From 98311b125ed5aa67deb61372b24cc9e06e4c56b8 Mon Sep 17 00:00:00 2001 From: nick evans Date: Mon, 24 Nov 2025 12:24:32 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20`SequenceSet#normalized=3F`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returns whether `#string` is fully normalized: entries have been sorted, deduplicated, and coalesced, and all entries are in normal form. In other words, `#normalized?` returns `true` if (and only if) `#string` is equal to `#normalized_string`. (`#entries` and `#elements` can be identical for non-normalized strings.) Because this is a new method, the benchmarks monkey-patch a naive implementation of the method for prior versions of net-imap without it. --- benchmarks/sequence_set-normalized_p.yml | 116 +++++++++++++++++++++++ lib/net/imap/sequence_set.rb | 58 +++++++++++- test/net/imap/test_sequence_set.rb | 9 ++ 3 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 benchmarks/sequence_set-normalized_p.yml diff --git a/benchmarks/sequence_set-normalized_p.yml b/benchmarks/sequence_set-normalized_p.yml new file mode 100644 index 00000000..7617b3b3 --- /dev/null +++ b/benchmarks/sequence_set-normalized_p.yml @@ -0,0 +1,116 @@ +--- +prelude: | + require "yaml" + require "net/imap" + + SAMPLES = Integer ENV.fetch("BENCHMARK_SAMPLES", 100) + INPUT_COUNT = Integer ENV.fetch("BENCHMARK_INPUT_COUNT", 1000) + MAX_INPUT = Integer ENV.fetch("BENCHMARK_MAX_INPUT", 1400) + WARMUP_RUNS = Integer ENV.fetch("BENCHMARK_WARMUP_RUNS", 200) + SHUFFLE_PCT = Float ENV.fetch("BENCHMARK_SHUFFLE_PCT", 0.2) + ABNORMAL_PCT = Float ENV.fetch("BENCHMARK_ABNORMAL_PCT", 0.2) + + def init_sets(count: 100, set_size: INPUT_COUNT, max: MAX_INPUT) + Array.new(count) { + Net::IMAP::SequenceSet.new(Array.new(set_size) { rand(1..max) }) + } + end + + def init_normal_sets(...) + init_sets(...) + end + + def init_frozen_normal_sets(...) + init_sets(...) + .map(&:freeze) + end + + def shuffle_entries(seqset) + case SHUFFLE_PCT + in 1.0... then seqset.entries.shuffle + in ...0.0 then raise RangeError, "SHUFFLE_PCT should be positive" + else + unsorted, entries = seqset.entries.partition { rand < SHUFFLE_PCT } + unsorted.each do |entry| + entries.insert(rand(0..entries.size), entry) + end + entries + end + end + + def init_unsorted_sets(...) + init_sets(...) + .each do |seqset| + entries = shuffle_entries(seqset) + seqset.clear + entries.each do |entry| + seqset.append entry + end + end + end + + def abnormal_form(seqset) + seqset.entries + .map {|entry| + if ABNORMAL_PCT < rand + entry.is_a?(Range) ? "#{entry.begin}:#{entry.end || :*}" : entry + elsif entry.is_a? Range + "#{entry.end || "*"}:#{entry.begin}" + else + "#{entry}:#{entry}" + end + } + .join(",") + end + + def init_abnormal_sets(...) + init_sets(...) + .each do |seqset| + seqset.string = abnormal_form(seqset) + end + end + + # Benchmark against a naive version that could be used in earlier releases + unless Net::IMAP::SequenceSet.instance_methods.include?(:normalized?) + class Net::IMAP::SequenceSet + def normalized? + @string.nil? || @string == normalized_string + end + end + end + + # warmup (esp. for JIT) + WARMUP_RUNS.times do + init_sets(count: 20, set_size: 100, max: 120).each do |set| + set.normalized? + end + end + +benchmark: + - name: "normal (#string not called)" + prelude: $sets = init_normal_sets + script: $sets.sample.normalized? + - name: "normal (#string called)" + prelude: $sets = init_normal_sets.tap do _1.each(&:string) end + script: $sets.sample.normalized? + - name: "frozen and normal" + prelude: $sets = init_frozen_normal_sets + script: $sets.sample.normalized? + - name: "unsorted" + prelude: $sets = init_unsorted_sets + script: $sets.sample.normalized? + - name: "abnormal" + prelude: $sets = init_abnormal_sets + script: $sets.sample.normalized? + +contexts: + # n.b: can't use anything newer as the baseline: it's over 500x faster! + - name: v0.5.12 + gems: + net-imap: 0.5.12 + require: false + - name: local + prelude: | + $LOAD_PATH.unshift "./lib" + $allowed_to_profile = true # only profile local code + require: false diff --git a/lib/net/imap/sequence_set.rb b/lib/net/imap/sequence_set.rb index cdabb4b7..37b1cd31 100644 --- a/lib/net/imap/sequence_set.rb +++ b/lib/net/imap/sequence_set.rb @@ -284,6 +284,8 @@ class IMAP # *. # # Denormalized properties: + # - #normalized?: Returns whether #entries are sorted, deduplicated, and + # coalesced, and all #string entries are in normalized form. # - #has_duplicates?: Returns whether the ordered entries repeat any # numbers. # - #count_duplicates: Returns the count of repeated numbers in the ordered @@ -1696,6 +1698,53 @@ def xor!(other) merge(other).subtract(both) end + # Returns whether #string is fully normalized: entries have been sorted, + # deduplicated, and coalesced, and all entries are in normal form. See + # SequenceSet@Ordered+and+Normalized+sets. + # + # Net::IMAP::SequenceSet["1,3,5"].normalized? #=> true + # Net::IMAP::SequenceSet["20:30"].normalized? #=> true + # + # Net::IMAP::SequenceSet["3,5,1"].normalized? #=> false, not sorted + # Net::IMAP::SequenceSet["1,2,3"].normalized? #=> false, not coalesced + # Net::IMAP::SequenceSet["1:5,2"].normalized? #=> false, repeated number + # + # Net::IMAP::SequenceSet["1:1"].normalized? #=> false, number as range + # Net::IMAP::SequenceSet["5:1"].normalized? #=> false, backwards range + # + # Returns +true+ if (and only if) #string is equal to #normalized_string: + # seqset = Net::IMAP::SequenceSet["1:3,5"] + # seqset.string #=> "1:3,5" + # seqset.normalized_string #=> "1:3,5" + # seqset.entries #=> [1..3, 5] + # seqset.elements #=> [1..3, 5] + # seqset.normalized? #=> true + # + # seqset = Net::IMAP::SequenceSet["3,1,2"] + # seqset.string #=> "3,1,2" + # seqset.normalized_string #=> "1:3" + # seqset.entries #=> [3, 1, 2] + # seqset.elements #=> [1..3] + # seqset.normalized? #=> false + # + # Can return +false+ even when #entries and #elements are the same: + # seqset = Net::IMAP::SequenceSet["5:1"] + # seqset.string #=> "5:1" + # seqset.normalized_string #=> "1:5" + # seqset.entries #=> [1..5] + # seqset.elements #=> [1..5] + # seqset.normalized? #=> false + # + # Note that empty sets are normalized, even though they are not #valid?: + # seqset = Net::IMAP::SequenceSet.empty + # seqset.normalized? #=> true + # seqset.valid? #=> false + # + # Related: #normalize, #normalize!, #normalized_string + def normalized? + @string.nil? || normal_string?(@string) + end + # Returns a SequenceSet with a normalized string representation: entries # have been sorted, deduplicated, and coalesced, and all entries # are in normal form. Returns +self+ for frozen normalized sets, and a @@ -1706,16 +1755,15 @@ def xor!(other) # Net::IMAP::SequenceSet["1:5,3:7,10:9,10:11"].normalize # #=> Net::IMAP::SequenceSet["1:7,9:11"] # - # Related: #normalize!, #normalized_string + # Related: #normalize!, #normalized_string, #normalized? def normalize - return self if frozen? && (@string.nil? || normal_string?(@string)) - remain_frozen dup.normalize! + frozen? && normalized? ? self : remain_frozen(dup.normalize!) end # Resets #string to be sorted, deduplicated, and coalesced. Returns # +self+. See SequenceSet@Ordered+and+Normalized+sets. # - # Related: #normalize, #normalized_string + # Related: #normalize, #normalized_string, #normalized? def normalize! modifying! # redundant check, to normalize the error message for JRuby @string = nil @@ -1731,7 +1779,7 @@ def normalize! # # Returns +nil+ when the set is empty. # - # Related: #normalize!, #normalize, #string, #to_s + # Related: #normalize!, #normalize, #string, #to_s, #normalized? def normalized_string @tuples.empty? ? nil : -@tuples.map { tuple_to_str _1 }.join(",") end diff --git a/test/net/imap/test_sequence_set.rb b/test/net/imap/test_sequence_set.rb index 99c06d07..9cfac1da 100644 --- a/test/net/imap/test_sequence_set.rb +++ b/test/net/imap/test_sequence_set.rb @@ -1299,6 +1299,15 @@ def assert_seqset_enum(expected, seqset, enum) assert_equal data[:normalize], set.normalized_string end + test "#normalized?" do |data| + set = SequenceSet.new(data[:input]) + eql = set.normalized_string == set.string + msg = "expect string=%p and normalized_string=%p %s normal" % [ + set.string, set.normalized_string, eql ? "to be" : "not to be" + ] + assert set.normalized? == eql, msg + end + test "#normalize" do |data| set = SequenceSet.new(data[:input]) assert_equal data[:normalize], set.normalize.string