Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions benchmarks/sequence_set-normalized_p.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
---
prelude: |
require "yaml"
require "net/imap"

SAMPLES = Integer ENV.fetch("BENCHMARK_SAMPLES", 100)
INPUT_COUNT = Integer ENV.fetch("BENCHMARK_INPUT_COUNT", 1000)
MAX_INPUT = Integer ENV.fetch("BENCHMARK_MAX_INPUT", 1400)
WARMUP_RUNS = Integer ENV.fetch("BENCHMARK_WARMUP_RUNS", 200)
SHUFFLE_PCT = Float ENV.fetch("BENCHMARK_SHUFFLE_PCT", 0.2)
ABNORMAL_PCT = Float ENV.fetch("BENCHMARK_ABNORMAL_PCT", 0.2)

def init_sets(count: 100, set_size: INPUT_COUNT, max: MAX_INPUT)
Array.new(count) {
Net::IMAP::SequenceSet.new(Array.new(set_size) { rand(1..max) })
}
end

def init_normal_sets(...)
init_sets(...)
end

def init_frozen_normal_sets(...)
init_sets(...)
.map(&:freeze)
end

def shuffle_entries(seqset)
case SHUFFLE_PCT
in 1.0... then seqset.entries.shuffle
in ...0.0 then raise RangeError, "SHUFFLE_PCT should be positive"
else
unsorted, entries = seqset.entries.partition { rand < SHUFFLE_PCT }
unsorted.each do |entry|
entries.insert(rand(0..entries.size), entry)
end
entries
end
end

def init_unsorted_sets(...)
init_sets(...)
.each do |seqset|
entries = shuffle_entries(seqset)
seqset.clear
entries.each do |entry|
seqset.append entry
end
end
end

def abnormal_form(seqset)
seqset.entries
.map {|entry|
if ABNORMAL_PCT < rand
entry.is_a?(Range) ? "#{entry.begin}:#{entry.end || :*}" : entry
elsif entry.is_a? Range
"#{entry.end || "*"}:#{entry.begin}"
else
"#{entry}:#{entry}"
end
}
.join(",")
end

def init_abnormal_sets(...)
init_sets(...)
.each do |seqset|
seqset.string = abnormal_form(seqset)
end
end

# Benchmark against a naive version that could be used in earlier releases
unless Net::IMAP::SequenceSet.instance_methods.include?(:normalized?)
class Net::IMAP::SequenceSet
def normalized?
@string.nil? || @string == normalized_string
end
end
end

# warmup (esp. for JIT)
WARMUP_RUNS.times do
init_sets(count: 20, set_size: 100, max: 120).each do |set|
set.normalized?
end
end

benchmark:
- name: "normal (#string not called)"
prelude: $sets = init_normal_sets
script: $sets.sample.normalized?
- name: "normal (#string called)"
prelude: $sets = init_normal_sets.tap do _1.each(&:string) end
script: $sets.sample.normalized?
- name: "frozen and normal"
prelude: $sets = init_frozen_normal_sets
script: $sets.sample.normalized?
- name: "unsorted"
prelude: $sets = init_unsorted_sets
script: $sets.sample.normalized?
- name: "abnormal"
prelude: $sets = init_abnormal_sets
script: $sets.sample.normalized?

contexts:
# n.b: can't use anything newer as the baseline: it's over 500x faster!
- name: v0.5.12
gems:
net-imap: 0.5.12
require: false
- name: local
prelude: |
$LOAD_PATH.unshift "./lib"
$allowed_to_profile = true # only profile local code
require: false
58 changes: 53 additions & 5 deletions lib/net/imap/sequence_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@ class IMAP
# <tt>*</tt>.
#
# <i>Denormalized properties:</i>
# - #normalized?: Returns whether #entries are sorted, deduplicated, and
# coalesced, and all #string entries are in normalized form.
# - #has_duplicates?: Returns whether the ordered entries repeat any
# numbers.
# - #count_duplicates: Returns the count of repeated numbers in the ordered
Expand Down Expand Up @@ -1696,6 +1698,53 @@ def xor!(other)
merge(other).subtract(both)
end

# Returns whether #string is fully normalized: entries have been sorted,
# deduplicated, and coalesced, and all entries are in normal form. See
# SequenceSet@Ordered+and+Normalized+sets.
#
# Net::IMAP::SequenceSet["1,3,5"].normalized? #=> true
# Net::IMAP::SequenceSet["20:30"].normalized? #=> true
#
# Net::IMAP::SequenceSet["3,5,1"].normalized? #=> false, not sorted
# Net::IMAP::SequenceSet["1,2,3"].normalized? #=> false, not coalesced
# Net::IMAP::SequenceSet["1:5,2"].normalized? #=> false, repeated number
#
# Net::IMAP::SequenceSet["1:1"].normalized? #=> false, number as range
# Net::IMAP::SequenceSet["5:1"].normalized? #=> false, backwards range
#
# Returns +true+ if (and only if) #string is equal to #normalized_string:
# seqset = Net::IMAP::SequenceSet["1:3,5"]
# seqset.string #=> "1:3,5"
# seqset.normalized_string #=> "1:3,5"
# seqset.entries #=> [1..3, 5]
# seqset.elements #=> [1..3, 5]
# seqset.normalized? #=> true
#
# seqset = Net::IMAP::SequenceSet["3,1,2"]
# seqset.string #=> "3,1,2"
# seqset.normalized_string #=> "1:3"
# seqset.entries #=> [3, 1, 2]
# seqset.elements #=> [1..3]
# seqset.normalized? #=> false
#
# Can return +false+ even when #entries and #elements are the same:
# seqset = Net::IMAP::SequenceSet["5:1"]
# seqset.string #=> "5:1"
# seqset.normalized_string #=> "1:5"
# seqset.entries #=> [1..5]
# seqset.elements #=> [1..5]
# seqset.normalized? #=> false
#
# Note that empty sets are normalized, even though they are not #valid?:
# seqset = Net::IMAP::SequenceSet.empty
# seqset.normalized? #=> true
# seqset.valid? #=> false
#
# Related: #normalize, #normalize!, #normalized_string
def normalized?
@string.nil? || normal_string?(@string)
end

# Returns a SequenceSet with a normalized string representation: entries
# have been sorted, deduplicated, and coalesced, and all entries
# are in normal form. Returns +self+ for frozen normalized sets, and a
Expand All @@ -1706,16 +1755,15 @@ def xor!(other)
# Net::IMAP::SequenceSet["1:5,3:7,10:9,10:11"].normalize
# #=> Net::IMAP::SequenceSet["1:7,9:11"]
#
# Related: #normalize!, #normalized_string
# Related: #normalize!, #normalized_string, #normalized?
def normalize
return self if frozen? && (@string.nil? || normal_string?(@string))
remain_frozen dup.normalize!
frozen? && normalized? ? self : remain_frozen(dup.normalize!)
end

# Resets #string to be sorted, deduplicated, and coalesced. Returns
# +self+. See SequenceSet@Ordered+and+Normalized+sets.
#
# Related: #normalize, #normalized_string
# Related: #normalize, #normalized_string, #normalized?
def normalize!
modifying! # redundant check, to normalize the error message for JRuby
@string = nil
Expand All @@ -1731,7 +1779,7 @@ def normalize!
#
# Returns +nil+ when the set is empty.
#
# Related: #normalize!, #normalize, #string, #to_s
# Related: #normalize!, #normalize, #string, #to_s, #normalized?
def normalized_string
@tuples.empty? ? nil : [email protected] { tuple_to_str _1 }.join(",")
end
Expand Down
9 changes: 9 additions & 0 deletions test/net/imap/test_sequence_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,15 @@ def assert_seqset_enum(expected, seqset, enum)
assert_equal data[:normalize], set.normalized_string
end

test "#normalized?" do |data|
set = SequenceSet.new(data[:input])
eql = set.normalized_string == set.string
msg = "expect string=%p and normalized_string=%p %s normal" % [
set.string, set.normalized_string, eql ? "to be" : "not to be"
]
assert set.normalized? == eql, msg
end

test "#normalize" do |data|
set = SequenceSet.new(data[:input])
assert_equal data[:normalize], set.normalize.string
Expand Down