Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions spellpy/spell.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,8 @@ def LCSMatch(self, LCSMap, seq):

maxLen = -1
maxLCSObject = None
set_seq = set(seq)
size_seq = len(seq)
for LCSObject in LCSMap:
set_template = set(LCSObject.logTemplate)
if len(set_seq & set_template) < 0.5 * size_seq:
continue
lcs = self.LCS(seq, LCSObject.logTemplate)
if len(lcs) > maxLen or (len(lcs) == maxLen and len(LCSObject.logTemplate) < len(maxLCSObject.logTemplate)):
maxLen = len(lcs)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_spellpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,19 @@ def test_getTemplate(self):
new_template = self.parser.getTemplate(lcs, seq)
self.assertListEqual(new_template, expected_template)

def test_LCSMatch_with_repeated_tokens(self):
# This test case simulates the scenario described in the issue.
# 'seq' has many repeated tokens, which makes len(set(seq)) small.
# The old implementation would fail this test.
logmessageL = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
seq = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']
logID = 0
newCluster = LCSObject(logTemplate=logmessageL, logIDL=[logID])

retLogClust = self.parser.LCSMatch([newCluster], seq)
self.assertIsNotNone(retLogClust)
self.assertListEqual(retLogClust.logTemplate, newCluster.logTemplate)


def helper(rootNode):
if rootNode.childD == dict():
Expand Down