nailo2c · nailo2c · Jul 25, 2025
diff --git a/spellpy/spell.py b/spellpy/spell.py
@@ -122,12 +122,8 @@ def LCSMatch(self, LCSMap, seq):
 
         maxLen = -1
         maxLCSObject = None
-        set_seq = set(seq)
         size_seq = len(seq)
         for LCSObject in LCSMap:
-            set_template = set(LCSObject.logTemplate)
-            if len(set_seq & set_template) < 0.5 * size_seq:
-                continue
             lcs = self.LCS(seq, LCSObject.logTemplate)
             if len(lcs) > maxLen or (len(lcs) == maxLen and len(LCSObject.logTemplate) < len(maxLCSObject.logTemplate)):
                 maxLen = len(lcs)

diff --git a/tests/test_spellpy.py b/tests/test_spellpy.py
@@ -94,6 +94,19 @@ def test_getTemplate(self):
         new_template = self.parser.getTemplate(lcs, seq)
         self.assertListEqual(new_template, expected_template)
 
+    def test_LCSMatch_with_repeated_tokens(self):
+        # This test case simulates the scenario described in the issue.
+        # 'seq' has many repeated tokens, which makes len(set(seq)) small.
+        # The old implementation would fail this test.
+        logmessageL = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
+        seq = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']
+        logID = 0
+        newCluster = LCSObject(logTemplate=logmessageL, logIDL=[logID])
+
+        retLogClust = self.parser.LCSMatch([newCluster], seq)
+        self.assertIsNotNone(retLogClust)
+        self.assertListEqual(retLogClust.logTemplate, newCluster.logTemplate)
+
 
 def helper(rootNode):
     if rootNode.childD == dict():