milvus-io
diff --git a/‎tests/python_client/chaos/checker.py‎
Lines changed: 724 additions & 403 deletions b/‎tests/python_client/chaos/checker.py‎
Lines changed: 724 additions & 403 deletions
diff --git a/‎tests/python_client/chaos/constants.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/python_client/chaos/constants.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/python_client/chaos/testcases/test_concurrent_operation.py‎
Lines changed: 2 additions & 0 deletions b/‎tests/python_client/chaos/testcases/test_concurrent_operation.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/python_client/chaos/testcases/test_single_request_operation.py‎
Lines changed: 5 additions & 1 deletion b/‎tests/python_client/chaos/testcases/test_single_request_operation.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎tests/python_client/check/func_check.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/python_client/check/func_check.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/python_client/check/param_check.py‎
Lines changed: 93 additions & 1 deletion b/‎tests/python_client/check/param_check.py‎
Lines changed: 93 additions & 1 deletion
diff --git a/‎tests/python_client/common/bulk_insert_data.py‎
Lines changed: 85 additions & 0 deletions b/‎tests/python_client/common/bulk_insert_data.py‎
Lines changed: 85 additions & 0 deletions
@@ -22,6 +22,7 @@
 WAIT_PER_OP = 10  # time to wait in seconds between operations
 CHAOS_DURATION = 120  # chaos duration time in seconds
 DEFAULT_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
+DEFAULT_EMB_LIST_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "MAX_SIM_COSINE", "params": {"M": 16, "efConstruction": 200}}
 DEFAULT_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
 DEFAULT_INT8_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
 DEFAULT_INT8_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
 
@@ -13,6 +13,7 @@
                            TextMatchChecker,
                            PhraseMatchChecker,
                            JsonQueryChecker,
+                           GeoQueryChecker,
                            DeleteChecker,
                            AddFieldChecker,
                            Op,
@@ -86,6 +87,7 @@ def init_health_checkers(self, collection_name=None):
             Op.text_match: TextMatchChecker(collection_name=c_name),
             Op.phrase_match: PhraseMatchChecker(collection_name=c_name),
             Op.json_query: JsonQueryChecker(collection_name=c_name),
+            Op.geo_query: GeoQueryChecker(collection_name=c_name),
             Op.delete: DeleteChecker(collection_name=c_name),
             Op.add_field: AddFieldChecker(collection_name=c_name),
         }
 
@@ -17,12 +17,14 @@
                            TextMatchChecker,
                            PhraseMatchChecker,
                            JsonQueryChecker,
+                           GeoQueryChecker,
                            IndexCreateChecker,
                            DeleteChecker,
                            CollectionDropChecker,
                            AlterCollectionChecker,
                            AddFieldChecker,
                            CollectionRenameChecker,
+                           TensorSearchChecker,
                            Op,
                            EventRecords,
                            ResultAnalyzer
@@ -82,8 +84,9 @@ def init_health_checkers(self, collection_name=None):
         checkers = {
             Op.create: CollectionCreateChecker(collection_name=c_name),
             Op.insert: InsertChecker(collection_name=c_name),
+            Op.tensor_search :TensorSearchChecker(collection_name=c_name),
             Op.upsert: UpsertChecker(collection_name=c_name),
-            Op.partial_update: PartialUpdateChecker(collection_name=c_name),
+            Op.partial_update: PartialUpdateChecker(collection_name=c_name), 
             Op.flush: FlushChecker(collection_name=c_name),
             Op.index: IndexCreateChecker(collection_name=c_name),
             Op.search: SearchChecker(collection_name=c_name),
@@ -93,6 +96,7 @@ def init_health_checkers(self, collection_name=None):
             Op.text_match: TextMatchChecker(collection_name=c_name),
             Op.phrase_match: PhraseMatchChecker(collection_name=c_name),
             Op.json_query: JsonQueryChecker(collection_name=c_name),
+            Op.geo_query: GeoQueryChecker(collection_name=c_name),
             Op.delete: DeleteChecker(collection_name=c_name),
             Op.drop: CollectionDropChecker(collection_name=c_name),
             Op.alter_collection: AlterCollectionChecker(collection_name=c_name),
 
@@ -588,8 +588,8 @@ def check_query_results(query_res, func_name, check_items):
             if isinstance(query_res, list):
                 result = pc.compare_lists_with_epsilon_ignore_dict_order(a=query_res, b=exp_res)
                 if result is False:
-                    log.debug(f"query expected: {exp_res}")
-                    log.debug(f"query actual: {query_res}")
+                    # Only for debug, compare the result with deepdiff
+                    pc.compare_lists_with_epsilon_ignore_dict_order_deepdiff(a=query_res, b=exp_res)
                 assert result
                 return result
             else:
 
@@ -7,6 +7,9 @@
 
 import numpy as np
 from collections.abc import Iterable
+import json
+from datetime import datetime
+from deepdiff import DeepDiff
 
 epsilon = ct.epsilon
 
@@ -69,6 +72,75 @@ def deep_approx_compare(x, y, epsilon=epsilon):
     return x == y
 
 
+import re
+# Pre-compile regex patterns for better performance
+_GEO_PATTERN = re.compile(r'(POINT|LINESTRING|POLYGON)\s+\(')
+_WHITESPACE_PATTERN = re.compile(r'\s+')
+
+def normalize_geo_string(s):
+    """
+    Normalize a GEO string by removing extra whitespace.
+
+    Args:
+        s: String value that might be a GEO type (POINT, LINESTRING, POLYGON)
+
+    Returns:
+        Normalized GEO string or original value if not a GEO string
+    """
+    if isinstance(s, str) and s.startswith(('POINT', 'LINESTRING', 'POLYGON')):
+        s = _GEO_PATTERN.sub(r'\1(', s)
+        s = _WHITESPACE_PATTERN.sub(' ', s).strip()
+    return s
+
+
+def normalize_value(value):
+    """
+    Normalize values for comparison by converting to standard types and formats.
+    """
+    # Fast path for None and simple immutable types
+    if value is None or isinstance(value, (bool, int)):
+        return value
+
+    # Convert numpy types to Python native types
+    if isinstance(value, (np.integer, np.floating)):
+        return float(value) if isinstance(value, np.floating) else int(value)
+
+    # Handle strings (common case for GEO fields)
+    if isinstance(value, str):
+        return normalize_geo_string(value)
+
+    # Convert list-like protobuf/custom types to standard list
+    type_name = type(value).__name__
+    if type_name in ('RepeatedScalarContainer', 'HybridExtraList', 'RepeatedCompositeContainer'):
+        value = list(value)
+
+    # Handle list of dicts (main use case for search/query results)
+    if isinstance(value, (list, tuple)):
+        normalized_list = []
+        for item in value:
+            if isinstance(item, dict):
+                # Normalize GEO strings in dict values
+                normalized_dict = {}
+                for k, v in item.items():
+                    if isinstance(v, str):
+                        normalized_dict[k] = normalize_geo_string(v)
+                    elif isinstance(v, (np.integer, np.floating)):
+                        normalized_dict[k] = float(v) if isinstance(v, np.floating) else int(v)
+                    elif isinstance(v, np.ndarray):
+                        normalized_dict[k] = v.tolist()
+                    elif type(v).__name__ in ('RepeatedScalarContainer', 'HybridExtraList', 'RepeatedCompositeContainer'):
+                        normalized_dict[k] = list(v)
+                    else:
+                        normalized_dict[k] = v
+                normalized_list.append(normalized_dict)
+            else:
+                # For non-dict items, just add as-is
+                normalized_list.append(item)
+        return normalized_list
+
+    # Return as-is for other types
+    return value
+
 def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
     """
     Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.
@@ -87,7 +159,8 @@ def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
     """
     if len(a) != len(b):
         return False
-
+    a = normalize_value(a)
+    b = normalize_value(b)
     # Create a set of available indices for b
     available_indices = set(range(len(b)))
 
@@ -110,6 +183,25 @@ def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
 
     return True
 
+def compare_lists_with_epsilon_ignore_dict_order_deepdiff(a, b, epsilon=epsilon):
+    """
+    Compare two lists of dictionaries for equality (order-insensitive) with floating-point tolerance using DeepDiff.
+    """
+    # Normalize both lists to handle type differences
+    a_normalized = normalize_value(a)
+    b_normalized = normalize_value(b)
+    for i in range(len(a_normalized)):
+        diff = DeepDiff(
+            a_normalized[i],
+            b_normalized[i],
+            ignore_order=True,
+            math_epsilon=epsilon,
+            significant_digits=1,
+            ignore_type_in_groups=[(list, tuple)],
+            ignore_string_type_changes=True,
+        )
+        if diff:
+            log.debug(f"[COMPARE_LISTS] Found differences at row {i}: {diff}")
 
 def ip_check(ip):
     if ip == "localhost":
 
@@ -12,6 +12,7 @@
 from faker import Faker
 from sklearn import preprocessing
 from common.common_func import gen_unique_str
+from common.common_func import gen_timestamptz_str
 from common.minio_comm import copy_files_to_minio
 from utils.util_log import test_log as log
 import pyarrow as pa
@@ -45,6 +46,8 @@ class DataField:
     array_float_field = "array_float"
     array_string_field = "array_string"
     new_field = "new_field"
+    geo_field = "geo"
+    timestamp_field = "timestamptz"
 
 
 class DataErrorType:
@@ -100,6 +103,51 @@ def gen_binary_vectors(nb, dim):
     return vectors
 
 
+def gen_wkt_geometry(nb, bounds=(0, 100, 0, 100)):
+    """
+    Generate random WKT geometry strings for bulk insert
+    Generates a mix of POINT, LINESTRING, and POLYGON types
+
+    Args:
+        nb: Number of geometry strings to generate
+        bounds: Coordinate bounds as (min_x, max_x, min_y, max_y)
+
+    Returns:
+        List of WKT strings
+    """
+    geometries = []
+    geom_types = ["POINT", "LINESTRING", "POLYGON"]
+
+    for _ in range(nb):
+        geom_type = random.choice(geom_types)
+
+        if geom_type == "POINT":
+            x = random.uniform(bounds[0], bounds[1])
+            y = random.uniform(bounds[2], bounds[3])
+            wkt = f"POINT ({x:.2f} {y:.2f})"
+
+        elif geom_type == "LINESTRING":
+            num_points = random.randint(2, 5)
+            points = []
+            for _ in range(num_points):
+                x = random.uniform(bounds[0], bounds[1])
+                y = random.uniform(bounds[2], bounds[3])
+                points.append(f"{x:.2f} {y:.2f}")
+            wkt = f"LINESTRING ({', '.join(points)})"
+
+        else:  # POLYGON
+            # Generate a simple rectangle polygon
+            x = random.uniform(bounds[0], bounds[1] - 20)
+            y = random.uniform(bounds[2], bounds[3] - 20)
+            width = random.uniform(10, 20)
+            height = random.uniform(10, 20)
+            wkt = f"POLYGON (({x:.2f} {y:.2f}, {x + width:.2f} {y:.2f}, {x + width:.2f} {y + height:.2f}, {x:.2f} {y + height:.2f}, {x:.2f} {y:.2f}))"
+
+        geometries.append(wkt)
+
+    return geometries
+
+
 def gen_fp16_vectors(num, dim, for_json=False):
     """
     generate float16 vector data
@@ -468,6 +516,19 @@ def gen_json_in_numpy_file(dir, data_field, rows, start=0, force=False):
     return file_name
 
 
+def gen_geometry_in_numpy_file(dir, data_field, rows, start=0, force=False):
+    file_name = f"{data_field}.npy"
+    file = f"{dir}/{file_name}"
+    if not os.path.exists(file) or force:
+        data = []
+        if rows > 0:
+            data = gen_wkt_geometry(rows)
+        arr = np.array(data)
+        log.info(f"file_name: {file_name} data type: {arr.dtype} data shape: {arr.shape}")
+        np.save(file, arr)
+    return file_name
+
+
 def gen_int_or_float_in_numpy_file(dir, data_field, rows, start=0, force=False, nullable=False, **kwargs):
     file_name = f"{data_field}.npy"
     file = f"{dir}/{file_name}"
@@ -635,6 +696,17 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
                      for i in range(start, rows + start)])
             else:
                 data = [None for _ in range(start, rows + start)]
+        elif data_field == DataField.geo_field:
+            if not nullable:
+                # Generate WKT geometry strings for parquet
+                data = gen_wkt_geometry(rows)
+            else:
+                data = [None for _ in range(start, rows + start)]
+        elif data_field == DataField.timestamp_field:
+            if not nullable:
+                data = [gen_timestamptz_str() for _ in range(start, rows + start)]
+            else:
+                data = [None for _ in range(start, rows + start)]
         else:
             raise Exception("unsupported field name")
 
@@ -796,6 +868,17 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d
                     d[data_field] = [gen_unique_str(str(i)) for i in range(array_length)]
                 else:
                     d[data_field] = None
+            elif data_field == DataField.geo_field:
+                if not nullable:
+                    # Generate a single WKT geometry string
+                    d[data_field] = gen_wkt_geometry(1)[0]
+                else:
+                    d[data_field] = None
+            elif data_field == DataField.timestamp_field:
+                if not nullable:
+                    d[data_field] = gen_timestamptz_str()
+                else:
+                    d[data_field] = None
             else:
                 raise Exception("unsupported field name")
         if enable_dynamic_field:
@@ -906,6 +989,8 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
                 file_name = gen_bool_in_numpy_file(dir=data_source_new, data_field=data_field, rows=rows, force=force)
             elif data_field == DataField.json_field:
                 file_name = gen_json_in_numpy_file(dir=data_source_new, data_field=data_field, rows=rows, force=force)
+            elif data_field == DataField.geo_field:
+                file_name = gen_geometry_in_numpy_file(dir=data_source_new, data_field=data_field, rows=rows, force=force)
             else:
                 file_name = gen_int_or_float_in_numpy_file(dir=data_source_new, data_field=data_field,
                                                            rows=rows, force=force, nullable=nullable, shuffle_pk=shuffle_pk)