Skip to content

Commit eaed105

Browse files
FeilongHouEric Houzhuwenxing
authored
test: add e2e test cases for Timestamptz (#45800)
Issue: #44518, #45756 pr: #44871, #45128, #45770, #45524, #44794, #45014 --------- Signed-off-by: Eric Hou <[email protected]> Signed-off-by: zhuwenxing <[email protected]> Co-authored-by: Eric Hou <[email protected]> Co-authored-by: zhuwenxing <[email protected]>
1 parent b948c62 commit eaed105

File tree

15 files changed

+2068
-768
lines changed

15 files changed

+2068
-768
lines changed

tests/python_client/chaos/checker.py

Lines changed: 724 additions & 403 deletions
Large diffs are not rendered by default.

tests/python_client/chaos/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
WAIT_PER_OP = 10 # time to wait in seconds between operations
2323
CHAOS_DURATION = 120 # chaos duration time in seconds
2424
DEFAULT_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
25+
DEFAULT_EMB_LIST_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "MAX_SIM_COSINE", "params": {"M": 16, "efConstruction": 200}}
2526
DEFAULT_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
2627
DEFAULT_INT8_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
2728
DEFAULT_INT8_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}

tests/python_client/chaos/testcases/test_concurrent_operation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
TextMatchChecker,
1414
PhraseMatchChecker,
1515
JsonQueryChecker,
16+
GeoQueryChecker,
1617
DeleteChecker,
1718
AddFieldChecker,
1819
Op,
@@ -86,6 +87,7 @@ def init_health_checkers(self, collection_name=None):
8687
Op.text_match: TextMatchChecker(collection_name=c_name),
8788
Op.phrase_match: PhraseMatchChecker(collection_name=c_name),
8889
Op.json_query: JsonQueryChecker(collection_name=c_name),
90+
Op.geo_query: GeoQueryChecker(collection_name=c_name),
8991
Op.delete: DeleteChecker(collection_name=c_name),
9092
Op.add_field: AddFieldChecker(collection_name=c_name),
9193
}

tests/python_client/chaos/testcases/test_single_request_operation.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
TextMatchChecker,
1818
PhraseMatchChecker,
1919
JsonQueryChecker,
20+
GeoQueryChecker,
2021
IndexCreateChecker,
2122
DeleteChecker,
2223
CollectionDropChecker,
2324
AlterCollectionChecker,
2425
AddFieldChecker,
2526
CollectionRenameChecker,
27+
TensorSearchChecker,
2628
Op,
2729
EventRecords,
2830
ResultAnalyzer
@@ -82,8 +84,9 @@ def init_health_checkers(self, collection_name=None):
8284
checkers = {
8385
Op.create: CollectionCreateChecker(collection_name=c_name),
8486
Op.insert: InsertChecker(collection_name=c_name),
87+
Op.tensor_search :TensorSearchChecker(collection_name=c_name),
8588
Op.upsert: UpsertChecker(collection_name=c_name),
86-
Op.partial_update: PartialUpdateChecker(collection_name=c_name),
89+
Op.partial_update: PartialUpdateChecker(collection_name=c_name),
8790
Op.flush: FlushChecker(collection_name=c_name),
8891
Op.index: IndexCreateChecker(collection_name=c_name),
8992
Op.search: SearchChecker(collection_name=c_name),
@@ -93,6 +96,7 @@ def init_health_checkers(self, collection_name=None):
9396
Op.text_match: TextMatchChecker(collection_name=c_name),
9497
Op.phrase_match: PhraseMatchChecker(collection_name=c_name),
9598
Op.json_query: JsonQueryChecker(collection_name=c_name),
99+
Op.geo_query: GeoQueryChecker(collection_name=c_name),
96100
Op.delete: DeleteChecker(collection_name=c_name),
97101
Op.drop: CollectionDropChecker(collection_name=c_name),
98102
Op.alter_collection: AlterCollectionChecker(collection_name=c_name),

tests/python_client/check/func_check.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -588,8 +588,8 @@ def check_query_results(query_res, func_name, check_items):
588588
if isinstance(query_res, list):
589589
result = pc.compare_lists_with_epsilon_ignore_dict_order(a=query_res, b=exp_res)
590590
if result is False:
591-
log.debug(f"query expected: {exp_res}")
592-
log.debug(f"query actual: {query_res}")
591+
# Only for debug, compare the result with deepdiff
592+
pc.compare_lists_with_epsilon_ignore_dict_order_deepdiff(a=query_res, b=exp_res)
593593
assert result
594594
return result
595595
else:

tests/python_client/check/param_check.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
import numpy as np
99
from collections.abc import Iterable
10+
import json
11+
from datetime import datetime
12+
from deepdiff import DeepDiff
1013

1114
epsilon = ct.epsilon
1215

@@ -69,6 +72,75 @@ def deep_approx_compare(x, y, epsilon=epsilon):
6972
return x == y
7073

7174

75+
import re
76+
# Pre-compile regex patterns for better performance
77+
_GEO_PATTERN = re.compile(r'(POINT|LINESTRING|POLYGON)\s+\(')
78+
_WHITESPACE_PATTERN = re.compile(r'\s+')
79+
80+
def normalize_geo_string(s):
81+
"""
82+
Normalize a GEO string by removing extra whitespace.
83+
84+
Args:
85+
s: String value that might be a GEO type (POINT, LINESTRING, POLYGON)
86+
87+
Returns:
88+
Normalized GEO string or original value if not a GEO string
89+
"""
90+
if isinstance(s, str) and s.startswith(('POINT', 'LINESTRING', 'POLYGON')):
91+
s = _GEO_PATTERN.sub(r'\1(', s)
92+
s = _WHITESPACE_PATTERN.sub(' ', s).strip()
93+
return s
94+
95+
96+
def normalize_value(value):
97+
"""
98+
Normalize values for comparison by converting to standard types and formats.
99+
"""
100+
# Fast path for None and simple immutable types
101+
if value is None or isinstance(value, (bool, int)):
102+
return value
103+
104+
# Convert numpy types to Python native types
105+
if isinstance(value, (np.integer, np.floating)):
106+
return float(value) if isinstance(value, np.floating) else int(value)
107+
108+
# Handle strings (common case for GEO fields)
109+
if isinstance(value, str):
110+
return normalize_geo_string(value)
111+
112+
# Convert list-like protobuf/custom types to standard list
113+
type_name = type(value).__name__
114+
if type_name in ('RepeatedScalarContainer', 'HybridExtraList', 'RepeatedCompositeContainer'):
115+
value = list(value)
116+
117+
# Handle list of dicts (main use case for search/query results)
118+
if isinstance(value, (list, tuple)):
119+
normalized_list = []
120+
for item in value:
121+
if isinstance(item, dict):
122+
# Normalize GEO strings in dict values
123+
normalized_dict = {}
124+
for k, v in item.items():
125+
if isinstance(v, str):
126+
normalized_dict[k] = normalize_geo_string(v)
127+
elif isinstance(v, (np.integer, np.floating)):
128+
normalized_dict[k] = float(v) if isinstance(v, np.floating) else int(v)
129+
elif isinstance(v, np.ndarray):
130+
normalized_dict[k] = v.tolist()
131+
elif type(v).__name__ in ('RepeatedScalarContainer', 'HybridExtraList', 'RepeatedCompositeContainer'):
132+
normalized_dict[k] = list(v)
133+
else:
134+
normalized_dict[k] = v
135+
normalized_list.append(normalized_dict)
136+
else:
137+
# For non-dict items, just add as-is
138+
normalized_list.append(item)
139+
return normalized_list
140+
141+
# Return as-is for other types
142+
return value
143+
72144
def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
73145
"""
74146
Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.
@@ -87,7 +159,8 @@ def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
87159
"""
88160
if len(a) != len(b):
89161
return False
90-
162+
a = normalize_value(a)
163+
b = normalize_value(b)
91164
# Create a set of available indices for b
92165
available_indices = set(range(len(b)))
93166

@@ -110,6 +183,25 @@ def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
110183

111184
return True
112185

186+
def compare_lists_with_epsilon_ignore_dict_order_deepdiff(a, b, epsilon=epsilon):
187+
"""
188+
Compare two lists of dictionaries for equality (order-insensitive) with floating-point tolerance using DeepDiff.
189+
"""
190+
# Normalize both lists to handle type differences
191+
a_normalized = normalize_value(a)
192+
b_normalized = normalize_value(b)
193+
for i in range(len(a_normalized)):
194+
diff = DeepDiff(
195+
a_normalized[i],
196+
b_normalized[i],
197+
ignore_order=True,
198+
math_epsilon=epsilon,
199+
significant_digits=1,
200+
ignore_type_in_groups=[(list, tuple)],
201+
ignore_string_type_changes=True,
202+
)
203+
if diff:
204+
log.debug(f"[COMPARE_LISTS] Found differences at row {i}: {diff}")
113205

114206
def ip_check(ip):
115207
if ip == "localhost":

tests/python_client/common/bulk_insert_data.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from faker import Faker
1313
from sklearn import preprocessing
1414
from common.common_func import gen_unique_str
15+
from common.common_func import gen_timestamptz_str
1516
from common.minio_comm import copy_files_to_minio
1617
from utils.util_log import test_log as log
1718
import pyarrow as pa
@@ -45,6 +46,8 @@ class DataField:
4546
array_float_field = "array_float"
4647
array_string_field = "array_string"
4748
new_field = "new_field"
49+
geo_field = "geo"
50+
timestamp_field = "timestamptz"
4851

4952

5053
class DataErrorType:
@@ -100,6 +103,51 @@ def gen_binary_vectors(nb, dim):
100103
return vectors
101104

102105

106+
def gen_wkt_geometry(nb, bounds=(0, 100, 0, 100)):
107+
"""
108+
Generate random WKT geometry strings for bulk insert
109+
Generates a mix of POINT, LINESTRING, and POLYGON types
110+
111+
Args:
112+
nb: Number of geometry strings to generate
113+
bounds: Coordinate bounds as (min_x, max_x, min_y, max_y)
114+
115+
Returns:
116+
List of WKT strings
117+
"""
118+
geometries = []
119+
geom_types = ["POINT", "LINESTRING", "POLYGON"]
120+
121+
for _ in range(nb):
122+
geom_type = random.choice(geom_types)
123+
124+
if geom_type == "POINT":
125+
x = random.uniform(bounds[0], bounds[1])
126+
y = random.uniform(bounds[2], bounds[3])
127+
wkt = f"POINT ({x:.2f} {y:.2f})"
128+
129+
elif geom_type == "LINESTRING":
130+
num_points = random.randint(2, 5)
131+
points = []
132+
for _ in range(num_points):
133+
x = random.uniform(bounds[0], bounds[1])
134+
y = random.uniform(bounds[2], bounds[3])
135+
points.append(f"{x:.2f} {y:.2f}")
136+
wkt = f"LINESTRING ({', '.join(points)})"
137+
138+
else: # POLYGON
139+
# Generate a simple rectangle polygon
140+
x = random.uniform(bounds[0], bounds[1] - 20)
141+
y = random.uniform(bounds[2], bounds[3] - 20)
142+
width = random.uniform(10, 20)
143+
height = random.uniform(10, 20)
144+
wkt = f"POLYGON (({x:.2f} {y:.2f}, {x + width:.2f} {y:.2f}, {x + width:.2f} {y + height:.2f}, {x:.2f} {y + height:.2f}, {x:.2f} {y:.2f}))"
145+
146+
geometries.append(wkt)
147+
148+
return geometries
149+
150+
103151
def gen_fp16_vectors(num, dim, for_json=False):
104152
"""
105153
generate float16 vector data
@@ -468,6 +516,19 @@ def gen_json_in_numpy_file(dir, data_field, rows, start=0, force=False):
468516
return file_name
469517

470518

519+
def gen_geometry_in_numpy_file(dir, data_field, rows, start=0, force=False):
520+
file_name = f"{data_field}.npy"
521+
file = f"{dir}/{file_name}"
522+
if not os.path.exists(file) or force:
523+
data = []
524+
if rows > 0:
525+
data = gen_wkt_geometry(rows)
526+
arr = np.array(data)
527+
log.info(f"file_name: {file_name} data type: {arr.dtype} data shape: {arr.shape}")
528+
np.save(file, arr)
529+
return file_name
530+
531+
471532
def gen_int_or_float_in_numpy_file(dir, data_field, rows, start=0, force=False, nullable=False, **kwargs):
472533
file_name = f"{data_field}.npy"
473534
file = f"{dir}/{file_name}"
@@ -635,6 +696,17 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
635696
for i in range(start, rows + start)])
636697
else:
637698
data = [None for _ in range(start, rows + start)]
699+
elif data_field == DataField.geo_field:
700+
if not nullable:
701+
# Generate WKT geometry strings for parquet
702+
data = gen_wkt_geometry(rows)
703+
else:
704+
data = [None for _ in range(start, rows + start)]
705+
elif data_field == DataField.timestamp_field:
706+
if not nullable:
707+
data = [gen_timestamptz_str() for _ in range(start, rows + start)]
708+
else:
709+
data = [None for _ in range(start, rows + start)]
638710
else:
639711
raise Exception("unsupported field name")
640712

@@ -796,6 +868,17 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d
796868
d[data_field] = [gen_unique_str(str(i)) for i in range(array_length)]
797869
else:
798870
d[data_field] = None
871+
elif data_field == DataField.geo_field:
872+
if not nullable:
873+
# Generate a single WKT geometry string
874+
d[data_field] = gen_wkt_geometry(1)[0]
875+
else:
876+
d[data_field] = None
877+
elif data_field == DataField.timestamp_field:
878+
if not nullable:
879+
d[data_field] = gen_timestamptz_str()
880+
else:
881+
d[data_field] = None
799882
else:
800883
raise Exception("unsupported field name")
801884
if enable_dynamic_field:
@@ -906,6 +989,8 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
906989
file_name = gen_bool_in_numpy_file(dir=data_source_new, data_field=data_field, rows=rows, force=force)
907990
elif data_field == DataField.json_field:
908991
file_name = gen_json_in_numpy_file(dir=data_source_new, data_field=data_field, rows=rows, force=force)
992+
elif data_field == DataField.geo_field:
993+
file_name = gen_geometry_in_numpy_file(dir=data_source_new, data_field=data_field, rows=rows, force=force)
909994
else:
910995
file_name = gen_int_or_float_in_numpy_file(dir=data_source_new, data_field=data_field,
911996
rows=rows, force=force, nullable=nullable, shuffle_pk=shuffle_pk)

0 commit comments

Comments
 (0)