Skip to content

Commit 12136e1

Browse files
committed
feat(utils): Support multiple order_by columns in RangeQuerySetWrapper
1 parent 7fc5c75 commit 12136e1

File tree

2 files changed

+119
-24
lines changed

2 files changed

+119
-24
lines changed

src/sentry/utils/query.py

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
import click
99
from django.db import connections, router
10+
from django.db.models import F, Func, Value
1011
from django.db.models.fields import Field
12+
from django.db.models.lookups import BuiltinLookup
1113
from django.db.models.query import QuerySet
1214
from django.db.models.query_utils import Q
1315
from django.db.models.sql.constants import ROW_COUNT
@@ -26,6 +28,17 @@ class InvalidQuerySetError(ValueError):
2628
pass
2729

2830

31+
class Tuple(Func):
32+
"""PostgreSQL ROW constructor for tuple expressions."""
33+
34+
function = "" # Empty string renders as (expr, ...) without function name
35+
36+
def __init__(self, *expressions: Any, output_field: Field | None = None, **extra: Any) -> None:
37+
if output_field is None:
38+
output_field = Field()
39+
super().__init__(*expressions, output_field=output_field, **extra)
40+
41+
2942
class TaskBulkQueryState(TypedDict):
3043
timestamp: str
3144
event_id: str
@@ -95,7 +108,10 @@ class RangeQuerySetWrapper[V]:
95108
Iterates through a queryset by chunking results by ``step`` and using GREATER THAN
96109
and LESS THAN queries on the primary key.
97110
98-
Very efficient, but ORDER BY statements will not work.
111+
Supports compound ordering for efficient multi-column pagination:
112+
RangeQuerySetWrapper(qs, order_by=("project_id", "id"))
113+
114+
The final field in a compound ordering must be unique.
99115
"""
100116

101117
def __init__[M: Model](
@@ -105,7 +121,7 @@ def __init__[M: Model](
105121
step: int = 1000,
106122
limit: int | None = None,
107123
min_id: int | None = None,
108-
order_by: str = "pk",
124+
order_by: str | tuple[str, ...] = "pk",
109125
callbacks: Sequence[Callable[[list[V]], None]] = (),
110126
result_value_getter: Callable[[V], int] | None = None,
111127
override_unique_safety_check: bool = False,
@@ -133,32 +149,45 @@ def __init__[M: Model](
133149
self.callbacks = callbacks
134150
self.result_value_getter = result_value_getter
135151

136-
order_by_col = queryset.model._meta.get_field(order_by if order_by != "pk" else "id")
152+
self.order_by_fields: tuple[str, ...] = (
153+
(order_by,) if isinstance(order_by, str) else order_by
154+
)
155+
156+
if not self.order_by_fields:
157+
raise InvalidQuerySetError("order_by cannot be empty")
158+
159+
# For compound ordering, only the final field needs to be unique
160+
final_field = self.order_by_fields[-1]
161+
order_by_col = queryset.model._meta.get_field(final_field if final_field != "pk" else "id")
137162
if not override_unique_safety_check and (
138163
not isinstance(order_by_col, Field) or not order_by_col.unique
139164
):
140165
# TODO: Ideally we could fix this bug and support ordering by a non unique col
166+
field_desc = (
167+
f"Final field '{final_field}'"
168+
if len(self.order_by_fields) > 1
169+
else "Order by column"
170+
)
141171
raise InvalidQuerySetError(
142-
"Order by column must be unique, otherwise this wrapper can get "
172+
f"{field_desc} must be unique, otherwise this wrapper can get "
143173
"stuck in an infinite loop. If you're sure your data is unique, "
144174
"you can disable this by passing "
145175
"`override_unique_safety_check=True`"
146176
)
147177

148178
def __iter__(self) -> Iterator[V]:
149179
if self.min_value is not None:
150-
cur_value = self.min_value
180+
# For compound ordering, min_value applies to the final field only
181+
cur_values = [None] * (len(self.order_by_fields) - 1) + [self.min_value]
151182
else:
152-
cur_value = None
183+
cur_values = [None] * len(self.order_by_fields)
153184

154185
num = 0
155186
limit = self.limit
156187

157-
queryset = self.queryset
158-
if self.desc:
159-
queryset = queryset.order_by("-%s" % self.order_by)
160-
else:
161-
queryset = queryset.order_by(self.order_by)
188+
queryset = self.queryset.order_by(
189+
*(f"-{field}" if self.desc else field for field in self.order_by_fields)
190+
)
162191

163192
# we implement basic cursor pagination for columns that are not unique
164193
last_object_pk: int | None = None
@@ -169,12 +198,11 @@ def __iter__(self) -> Iterator[V]:
169198

170199
start = num
171200

172-
if cur_value is None:
201+
if all(v is None for v in cur_values):
173202
results_qs = queryset
174-
elif self.desc:
175-
results_qs = queryset.filter(**{"%s__lte" % self.order_by: cur_value})
176203
else:
177-
results_qs = queryset.filter(**{"%s__gte" % self.order_by: cur_value})
204+
# Build compound cursor condition
205+
results_qs = queryset.filter(self._build_cursor_filter(cur_values))
178206

179207
results = list(results_qs[0 : self.step])
180208

@@ -197,19 +225,36 @@ def __iter__(self) -> Iterator[V]:
197225
# to `None` causing the loop to exit early.
198226
num += 1
199227
last_object_pk = pk
200-
cur_value = (
201-
self.result_value_getter(result)
202-
if self.result_value_getter
203-
else getattr(result, self.order_by)
204-
)
228+
229+
cur_values = [
230+
(
231+
self.result_value_getter(result)
232+
if self.result_value_getter and field == self.order_by_fields[-1]
233+
else getattr(result, field)
234+
)
235+
for field in self.order_by_fields
236+
]
205237

206238
yield result
207239

208-
if cur_value is None:
240+
if all(v is None for v in cur_values):
209241
break
210242

211243
has_results = num > start
212244

245+
def _build_cursor_filter(self, cur_values: list[Any]) -> Q:
246+
"""Build cursor filter using PostgreSQL tuple comparison."""
247+
if len(self.order_by_fields) == 1:
248+
op = "lte" if self.desc else "gte"
249+
return Q(**{f"{self.order_by_fields[0]}__{op}": cur_values[0]})
250+
251+
lookup = BuiltinLookup(
252+
Tuple(*[F(field) for field in self.order_by_fields]),
253+
Tuple(*[Value(v) for v in cur_values]),
254+
)
255+
lookup.lookup_name = "lte" if self.desc else "gte"
256+
return Q(lookup)
257+
213258

214259
class RangeQuerySetWrapperWithProgressBar[V](RangeQuerySetWrapper[V]):
215260
def get_total_count(self) -> int:

tests/sentry/utils/test_query.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,66 @@ def test_order_by_non_unique_fails(self) -> None:
6868
self.range_wrapper(qs, order_by="name", override_unique_safety_check=True)
6969

7070
def test_order_by_unique(self) -> None:
71-
self.create_user()
71+
total = 10
72+
for _ in range(total):
73+
self.create_user()
74+
7275
qs = User.objects.all()
73-
self.range_wrapper(qs, order_by="username")
74-
assert len(list(self.range_wrapper(qs, order_by="username", step=2))) == 1
76+
77+
wrapper = self.range_wrapper(qs, order_by="username", step=3)
78+
assert len(list(wrapper)) == total
7579

7680
def test_wrapper_over_values_list(self) -> None:
7781
self.create_user()
7882
qs = User.objects.all().values_list("id")
7983
assert list(qs) == list(self.range_wrapper(qs, result_value_getter=lambda r: r[0]))
8084

85+
def test_compound_order_by_tuple(self) -> None:
86+
"""Test that tuple order_by is accepted and works correctly."""
87+
total = 5
88+
for _ in range(total):
89+
self.create_user()
90+
91+
qs = User.objects.all()
92+
93+
wrapper = self.range_wrapper(qs, order_by=("date_joined", "id"), step=2)
94+
results = list(wrapper)
95+
assert len(results) == total
96+
97+
def test_compound_order_by_unique_final_field(self) -> None:
98+
"""Test that compound order_by requires final field to be unique."""
99+
qs = User.objects.all()
100+
101+
# Should fail with non-unique final field
102+
with pytest.raises(InvalidQuerySetError, match="Final field"):
103+
self.range_wrapper(qs, order_by=("date_joined", "name"))
104+
105+
def test_compound_order_by_descending(self) -> None:
106+
"""Test that compound order_by works with descending order."""
107+
total = 5
108+
for _ in range(total):
109+
self.create_user()
110+
111+
qs = User.objects.all()
112+
113+
# Test descending order with negative step
114+
wrapper = self.range_wrapper(qs, order_by=("date_joined", "id"), step=-2)
115+
results = list(wrapper)
116+
assert len(results) == total
117+
118+
for i in range(len(results) - 1):
119+
assert (results[i].date_joined, results[i].id) >= (
120+
results[i + 1].date_joined,
121+
results[i + 1].id,
122+
)
123+
124+
def test_order_by_empty_tuple(self) -> None:
125+
"""Test that empty order_by tuple raises an error."""
126+
qs = User.objects.all()
127+
128+
with pytest.raises(InvalidQuerySetError, match="order_by cannot be empty"):
129+
self.range_wrapper(qs, order_by=())
130+
81131

82132
@no_silo_test
83133
class RangeQuerySetWrapperWithProgressBarTest(RangeQuerySetWrapperTest):

0 commit comments

Comments
 (0)