Skip to content

Commit a05b580

Browse files
chaunceyjiangnjhill
authored andcommitted
[Bugfix] fix --scheduling-policy=priority & n>1 crashes engine (#29764)
Signed-off-by: chaunceyjiang <[email protected]> Signed-off-by: Nick Hill <[email protected]> Co-authored-by: Nick Hill <[email protected]> (cherry picked from commit 0a9caca)
1 parent b6ae5ae commit a05b580

File tree

3 files changed

+34
-15
lines changed

3 files changed

+34
-15
lines changed

tests/v1/core/test_priority_scheduler_random.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,17 @@ def test_priority_scheduling_blast(
219219
vllm_config=scheduler.vllm_config,
220220
)
221221
scheduler.add_request(req)
222-
222+
num_initial_requests = 2
223+
for _ in range(num_initial_requests):
224+
req = _create_random_request(
225+
max_tokens_range=(1, max_output_tokens),
226+
num_tokens_range=(1, max_input_tokens),
227+
arrival_time_range=(0, 0),
228+
priority_range=(4, 4),
229+
num_mm_item_range=(0, 2),
230+
vllm_config=scheduler.vllm_config,
231+
)
232+
scheduler.add_request(req)
223233
for _ in range(20000):
224234
if len(scheduler.waiting) == 0:
225235
num_new_requests = random.randint(0, 2)

vllm/v1/core/sched/request_queue.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -137,31 +137,30 @@ class PriorityRequestQueue(RequestQueue):
137137
"""
138138
A priority queue that supports heap operations.
139139
140-
Requests with a smaller value of `priority` are processed first.
140+
Respects the ordering defined in the Request class, where
141+
requests with a smaller value of `priority` are processed first.
141142
If multiple requests have the same priority, the one with the earlier
142143
`arrival_time` is processed first.
143144
"""
144145

145146
def __init__(self) -> None:
146-
self._heap: list[tuple[int, float, Request]] = []
147+
self._heap: list[Request] = []
147148

148149
def add_request(self, request: Request) -> None:
149150
"""Add a request to the queue according to priority policy."""
150-
heapq.heappush(self._heap, (request.priority, request.arrival_time, request))
151+
heapq.heappush(self._heap, request)
151152

152153
def pop_request(self) -> Request:
153154
"""Pop a request from the queue according to priority policy."""
154155
if not self._heap:
155156
raise IndexError("pop from empty heap")
156-
_, _, request = heapq.heappop(self._heap)
157-
return request
157+
return heapq.heappop(self._heap)
158158

159159
def peek_request(self) -> Request:
160160
"""Peek at the next request in the queue without removing it."""
161161
if not self._heap:
162162
raise IndexError("peek from empty heap")
163-
_, _, request = self._heap[0]
164-
return request
163+
return self._heap[0]
165164

166165
def prepend_request(self, request: Request) -> None:
167166
"""Add a request to the queue according to priority policy.
@@ -180,15 +179,13 @@ def prepend_requests(self, requests: RequestQueue) -> None:
180179

181180
def remove_request(self, request: Request) -> None:
182181
"""Remove a specific request from the queue."""
183-
self._heap = [(p, t, r) for p, t, r in self._heap if r != request]
182+
self._heap.remove(request)
184183
heapq.heapify(self._heap)
185184

186185
def remove_requests(self, requests: Iterable[Request]) -> None:
187186
"""Remove multiple specific requests from the queue."""
188-
requests_to_remove = set(requests)
189-
self._heap = [
190-
(p, t, r) for p, t, r in self._heap if r not in requests_to_remove
191-
]
187+
requests_to_remove = requests if isinstance(requests, set) else set(requests)
188+
self._heap = [r for r in self._heap if r not in requests_to_remove]
192189
heapq.heapify(self._heap)
193190

194191
def __bool__(self) -> bool:
@@ -203,8 +200,7 @@ def __iter__(self) -> Iterator[Request]:
203200
"""Iterate over the queue according to priority policy."""
204201
heap_copy = self._heap[:]
205202
while heap_copy:
206-
_, _, request = heapq.heappop(heap_copy)
207-
yield request
203+
yield heapq.heappop(heap_copy)
208204

209205
def __reversed__(self) -> Iterator[Request]:
210206
"""Iterate over the queue in reverse priority order."""

vllm/v1/request.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,19 @@ def take_events(self) -> list[EngineCoreEvent] | None:
227227
events, self.events = self.events, []
228228
return events
229229

230+
def __lt__(self, other: "Request") -> bool:
231+
"""
232+
Compare two requests based on priority, arrival time, and request ID.
233+
Used in priority scheduling.
234+
"""
235+
if self.priority != other.priority:
236+
return self.priority < other.priority
237+
if self.arrival_time != other.arrival_time:
238+
return self.arrival_time < other.arrival_time
239+
if self.request_id != other.request_id:
240+
return self.request_id < other.request_id
241+
return id(self) < id(other)
242+
230243

231244
class RequestStatus(enum.IntEnum):
232245
"""Status of a request."""

0 commit comments

Comments
 (0)