Skip to content

Commit cfeff8c

Browse files
bbartelsZhengHongming888
authored andcommitted
Adds anthropic /v1/messages endpoint to openai api_server (vllm-project#27882)
Signed-off-by: bbartels <[email protected]> Signed-off-by: Benjamin Bartels <[email protected]>
1 parent e72ba68 commit cfeff8c

File tree

5 files changed

+138
-461
lines changed

5 files changed

+138
-461
lines changed

tests/entrypoints/anthropic/__init__.py

Whitespace-only changes.

tests/entrypoints/anthropic/test_messages.py renamed to tests/entrypoints/openai/test_messages.py

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66
import pytest_asyncio
77

8-
from ...utils import RemoteAnthropicServer
8+
from ...utils import RemoteOpenAIServer
99

1010
MODEL_NAME = "Qwen/Qwen3-0.6B"
1111

@@ -23,13 +23,13 @@ def server(): # noqa: F811
2323
"claude-3-7-sonnet-latest",
2424
]
2525

26-
with RemoteAnthropicServer(MODEL_NAME, args) as remote_server:
26+
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
2727
yield remote_server
2828

2929

3030
@pytest_asyncio.fixture
3131
async def client(server):
32-
async with server.get_async_client() as async_client:
32+
async with server.get_async_client_anthropic() as async_client:
3333
yield async_client
3434

3535

@@ -105,37 +105,37 @@ async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
105105

106106
print(f"Anthropic response: {resp.model_dump_json()}")
107107

108-
@pytest.mark.asyncio
109-
async def test_anthropic_tool_call_streaming(client: anthropic.AsyncAnthropic):
110-
resp = await client.messages.create(
111-
model="claude-3-7-sonnet-latest",
112-
max_tokens=1024,
113-
messages=[
114-
{
115-
"role": "user",
116-
"content": "What's the weather like in New York today?",
117-
}
118-
],
119-
tools=[
120-
{
121-
"name": "get_current_weather",
122-
"description": "Useful for querying the weather "
123-
"in a specified city.",
124-
"input_schema": {
125-
"type": "object",
126-
"properties": {
127-
"location": {
128-
"type": "string",
129-
"description": "City or region, for example: "
130-
"New York, London, Tokyo, etc.",
131-
}
132-
},
133-
"required": ["location"],
108+
109+
@pytest.mark.asyncio
110+
async def test_anthropic_tool_call_streaming(client: anthropic.AsyncAnthropic):
111+
resp = await client.messages.create(
112+
model="claude-3-7-sonnet-latest",
113+
max_tokens=1024,
114+
messages=[
115+
{
116+
"role": "user",
117+
"content": "What's the weather like in New York today?",
118+
}
119+
],
120+
tools=[
121+
{
122+
"name": "get_current_weather",
123+
"description": "Useful for querying the weather in a specified city.",
124+
"input_schema": {
125+
"type": "object",
126+
"properties": {
127+
"location": {
128+
"type": "string",
129+
"description": "City or region, for example: "
130+
"New York, London, Tokyo, etc.",
131+
}
134132
},
135-
}
136-
],
137-
stream=True,
138-
)
133+
"required": ["location"],
134+
},
135+
}
136+
],
137+
stream=True,
138+
)
139139

140-
async for chunk in resp:
141-
print(chunk.model_dump_json())
140+
async for chunk in resp:
141+
print(chunk.model_dump_json())

tests/utils.py

Lines changed: 17 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,23 @@ def get_async_client(self, **kwargs):
247247
**kwargs,
248248
)
249249

250+
def get_client_anthropic(self, **kwargs):
251+
if "timeout" not in kwargs:
252+
kwargs["timeout"] = 600
253+
return anthropic.Anthropic(
254+
base_url=self.url_for(),
255+
api_key=self.DUMMY_API_KEY,
256+
max_retries=0,
257+
**kwargs,
258+
)
259+
260+
def get_async_client_anthropic(self, **kwargs):
261+
if "timeout" not in kwargs:
262+
kwargs["timeout"] = 600
263+
return anthropic.AsyncAnthropic(
264+
base_url=self.url_for(), api_key=self.DUMMY_API_KEY, max_retries=0, **kwargs
265+
)
266+
250267

251268
class RemoteOpenAIServerCustom(RemoteOpenAIServer):
252269
"""Launch test server with custom child process"""
@@ -293,131 +310,6 @@ def __exit__(self, exc_type, exc_value, traceback):
293310
self.proc.kill()
294311

295312

296-
class RemoteAnthropicServer:
297-
DUMMY_API_KEY = "token-abc123" # vLLM's Anthropic server does not need API key
298-
299-
def __init__(
300-
self,
301-
model: str,
302-
vllm_serve_args: list[str],
303-
*,
304-
env_dict: dict[str, str] | None = None,
305-
seed: int | None = 0,
306-
auto_port: bool = True,
307-
max_wait_seconds: float | None = None,
308-
) -> None:
309-
if auto_port:
310-
if "-p" in vllm_serve_args or "--port" in vllm_serve_args:
311-
raise ValueError(
312-
"You have manually specified the port when `auto_port=True`."
313-
)
314-
315-
# Don't mutate the input args
316-
vllm_serve_args = vllm_serve_args + ["--port", str(get_open_port())]
317-
if seed is not None:
318-
if "--seed" in vllm_serve_args:
319-
raise ValueError(
320-
f"You have manually specified the seed when `seed={seed}`."
321-
)
322-
323-
vllm_serve_args = vllm_serve_args + ["--seed", str(seed)]
324-
325-
parser = FlexibleArgumentParser(description="vLLM's remote Anthropic server.")
326-
subparsers = parser.add_subparsers(required=False, dest="subparser")
327-
parser = ServeSubcommand().subparser_init(subparsers)
328-
args = parser.parse_args(["--model", model, *vllm_serve_args])
329-
self.host = str(args.host or "localhost")
330-
self.port = int(args.port)
331-
332-
self.show_hidden_metrics = args.show_hidden_metrics_for_version is not None
333-
334-
# download the model before starting the server to avoid timeout
335-
is_local = os.path.isdir(model)
336-
if not is_local:
337-
engine_args = AsyncEngineArgs.from_cli_args(args)
338-
model_config = engine_args.create_model_config()
339-
load_config = engine_args.create_load_config()
340-
341-
model_loader = get_model_loader(load_config)
342-
model_loader.download_model(model_config)
343-
344-
env = os.environ.copy()
345-
# the current process might initialize cuda,
346-
# to be safe, we should use spawn method
347-
env["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
348-
if env_dict is not None:
349-
env.update(env_dict)
350-
self.proc = subprocess.Popen(
351-
[
352-
sys.executable,
353-
"-m",
354-
"vllm.entrypoints.anthropic.api_server",
355-
model,
356-
*vllm_serve_args,
357-
],
358-
env=env,
359-
stdout=sys.stdout,
360-
stderr=sys.stderr,
361-
)
362-
max_wait_seconds = max_wait_seconds or 240
363-
self._wait_for_server(url=self.url_for("health"), timeout=max_wait_seconds)
364-
365-
def __enter__(self):
366-
return self
367-
368-
def __exit__(self, exc_type, exc_value, traceback):
369-
self.proc.terminate()
370-
try:
371-
self.proc.wait(8)
372-
except subprocess.TimeoutExpired:
373-
# force kill if needed
374-
self.proc.kill()
375-
376-
def _wait_for_server(self, *, url: str, timeout: float):
377-
# run health check
378-
start = time.time()
379-
while True:
380-
try:
381-
if requests.get(url).status_code == 200:
382-
break
383-
except Exception:
384-
# this exception can only be raised by requests.get,
385-
# which means the server is not ready yet.
386-
# the stack trace is not useful, so we suppress it
387-
# by using `raise from None`.
388-
result = self.proc.poll()
389-
if result is not None and result != 0:
390-
raise RuntimeError("Server exited unexpectedly.") from None
391-
392-
time.sleep(0.5)
393-
if time.time() - start > timeout:
394-
raise RuntimeError("Server failed to start in time.") from None
395-
396-
@property
397-
def url_root(self) -> str:
398-
return f"http://{self.host}:{self.port}"
399-
400-
def url_for(self, *parts: str) -> str:
401-
return self.url_root + "/" + "/".join(parts)
402-
403-
def get_client(self, **kwargs):
404-
if "timeout" not in kwargs:
405-
kwargs["timeout"] = 600
406-
return anthropic.Anthropic(
407-
base_url=self.url_for(),
408-
api_key=self.DUMMY_API_KEY,
409-
max_retries=0,
410-
**kwargs,
411-
)
412-
413-
def get_async_client(self, **kwargs):
414-
if "timeout" not in kwargs:
415-
kwargs["timeout"] = 600
416-
return anthropic.AsyncAnthropic(
417-
base_url=self.url_for(), api_key=self.DUMMY_API_KEY, max_retries=0, **kwargs
418-
)
419-
420-
421313
def _test_completion(
422314
client: openai.OpenAI,
423315
model: str,

0 commit comments

Comments
 (0)