Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions app/openai_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
GPT_4_1_NANO_MODEL = "gpt-4.1-nano"
GPT_4_1_NANO_2025_04_14_MODEL = "gpt-4.1-nano-2025-04-14"
GPT_5_CHAT_LATEST_MODEL = "gpt-5-chat-latest"
GPT_5_SEARCH_API_MODEL = "gpt-5-search-api"
GPT_5_SEARCH_API_2025_10_14_MODEL = "gpt-5-search-api-2025-10-14"
GPT_5_MODEL = "gpt-5"
GPT_5_MINI_MODEL = "gpt-5-mini"
GPT_5_NANO_MODEL = "gpt-5-nano"
Expand Down Expand Up @@ -75,6 +77,7 @@
GPT_4O_MINI_2024_07_18_MODEL: (3, 1),
# GPT-5 chat latest
GPT_5_CHAT_LATEST_MODEL: (3, 1),
GPT_5_SEARCH_API_2025_10_14_MODEL: (3, 1),
# GPT-5 family (dated)
GPT_5_2025_08_07_MODEL: (3, 1),
GPT_5_MINI_2025_08_07_MODEL: (3, 1),
Expand All @@ -100,6 +103,7 @@
GPT_5_MODEL: GPT_5_2025_08_07_MODEL,
GPT_5_MINI_MODEL: GPT_5_MINI_2025_08_07_MODEL,
GPT_5_NANO_MODEL: GPT_5_NANO_2025_08_07_MODEL,
GPT_5_SEARCH_API_MODEL: GPT_5_SEARCH_API_2025_10_14_MODEL,
O3_MODEL: O3_2025_04_16_MODEL,
O4_MINI_MODEL: O4_MINI_2025_04_16_MODEL,
}
Expand Down Expand Up @@ -130,6 +134,7 @@
GPT_4_1_NANO_2025_04_14_MODEL: 1048576,
# GPT-5 chat latest
GPT_5_CHAT_LATEST_MODEL: 128000,
GPT_5_SEARCH_API_2025_10_14_MODEL: 128000,
# GPT-5 family (dated)
GPT_5_2025_08_07_MODEL: 128000,
GPT_5_MINI_2025_08_07_MODEL: 128000,
Expand Down
18 changes: 14 additions & 4 deletions app/openai_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ def messages_within_context_window(
def _is_reasoning(model: str) -> bool:
"""Returns True if the model is a reasoning model under Chat Completions.

Excludes chat models like gpt-5-chat-latest. Matches o3*, o4*, and
Excludes chat models like gpt-5-chat-latest and gpt-5-search-api. Matches o3*, o4*, and
non-chat gpt-5* families. Case-insensitive and safe with None/empty.
"""
if not model:
return False
ml = model.lower()
if ml.startswith("gpt-5-chat"):
if ml.startswith("gpt-5-chat") or ml.startswith("gpt-5-search"):
return False
return (
ml.startswith("o1")
Expand All @@ -102,6 +102,13 @@ def _is_reasoning(model: str) -> bool:
)


def _is_search_model(model: str) -> bool:
"""Returns True for search-specific chat models."""
if not model:
return False
return model.lower().startswith("gpt-5-search")


def _normalize_base_url(value: Optional[str]) -> Optional[str]:
"""Normalizes falsy/empty base URLs to None for SDK compatibility."""
if value is None:
Expand Down Expand Up @@ -159,17 +166,20 @@ def _create_chat_completion(
raise ValueError("timeout_seconds must be None for streaming calls")

is_reasoning = _is_reasoning(model)
is_search = _is_search_model(model)
# Reasoning models use max_completion_tokens; others use max_tokens
token_kwarg = _token_budget_kwarg(model, MAX_TOKENS)

base_kwargs = dict(
model=model,
messages=messages,
n=1,
user=user,
stream=stream,
)
if not is_reasoning:
if not is_search:
base_kwargs["n"] = 1

if not is_reasoning and not is_search:
base_kwargs["temperature"] = temperature
base_kwargs["presence_penalty"] = 0
base_kwargs["frequency_penalty"] = 0
Expand Down
28 changes: 22 additions & 6 deletions tests/openai_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
format_assistant_reply,
format_openai_message_content,
)
from app.openai_constants import GPT_4O_MODEL
from app.openai_constants import MAX_TOKENS
from app.openai_constants import (
GPT_4O_MODEL,
GPT_5_SEARCH_API_MODEL,
MAX_TOKENS,
)
import pytest


Expand Down Expand Up @@ -152,6 +155,7 @@ def fake_calculate_num_tokens(messages, model=None): # type: ignore[no-redef]
[
(GPT_4O_MODEL, False, 0.7, 12, "U123"),
("o3", True, 1.0, 5, "U234"),
(GPT_5_SEARCH_API_MODEL, False, 0.5, 8, "U345"),
],
)
def test_sync_tokens_and_sampling_behavior(fake_clients, api_type, model, is_reasoning, temperature, timeout, user):
Expand All @@ -172,22 +176,34 @@ def test_sync_tokens_and_sampling_behavior(fake_clients, api_type, model, is_rea
)

kwargs = fake_clients["create_kwargs"]
is_search = kwargs.get("model", "").startswith("gpt-5-search")
if is_reasoning:
assert kwargs.get("max_completion_tokens") == MAX_TOKENS
assert "max_tokens" not in kwargs
for k in ("temperature", "presence_penalty", "frequency_penalty", "logit_bias"):
assert k not in kwargs
assert "top_p" not in kwargs
elif is_search:
assert kwargs.get("max_tokens") == MAX_TOKENS
for k in (
"temperature",
"presence_penalty",
"frequency_penalty",
"logit_bias",
"top_p",
):
assert k not in kwargs
else:
assert kwargs.get("max_tokens") == MAX_TOKENS
assert kwargs.get("temperature") == temperature
assert kwargs.get("presence_penalty") == 0
assert kwargs.get("frequency_penalty") == 0
assert isinstance(kwargs.get("logit_bias"), dict)
if is_reasoning:
assert "top_p" not in kwargs
else:
assert kwargs.get("top_p") == 1
assert kwargs.get("n") == 1
if is_search:
assert "n" not in kwargs
else:
assert kwargs.get("n") == 1
assert kwargs.get("user") == user
assert kwargs.get("stream") is False
assert kwargs.get("timeout") == timeout
Expand Down