Add gpt-5-search-api model support (#125)

PeterDaveHello · web-flow · commit b7ebd0255151 · 2025-10-15T06:49:58.000+09:00
Reference: - https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat
diff --git a/app/openai_constants.py b/app/openai_constants.py
@@ -28,6 +28,8 @@
 GPT_4_1_NANO_MODEL = "gpt-4.1-nano"
 GPT_4_1_NANO_2025_04_14_MODEL = "gpt-4.1-nano-2025-04-14"
 GPT_5_CHAT_LATEST_MODEL = "gpt-5-chat-latest"
+GPT_5_SEARCH_API_MODEL = "gpt-5-search-api"
+GPT_5_SEARCH_API_2025_10_14_MODEL = "gpt-5-search-api-2025-10-14"
 GPT_5_MODEL = "gpt-5"
 GPT_5_MINI_MODEL = "gpt-5-mini"
 GPT_5_NANO_MODEL = "gpt-5-nano"
@@ -75,6 +77,7 @@
     GPT_4O_MINI_2024_07_18_MODEL: (3, 1),
     # GPT-5 chat latest
     GPT_5_CHAT_LATEST_MODEL: (3, 1),
+    GPT_5_SEARCH_API_2025_10_14_MODEL: (3, 1),
     # GPT-5 family (dated)
     GPT_5_2025_08_07_MODEL: (3, 1),
     GPT_5_MINI_2025_08_07_MODEL: (3, 1),
@@ -100,6 +103,7 @@
     GPT_5_MODEL: GPT_5_2025_08_07_MODEL,
     GPT_5_MINI_MODEL: GPT_5_MINI_2025_08_07_MODEL,
     GPT_5_NANO_MODEL: GPT_5_NANO_2025_08_07_MODEL,
+    GPT_5_SEARCH_API_MODEL: GPT_5_SEARCH_API_2025_10_14_MODEL,
     O3_MODEL: O3_2025_04_16_MODEL,
     O4_MINI_MODEL: O4_MINI_2025_04_16_MODEL,
 }
@@ -130,6 +134,7 @@
     GPT_4_1_NANO_2025_04_14_MODEL: 1048576,
     # GPT-5 chat latest
     GPT_5_CHAT_LATEST_MODEL: 128000,
+    GPT_5_SEARCH_API_2025_10_14_MODEL: 128000,
     # GPT-5 family (dated)
     GPT_5_2025_08_07_MODEL: 128000,
     GPT_5_MINI_2025_08_07_MODEL: 128000,
diff --git a/app/openai_ops.py b/app/openai_ops.py
@@ -86,13 +86,13 @@ def messages_within_context_window(
 def _is_reasoning(model: str) -> bool:
     """Returns True if the model is a reasoning model under Chat Completions.
 
-    Excludes chat models like gpt-5-chat-latest. Matches o3*, o4*, and
+    Excludes chat models like gpt-5-chat-latest and gpt-5-search-api. Matches o3*, o4*, and
     non-chat gpt-5* families. Case-insensitive and safe with None/empty.
     """
     if not model:
         return False
     ml = model.lower()
-    if ml.startswith("gpt-5-chat"):
+    if ml.startswith("gpt-5-chat") or ml.startswith("gpt-5-search"):
         return False
     return (
         ml.startswith("o1")
@@ -102,6 +102,13 @@ def _is_reasoning(model: str) -> bool:
     )
 
 
+def _is_search_model(model: str) -> bool:
+    """Returns True for search-specific chat models."""
+    if not model:
+        return False
+    return model.lower().startswith("gpt-5-search")
+
+
 def _normalize_base_url(value: Optional[str]) -> Optional[str]:
     """Normalizes falsy/empty base URLs to None for SDK compatibility."""
     if value is None:
@@ -159,17 +166,20 @@ def _create_chat_completion(
         raise ValueError("timeout_seconds must be None for streaming calls")
 
     is_reasoning = _is_reasoning(model)
+    is_search = _is_search_model(model)
     # Reasoning models use max_completion_tokens; others use max_tokens
     token_kwarg = _token_budget_kwarg(model, MAX_TOKENS)
 
     base_kwargs = dict(
         model=model,
         messages=messages,
-        n=1,
         user=user,
         stream=stream,
     )
-    if not is_reasoning:
+    if not is_search:
+        base_kwargs["n"] = 1
+
+    if not is_reasoning and not is_search:
         base_kwargs["temperature"] = temperature
         base_kwargs["presence_penalty"] = 0
         base_kwargs["frequency_penalty"] = 0
diff --git a/tests/openai_ops_test.py b/tests/openai_ops_test.py
@@ -3,8 +3,11 @@
     format_assistant_reply,
     format_openai_message_content,
 )
-from app.openai_constants import GPT_4O_MODEL
-from app.openai_constants import MAX_TOKENS
+from app.openai_constants import (
+    GPT_4O_MODEL,
+    GPT_5_SEARCH_API_MODEL,
+    MAX_TOKENS,
+)
 import pytest
 
 
@@ -152,6 +155,7 @@ def fake_calculate_num_tokens(messages, model=None):  # type: ignore[no-redef]
     [
         (GPT_4O_MODEL, False, 0.7, 12, "U123"),
         ("o3", True, 1.0, 5, "U234"),
+        (GPT_5_SEARCH_API_MODEL, False, 0.5, 8, "U345"),
     ],
 )
 def test_sync_tokens_and_sampling_behavior(fake_clients, api_type, model, is_reasoning, temperature, timeout, user):
@@ -172,22 +176,34 @@ def test_sync_tokens_and_sampling_behavior(fake_clients, api_type, model, is_rea
     )
 
     kwargs = fake_clients["create_kwargs"]
+    is_search = kwargs.get("model", "").startswith("gpt-5-search")
     if is_reasoning:
         assert kwargs.get("max_completion_tokens") == MAX_TOKENS
         assert "max_tokens" not in kwargs
         for k in ("temperature", "presence_penalty", "frequency_penalty", "logit_bias"):
             assert k not in kwargs
+        assert "top_p" not in kwargs
+    elif is_search:
+        assert kwargs.get("max_tokens") == MAX_TOKENS
+        for k in (
+            "temperature",
+            "presence_penalty",
+            "frequency_penalty",
+            "logit_bias",
+            "top_p",
+        ):
+            assert k not in kwargs
     else:
         assert kwargs.get("max_tokens") == MAX_TOKENS
         assert kwargs.get("temperature") == temperature
         assert kwargs.get("presence_penalty") == 0
         assert kwargs.get("frequency_penalty") == 0
         assert isinstance(kwargs.get("logit_bias"), dict)
-    if is_reasoning:
-        assert "top_p" not in kwargs
-    else:
         assert kwargs.get("top_p") == 1
-    assert kwargs.get("n") == 1
+    if is_search:
+        assert "n" not in kwargs
+    else:
+        assert kwargs.get("n") == 1
     assert kwargs.get("user") == user
     assert kwargs.get("stream") is False
     assert kwargs.get("timeout") == timeout