-
-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Open
Labels
Description
What happened?
When calling gpt-5.1 via litellm-proxy 1.80.0 and passing the new reasoning_effort="none" argument together with stream=True, litellm returns an error as if the none value was not supported:
from openai import OpenAI
client = OpenAI(base_url=<litellm-proxy>)
completion = client.chat.completions.create(model="openai/gpt-5.1",
messages=[{"role": "user", "content": "tell me joke"}],
reasoning_effort="none",
stream=True){"error":
{"message":
"litellm.ServiceUnavailableError: litellm.MidStreamFallbackError: litellm.APIConnectionError: APIConnectionError:
OpenAIException - Error receiving chunk from stream: 1 validation error for ResponseCreatedEvent
response.reasoning.effort\n Input should be 'minimal', 'low', 'medium' or 'high' [type=literal_error, input_value='none', input_type=str]
For further information visit https://errors.pydantic.dev/2.10/v/literal_error. Received Model Group=openai/gpt-5.1
Available Model Group Fallbacks=None", "type": null, "param": null, "code": "503"
}
}
Remove streaming and it works as expected:
from openai import OpenAI
client = OpenAI(base_url=<litellm-proxy>)
completion = client.chat.completions.create(model="openai/gpt-5.1",
messages=[{"role": "user", "content": "tell me joke"}],
reasoning_effort="none")
completion.choices[0].message.content
'Why don’t programmers like nature?\n\nIt has too many bugs.'It also works as expected when calling OpenAI API endpoint directly.
Relevant log output
InternalServerError Traceback (most recent call last)
Cell In[3], line 1
----> 1 completion = client.chat.completions.create(model="openai/gpt-5.1", messages=[{"role": "user", "content": "tell me joke"}], reasoning_effort="none", stream=True)
File ~/Downloads/litellm/.venv/lib/python3.12/site-packages/openai/_utils/_utils.py:286, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
284 msg = f"Missing required argument: {quote(missing[0])}"
285 raise TypeError(msg)
--> 286 return func(*args, **kwargs)
File ~/Downloads/litellm/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py:1189, in Completions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, prompt_cache_key, prompt_cache_retention, reasoning_effort, response_format, safety_identifier, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, verbosity, web_search_options, extra_headers, extra_query, extra_body, timeout)
1142 @required_args(["messages", "model"], ["messages", "model", "stream"])
1143 def create(
1144 self,
(...) 1186 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1187 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
1188 validate_response_format(response_format)
-> 1189 return self._post(
1190 "/chat/completions",
1191 body=maybe_transform(
1192 {
1193 "messages": messages,
1194 "model": model,
1195 "audio": audio,
1196 "frequency_penalty": frequency_penalty,
1197 "function_call": function_call,
1198 "functions": functions,
1199 "logit_bias": logit_bias,
1200 "logprobs": logprobs,
1201 "max_completion_tokens": max_completion_tokens,
1202 "max_tokens": max_tokens,
1203 "metadata": metadata,
1204 "modalities": modalities,
1205 "n": n,
1206 "parallel_tool_calls": parallel_tool_calls,
1207 "prediction": prediction,
1208 "presence_penalty": presence_penalty,
1209 "prompt_cache_key": prompt_cache_key,
1210 "prompt_cache_retention": prompt_cache_retention,
1211 "reasoning_effort": reasoning_effort,
1212 "response_format": response_format,
1213 "safety_identifier": safety_identifier,
1214 "seed": seed,
1215 "service_tier": service_tier,
1216 "stop": stop,
1217 "store": store,
1218 "stream": stream,
1219 "stream_options": stream_options,
1220 "temperature": temperature,
1221 "tool_choice": tool_choice,
1222 "tools": tools,
1223 "top_logprobs": top_logprobs,
1224 "top_p": top_p,
1225 "user": user,
1226 "verbosity": verbosity,
1227 "web_search_options": web_search_options,
1228 },
1229 completion_create_params.CompletionCreateParamsStreaming
1230 if stream
1231 else completion_create_params.CompletionCreateParamsNonStreaming,
1232 ),
1233 options=make_request_options(
1234 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1235 ),
1236 cast_to=ChatCompletion,
1237 stream=stream or False,
1238 stream_cls=Stream[ChatCompletionChunk],
1239 )
File ~/Downloads/litellm/.venv/lib/python3.12/site-packages/openai/_base_client.py:1259, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1245 def post(
1246 self,
1247 path: str,
(...) 1254 stream_cls: type[_StreamT] | None = None,
1255 ) -> ResponseT | _StreamT:
1256 opts = FinalRequestOptions.construct(
1257 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1258 )
-> 1259 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File ~/Downloads/litellm/.venv/lib/python3.12/site-packages/openai/_base_client.py:1047, in SyncAPIClient.request(self, cast_to, options, stream, stream_cls)
1044 err.response.read()
1046 log.debug("Re-raising status error")
-> 1047 raise self._make_status_error_from_response(err.response) from None
1049 break
1051 assert response is not None, "could not resolve response (should never happen)"
InternalServerError: data: {"error": {"message": "litellm.ServiceUnavailableError: litellm.MidStreamFallbackError: litellm.APIConnectionError: APIConnectionError: OpenAIException - Error receiving chunk from stream: 1 validation error for ResponseCreatedEvent\nresponse.reasoning.effort\n Input should be 'minimal', 'low', 'medium' or 'high' [type=literal_error, input_value='none', input_type=str]\n For further information visit https://errors.pydantic.dev/2.10/v/literal_error. Received Model Group=openai/gpt-5.1\nAvailable Model Group Fallbacks=None", "type": null, "param": null, "code": "503"}}Are you a ML Ops Team?
Yes
What LiteLLM version are you on ?
v1.80.0