From 4813a9ab34b2864f94f25d2cdad83fa02c80eca5 Mon Sep 17 00:00:00 2001 From: zhangzhefang Date: Fri, 21 Nov 2025 23:35:10 +0800 Subject: [PATCH 1/4] fix(core): include llm_output in streaming LLMResult Fixes #34057 Previously, streaming mode did not include the `llm_output` field in the `LLMResult` object passed to `on_llm_end` callbacks. This broke integrations like Langfuse that rely on this field to extract metadata such as model name. This commit ensures that `llm_output` is always present in streaming mode by passing an empty dict `{}` in all streaming methods (`stream` and `astream`) for both `BaseLLM` and `BaseChatModel`. Changes: - Updated `BaseLLM.stream()` to include `llm_output={}` in LLMResult - Updated `BaseLLM.astream()` to include `llm_output={}` in LLMResult - Updated `BaseChatModel.stream()` to include `llm_output={}` in LLMResult - Updated `BaseChatModel.astream()` to include `llm_output={}` in LLMResult - Added test to verify `llm_output` is present in streaming callbacks --- .../language_models/chat_models.py | 4 +- .../langchain_core/language_models/llms.py | 6 ++- .../unit_tests/fake/test_fake_chat_model.py | 44 ++++++++++++++++++- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index bfd37ea58835a..83c472f2ea9e0 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -583,7 +583,7 @@ def stream( run_manager.on_llm_error(err, response=LLMResult(generations=[])) raise err - run_manager.on_llm_end(LLMResult(generations=[[generation]])) + run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={})) @override async def astream( @@ -712,7 +712,7 @@ async def astream( raise err await run_manager.on_llm_end( - LLMResult(generations=[[generation]]), + LLMResult(generations=[[generation]], llm_output={}), ) # --- Custom methods --- diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py index 813ae7b21b907..10facb84b8398 100644 --- a/libs/core/langchain_core/language_models/llms.py +++ b/libs/core/langchain_core/language_models/llms.py @@ -564,7 +564,7 @@ def stream( run_manager.on_llm_error(err, response=LLMResult(generations=[])) raise err - run_manager.on_llm_end(LLMResult(generations=[[generation]])) + run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={})) @override async def astream( @@ -635,7 +635,9 @@ async def astream( await run_manager.on_llm_error(err, response=LLMResult(generations=[])) raise err - await run_manager.on_llm_end(LLMResult(generations=[[generation]])) + await run_manager.on_llm_end( + LLMResult(generations=[[generation]], llm_output={}) + ) # --- Custom methods --- diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index bf5629a12c54d..f13c5ef0c6126 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -7,7 +7,7 @@ from typing_extensions import override -from langchain_core.callbacks.base import AsyncCallbackHandler +from langchain_core.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler from langchain_core.language_models import ( FakeListChatModel, FakeMessagesListChatModel, @@ -15,7 +15,7 @@ ParrotFakeChatModel, ) from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage -from langchain_core.outputs import ChatGenerationChunk, GenerationChunk +from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult from tests.unit_tests.stubs import ( _any_id_ai_message, _any_id_ai_message_chunk, @@ -253,3 +253,43 @@ def test_fake_messages_list_chat_model_sleep_delay() -> None: elapsed = time.time() - start assert elapsed >= sleep_time + + +def test_stream_llm_result_contains_llm_output() -> None: + """Test that streaming mode includes llm_output in LLMResult.""" + + class LLMResultCaptureHandler(BaseCallbackHandler): + """Callback handler that captures LLMResult from on_llm_end.""" + + def __init__(self) -> None: + self.llm_results: list[LLMResult] = [] + + @override + def on_llm_end( + self, + response: LLMResult, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> None: + """Capture the LLMResult.""" + self.llm_results.append(response) + + model = GenericFakeChatModel(messages=cycle([AIMessage(content="hello world")])) + handler = LLMResultCaptureHandler() + + # Consume the stream to trigger on_llm_end + chunks = list(model.stream("test", config={"callbacks": [handler]})) + + # Verify we got chunks + assert len(chunks) > 0 + + # Verify on_llm_end was called + assert len(handler.llm_results) == 1 + + # Verify llm_output field exists in the LLMResult + llm_result = handler.llm_results[0] + assert hasattr(llm_result, "llm_output") + assert llm_result.llm_output is not None + assert isinstance(llm_result.llm_output, dict) From 0d9a3e2fe598667baa7ce3b87c0300f8f7707a08 Mon Sep 17 00:00:00 2001 From: zhangzhefang Date: Sun, 23 Nov 2025 16:08:38 +0800 Subject: [PATCH 2/4] test(core): update test expectations for llm_output in streaming mode Update test_runnable_events_v1.py to expect llm_output={} instead of llm_output=None in streaming mode, consistent with the fix for issue #34057. This ensures that llm_output is always a dict ({}) rather than None when callbacks receive LLMResult in streaming mode. --- .../tests/unit_tests/runnables/test_runnable_events_v1.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py index 0b30aa58be517..afbfe57e78c56 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py @@ -648,7 +648,7 @@ def i_dont_stream(value: Any, config: RunnableConfig) -> Any: } ] ], - "llm_output": None, + "llm_output": {}, "run": None, "type": "LLMResult", }, @@ -780,7 +780,7 @@ async def ai_dont_stream(value: Any, config: RunnableConfig) -> Any: } ] ], - "llm_output": None, + "llm_output": {}, "run": None, "type": "LLMResult", }, @@ -1030,7 +1030,7 @@ async def test_event_stream_with_simple_chain() -> None: } ] ], - "llm_output": None, + "llm_output": {}, "run": None, "type": "LLMResult", }, @@ -1809,7 +1809,7 @@ async def test_with_llm() -> None: } ] ], - "llm_output": None, + "llm_output": {}, "run": None, "type": "LLMResult", }, From 20827bb5a2a3acb56a9836474bf4fefcf67a4b0f Mon Sep 17 00:00:00 2001 From: zhangzhefang Date: Sun, 23 Nov 2025 16:18:53 +0800 Subject: [PATCH 3/4] fix(core): ensure llm_output is always dict in all code paths This commit comprehensively fixes issue #34057 by ensuring llm_output={} in ALL code paths, not just streaming: Changes to chat_models.py: - Added llm_output={} to cache retrieval paths (sync/async) - Added llm_output={} to generate_from_stream() - Added llm_output={} to SimpleChatModel._generate() Changes to llms.py: - Added llm_output={} to SimpleLLM._generate() and _agenerate() Changes to fake_chat_models.py: - Added llm_output={} to all fake model _generate() methods: - FakeMessagesListChatModel - GenericFakeChatModel - ParrotFakeChatModel This ensures that llm_output is consistently an empty dict rather than None across streaming, non-streaming, cached, and fake model paths. --- libs/core/langchain_core/language_models/chat_models.py | 9 +++++---- .../langchain_core/language_models/fake_chat_models.py | 6 +++--- libs/core/langchain_core/language_models/llms.py | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 83c472f2ea9e0..e1fe5d4483540 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -206,7 +206,8 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult: message=message_chunk_to_message(generation.message), generation_info=generation.generation_info, ) - ] + ], + llm_output={}, ) @@ -1135,7 +1136,7 @@ def _generate_with_cache( cache_val = llm_cache.lookup(prompt, llm_string) if isinstance(cache_val, list): converted_generations = self._convert_cached_generations(cache_val) - return ChatResult(generations=converted_generations) + return ChatResult(generations=converted_generations, llm_output={}) elif self.cache is None: pass else: @@ -1253,7 +1254,7 @@ async def _agenerate_with_cache( cache_val = await llm_cache.alookup(prompt, llm_string) if isinstance(cache_val, list): converted_generations = self._convert_cached_generations(cache_val) - return ChatResult(generations=converted_generations) + return ChatResult(generations=converted_generations, llm_output={}) elif self.cache is None: pass else: @@ -1742,7 +1743,7 @@ def _generate( output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs) message = AIMessage(content=output_str) generation = ChatGeneration(message=message) - return ChatResult(generations=[generation]) + return ChatResult(generations=[generation], llm_output={}) @abstractmethod def _call( diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index 7ffb589601300..2addd56cfaf73 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -44,7 +44,7 @@ def _generate( else: self.i = 0 generation = ChatGeneration(message=response) - return ChatResult(generations=[generation]) + return ChatResult(generations=[generation], llm_output={}) @property @override @@ -261,7 +261,7 @@ def _generate( message = next(self.messages) message_ = AIMessage(content=message) if isinstance(message, str) else message generation = ChatGeneration(message=message_) - return ChatResult(generations=[generation]) + return ChatResult(generations=[generation], llm_output={}) def _stream( self, @@ -386,7 +386,7 @@ def _generate( run_manager: CallbackManagerForLLMRun | None = None, **kwargs: Any, ) -> ChatResult: - return ChatResult(generations=[ChatGeneration(message=messages[-1])]) + return ChatResult(generations=[ChatGeneration(message=messages[-1])], llm_output={}) @property def _llm_type(self) -> str: diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py index 10facb84b8398..283ef1aad0b9a 100644 --- a/libs/core/langchain_core/language_models/llms.py +++ b/libs/core/langchain_core/language_models/llms.py @@ -1504,7 +1504,7 @@ def _generate( else self._call(prompt, stop=stop, **kwargs) ) generations.append([Generation(text=text)]) - return LLMResult(generations=generations) + return LLMResult(generations=generations, llm_output={}) async def _agenerate( self, @@ -1522,4 +1522,4 @@ async def _agenerate( else await self._acall(prompt, stop=stop, **kwargs) ) generations.append([Generation(text=text)]) - return LLMResult(generations=generations) + return LLMResult(generations=generations, llm_output={}) From dcbe68a6688d4e0e5323b0eac46dc200cb07624c Mon Sep 17 00:00:00 2001 From: zhangzhefang Date: Sun, 23 Nov 2025 16:25:34 +0800 Subject: [PATCH 4/4] style: fix line length in fake_chat_models.py Split long line to comply with max line length of 88 characters. --- libs/core/langchain_core/language_models/fake_chat_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index 2addd56cfaf73..99f71f6cf8788 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -386,7 +386,9 @@ def _generate( run_manager: CallbackManagerForLLMRun | None = None, **kwargs: Any, ) -> ChatResult: - return ChatResult(generations=[ChatGeneration(message=messages[-1])], llm_output={}) + return ChatResult( + generations=[ChatGeneration(message=messages[-1])], llm_output={} + ) @property def _llm_type(self) -> str: