From 4813a9ab34b2864f94f25d2cdad83fa02c80eca5 Mon Sep 17 00:00:00 2001
From: zhangzhefang <zhangzhefang@msn.cn>
Date: Fri, 21 Nov 2025 23:35:10 +0800
Subject: [PATCH 1/4] fix(core): include llm_output in streaming LLMResult

Fixes #34057

Previously, streaming mode did not include the `llm_output` field in the
`LLMResult` object passed to `on_llm_end` callbacks. This broke integrations
like Langfuse that rely on this field to extract metadata such as model name.

This commit ensures that `llm_output` is always present in streaming mode by
passing an empty dict `{}` in all streaming methods (`stream` and `astream`)
for both `BaseLLM` and `BaseChatModel`.

Changes:
- Updated `BaseLLM.stream()` to include `llm_output={}` in LLMResult
- Updated `BaseLLM.astream()` to include `llm_output={}` in LLMResult
- Updated `BaseChatModel.stream()` to include `llm_output={}` in LLMResult
- Updated `BaseChatModel.astream()` to include `llm_output={}` in LLMResult
- Added test to verify `llm_output` is present in streaming callbacks
---
 .../language_models/chat_models.py            |  4 +-
 .../langchain_core/language_models/llms.py    |  6 ++-
 .../unit_tests/fake/test_fake_chat_model.py   | 44 ++++++++++++++++++-
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
index bfd37ea58835a..83c472f2ea9e0 100644
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -583,7 +583,7 @@ def stream(
                 run_manager.on_llm_error(err, response=LLMResult(generations=[]))
                 raise err
 
-            run_manager.on_llm_end(LLMResult(generations=[[generation]]))
+            run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={}))
 
     @override
     async def astream(
@@ -712,7 +712,7 @@ async def astream(
             raise err
 
         await run_manager.on_llm_end(
-            LLMResult(generations=[[generation]]),
+            LLMResult(generations=[[generation]], llm_output={}),
         )
 
     # --- Custom methods ---
diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py
index 813ae7b21b907..10facb84b8398 100644
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
@@ -564,7 +564,7 @@ def stream(
                 run_manager.on_llm_error(err, response=LLMResult(generations=[]))
                 raise err
 
-            run_manager.on_llm_end(LLMResult(generations=[[generation]]))
+            run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={}))
 
     @override
     async def astream(
@@ -635,7 +635,9 @@ async def astream(
             await run_manager.on_llm_error(err, response=LLMResult(generations=[]))
             raise err
 
-        await run_manager.on_llm_end(LLMResult(generations=[[generation]]))
+        await run_manager.on_llm_end(
+            LLMResult(generations=[[generation]], llm_output={})
+        )
 
     # --- Custom methods ---
 
diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
index bf5629a12c54d..f13c5ef0c6126 100644
--- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
+++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
@@ -7,7 +7,7 @@
 
 from typing_extensions import override
 
-from langchain_core.callbacks.base import AsyncCallbackHandler
+from langchain_core.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
 from langchain_core.language_models import (
     FakeListChatModel,
     FakeMessagesListChatModel,
@@ -15,7 +15,7 @@
     ParrotFakeChatModel,
 )
 from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
-from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
+from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
 from tests.unit_tests.stubs import (
     _any_id_ai_message,
     _any_id_ai_message_chunk,
@@ -253,3 +253,43 @@ def test_fake_messages_list_chat_model_sleep_delay() -> None:
     elapsed = time.time() - start
 
     assert elapsed >= sleep_time
+
+
+def test_stream_llm_result_contains_llm_output() -> None:
+    """Test that streaming mode includes llm_output in LLMResult."""
+
+    class LLMResultCaptureHandler(BaseCallbackHandler):
+        """Callback handler that captures LLMResult from on_llm_end."""
+
+        def __init__(self) -> None:
+            self.llm_results: list[LLMResult] = []
+
+        @override
+        def on_llm_end(
+            self,
+            response: LLMResult,
+            *,
+            run_id: UUID,
+            parent_run_id: UUID | None = None,
+            **kwargs: Any,
+        ) -> None:
+            """Capture the LLMResult."""
+            self.llm_results.append(response)
+
+    model = GenericFakeChatModel(messages=cycle([AIMessage(content="hello world")]))
+    handler = LLMResultCaptureHandler()
+
+    # Consume the stream to trigger on_llm_end
+    chunks = list(model.stream("test", config={"callbacks": [handler]}))
+
+    # Verify we got chunks
+    assert len(chunks) > 0
+
+    # Verify on_llm_end was called
+    assert len(handler.llm_results) == 1
+
+    # Verify llm_output field exists in the LLMResult
+    llm_result = handler.llm_results[0]
+    assert hasattr(llm_result, "llm_output")
+    assert llm_result.llm_output is not None
+    assert isinstance(llm_result.llm_output, dict)

From 0d9a3e2fe598667baa7ce3b87c0300f8f7707a08 Mon Sep 17 00:00:00 2001
From: zhangzhefang <zhangzhefang@msn.cn>
Date: Sun, 23 Nov 2025 16:08:38 +0800
Subject: [PATCH 2/4] test(core): update test expectations for llm_output in
 streaming mode

Update test_runnable_events_v1.py to expect llm_output={} instead of
llm_output=None in streaming mode, consistent with the fix for issue #34057.

This ensures that llm_output is always a dict ({}) rather than None when
callbacks receive LLMResult in streaming mode.
---
 .../tests/unit_tests/runnables/test_runnable_events_v1.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
index 0b30aa58be517..afbfe57e78c56 100644
--- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
@@ -648,7 +648,7 @@ def i_dont_stream(value: Any, config: RunnableConfig) -> Any:
                                 }
                             ]
                         ],
-                        "llm_output": None,
+                        "llm_output": {},
                         "run": None,
                         "type": "LLMResult",
                     },
@@ -780,7 +780,7 @@ async def ai_dont_stream(value: Any, config: RunnableConfig) -> Any:
                                 }
                             ]
                         ],
-                        "llm_output": None,
+                        "llm_output": {},
                         "run": None,
                         "type": "LLMResult",
                     },
@@ -1030,7 +1030,7 @@ async def test_event_stream_with_simple_chain() -> None:
                                 }
                             ]
                         ],
-                        "llm_output": None,
+                        "llm_output": {},
                         "run": None,
                         "type": "LLMResult",
                     },
@@ -1809,7 +1809,7 @@ async def test_with_llm() -> None:
                                 }
                             ]
                         ],
-                        "llm_output": None,
+                        "llm_output": {},
                         "run": None,
                         "type": "LLMResult",
                     },

From 20827bb5a2a3acb56a9836474bf4fefcf67a4b0f Mon Sep 17 00:00:00 2001
From: zhangzhefang <zhangzhefang@msn.cn>
Date: Sun, 23 Nov 2025 16:18:53 +0800
Subject: [PATCH 3/4] fix(core): ensure llm_output is always dict in all code
 paths

This commit comprehensively fixes issue #34057 by ensuring llm_output={}
in ALL code paths, not just streaming:

Changes to chat_models.py:
- Added llm_output={} to cache retrieval paths (sync/async)
- Added llm_output={} to generate_from_stream()
- Added llm_output={} to SimpleChatModel._generate()

Changes to llms.py:
- Added llm_output={} to SimpleLLM._generate() and _agenerate()

Changes to fake_chat_models.py:
- Added llm_output={} to all fake model _generate() methods:
  - FakeMessagesListChatModel
  - GenericFakeChatModel
  - ParrotFakeChatModel

This ensures that llm_output is consistently an empty dict rather than
None across streaming, non-streaming, cached, and fake model paths.
---
 libs/core/langchain_core/language_models/chat_models.py  | 9 +++++----
 .../langchain_core/language_models/fake_chat_models.py   | 6 +++---
 libs/core/langchain_core/language_models/llms.py         | 4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
index 83c472f2ea9e0..e1fe5d4483540 100644
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -206,7 +206,8 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
                 message=message_chunk_to_message(generation.message),
                 generation_info=generation.generation_info,
             )
-        ]
+        ],
+        llm_output={},
     )
 
 
@@ -1135,7 +1136,7 @@ def _generate_with_cache(
                 cache_val = llm_cache.lookup(prompt, llm_string)
                 if isinstance(cache_val, list):
                     converted_generations = self._convert_cached_generations(cache_val)
-                    return ChatResult(generations=converted_generations)
+                    return ChatResult(generations=converted_generations, llm_output={})
             elif self.cache is None:
                 pass
             else:
@@ -1253,7 +1254,7 @@ async def _agenerate_with_cache(
                 cache_val = await llm_cache.alookup(prompt, llm_string)
                 if isinstance(cache_val, list):
                     converted_generations = self._convert_cached_generations(cache_val)
-                    return ChatResult(generations=converted_generations)
+                    return ChatResult(generations=converted_generations, llm_output={})
             elif self.cache is None:
                 pass
             else:
@@ -1742,7 +1743,7 @@ def _generate(
         output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs)
         message = AIMessage(content=output_str)
         generation = ChatGeneration(message=message)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     @abstractmethod
     def _call(
diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
index 7ffb589601300..2addd56cfaf73 100644
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -44,7 +44,7 @@ def _generate(
         else:
             self.i = 0
         generation = ChatGeneration(message=response)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     @property
     @override
@@ -261,7 +261,7 @@ def _generate(
         message = next(self.messages)
         message_ = AIMessage(content=message) if isinstance(message, str) else message
         generation = ChatGeneration(message=message_)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     def _stream(
         self,
@@ -386,7 +386,7 @@ def _generate(
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        return ChatResult(generations=[ChatGeneration(message=messages[-1])])
+        return ChatResult(generations=[ChatGeneration(message=messages[-1])], llm_output={})
 
     @property
     def _llm_type(self) -> str:
diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py
index 10facb84b8398..283ef1aad0b9a 100644
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
@@ -1504,7 +1504,7 @@ def _generate(
                 else self._call(prompt, stop=stop, **kwargs)
             )
             generations.append([Generation(text=text)])
-        return LLMResult(generations=generations)
+        return LLMResult(generations=generations, llm_output={})
 
     async def _agenerate(
         self,
@@ -1522,4 +1522,4 @@ async def _agenerate(
                 else await self._acall(prompt, stop=stop, **kwargs)
             )
             generations.append([Generation(text=text)])
-        return LLMResult(generations=generations)
+        return LLMResult(generations=generations, llm_output={})

From dcbe68a6688d4e0e5323b0eac46dc200cb07624c Mon Sep 17 00:00:00 2001
From: zhangzhefang <zhangzhefang@msn.cn>
Date: Sun, 23 Nov 2025 16:25:34 +0800
Subject: [PATCH 4/4] style: fix line length in fake_chat_models.py

Split long line to comply with max line length of 88 characters.
---
 libs/core/langchain_core/language_models/fake_chat_models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
index 2addd56cfaf73..99f71f6cf8788 100644
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -386,7 +386,9 @@ def _generate(
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        return ChatResult(generations=[ChatGeneration(message=messages[-1])], llm_output={})
+        return ChatResult(
+            generations=[ChatGeneration(message=messages[-1])], llm_output={}
+        )
 
     @property
     def _llm_type(self) -> str: