Fixing gen_answer failover leaving raw_answer blank (#1077)

jamesbraza · jamesbraza · commit a39959e43b0f · 2025-08-26T14:36:50.000-07:00
diff --git a/src/paperqa/agents/tools.py b/src/paperqa/agents/tools.py
@@ -309,9 +309,6 @@ async def gen_answer(self, state: EnvironmentState) -> str:
         Args:
             state: Current state.
         """
-        if not state.docs.docs:
-            raise EmptyDocsError("Not generating an answer due to having no papers.")
-
         logger.info(f"Generating answer for '{state.session.question}'.")
 
         if f"{self.TOOL_FN_NAME}_initialized" in self.settings.agent.callbacks:
diff --git a/src/paperqa/docs.py b/src/paperqa/docs.py
@@ -26,7 +26,7 @@
     NumpyVectorStore,
     VectorStore,
 )
-from paperqa.prompts import CANNOT_ANSWER_PHRASE
+from paperqa.prompts import CANNOT_ANSWER_PHRASE, EMPTY_CONTEXTS
 from paperqa.readers import read_doc
 from paperqa.settings import MaybeSettings, get_settings
 from paperqa.types import Doc, DocDetails, DocKey, PQASession, Text
@@ -738,7 +738,7 @@ async def aquery(
         contexts = session.contexts
         if answer_config.get_evidence_if_no_contexts and not contexts:
             session = await self.aget_evidence(
-                query=session,
+                session,
                 callbacks=callbacks,
                 settings=settings,
                 embedding_model=embedding_model,
@@ -770,9 +770,10 @@ async def aquery(
             pre_str=pre_str,
         )
 
-        if len(context_str.strip()) < 10:  # noqa: PLR2004
+        if len(context_str.strip()) <= EMPTY_CONTEXTS:
             answer_text = (
-                f"{CANNOT_ANSWER_PHRASE} this question due to insufficient information."
+                f"{CANNOT_ANSWER_PHRASE} this question due to"
+                f" {'having no papers' if not self.docs else 'insufficient information.'}."
             )
             answer_reasoning = None
         else:
diff --git a/src/paperqa/prompts.py b/src/paperqa/prompts.py
@@ -136,5 +136,6 @@
 )
 
 CONTEXT_OUTER_PROMPT = "{context_str}\n\nValid Keys: {valid_keys}"
+EMPTY_CONTEXTS = len(CONTEXT_OUTER_PROMPT.format(context_str="", valid_keys="").strip())
 CONTEXT_INNER_PROMPT_NOT_DETAILED = "{name}: {text}"
 CONTEXT_INNER_PROMPT = f"{CONTEXT_INNER_PROMPT_NOT_DETAILED}\nFrom {{citation}}"
diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -12,7 +12,7 @@
 from functools import wraps
 from pathlib import Path
 from typing import cast
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, patch
 from uuid import uuid4
 
 import ldp.agent
@@ -21,6 +21,7 @@
     Environment,
     Tool,
     ToolRequestMessage,
+    ToolResponseMessage,
     ToolsAdapter,
     ToolSelector,
 )
@@ -469,26 +470,27 @@ async def test_timeout(agent_test_settings: Settings, agent_type: str | type) ->
     agent_test_settings.agent.timeout = 0.05  # Give time for Environment.reset()
     agent_test_settings.llm = "gpt-4o-mini"
     agent_test_settings.agent.tool_names = {"gen_answer", "complete"}
-    docs = Docs()
+    orig_exec_tool_calls = PaperQAEnvironment.exec_tool_calls
+    tool_responses: list[list[ToolResponseMessage]] = []
 
-    async def custom_aget_evidence(*_, **kwargs) -> PQASession:  # noqa: RUF029
-        return kwargs["query"]
+    async def spy_exec_tool_calls(*args, **kwargs) -> list[ToolResponseMessage]:
+        responses = await orig_exec_tool_calls(*args, **kwargs)
+        tool_responses.append(responses)
+        return responses
 
-    with (
-        patch.object(docs, "docs", {"stub_key": MagicMock(spec_set=Doc)}),
-        patch.multiple(
-            Docs, clear_docs=MagicMock(), aget_evidence=custom_aget_evidence
-        ),
-    ):
+    with patch.object(PaperQAEnvironment, "exec_tool_calls", spy_exec_tool_calls):
         response = await agent_query(
             query="Are COVID-19 vaccines effective?",
             settings=agent_test_settings,
-            docs=docs,
             agent_type=agent_type,
         )
     # Ensure that GenerateAnswerTool was called in truncation's failover
     assert response.status == AgentStatus.TRUNCATED, "Agent did not timeout"
     assert CANNOT_ANSWER_PHRASE in response.session.answer
+    (last_response,) = tool_responses[-1]
+    assert (
+        "no papers" in last_response.content
+    ), "Expecting agent to been shown specifics on the failure"
 
 
 @pytest.mark.flaky(reruns=5, only_rerun=["AssertionError"])

Original file line number	Diff line number	Diff line change
`@@ -136,5 +136,6 @@`
`136`	`136`	`)`
`137`	`137`
`138`	`138`	`CONTEXT_OUTER_PROMPT = "{context_str}\n\nValid Keys: {valid_keys}"`
	`139`	`+EMPTY_CONTEXTS = len(CONTEXT_OUTER_PROMPT.format(context_str="", valid_keys="").strip())`
`139`	`140`	`CONTEXT_INNER_PROMPT_NOT_DETAILED = "{name}: {text}"`
`140`	`141`	`CONTEXT_INNER_PROMPT = f"{CONTEXT_INNER_PROMPT_NOT_DETAILED}\nFrom {{citation}}"`