fix for bulk run with litellm backend

arsenyinfo · arsenyinfo · commit 52436e83329b · 2025-11-16T14:51:52.000+01:00
diff --git a/klaudbiusz/cli/bulk_run.py b/klaudbiusz/cli/bulk_run.py
@@ -8,18 +8,17 @@
 from pathlib import Path
 from typing import TypedDict
 
-# Disable LiteLLM's async logging to avoid event loop issues with joblib
-import litellm
+from litellm_multiprocess_fix import patch_litellm_for_multiprocessing
+
+patch_litellm_for_multiprocessing()
+
 from codegen import ClaudeAppBuilder
 from codegen import GenerationMetrics as ClaudeGenerationMetrics
 from codegen_multi import LiteLLMAppBuilder
 from dotenv import load_dotenv
 from joblib import Parallel, delayed
 from prompts_databricks import PROMPTS as DATABRICKS_PROMPTS
 
-litellm.turn_off_message_logging = True
-litellm.drop_params = True  # silently drop unsupported params instead of warning
-
 # Unified type for metrics from both backends
 GenerationMetrics = ClaudeGenerationMetrics
 
@@ -48,12 +47,9 @@ def run_single_generation(
     suppress_logs: bool = True,
     mcp_binary: str | None = None,
 ) -> RunResult:
-    # Ensure LiteLLM is configured fresh in each worker process
+    # re-apply litellm patch in worker process (joblib uses spawn/fork)
     if backend == "litellm":
-        import litellm
-
-        litellm.turn_off_message_logging = True
-        litellm.drop_params = True
+        patch_litellm_for_multiprocessing()
 
     def timeout_handler(signum, frame):
         raise TimeoutError("Generation timed out after 1200 seconds")
diff --git a/klaudbiusz/cli/codegen_multi.py b/klaudbiusz/cli/codegen_multi.py
@@ -347,6 +347,7 @@ def cli(
     app_name: str | None = None,
     model: str = "openrouter/minimax/minimax-m2",  # other good options: "openrouter/moonshotai/kimi-k2-thinking",  "gemini/gemini-2.5-pro",
     # some open-weights platform provide openai/anthropic-like API that can be used like 
+    # OPENAI_API_KEY=$DATABRICKS_TOKEN OPENAI_API_BASE=https://$DATABRICKS_HOST/serving-endpoints uv run cli/single_run.py "..." --backend=litellm --model="openai/databricks-gpt-oss-120b"
     # OPENAI_API_BASE="https://api.minimax.io/v1" OPENAI_API_KEY="$MINIMAX_API_KEY" uv run cli/single_run.py "..."" --backend=litellm --model="openai/MiniMax-M2"
     # ANTHROPIC_BASE_URL="https://api.minimax.io/anthropic" ANTHROPIC_API_KEY="$MINIMAX_API_KEY" uv run cli/single_run.py "..." --backend=litellm --model="anthropic/MiniMax-M2"
     suppress_logs: bool = False,