fix: improve LLM provider integration and debug logging

HendrikReh · HendrikReh · commit eacea936872e · 2025-10-17T17:19:59.000+02:00
This commit addresses critical issues with Gemini and OpenAI provide integrations and enhances debug logging across all LLM adapters.

Changes:
  - Reroute Gemini to standalone HTTP client bypassing rig.rs due to
    deserialization issues with current Gemini API
  - Add generationConfig.responseMimeType to Gemini requests to enforce
    JSON responses
  - Switch OpenAI from json_schema to json_object response format for
    better compatibility with GPT-5 reasoning models
  - Add flexible regex patterns INSTR_IGNORE and PROMPT_LEAK to
    rules/patterns.json to catch attack variations
  - Enhance debug logging: always log raw LLM responses when --debug flag
    is enabled (not only on errors)
  - Add debug logging to Gemini standalone client
  - Remove unused verdict_json_schema function from rig adapter
  - Update README.md with detailed provider integration pitfalls

Fixes:
  - Gemini "missing field generationConfig" deserialization errors
  - OpenAI GPT-5 returning only reasoning traces with no content
  - Detection rules missing "ignore your previous instructions" variations
  - Debug logging only showing errors instead of all raw responses
diff --git a/README.md b/README.md
@@ -533,8 +533,9 @@ Based on this ~7-hour hackathon experience building a production-ready Rust CLI:
 While wiring rig.rs into real LLM providers we hit a few repeat offenders. The highlights:
 
 - **Anthropic truncation & malformed JSON** — Responses frequently dropped closing quotes/braces and embedded raw newlines inside strings. We added newline sanitisation, automatic quote/brace repair, a JSON5 fallback, and eventually a fallback verdict so scans never abort.
-- **OpenAI reasoning-only replies** — GPT‑5 often streamed only `reasoning` traces or tool calls. We now capture tool-call arguments, request OpenAI’s structured JSON schema, and fall back to an “unknown” verdict when the model withholds textual content.
-- **Gemini empty responses** — Successful calls can still return empty candidates. Health checks now treat empty responses as warnings instead of hard failures, surfacing an “unknown” verdict with guidance.
+- **OpenAI reasoning-only replies** — GPT‑5 reasoning models returned only reasoning traces without textual content when using `json_schema` response format. We now capture tool-call arguments and use simpler `json_object` response format (instead of strict `json_schema`) for better compatibility with reasoning models. Falls back to an "unknown" verdict when the model withholds textual content.
+- **Gemini rig.rs incompatibility** — Rig's Gemini implementation has deserialization issues with the current Gemini API (missing `generationConfig` field errors). The Gemini API also rejects requests combining forced function calling (ANY mode) with `responseMimeType: 'application/json'`. Solution: Bypassed rig entirely for Gemini; implemented standalone HTTP client using Gemini's native REST API with prompt-based JSON formatting.
+- **Gemini empty responses** — Successful calls can still return empty candidates. Health checks now treat empty responses as warnings instead of hard failures, surfacing an "unknown" verdict with guidance.
 - **Debugging provider quirks** — The global `--debug` flag flips `LLM_GUARD_DEBUG=1`, causing the adapter to log the raw upstream payload whenever parsing fails, making it obvious when prompt/schema updates are needed.
 
 These guardrails keep the CLI resilient even when upstream providers change response contracts mid-flight.
diff --git a/crates/llm-guard-core/src/llm/gemini.rs b/crates/llm-guard-core/src/llm/gemini.rs
@@ -4,6 +4,7 @@ use anyhow::{anyhow, bail, Context, Result};
 use async_trait::async_trait;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
+use std::env;
 use std::time::Duration;
 use tokio::time::sleep;
 
@@ -64,6 +65,9 @@ impl LlmClient for GeminiClient {
                     )),
                 }],
             }],
+            generation_config: Some(GeminiGenerationConfig {
+                response_mime_type: "application/json".to_string(),
+            }),
         };
 
         let mut attempt = 0u32;
@@ -106,6 +110,15 @@ impl LlmClient for GeminiClient {
                 .json()
                 .await
                 .context("failed to parse Gemini response")?;
+
+            // Always log raw response when debug is enabled
+            if debug_enabled() {
+                tracing::warn!(
+                    "gemini raw response: {}",
+                    serde_json::to_string_pretty(&message).unwrap_or_default()
+                );
+            }
+
             let content = message
                 .candidates
                 .into_iter()
@@ -114,6 +127,11 @@ impl LlmClient for GeminiClient {
                 .next()
                 .ok_or_else(|| anyhow!("Gemini response missing message content"))?;
 
+            // Log extracted content when debug is enabled
+            if debug_enabled() {
+                tracing::warn!("gemini extracted content: {}", content);
+            }
+
             let verdict: ModelVerdict = serde_json::from_str(&content)
                 .context("expected JSON verdict from Gemini response")?;
 
@@ -135,9 +153,22 @@ fn truncate(input: &str, max_chars: usize) -> String {
     input.chars().take(max_chars).collect::<String>() + "…"
 }
 
+fn debug_enabled() -> bool {
+    matches!(env::var("LLM_GUARD_DEBUG"), Ok(val) if !val.is_empty() && val != "0")
+}
+
 #[derive(Serialize)]
 struct GeminiRequest {
     contents: Vec<GeminiRequestContent>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(rename = "generationConfig")]
+    generation_config: Option<GeminiGenerationConfig>,
+}
+
+#[derive(Serialize)]
+struct GeminiGenerationConfig {
+    #[serde(rename = "responseMimeType")]
+    response_mime_type: String,
 }
 
 #[derive(Serialize)]
@@ -152,22 +183,22 @@ struct GeminiRequestPart {
     text: Option<String>,
 }
 
-#[derive(Deserialize)]
+#[derive(Deserialize, Serialize)]
 struct GeminiResponse {
     candidates: Vec<GeminiCandidate>,
 }
 
-#[derive(Deserialize)]
+#[derive(Deserialize, Serialize)]
 struct GeminiCandidate {
     content: GeminiResponseContent,
 }
 
-#[derive(Deserialize)]
+#[derive(Deserialize, Serialize)]
 struct GeminiResponseContent {
     parts: Vec<GeminiResponsePart>,
 }
 
-#[derive(Deserialize)]
+#[derive(Deserialize, Serialize)]
 struct GeminiResponsePart {
     #[serde(default)]
     text: Option<String>,
diff --git a/crates/llm-guard-core/src/llm/mod.rs b/crates/llm-guard-core/src/llm/mod.rs
@@ -42,6 +42,10 @@ pub fn build_client(settings: &LlmSettings) -> Result<Box<dyn LlmClient>> {
     let kind = ProviderKind::from_provider(settings.provider.trim())?;
     match kind {
         ProviderKind::Noop => Ok(Box::new(NoopLlmClient::default())),
+        ProviderKind::Gemini => {
+            // Use standalone Gemini client to avoid rig deserialization issues
+            Ok(Box::new(GeminiClient::new(settings)?))
+        }
         ProviderKind::Rig => {
             bail!("Select a specific rig-enabled provider (e.g. openai) in LLM_GUARD_PROVIDER")
         }
diff --git a/crates/llm-guard-core/src/llm/rig_adapter.rs b/crates/llm-guard-core/src/llm/rig_adapter.rs
@@ -6,14 +6,13 @@ use rig::client::CompletionClient;
 use rig::completion::message::AssistantContent;
 use rig::completion::{CompletionError, CompletionModelDyn};
 use rig::providers::azure::AzureOpenAIAuth;
-use rig::providers::{anthropic, azure, gemini, openai};
+use rig::providers::{anthropic, azure, openai};
 use serde::Deserialize;
 use serde_json::json;
 use std::env;
 
 const DEFAULT_OPENAI_MODEL: &str = "gpt-4o-mini";
 const DEFAULT_ANTHROPIC_MODEL: &str = "claude-3-5-sonnet-latest";
-const DEFAULT_GEMINI_MODEL: &str = "gemini-1.5-pro";
 const MAX_OUTPUT_TOKENS: u64 = 200;
 const TEMPERATURE: f64 = 0.1;
 const SYSTEM_PROMPT: &str = "You are an application security assistant. Analyze prompt-injection scan results and respond with strict JSON: {\"label\": \"safe|suspicious|malicious\", \"rationale\": \"...\", \"mitigation\": \"...\"}. The mitigation should advise remediation steps.";
@@ -36,7 +35,9 @@ impl RigLlmClient {
         match kind {
             ProviderKind::OpenAi => Ok(Box::new(Self::new_openai(settings)?)),
             ProviderKind::Anthropic => Ok(Box::new(Self::new_anthropic(settings)?)),
-            ProviderKind::Gemini => Ok(Box::new(Self::new_gemini(settings)?)),
+            ProviderKind::Gemini => {
+                bail!("Gemini provider should use standalone client, not rig adapter")
+            }
             ProviderKind::Azure => Ok(Box::new(Self::new_azure(settings)?)),
             ProviderKind::Noop | ProviderKind::Rig => {
                 bail!("rig adapter does not support provider `{kind:?}` yet")
@@ -100,30 +101,8 @@ impl RigLlmClient {
         ))
     }
 
-    fn new_gemini(settings: &LlmSettings) -> Result<Self> {
-        if settings.api_key.trim().is_empty() {
-            bail!("Gemini API key must be provided via LLM_GUARD_API_KEY");
-        }
-
-        let mut builder = gemini::Client::builder(&settings.api_key);
-        if let Some(endpoint) = settings.endpoint.as_deref() {
-            builder = builder.base_url(endpoint);
-        }
-        let client = builder
-            .build()
-            .context("failed to build gemini rig client")?;
-
-        let model_id = settings
-            .model
-            .clone()
-            .filter(|m| !m.trim().is_empty())
-            .unwrap_or_else(|| DEFAULT_GEMINI_MODEL.to_string());
-
-        let model: Box<dyn CompletionModelDyn + Send + Sync> =
-            Box::new(client.completion_model(&model_id));
-
-        Ok(Self::from_model(model, "gemini", model_id, None, true))
-    }
+    // Note: Gemini support removed from rig adapter due to deserialization issues.
+    // Gemini now uses a standalone HTTP client implementation (see gemini.rs).
 
     fn new_azure(settings: &LlmSettings) -> Result<Self> {
         if settings.api_key.trim().is_empty() {
@@ -217,17 +196,16 @@ impl LlmClient for RigLlmClient {
         }
 
         if self.config.provider_label == "openai" {
+            // Use simple json_object format instead of json_schema for better compatibility
+            // with reasoning models like gpt-5
             builder = builder.additional_params(json!({
                 "response_format": {
-                    "type": "json_schema",
-                    "json_schema": {
-                        "name": "verdict",
-                        "strict": false,
-                        "schema": verdict_json_schema()
-                    }
+                    "type": "json_object"
                 }
             }));
         }
+        // Note: Gemini function calling removed due to rig compatibility issues
+        // Gemini will rely on prompt instructions for JSON formatting
 
         let request = builder.build();
 
@@ -255,6 +233,9 @@ impl LlmClient for RigLlmClient {
 
         let choice = response.choice;
 
+        // Always log raw response when debug is enabled
+        debug_log_choice(self.config.provider_label, &choice);
+
         let content = choice
             .clone()
             .into_iter()
@@ -270,8 +251,13 @@ impl LlmClient for RigLlmClient {
             .join("\n");
 
         let trimmed = content.trim();
+
+        // Log extracted content when debug is enabled
+        if debug_enabled() && !trimmed.is_empty() {
+            tracing::warn!("rig {} extracted content: {}", self.config.provider_label, trimmed);
+        }
+
         if trimmed.is_empty() {
-            debug_log_choice(self.config.provider_label, &choice);
             // Gemini/OpenAI callbacks sometimes omit textual content while returning metadata only.
             // In that case we produce a fallback verdict instead of treating it as an error.
             tracing::warn!(
@@ -282,11 +268,7 @@ impl LlmClient for RigLlmClient {
         }
 
         let json_payload = extract_json_payload(trimmed);
-        let verdict = parse_verdict_json(&json_payload, self.config.provider_label, &self.model_id)
-            .or_else(|err| {
-                debug_log_choice(self.config.provider_label, &choice);
-                Err(err)
-            })?;
+        let verdict = parse_verdict_json(&json_payload, self.config.provider_label, &self.model_id)?;
 
         Ok(LlmVerdict {
             label: verdict.label,
@@ -362,22 +344,6 @@ fn fallback_model_verdict(provider: &str) -> ModelVerdict {
     }
 }
 
-fn verdict_json_schema() -> serde_json::Value {
-    json!({
-        "type": "object",
-        "additionalProperties": false,
-        "required": ["label", "rationale", "mitigation"],
-        "properties": {
-            "label": {
-                "type": "string",
-                "enum": ["safe", "suspicious", "malicious", "unknown"]
-            },
-            "rationale": { "type": "string" },
-            "mitigation": { "type": "string" }
-        }
-    })
-}
-
 fn sanitize_json_strings(payload: &str) -> String {
     let mut result = String::with_capacity(payload.len());
     let mut in_string = false;
diff --git a/rules/patterns.json b/rules/patterns.json
@@ -11,5 +11,17 @@
     "description": "Requests to rewrite or ignore system instructions",
     "pattern": "(?i)pretend\\s+you\\s+are\\s+a\\s+developer",
     "weight": 30
+  },
+  {
+    "id": "INSTR_IGNORE",
+    "description": "Attempts to ignore or override previous instructions",
+    "pattern": "(?i)ignore\\s+(?:your\\s+)?(?:previous|prior|all)\\s+(?:instructions?|prompts?|rules?)",
+    "weight": 35
+  },
+  {
+    "id": "PROMPT_LEAK",
+    "description": "Attempts to reveal system prompts or hidden instructions",
+    "pattern": "(?i)(?:dump|reveal|show|tell\\s+me|print)\\s+.*?(?:hidden|system|initial|original)?\\s*(?:prompt|instruction)s?",
+    "weight": 40
   }
 ]