Merge pull request #109 from JimStenstrom/fix/ollama-json-error-handling-87

will-lamerton · web-flow · commit 1c0617630daa · 2025-11-30T14:17:38.000Z
Fix/ollama json error handling 87
diff --git a/source/ai-sdk-client-error-handling.spec.ts b/source/ai-sdk-client-error-handling.spec.ts
@@ -0,0 +1,174 @@
+import test from 'ava';
+import {parseAPIError} from './ai-sdk-client.js';
+
+// Tests for parseAPIError function
+// Now using the actual exported function instead of a duplicated copy
+
+test('parseAPIError - handles Ollama unmarshal error from issue #87', t => {
+	const error = new Error(
+		"RetryError [AI_RetryError]: Failed after 3 attempts. Last error: unmarshal: invalid character '{' after top-level value",
+	);
+
+	const result = parseAPIError(error);
+
+	t.true(result.includes('Ollama server error'));
+	t.true(result.includes('malformed JSON'));
+	t.true(result.includes('Restart Ollama'));
+	t.true(result.includes('Re-pull the model'));
+	t.true(result.includes('Check Ollama logs'));
+	t.true(result.includes('Try a different model'));
+	t.true(result.includes('Original error:'));
+});
+
+test('parseAPIError - handles unmarshal error without retry wrapper', t => {
+	const error = new Error("unmarshal: invalid character '{' after top-level value");
+
+	const result = parseAPIError(error);
+
+	t.true(result.includes('Ollama server error'));
+	t.true(result.includes('malformed JSON'));
+});
+
+test('parseAPIError - handles 500 error with invalid character (status code takes precedence)', t => {
+	// This test verifies that HTTP status codes are parsed FIRST,
+	// so a 500 error with "invalid character" in the message is treated
+	// as a server error, not an Ollama-specific error
+	const error = new Error(
+		"500 Internal Server Error: invalid character 'x' after top-level value",
+	);
+
+	const result = parseAPIError(error);
+
+	// Status code parsing takes precedence over Ollama-specific pattern matching
+	t.is(result, "Server error: invalid character 'x' after top-level value");
+});
+
+test('parseAPIError - handles 500 error without JSON parsing issue', t => {
+	const error = new Error('500 Internal Server Error: database connection failed');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Server error: database connection failed');
+});
+
+test('parseAPIError - handles 404 error', t => {
+	const error = new Error('404 Not Found: model not available');
+
+	const result = parseAPIError(error);
+
+	t.is(
+		result,
+		'Model not found: The requested model may not exist or is unavailable',
+	);
+});
+
+test('parseAPIError - handles connection refused', t => {
+	const error = new Error('ECONNREFUSED: Connection refused');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Connection failed: Unable to reach the model server');
+});
+
+test('parseAPIError - handles timeout error', t => {
+	const error = new Error('Request timeout: ETIMEDOUT');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Request timed out: The model took too long to respond');
+});
+
+test('parseAPIError - handles non-Error objects', t => {
+	const result = parseAPIError('string error');
+
+	t.is(result, 'An unknown error occurred while communicating with the model');
+});
+
+test('parseAPIError - handles context length errors', t => {
+	const error = new Error(
+		'context length exceeded',
+	);
+
+	const result = parseAPIError(error);
+
+	// Use exact assertion instead of OR condition
+	t.is(result, 'Context too large: Please reduce the conversation length or message size');
+});
+
+test('parseAPIError - handles too many tokens errors', t => {
+	const error = new Error(
+		'too many tokens in the request',
+	);
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Context too large: Please reduce the conversation length or message size');
+});
+
+test('parseAPIError - handles 400 with context length in message', t => {
+	const error = new Error(
+		'400 Bad Request: context length exceeded',
+	);
+
+	const result = parseAPIError(error);
+
+	// The 400 status code pattern matches first, so we get the full message
+	t.is(result, 'Bad request: context length exceeded');
+});
+
+test('parseAPIError - handles 401 authentication error', t => {
+	const error = new Error('401 Unauthorized: Invalid API key');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Authentication failed: Invalid API key or credentials');
+});
+
+test('parseAPIError - handles 403 forbidden error', t => {
+	const error = new Error('403 Forbidden: Access denied');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Access forbidden: Check your API permissions');
+});
+
+test('parseAPIError - handles 429 rate limit error', t => {
+	const error = new Error('429 Too Many Requests: Rate limit exceeded');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Rate limit exceeded: Too many requests. Please wait and try again');
+});
+
+test('parseAPIError - handles 502 bad gateway error', t => {
+	const error = new Error('502 Bad Gateway: upstream error');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Server error: upstream error');
+});
+
+test('parseAPIError - handles 503 service unavailable error', t => {
+	const error = new Error('503 Service Unavailable: server overloaded');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Server error: server overloaded');
+});
+
+test('parseAPIError - handles reduce tokens message', t => {
+	const error = new Error('Please reduce the number of tokens in your request');
+
+	const result = parseAPIError(error);
+
+	t.is(result, 'Too many tokens: Please shorten your message or clear conversation history');
+});
+
+test('parseAPIError - cleans up unknown errors', t => {
+	const error = new Error('Error: Something unexpected happened\nWith more details');
+
+	const result = parseAPIError(error);
+
+	// Should strip "Error: " prefix and only return first line
+	t.is(result, 'Something unexpected happened');
+});
diff --git a/source/ai-sdk-client-maxretries.spec.ts b/source/ai-sdk-client-maxretries.spec.ts
@@ -0,0 +1,114 @@
+import test from 'ava';
+import {AISDKClient} from './ai-sdk-client.js';
+import type {AIProviderConfig} from './types/config.js';
+
+// Tests for maxRetries configuration
+// Now tests actual AISDKClient instantiation and behavior
+
+test('AISDKClient - maxRetries defaults to 2 when not specified', t => {
+	const config: AIProviderConfig = {
+		name: 'TestProvider',
+		type: 'openai-compatible',
+		models: ['test-model'],
+		config: {
+			baseURL: 'http://localhost:11434/v1',
+			apiKey: 'test-key',
+		},
+	};
+
+	const client = new AISDKClient(config);
+
+	// Verify the client's internal maxRetries is set to default of 2
+	t.is(client.getMaxRetries(), 2);
+});
+
+test('AISDKClient - maxRetries respects custom value', t => {
+	const config: AIProviderConfig = {
+		name: 'TestProvider',
+		type: 'openai-compatible',
+		models: ['test-model'],
+		maxRetries: 5,
+		config: {
+			baseURL: 'http://localhost:11434/v1',
+			apiKey: 'test-key',
+		},
+	};
+
+	const client = new AISDKClient(config);
+
+	// Verify the client uses the custom maxRetries value
+	t.is(client.getMaxRetries(), 5);
+});
+
+test('AISDKClient - maxRetries can be set to 0 to disable retries', t => {
+	// Important: This test verifies that 0 is treated as a valid value,
+	// not as falsy (which would incorrectly default to 2)
+	const config: AIProviderConfig = {
+		name: 'TestProvider',
+		type: 'openai-compatible',
+		models: ['test-model'],
+		maxRetries: 0,
+		config: {
+			baseURL: 'http://localhost:11434/v1',
+			apiKey: 'test-key',
+		},
+	};
+
+	const client = new AISDKClient(config);
+
+	// Verify that 0 is respected (nullish coalescing handles this correctly)
+	t.is(client.getMaxRetries(), 0);
+});
+
+test('AISDKClient - maxRetries handles value of 1', t => {
+	const config: AIProviderConfig = {
+		name: 'TestProvider',
+		type: 'openai-compatible',
+		models: ['test-model'],
+		maxRetries: 1,
+		config: {
+			baseURL: 'http://localhost:11434/v1',
+			apiKey: 'test-key',
+		},
+	};
+
+	const client = new AISDKClient(config);
+
+	t.is(client.getMaxRetries(), 1);
+});
+
+test('AIProviderConfig type - includes maxRetries in interface', t => {
+	// Compile-time test that maxRetries is part of the interface
+	const config: AIProviderConfig = {
+		name: 'TestProvider',
+		type: 'openai-compatible',
+		models: ['test-model'],
+		maxRetries: 3,
+		config: {
+			baseURL: 'http://localhost:11434/v1',
+		},
+	};
+
+	// TypeScript should not complain about maxRetries property
+	t.is(typeof config.maxRetries, 'number');
+	t.true('maxRetries' in config);
+});
+
+test('AISDKClient - undefined maxRetries uses default', t => {
+	// Explicitly set to undefined to test fallback behavior
+	const config: AIProviderConfig = {
+		name: 'TestProvider',
+		type: 'openai-compatible',
+		models: ['test-model'],
+		maxRetries: undefined,
+		config: {
+			baseURL: 'http://localhost:11434/v1',
+			apiKey: 'test-key',
+		},
+	};
+
+	const client = new AISDKClient(config);
+
+	// Verify undefined falls back to default of 2
+	t.is(client.getMaxRetries(), 2);
+});
diff --git a/source/ai-sdk-client.ts b/source/ai-sdk-client.ts
@@ -15,16 +15,19 @@ import {XMLToolCallParser} from '@/tool-calling/xml-parser';
 import {getModelContextLimit} from '@/models/index.js';
 
 /**
- * Parses API errors into user-friendly messages
+ * Parses API errors into user-friendly messages.
+ * Exported for testing purposes.
  */
-function parseAPIError(error: unknown): string {
+export function parseAPIError(error: unknown): string {
 	if (!(error instanceof Error)) {
 		return 'An unknown error occurred while communicating with the model';
 	}
 
 	const errorMessage = error.message;
 
-	// Extract status code and clean message from common error patterns
+	// Extract status code and clean message from common error patterns FIRST
+	// This ensures HTTP status codes are properly parsed before falling through
+	// to more generic pattern matching (like Ollama-specific errors)
 	const statusMatch = errorMessage.match(
 		/(?:Error: )?(\d{3})\s+(?:\d{3}\s+)?(?:Bad Request|[^:]+):\s*(.+)/i,
 	);
@@ -52,6 +55,26 @@ function parseAPIError(error: unknown): string {
 		}
 	}
 
+	// Handle Ollama-specific unmarshal/JSON parsing errors
+	// This runs AFTER status code parsing to avoid misclassifying HTTP errors
+	// that happen to contain JSON parsing error text in their message
+	if (
+		errorMessage.includes('unmarshal') ||
+		(errorMessage.includes('invalid character') &&
+			errorMessage.includes('after top-level value'))
+	) {
+		return (
+			'Ollama server error: The model returned malformed JSON. ' +
+			'This usually indicates an issue with the Ollama server or model. ' +
+			'Try:\n' +
+			'  1. Restart Ollama: systemctl restart ollama (Linux) or restart the Ollama app\n' +
+			'  2. Re-pull the model: ollama pull <model-name>\n' +
+			'  3. Check Ollama logs for more details\n' +
+			'  4. Try a different model to see if the issue is model-specific\n' +
+			`Original error: ${errorMessage}`
+		);
+	}
+
 	// Handle timeout errors
 	if (errorMessage.includes('timeout') || errorMessage.includes('ETIMEDOUT')) {
 		return 'Request timed out: The model took too long to respond';
@@ -138,12 +161,15 @@ export class AISDKClient implements LLMClient {
 	private providerConfig: AIProviderConfig;
 	private undiciAgent: Agent;
 	private cachedContextSize: number;
+	private maxRetries: number;
 
 	constructor(providerConfig: AIProviderConfig) {
 		this.providerConfig = providerConfig;
 		this.availableModels = providerConfig.models;
 		this.currentModel = providerConfig.models[0] || '';
 		this.cachedContextSize = 0;
+		// Default to 2 retries (same as AI SDK default), or use configured value
+		this.maxRetries = providerConfig.maxRetries ?? 2;
 
 		const {requestTimeout, socketTimeout, connectionPool} = this.providerConfig;
 		const resolvedSocketTimeout =
@@ -233,6 +259,10 @@ export class AISDKClient implements LLMClient {
 		return this.cachedContextSize;
 	}
 
+	getMaxRetries(): number {
+		return this.maxRetries;
+	}
+
 	getAvailableModels(): Promise<string[]> {
 		return Promise.resolve(this.availableModels);
 	}
@@ -263,6 +293,7 @@ export class AISDKClient implements LLMClient {
 				messages: modelMessages,
 				tools: aiTools,
 				abortSignal: signal,
+				maxRetries: this.maxRetries,
 			});
 
 			// Extract tool calls from result
@@ -384,6 +415,7 @@ export class AISDKClient implements LLMClient {
 				messages: modelMessages,
 				tools: aiTools,
 				abortSignal: signal,
+				maxRetries: this.maxRetries,
 			});
 
 			// Stream tokens
diff --git a/source/types/config.ts b/source/types/config.ts