diff --git a/app/_data/plugins/ai-proxy.yaml b/app/_data/plugins/ai-proxy.yaml
index 550af4a64b..d51fa26720 100644
--- a/app/_data/plugins/ai-proxy.yaml
+++ b/app/_data/plugins/ai-proxy.yaml
@@ -5,39 +5,65 @@ providers:
chat:
supported: true
streaming: true
- upstream_path: 'Use the LLM chat upstream path'
+ upstream_path: 'Uses the Converse and ConverseStream API'
route_type: 'llm/v1/chat'
model_example: 'Use the model name for the specific LLM provider'
min_version: '3.8'
completions:
supported: true
streaming: true
- upstream_path: 'Use the LLM completions upstream path'
+ upstream_path: 'Uses the Converse and ConverseStream API'
route_type: 'llm/v1/completions'
model_example: 'Use the model name for the specific LLM provider'
min_version: '3.8'
embeddings:
supported: true
streaming: false
- upstream_path: 'Use the LLM embeddings upstream path'
+ upstream_path: 'Uses the InvokeModel and InvokeWithResponseStream API'
route_type: 'llm/v1/embeddings'
model_example: 'Use the model name for the specific LLM provider'
min_version: '3.11'
+ batches: # Native format from SDK only
+ supported: 'n/a'
+ streaming: false
+ upstream_path: 'Uses the ModelInvocationJob API'
+ route_type: 'llm/v1/batches'
+ model_example: 'n/a'
+ min_version: ''
+ note:
+ content: 'Batches processing for Bedrock is supported in the native format from SDK only'
+ files:
+ supported: 'n/a'
+ streaming: false
+ upstream_path: '/openai/files'
+ route_type: 'llm/v1/files'
+ model_example: 'n/a'
+ min_version: ''
+ note:
+ content: 'Bedrock does not have a dedicated Files API. File storage uses Google Cloud Storage, similar to AWS S3.'
image:
generations:
supported: true
streaming: false
- upstream_path: 'Use the LLM image/generations upstream path'
+ upstream_path: 'Uses the InvokeModel API'
route_type: 'image/v1/images/generations'
model_example: 'Use the model name for the specific LLM provider'
min_version: '3.11'
edits:
supported: true
streaming: false
- upstream_path: 'Use the LLM image/edits upstream path'
+ upstream_path: 'Uses the InvokeModel API'
route_type: 'image/v1/images/edits'
model_example: 'Use the model name for the specific LLM provider'
min_version: '3.11'
+ video:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: 'Uses the StartAsyncInvoke API'
+ route_type: 'video/v1/videos/generations'
+ model_example: 'Use the model name for the specific LLM provider'
+ min_version: '3.13'
- name: 'Anthropic'
url_pattern: 'https://api.anthropic.com:443/{route_type_path}'
@@ -56,6 +82,15 @@ providers:
route_type: 'llm/v1/completions'
model_example: 'claude-2.1'
min_version: '3.6'
+ batches: # Native format from SDK only
+ supported: 'n/a'
+ streaming: true
+ upstream_path: '/v1/messages/batches'
+ route_type: 'files/v1/batches'
+ model_example: 'n/a'
+ min_version: ''
+ note:
+ content: 'Batches processing for Antropic is supported in the native format from SDK only'
- name: 'Azure'
url_pattern: 'https://{azure_instance}.openai.azure.com:443/openai/deployments/{deployment_name}/{route_type_path}'
@@ -153,6 +188,25 @@ providers:
route_type: 'realtime/v1/realtime'
model_example: 'n/a'
min_version: '3.11'
+ video:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: '/openai/v1/video/generations/jobs'
+ route_type: 'video/v1/videos/generations'
+ model_example: 'sora-2'
+ min_version: '3.13'
+
+ - name: 'Cerebras'
+ url_pattern: 'https://api.cerebras.ai/{route_type_path}'
+ min_version: '3.13'
+ chat:
+ supported: true
+ streaming: true
+ upstream_path: '/v1/chat/completions'
+ route_type: 'llm/v1/chat'
+ model_example: 'llama3.1-8b'
+ min_version: '3.13'
- name: 'Cohere'
url_pattern: 'https://api.cohere.com:443/{route_type_path}'
@@ -179,39 +233,106 @@ providers:
model_example: 'embed-english-v3.0'
min_version: '3.11'
+ - name: 'Dashscope'
+ url_pattern: 'https://dashscope.aliyuncs.com or https://dashscope-intl.aliyuncs.com'
+ min_version: '3.13'
+ chat:
+ supported: true
+ streaming: true
+ upstream_path: '/compatible-mode/v1/chat/completions'
+ route_type: 'llm/v1/chat'
+ model_example: 'qwen-plus'
+ min_version: '3.13'
+ embeddings:
+ supported: true
+ streaming: false
+ upstream_path: '/compatible-mode/v1/embeddings'
+ route_type: 'llm/v1/embeddings'
+ model_example: 'text-embedding-v1'
+ min_version: '3.13'
+ image:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: '/api/v1/services/aigc/multimodal-generation/generation'
+ route_type: 'image/v1/images/generations'
+ model_example: 'qwen-image-plus'
+ min_version: '3.13'
+ edits:
+ supported: true
+ streaming: false
+ upstream_path: '/api/v1/services/aigc/image2image/image-synthesis'
+ route_type: 'image/v1/images/edits'
+ model_example: 'qwen-image-plus'
+ min_version: '3.13'
+
- name: 'Gemini'
url_pattern: 'https://generativelanguage.googleapis.com'
min_version: '3.8'
chat:
supported: true
streaming: true
- upstream_path: 'llm/v1/chat'
+ upstream_path: 'Uses generateContent API'
route_type: 'llm/v1/chat'
model_example: 'gemini-2.0-flash'
min_version: '3.8'
-
embeddings:
supported: true
streaming: false
- upstream_path: 'llm/v1/embeddings'
+ upstream_path: 'Uses batchEmbedContents API'
route_type: 'llm/v1/embeddings'
model_example: 'text-embedding-004'
min_version: '3.11'
+ files: # Native format from SDK only
+ supported: 'n/a'
+ streaming: false
+ upstream_path: 'Uses uploadFile and files API'
+ route_type: 'llm/v1/files'
+ model_example: 'n/a'
+ min_version: ''
+ note:
+ content: 'Files processing for Gemini is supported in the native format from SDK only'
+ batches: # Native format from SDK only
+ supported: 'n/a'
+ streaming: false
+ upstream_path: 'Uses batches API'
+ route_type: 'llm/v1/batches'
+ model_example: 'n/a'
+ min_version: ''
+ note:
+ content: 'Batches processing for Gemini is supported in the native format from SDK only'
image:
generations:
supported: true
streaming: false
- upstream_path: 'image/v1/images/generations'
+ upstream_path: 'Uses generateContent API'
route_type: 'image/v1/images/generations'
model_example: 'gemini-2.0-flash-preview-image-generation1'
min_version: '3.11'
edits:
supported: true
streaming: false
- upstream_path: 'image/v1/images/edits'
+ upstream_path: 'Uses generateContent API'
route_type: 'image/v1/images/edits'
model_example: 'gemini-2.0-flash-preview-image-generation1'
min_version: '3.11'
+ realtime: # Native format from SDK only
+ supported: true
+ streaming: true
+ upstream_path: 'Uses BidiGenerateContent API'
+ route_type: 'realtime/v1/realtime'
+ model_example: 'gemini-live-2.5-flash-preview-native-audio-09-2025'
+ min_version: '3.13'
+ note:
+ content: 'Realtime processing for Gemini is supported in the native format from SDK only'
+ video:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: 'Uses predictLongRunning API'
+ route_type: 'video/v1/videos/generations'
+ model_example: 'veo-3.1-generate-001'
+ min_version: '3.13'
- name: 'Gemini Vertex'
url_pattern: 'https://aiplatform.googleapis.com/'
@@ -219,39 +340,63 @@ providers:
chat:
supported: true
streaming: true
- upstream_path: 'llm/v1/chat'
+ upstream_path: 'Uses generateContent API'
route_type: 'llm/v1/chat'
model_example: 'gemini-2.0-flash'
min_version: '3.8'
completions:
supported: true
streaming: false
- upstream_path: 'llm/v1/completions'
+ upstream_path: 'Uses generateContent API'
route_type: 'llm/v1/completions'
model_example: 'gemini-2.0-flash'
min_version: '3.8'
embeddings:
supported: true
streaming: false
- upstream_path: 'llm/v1/embeddings'
+ upstream_path: 'Uses generateContent API'
route_type: 'llm/v1/embeddings'
model_example: 'text-embedding-004'
min_version: '3.11'
+ files:
+ supported: 'n/a'
+ streaming: false
+ upstream_path: '/openai/files'
+ route_type: 'llm/v1/files'
+ model_example: 'n/a'
+ min_version: '3.11'
+ note:
+ content: 'Gemini Vertex does not have a dedicated Files API. File storage uses Google Cloud Storage, similar to AWS S3.'
+ batches:
+ supported: true
+ streaming: false
+ upstream_path: 'Uses batchPredictionJobs API'
+ route_type: 'llm/v1/batches'
+ model_example: 'n/a'
+ min_version: '3.13'
image:
generations:
supported: true
streaming: false
- upstream_path: 'image/v1/images/generations'
+ upstream_path: 'Uses generateContent API'
route_type: 'image/v1/images/generations'
model_example: 'gemini-2.0-flash-preview-image-generation1'
min_version: '3.11'
edits:
supported: true
streaming: false
- upstream_path: 'image/v1/images/edits'
+ upstream_path: 'Uses generateContent API'
route_type: 'image/v1/images/edits'
model_example: 'gemini-2.0-flash-preview-image-generation1'
min_version: '3.11'
+ video:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: 'Uses predictLongRunning API'
+ route_type: 'video/v1/videos/generations'
+ model_example: 'veo-3.1-generate-001'
+ min_version: '3.13'
- name: 'Hugging Face'
url_pattern: 'https://api-inference.huggingface.co'
@@ -259,24 +404,25 @@ providers:
chat:
supported: true
streaming: true
- upstream_path: '/models/{model_provider}/{model_name}'
+ upstream_path: '/v1/chat/completions'
route_type: 'llm/v1/chat'
model_example: 'Use the model name for the specific LLM provider'
min_version: '3.9'
- completions:
- supported: true
- streaming: true
- upstream_path: '/models/{model_provider}/{model_name}'
- route_type: 'llm/v1/completions'
- model_example: 'Use the model name for the specific LLM provider'
- min_version: '3.9'
embeddings:
supported: true
streaming: false
- upstream_path: '/models/{model_provider}/{model_name}'
+ upstream_path: '/hf-inference/models/{model_name}/pipeline/feature-extraction'
route_type: 'llm/v1/embeddings'
model_example: 'Use the embedding model name'
min_version: '3.11'
+ video:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: '/v1/videos'
+ route_type: 'video/v1/videos/generations'
+ model_example: 'Use the video generation model name'
+ min_version: '3.13'
- name: 'Llama2'
formats: 'supports Llama2 and Llama3 models and raw, OLLAMA, and OpenAI formats'
@@ -311,21 +457,14 @@ providers:
chat:
supported: true
streaming: true
- upstream_path: 'User-defined'
+ upstream_path: '/v1/chat/completions or user-defined'
route_type: 'llm/v1/chat'
model_example: 'mistral-tiny'
min_version: '3.6'
- completions:
- supported: true
- streaming: true
- upstream_path: 'User-defined'
- route_type: 'llm/v1/completions'
- model_example: 'mistral-tiny'
- min_version: '3.6'
embeddings:
supported: true
streaming: false
- upstream_path: 'User-defined'
+ upstream_path: '/v1/embeddings or user-defined'
route_type: 'llm/v1/embeddings'
model_example: 'mistral-embed'
min_version: '3.11'
@@ -427,6 +566,40 @@ providers:
route_type: 'realtime/v1/realtime'
model_example: 'gpt-4o'
min_version: '3.11'
+ video:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: 'Use the LLM image/generations upstream path'
+ route_type: 'video/v1/videos/generations'
+ model_example: 'sora-2'
+ min_version: '3.13'
+
+ - name: 'xAI'
+ url_pattern: 'https://api.x.ai/{route_type_path}'
+ min_version: '3.13'
+ chat:
+ supported: true
+ streaming: true
+ upstream_path: '/v1/chat/completions'
+ route_type: 'llm/v1/chat'
+ model_example: 'grok-4-1-fast-reasoning'
+ min_version: '3.13'
+ responses:
+ supported: true
+ streaming: false
+ upstream_path: '/v1/responses'
+ route_type: 'llm/v1/responses'
+ model_example: 'grok-4-1-fast-reasoning'
+ min_version: '3.13'
+ image:
+ generations:
+ supported: true
+ streaming: false
+ upstream_path: '/v1/images/generations'
+ route_type: 'image/v1/images/generations'
+ model_example: 'grok-2-image-1212'
+ min_version: '3.13'
- name: 'xAI'
url_pattern: 'https://api.x.ai:443/{route_type_path}'
diff --git a/app/_includes/plugins/ai-proxy/formats.md b/app/_includes/plugins/ai-proxy/formats.md
index 108d6cf400..c981c2a52c 100644
--- a/app/_includes/plugins/ai-proxy/formats.md
+++ b/app/_includes/plugins/ai-proxy/formats.md
@@ -99,29 +99,54 @@ columns:
- title: Supported APIs
key: apis
rows:
- - llm_format: "[`gemini`](./examples/gemini-native-routes/)"
+ - llm_format: "`gemini`"
provider: Gemini
apis: |
- - `/generateContent`
- - `/streamGenerateContent`
- - llm_format: "[`bedrock`](./examples/bedrock-native-routes/)"
+ - `/v1beta/models/{model_name}:generateContent`
+ - `/v1beta/models/{model_name}:streamGenerateContent`
+ - `/v1beta/models/{model_name}:embedContent`
+ - `/v1beta/models/{model_name}:batchEmbedContent`
+ - `/v1beta/batches`
+ - `/upload/{file_id}/files`
+ - `/v1beta/files`
+ - llm_format: "`gemini`"
+ provider: Vertex
+ apis: |
+ - `/v1/projects/{project_id}/locations/{location}/models/{model_name}:generateContent`
+ - `/v1/projects/{project_id}/locations/{location}/models/{model_name}:streamGenerateContent`
+ - `/v1/projects/{project_id}/locations/{location}/models/{model_name}:embedContent`
+ - `/v1/projects/{project_id}/locations/{location}/models/{model_name}:batchEmbedContent`
+ - `/v1/projects/{project_id}/locations/{location}/models/{model_name}:predictLongRunning`
+ - `/v1/projects/{project_id}/locations/{location}/rankingConfigs/{config_name}:rank`
+ - `/v1/projects/{project_id}/locations/{location}/batchPredictionJobs`
+ - llm_format: "`bedrock`"
provider: Bedrock
apis: |
- - `/converse`
- - `/converse-stream`
- - `/retrieveAndGenerate`
- - `/retrieveAndGenerateStream`
- - `/rerank`
- - llm_format: "[`cohere`](./examples/cohere-native-routes/)"
+ - `/model/{model_name}/converse`
+ - `/model/{model_name}/converse-stream`
+ - `/model/{model_name}/invoke`
+ - `/model/{model_name}/invoke-with-response-stream`
+ - `/model/{model_name}/retrieveAndGenerate`
+ - `/model/{model_name}/retrieveAndGenerateStream`
+ - `/model/{model_name}/rerank`
+ - `/model/{model_name}/async-invoke`
+ - `/model-invocations`
+ - llm_format: "`cohere`"
provider: Cohere
apis: |
- `/v1/rerank`
- `/v2/rerank`
- - llm_format: "[`huggingface`](./examples/hugging-face-native-routes/)"
+ - llm_format: "`huggingface`"
provider: Hugging Face
apis: |
- `/generate`
- `/generate_stream`
+ - llm_format: "`anthropic`"
+ provider: Hugging Face
+ apis: |
+ - `/v1/messages`
+ - `/v1/messages/batches`
+
{% endtable %}
@@ -133,7 +158,7 @@ The following sections detail the provider and statistic logging limitations.
* **Anthropic**: Does not support `llm/v1/completions` or `llm/v1/embeddings`.
* **Llama2**: Raw format lacks support for `llm/v1/embeddings`.
-* **Bedrock** and **Gemini**: Only support `auth.allow_override = false`.
+* **Gemini**: Only supports `auth.allow_override = false`.
#### Statistics logging limitations
diff --git a/app/_includes/plugins/ai-proxy/grouped-upstreams.md b/app/_includes/plugins/ai-proxy/grouped-upstreams.md
index dad8e98412..16c043737b 100644
--- a/app/_includes/plugins/ai-proxy/grouped-upstreams.md
+++ b/app/_includes/plugins/ai-proxy/grouped-upstreams.md
@@ -63,6 +63,13 @@
plugin=plugin %}
{% endnavtab %}
+{% navtab "Cerebras" %}
+ {% include plugins/ai-proxy/tables/upstream-paths/upstream-paths.html
+ providers=providers
+ provider_name="Cerebras"
+ plugin=plugin %}
+{% endnavtab %}
+
{% navtab "Cohere" %}
{% include plugins/ai-proxy/tables/upstream-paths/upstream-paths.html
providers=providers
@@ -76,7 +83,7 @@
provider_name="Gemini"
plugin=plugin %}
{:.warning}
-> **[1]**: Kong AI Gateway does **not** support the [Imagen](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagen-4.0-generate-preview-06-06?inv=1&invt=Ab46EA&project=summit-demo-2022) model family. For image generation with Google Vertex AI, use [Gemini models](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation) instead.
+> **[1]**: Kong AI Gateway before 3.13 does **not** support the [Imagen](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagen-4.0-generate-preview-06-06?inv=1&invt=Ab46EA) model family. For image generation with Google Vertex AI, use [Gemini models](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation) instead.
{% endnavtab %}
{% navtab "Gemini Vertex" %}
@@ -85,7 +92,7 @@
provider_name="Gemini Vertex"
plugin=plugin %}
{:.warning}
-> **[1]**: Kong AI Gateway does **not** support the [Imagen](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagen-4.0-generate-preview-06-06?inv=1&invt=Ab46EA&project=summit-demo-2022) model family. For image generation with Google Vertex AI, use [Gemini models](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation) instead.
+> **[1]**: Kong AI Gateway before 3.13 does **not** support the [Imagen](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagen-4.0-generate-preview-06-06?inv=1&invt=Ab46EA) model family. For image generation with Google Vertex AI, use [Gemini models](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation) instead.
{% endnavtab %}
{% navtab "Hugging Face" %}
@@ -109,4 +116,11 @@
plugin=plugin %}
{% endnavtab %}
+{% navtab "xAI" %}
+ {% include plugins/ai-proxy/tables/upstream-paths/upstream-paths.html
+ providers=providers
+ provider_name="xAI"
+ plugin=plugin %}
+{% endnavtab %}
+
{% endnavtabs %}
diff --git a/app/_includes/plugins/ai-proxy/overview.md b/app/_includes/plugins/ai-proxy/overview.md
index 19d90cfb36..bdd355c472 100644
--- a/app/_includes/plugins/ai-proxy/overview.md
+++ b/app/_includes/plugins/ai-proxy/overview.md
@@ -51,7 +51,6 @@ features:
examples: |
* [`llm/v1/assistants`](./examples/assistants-route-type/)
* [`llm/v1/responses`](./examples/responses-route-type/)
- * [Secure GitHub MCP Server traffic using `llm/v1/responses` route type](/mcp/secure-mcp-traffic/)
- title: "Batch and files"
description: Supports parallel LLM requests and file upload for long documents and structured input.
diff --git a/app/_includes/plugins/ai-proxy/tables/supported-providers-audio.html b/app/_includes/plugins/ai-proxy/tables/supported-providers-audio.html
index dc2ad912dd..e27465de38 100644
--- a/app/_includes/plugins/ai-proxy/tables/supported-providers-audio.html
+++ b/app/_includes/plugins/ai-proxy/tables/supported-providers-audio.html
@@ -10,11 +10,20 @@
| {{ provider.name }}{% if provider.formats %} ({{ provider.formats }}){% endif %} | -{{ provider.files.supported | to_check }} | -{{ provider.batches.supported | to_check }} | -{{ provider.assistants.supported | to_check }} | -{{ provider.responses.supported | to_check }} | ++ {{ provider.name }}{% if provider.formats %} ({{ provider.formats }}){% endif %} + {% for feature in features %} + {% assign feature_data = provider[feature] %} + {% if feature_data.supported == true and feature_data.min_version != '' and feature_data.min_version %} + {% new_in {{ feature_data.min_version }} %} + {% break %} + {% endif %} + {% endfor %} + | + {% for feature in features %} + {% assign feature_data = provider[feature] %} ++ {% if feature_data.supported == 'n/a' %} + n/a{% if feature_data.note.content %}{{ current_note | plus: 1 }}{% assign current_note = current_note | plus: 1 %}{% endif %} + {% elsif feature_data.note.content %} + {{ feature_data.supported | to_check }}{{ current_note | plus: 1 }}{% assign current_note = current_note | plus: 1 %} + {% else %} + {{ feature_data.supported | to_check }} + {% endif %} + | + {% endfor %}
{{ forloop.index }}) {{ note }}
+{% endfor %} +{% endif %} \ No newline at end of file diff --git a/app/_includes/plugins/ai-proxy/tables/supported-providers-text.html b/app/_includes/plugins/ai-proxy/tables/supported-providers-text.html index fc08cf40fb..96c3452dc3 100644 --- a/app/_includes/plugins/ai-proxy/tables/supported-providers-text.html +++ b/app/_includes/plugins/ai-proxy/tables/supported-providers-text.html @@ -10,11 +10,18 @@ {% for provider in include.providers %}