Merge branch 'main' into update-notebook-link

dmaniloff · web-flow · commit 69d1eaf9e48b · 2025-10-31T12:47:01.000-04:00
diff --git a/demos/basic_demo.ipynb b/demos/basic_demo.ipynb
diff --git a/distribution/run.yaml b/distribution/run.yaml
@@ -1,12 +1,11 @@
-version: "2"
-image_name: trustyai_ragas_distro_remote
+version: 2
+image_name: trustyai_ragas_distro
 apis:
-  - eval
-  - inference
-  - files
-  - benchmarks
-  - telemetry
-  - datasetio
+- eval
+- inference
+- files
+- benchmarks
+- datasetio
 providers:
   eval:
     - provider_id: ${env.KUBEFLOW_LLAMA_STACK_URL:+trustyai_ragas_remote}
@@ -22,41 +21,59 @@ providers:
           llama_stack_url: ${env.KUBEFLOW_LLAMA_STACK_URL}
           base_image: ${env.KUBEFLOW_BASE_IMAGE}
           pipelines_api_token: ${env.KUBEFLOW_PIPELINES_TOKEN:=}
+        kvstore:
+          namespace: ragas
+          backend: kv_default
     - provider_id: ${env.EMBEDDING_MODEL:+trustyai_ragas_inline}
       provider_type: inline::trustyai_ragas
       module: llama_stack_provider_ragas.inline
       config:
         embedding_model: ${env.EMBEDDING_MODEL}
-  datasetio:
-    - provider_id: localfs
-      provider_type: inline::localfs
-      config:
         kvstore:
-          type: sqlite
-          namespace: null
-          db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro_remote}/localfs_datasetio.db
+          namespace: ragas
+          backend: kv_default
+  datasetio:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
   inference:
     - provider_id: ollama
       provider_type: remote::ollama
       config:
         url: ${env.OLLAMA_URL:=http://localhost:11434}
-  telemetry:
-    - provider_id: meta-reference
-      provider_type: inline::meta-reference
-      config:
-        service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-        sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-        sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro_remote}/trace_store.db
-        otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
   files:
-    - provider_id: meta-reference-files
-      provider_type: inline::localfs
-      config:
-        storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/trustyai_ragas_distro_remote/files}
-        metadata_store:
-          type: sqlite
-          db_path: ${env.METADATA_STORE_DB_PATH:=~/.llama/distributions/trustyai_ragas_distro_remote}/registry.db}
-models:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/trustyai_ragas_distro/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
   - metadata:
       embedding_dimension: 384
     model_id: all-MiniLM-L6-v2
@@ -68,6 +85,13 @@ models:
     provider_id: ollama
     provider_model_id: granite3.3:2b
     model_type: llm
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups: []
 server:
-  host: localhost
   port: 8321
+telemetry:
+  enabled: true
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "llama-stack-provider-ragas"
-version = "0.4.1"
+version = "0.4.2"
 description = "Ragas evaluation as an out-of-tree Llama Stack provider"
 readme = "README.md"
 requires-python = ">=3.12"
@@ -25,7 +25,7 @@ authors = [
 keywords = ["llama-stack", "ragas", "evaluation"]
 dependencies = [
     "setuptools-scm",
-    "llama-stack>=0.2.23",
+    "llama-stack>0.2.23",
     "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found
     "ragas==0.3.0",
     "pandas<2.3.0",
@@ -39,7 +39,7 @@ homepage = "https://github.com/trustyai-explainability/llama-stack-provider-raga
 repository = "https://github.com/trustyai-explainability/llama-stack-provider-ragas"
 
 [project.optional-dependencies]
-remote = ["kfp>=2.5.0", "kfp-kubernetes>=2.0.0", "s3fs>=2024.12.0", "kubernetes>=30.0.0"]
+remote = ["kfp>=2.5.0", "kfp-kubernetes>=2.0.0", "kfp-pipeline-spec>=2.0.0", "kfp-server-api>=2.0.0", "s3fs>=2024.12.0", "kubernetes>=30.0.0"]
 distro = ["opentelemetry-api", "opentelemetry-exporter-otlp", "aiosqlite", "ollama", "uvicorn"]
 dev = [
     "llama-stack-provider-ragas[distro]",
diff --git a/src/llama_stack_provider_ragas/config.py b/src/llama_stack_provider_ragas/config.py
@@ -33,11 +33,11 @@ class RagasProviderBaseConfig(BaseModel):
 
     # Looking for the model?
     # It's in the benchmark config's eval_candidate.
-    # You set it as part of the call to `client.eval.run_eval`.
+    # You set it as part of the call to `client.alpha.eval.run_eval`.
 
     # Looking for the sampling params?
     # It's in the benchmark config's eval_candidate.
-    # You set them as part of the call to `client.eval.run_eval`.
+    # You set them as part of the call to `client.alpha.eval.run_eval`.
 
     # Looking for the dataset?
     # It's in the benchmark config's dataset_id.
diff --git a/src/llama_stack_provider_ragas/inline/wrappers_inline.py b/src/llama_stack_provider_ragas/inline/wrappers_inline.py
@@ -3,7 +3,12 @@
 
 from langchain_core.language_models.llms import Generation, LLMResult
 from langchain_core.prompt_values import PromptValue
-from llama_stack.apis.inference import SamplingParams, TopPSamplingStrategy
+from llama_stack.apis.inference import (
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    SamplingParams,
+    TopPSamplingStrategy,
+)
 from ragas.embeddings.base import BaseRagasEmbeddings
 from ragas.llms.base import BaseRagasLLM
 from ragas.run_config import RunConfig
@@ -39,10 +44,11 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:
     async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
         """Embed documents using Llama Stack inference API."""
         try:
-            response = await self.inference_api.openai_embeddings(
+            request = OpenAIEmbeddingsRequestWithExtraBody(
                 model=self.embedding_model_id,
                 input=texts,
             )
+            response = await self.inference_api.openai_embeddings(request)
             return [data.embedding for data in response.data]
         except Exception as e:
             logger.error(f"Document embedding failed: {str(e)}")
@@ -51,10 +57,11 @@ async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
     async def aembed_query(self, text: str) -> list[float]:
         """Embed query using Llama Stack inference API."""
         try:
-            response = await self.inference_api.openai_embeddings(
+            request = OpenAIEmbeddingsRequestWithExtraBody(
                 model=self.embedding_model_id,
                 input=text,
             )
+            response = await self.inference_api.openai_embeddings(request)
             return response.data[0].embedding  # type: ignore
         except Exception as e:
             logger.error(f"Query embedding failed: {str(e)}")
@@ -109,7 +116,7 @@ async def agenerate_text(
             # sampling params for this generation should be set via the benchmark config
             # we will ignore the temperature and stop params passed in here
             for _ in range(n):
-                response = await self.inference_api.openai_completion(
+                request = OpenAICompletionRequestWithExtraBody(
                     model=self.model_id,
                     prompt=prompt.to_string(),
                     max_tokens=self.sampling_params.max_tokens
@@ -125,6 +132,7 @@ async def agenerate_text(
                     else None,
                     stop=self.sampling_params.stop if self.sampling_params else None,
                 )
+                response = await self.inference_api.openai_completion(request)
 
                 if not response.choices:
                     logger.warning("Completion response returned no choices")
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -35,7 +35,7 @@ def model():
 
 @pytest.fixture
 def embedding_model():
-    return "all-MiniLM-L6-v2"
+    return "ollama/all-minilm:latest"
 
 
 @pytest.fixture
diff --git a/tests/test_inline_evaluation.py b/tests/test_inline_evaluation.py
@@ -39,7 +39,7 @@ def test_single_metric_evaluation(
         provider_id=PROVIDER_ID_INLINE,
     )
 
-    job = lls_client.eval.run_eval(
+    job = lls_client.alpha.eval.run_eval(
         benchmark_id=benchmark_id,
         benchmark_config={
             "eval_candidate": {
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ def test_single_metric_evaluation(`
`39`	`39`	`provider_id=PROVIDER_ID_INLINE,`
`40`	`40`	`)`
`41`	`41`
`42`		`- job = lls_client.eval.run_eval(`
	`42`	`+ job = lls_client.alpha.eval.run_eval(`
`43`	`43`	`benchmark_id=benchmark_id,`
`44`	`44`	`benchmark_config={`
`45`	`45`	`"eval_candidate": {`