neondatabase
diff --git a/‎klaudbiusz/README.md‎
Lines changed: 17 additions & 3 deletions b/‎klaudbiusz/README.md‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎klaudbiusz/cli/dagger_utils.py‎
Lines changed: 26 additions & 0 deletions b/‎klaudbiusz/cli/dagger_utils.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎klaudbiusz/cli/eval/dbx-sdk/build.sh‎
Lines changed: 19 additions & 0 deletions b/‎klaudbiusz/cli/eval/dbx-sdk/build.sh‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎klaudbiusz/cli/eval/dbx-sdk/install.sh‎
Lines changed: 15 additions & 0 deletions b/‎klaudbiusz/cli/eval/dbx-sdk/install.sh‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎klaudbiusz/cli/eval/dbx-sdk/start.sh‎
Lines changed: 5 additions & 2 deletions b/‎klaudbiusz/cli/eval/dbx-sdk/start.sh‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎klaudbiusz/cli/eval/docker/build.sh‎
Lines changed: 18 additions & 0 deletions b/‎klaudbiusz/cli/eval/docker/build.sh‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎klaudbiusz/cli/eval/docker/install.sh‎
Lines changed: 33 additions & 0 deletions b/‎klaudbiusz/cli/eval/docker/install.sh‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎klaudbiusz/cli/eval/docker/start.sh‎
Lines changed: 2 additions & 2 deletions b/‎klaudbiusz/cli/eval/docker/start.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎klaudbiusz/cli/eval/trpc/build.sh‎
Lines changed: 35 additions & 0 deletions b/‎klaudbiusz/cli/eval/trpc/build.sh‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎klaudbiusz/cli/eval/trpc/install.sh‎
Lines changed: 30 additions & 0 deletions b/‎klaudbiusz/cli/eval/trpc/install.sh‎
Lines changed: 30 additions & 0 deletions
@@ -54,18 +54,32 @@ cd klaudbiusz
 # Evaluate all apps
 uv run cli/evaluate_all.py
 
+# Parallel evaluation (faster for large batches)
+uv run cli/evaluate_all.py -j 4                         # Run 4 evaluations in parallel
+uv run cli/evaluate_all.py -j 0                         # Auto-detect CPU count
+uv run cli/evaluate_all.py --parallel 8                 # Long form
+
 # Partial evaluation (filter apps)
 uv run cli/evaluate_all.py --limit 5                    # First 5 apps
 uv run cli/evaluate_all.py --apps app1 app2             # Specific apps
 uv run cli/evaluate_all.py --pattern "customer*"        # Pattern matching
 uv run cli/evaluate_all.py --skip 10 --limit 5          # Skip first 10, evaluate next 5
+uv run cli/evaluate_all.py --start-from app5            # Start from specific app
+
+# Custom directory
+uv run cli/evaluate_all.py --dir /path/to/apps          # Evaluate apps in custom directory
+
+# Staging environment (for testing)
+uv run cli/evaluate_all.py --staging                    # Log to staging MLflow experiment
 
 # Evaluate single app
 uv run cli/evaluate_app.py ../app/customer-churn-analysis
 ```
 
 **Results are automatically logged to MLflow:** Navigate to `ML → Experiments → /Shared/klaudbiusz-evaluations` in Databricks UI / Googfooding.
 
+**Performance:** Parallel evaluation with `-j` can provide 3-4x speedup for large batches (e.g., 20 apps in 5 min vs 15+ min sequential).
+
 ## Evaluation Framework
 
 We use **9 objective metrics** to measure autonomous deployability:
@@ -143,7 +157,7 @@ klaudbiusz/
 
 1. Write natural language prompt
 2. Generate: `uv run cli/single_run.py "your prompt"` or `uv run cli/bulk_run.py`
-3. Evaluate: `uv run cli/evaluate_all.py`
+3. Evaluate: `uv run cli/evaluate_all.py -j 0` (parallel, auto-detect CPUs)
 4. Review: `cat EVALUATION_REPORT.md`
 5. Deploy apps that pass checks
 
@@ -169,9 +183,9 @@ shasum -a 256 -c klaudbiusz_evaluation_*.tar.gz.sha256
 
 ## Requirements
 
-- Python 3.11+
+- Python 3.12+
 - uv (Python package manager)
-- Docker (for builds and runtime checks)
+- Docker (for Dagger containerized evaluations)
 - Node.js 18+ (for generated apps)
 - Databricks workspace with access token
 
 
@@ -0,0 +1,26 @@
+"""Simplified Dagger utilities for klaudbiusz evaluation."""
+
+import dagger
+from typing import Self
+
+
+class ExecResult:
+    """Result of executing a command in a Dagger container."""
+
+    exit_code: int
+    stdout: str
+    stderr: str
+
+    def __init__(self, exit_code: int, stdout: str, stderr: str):
+        self.exit_code = exit_code
+        self.stdout = stdout
+        self.stderr = stderr
+
+    @classmethod
+    async def from_ctr(cls, ctr: dagger.Container) -> Self:
+        """Create ExecResult from a Dagger container."""
+        return cls(
+            exit_code=await ctr.exit_code(),
+            stdout=await ctr.stdout(),
+            stderr=await ctr.stderr(),
+        )
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+# DBX SDK template: Build the application
+# For DBX SDK, we build from root package.json
+
+echo "Building application..." >&2
+
+if [ -f "package.json" ]; then
+    if grep -q '"build"' package.json 2>/dev/null; then
+        echo "Building from root..." >&2
+        npm run build
+        echo "✅ Build successful" >&2
+    else
+        echo "⚠️  No build script found in package.json" >&2
+    fi
+else
+    echo "⚠️  No package.json found" >&2
+fi
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -e
+
+# DBX SDK template: Install dependencies
+# This template has a single root package.json
+
+echo "Installing dependencies..." >&2
+
+if [ -f "package.json" ]; then
+    npm install
+    echo "✅ Dependencies installed" >&2
+else
+    echo "⚠️  No package.json found" >&2
+    exit 1
+fi
@@ -39,6 +39,9 @@ if [ -z "$DATABRICKS_HOST" ] || [ -z "$DATABRICKS_TOKEN" ]; then
     exit 1
 fi
 
+# Set default port if not provided
+DATABRICKS_APP_PORT="${DATABRICKS_APP_PORT:-8000}"
+
 # Verify package.json exists
 if [ ! -f "package.json" ]; then
     echo "❌ Error: No package.json found in root directory" >&2
@@ -61,13 +64,13 @@ fi
 # Health check with retries (3 attempts, 2s timeout each, 1s apart)
 for i in {1..3}; do
     # Try healthcheck endpoint first
-    if curl -f -s --max-time 2 http://localhost:8000/healthcheck >/dev/null 2>&1; then
+    if curl -f -s --max-time 2 http://localhost:${DATABRICKS_APP_PORT}/healthcheck >/dev/null 2>&1; then
         echo "✅ App ready (healthcheck)" >&2
         exit 0
     fi
 
     # Fallback to root endpoint for npm apps
-    if curl -f -s --max-time 2 http://localhost:8000/ >/dev/null 2>&1; then
+    if curl -f -s --max-time 2 http://localhost:${DATABRICKS_APP_PORT}/ >/dev/null 2>&1; then
         echo "✅ App ready (root)" >&2
         exit 0
     fi
 
@@ -0,0 +1,18 @@
+#!/bin/bash
+set -e
+
+# Docker template: Build the application
+# For Docker, we use docker build command
+
+echo "Building Docker image..." >&2
+
+if [ ! -f "Dockerfile" ]; then
+    echo "⚠️  No Dockerfile found" >&2
+    exit 1
+fi
+
+# Get app name from DATABRICKS_APP_NAME env var or use default
+APP_NAME="${DATABRICKS_APP_NAME:-app}"
+
+docker build -t "eval-${APP_NAME}" .
+echo "✅ Docker image built successfully" >&2
@@ -0,0 +1,33 @@
+#!/bin/bash
+set -e
+
+# Docker template: Install dependencies
+# This script handles various project structures (trpc, dbx-sdk, or custom)
+
+echo "Installing dependencies..." >&2
+
+# Check if root package.json has install:all script (trpc style)
+if [ -f "package.json" ] && grep -q '"install:all"' package.json 2>/dev/null; then
+    echo "Running npm run install:all..." >&2
+    npm run install:all
+elif [ -f "package.json" ]; then
+    # Root-level app (dbx-sdk style)
+    echo "Installing root dependencies..." >&2
+    npm install
+else
+    # Install server/client separately if they exist
+    if [ -d "server" ] && [ -f "server/package.json" ]; then
+        echo "Installing server dependencies..." >&2
+        cd server && npm install && cd ..
+    fi
+
+    if [ -d "client" ] && [ -f "client/package.json" ]; then
+        echo "Installing client dependencies..." >&2
+        cd client && npm install && cd ..
+    elif [ -d "frontend" ] && [ -f "frontend/package.json" ]; then
+        echo "Installing frontend dependencies..." >&2
+        cd frontend && npm install && cd ..
+    fi
+fi
+
+echo "✅ Dependencies installed" >&2
@@ -66,7 +66,7 @@ ENV_VARS+=("-e" "DATABRICKS_APP_PORT=${DATABRICKS_APP_PORT}")
 ENV_VARS+=("-e" "FLASK_RUN_HOST=${FLASK_RUN_HOST}")
 
 # Run the container
-docker run -d -p 8000:8000 \
+docker run -d -p ${DATABRICKS_APP_PORT}:8000 \
     --name "${CONTAINER_NAME}" \
     ${ENV_FILE_ARGS} \
     "${ENV_VARS[@]}" \
@@ -84,7 +84,7 @@ fi
 # Health check with retries (3 attempts, 2s timeout each, 1s apart)
 # Docker apps should have proper /healthcheck endpoint
 for i in {1..3}; do
-    if curl -f -s --max-time 2 http://localhost:8000/healthcheck >/dev/null 2>&1; then
+    if curl -f -s --max-time 2 http://localhost:${DATABRICKS_APP_PORT}/healthcheck >/dev/null 2>&1; then
         echo "✅ App ready (healthcheck)" >&2
         exit 0
     fi
 
@@ -0,0 +1,35 @@
+#!/bin/bash
+set -e
+
+# tRPC template: Build the application
+# For tRPC, we build the client (React frontend)
+
+echo "Building application..." >&2
+
+# Build client if it exists
+if [ -d "client" ] && [ -f "client/package.json" ]; then
+    if grep -q '"build"' client/package.json 2>/dev/null; then
+        echo "Building client..." >&2
+        cd client && npm run build && cd ..
+        echo "✅ Client built successfully" >&2
+    else
+        echo "⚠️  No build script found in client/package.json" >&2
+    fi
+elif [ -d "frontend" ] && [ -f "frontend/package.json" ]; then
+    if grep -q '"build"' frontend/package.json 2>/dev/null; then
+        echo "Building frontend..." >&2
+        cd frontend && npm run build && cd ..
+        echo "✅ Frontend built successfully" >&2
+    else
+        echo "⚠️  No build script found in frontend/package.json" >&2
+    fi
+else
+    # Try root-level build
+    if [ -f "package.json" ] && grep -q '"build"' package.json 2>/dev/null; then
+        echo "Building from root..." >&2
+        npm run build
+        echo "✅ Build successful" >&2
+    else
+        echo "⚠️  No build script found" >&2
+    fi
+fi
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+# tRPC template: Install dependencies
+# This script handles dependency installation for tRPC monorepo structure
+
+echo "Installing dependencies..." >&2
+
+# Check if root package.json has install:all script
+if [ -f "package.json" ] && grep -q '"install:all"' package.json 2>/dev/null; then
+    echo "Running npm run install:all..." >&2
+    npm run install:all
+else
+    # Install server dependencies
+    if [ -d "server" ] && [ -f "server/package.json" ]; then
+        echo "Installing server dependencies..." >&2
+        cd server && npm install && cd ..
+    fi
+
+    # Install client dependencies (try both client/ and frontend/)
+    if [ -d "client" ] && [ -f "client/package.json" ]; then
+        echo "Installing client dependencies..." >&2
+        cd client && npm install && cd ..
+    elif [ -d "frontend" ] && [ -f "frontend/package.json" ]; then
+        echo "Installing frontend dependencies..." >&2
+        cd frontend && npm install && cd ..
+    fi
+fi
+
+echo "✅ Dependencies installed" >&2