neondatabase
diff --git a/‎.github/workflows/python.yml‎
Lines changed: 54 additions & 4 deletions b/‎.github/workflows/python.yml‎
Lines changed: 54 additions & 4 deletions
diff --git a/‎klaudbiusz/cli/bulk_run.py‎
Lines changed: 5 additions & 4 deletions b/‎klaudbiusz/cli/bulk_run.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎klaudbiusz/cli/evaluate_all.py‎
Lines changed: 12 additions & 11 deletions b/‎klaudbiusz/cli/evaluate_all.py‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎klaudbiusz/cli/evaluate_app.py‎
Lines changed: 9 additions & 8 deletions b/‎klaudbiusz/cli/evaluate_app.py‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎klaudbiusz/cli/generate_eval_viewer.py‎
Lines changed: 2 additions & 5 deletions b/‎klaudbiusz/cli/generate_eval_viewer.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎klaudbiusz/pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎klaudbiusz/pyproject.toml‎
Lines changed: 1 addition & 0 deletions
@@ -6,9 +6,13 @@ on:
       - main
     paths:
       - "agent/**"
+      - "klaudbiusz/**"
+      - "scripts/**"
   pull_request:
     paths:
       - "agent/**"
+      - "klaudbiusz/**"
+      - "scripts/**"
 
 env:
   CONTAINER_NAME: agent-apiserver-1
@@ -37,7 +41,55 @@ jobs:
         with:
           version: ">=0.11.5"
           args: check .
-        continue-on-error: true
+
+      - name: Show fix instructions on failure
+        if: failure()
+        run: |
+          echo "❌ Ruff checks failed!"
+          echo ""
+          echo "To fix automatically, run:"
+          echo "  uv run ruff check . --fix"
+          echo ""
+          echo "To check specific directories:"
+          echo "  uv run ruff check klaudbiusz/ --extend-exclude='node_modules'"
+          echo "  uv run ruff check scripts/"
+
+  type-check:
+    name: Type Check (Pyright)
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.3"
+
+      - name: Type check agent
+        working-directory: ./agent/
+        run: uv run pyright .
+
+      - name: Type check klaudbiusz
+        working-directory: ./klaudbiusz/
+        run: uv run pyright .
+
+      - name: Type check scripts
+        working-directory: ./
+        run: uv run pyright scripts/
+
+      - name: Show fix instructions on failure
+        if: failure()
+        run: |
+          echo "❌ Pyright type checks failed!"
+          echo ""
+          echo "To check types locally, run:"
+          echo "  cd agent && uv run pyright ."
+          echo "  cd klaudbiusz && uv run pyright ."
+          echo "  uv run pyright scripts/"
+          echo ""
+          echo "Fix type errors by adding proper type annotations or type: ignore comments."
 
   rust-checks:
     name: Rust Checks
@@ -70,7 +122,6 @@ jobs:
       - name: Run cargo clippy
         working-directory: ./dabgent
         run: cargo clippy -- -W warnings
-        continue-on-error: true
 
   rust-tests:
     name: Rust Tests
@@ -97,12 +148,10 @@ jobs:
       - name: Run unit tests
         working-directory: ./dabgent
         run: cargo test --lib --all
-        continue-on-error: true
 
       - name: Run integration tests (excluding e2e)
         working-directory: ./dabgent
         run: cargo test --all --test '*' -- --skip e2e_generation
-        continue-on-error: true
 
   # E2E tests disabled until e2e_generation.rs is properly implemented
   # rust-e2e-tests:
@@ -317,6 +366,7 @@ jobs:
         health-check,
         e2e-tests,
         lint,
+        type-check,
         rust-checks,
         rust-tests,
       ]
 
@@ -68,11 +68,12 @@ def enrich_results_with_screenshots(results: list[RunResult]) -> None:
         result["screenshot_path"] = str(screenshot_path) if screenshot_path.exists() else None
 
         # check if logs exist and are non-empty
-        has_logs = False
         if logs_path.exists():
             try:
-                has_logs = logs_path.stat().st_size > 0
-                result["browser_logs_path"] = str(logs_path)
+                if logs_path.stat().st_size > 0:
+                    result["browser_logs_path"] = str(logs_path)
+                else:
+                    result["browser_logs_path"] = None
             except Exception:
                 result["browser_logs_path"] = None
         else:
@@ -81,7 +82,7 @@ def enrich_results_with_screenshots(results: list[RunResult]) -> None:
 
 def run_single_generation(app_name: str, prompt: str, wipe_db: bool = False, use_subagents: bool = False) -> RunResult:
     def timeout_handler(signum, frame):
-        raise TimeoutError(f"Generation timed out after 900 seconds")
+        raise TimeoutError("Generation timed out after 900 seconds")
 
     try:
         # set 15 minute timeout for entire generation
 
@@ -13,17 +13,18 @@
 import os
 import subprocess
 import sys
-from datetime import datetime
-from dotenv import load_dotenv
-
-# Load environment variables from .env file
-load_dotenv()
 import time
 from collections import Counter, defaultdict
 from dataclasses import asdict, dataclass
+from datetime import datetime
 from pathlib import Path
 from typing import Any
 
+from dotenv import load_dotenv
+
+# load environment variables from .env file
+load_dotenv()
+
 # Load environment variables
 try:
     from dotenv import load_dotenv
@@ -39,10 +40,11 @@
     pass
 
 try:
-    import anthropic
+    import anthropic  # type: ignore[import-untyped]
     ANTHROPIC_AVAILABLE = True
 except ImportError:
     ANTHROPIC_AVAILABLE = False
+    anthropic = None  # type: ignore[assignment]
 
 
 @dataclass
@@ -221,7 +223,7 @@ def evaluate_app(app_dir: Path, prompt: str | None = None) -> EvalResult:
                 import base64
                 image_data = base64.standard_b64encode(screenshot_path.read_bytes()).decode("utf-8")
 
-                client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
+                client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))  # type: ignore[union-attr]
                 message = client.messages.create(
                     model="claude-sonnet-4-5-20250929",
                     max_tokens=100,
@@ -266,7 +268,6 @@ def evaluate_app(app_dir: Path, prompt: str | None = None) -> EvalResult:
 
     # Metric 8: Local runability
     local_score = 0
-    local_details = []
     readme = app_dir / "README.md"
     if readme.exists() and any(w in readme.read_text().lower() for w in ["setup", "installation"]):
         local_score += 1
@@ -285,8 +286,8 @@ def evaluate_app(app_dir: Path, prompt: str | None = None) -> EvalResult:
             local_score += 1
             if "start" in pkg_data.get("scripts", {}):
                 local_score += 1
-        except Exception as e:
-            # Silently fail but at least we tried
+        except Exception:
+            # silently fail but at least we tried
             pass
     if (app_dir / "server" / "src" / "index.ts").exists():
         local_score += 1
@@ -902,7 +903,7 @@ def main():
     print(f"  8. Local Runability:      {metrics['local_runability_avg']:.1f}/5 ⭐")
     print(f"  9. Deployability:         {metrics['deployability_avg']:.1f}/5 ⭐")
 
-    print(f"\nQuality Distribution:")
+    print("\nQuality Distribution:")
     qual = summary["quality_distribution"]
     print(f"  🟢 Excellent: {len(qual['excellent'])}")
     print(f"  🟡 Good:      {len(qual['good'])}")
 
@@ -18,16 +18,17 @@
 import json
 import os
 import subprocess
-from dotenv import load_dotenv
-
-# Load environment variables from .env file
-load_dotenv()
 import sys
 import time
 from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import Any
 
+from dotenv import load_dotenv
+
+# load environment variables from .env file
+load_dotenv()
+
 # Load environment variables from .env file
 try:
     from dotenv import load_dotenv
@@ -46,9 +47,9 @@
     print("Warning: python-dotenv not installed, relying on system environment variables")
 
 try:
-    import anthropic
+    import anthropic  # type: ignore[import-untyped]
 except ImportError:
-    anthropic = None
+    anthropic = None  # type: ignore[assignment]
 
 
 @dataclass
@@ -351,7 +352,7 @@ def check_data_validity_llm(app_dir: Path, prompt: str | None) -> tuple[int, str
             ],
         )
 
-        response_text = message.content[0].text
+        response_text = message.content[0].text  # type: ignore[union-attr]
         score = 0
         issues = "Unknown"
 
@@ -435,7 +436,7 @@ def check_ui_functional_vlm(app_dir: Path, prompt: str | None) -> tuple[bool, st
             ],
         )
 
-        response_text = message.content[0].text.strip().upper()
+        response_text = message.content[0].text  # type: ignore[union-attr].strip().upper()
 
         # Binary check: PASS or FAIL
         if "PASS" in response_text:
 
@@ -9,14 +9,11 @@
 def generate_html_viewer(eval_json_path: Path, output_path: Path):
     """Generate a standalone HTML viewer for evaluation results."""
 
-    # Read evaluation data
+    # read evaluation data
     with open(eval_json_path) as f:
         data = json.load(f)
 
-    summary = data.get("summary", {})
-    apps = data.get("apps", [])
-
-    # Embed the JSON data directly in the HTML
+    # embed the JSON data directly in the HTML
     json_data = json.dumps(data, indent=2)
 
     html_content = f"""<!DOCTYPE html>
 
@@ -11,6 +11,7 @@ dependencies = [
     "python-dotenv>=1.0.0",
     "tqdm>=4.66.0",
     "joblib>=1.4.0",
+    "anthropic>=0.40.0",
 ]
 
 [tool.ruff]
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@ dependencies = [`
`11`	`11`	`"python-dotenv>=1.0.0",`
`12`	`12`	`"tqdm>=4.66.0",`
`13`	`13`	`"joblib>=1.4.0",`
	`14`	`+ "anthropic>=0.40.0",`
`14`	`15`	`]`
`15`	`16`
`16`	`17`	`[tool.ruff]`