Skip to content

Commit 0cebca4

Browse files
committed
Extend Python checks to klaudbiusz and scripts
- Add ruff and pyright checks for klaudbiusz/ and scripts/ - Fix all linting and type errors - Make CI checks blocking (remove continue-on-error) - Add helpful failure messages with fix commands - Drop unused streamlit dependency - Add missing anthropic dependency to klaudbiusz
1 parent 7eed0d6 commit 0cebca4

File tree

10 files changed

+199
-35
lines changed

10 files changed

+199
-35
lines changed

.github/workflows/python.yml

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@ on:
66
- main
77
paths:
88
- "agent/**"
9+
- "klaudbiusz/**"
10+
- "scripts/**"
911
pull_request:
1012
paths:
1113
- "agent/**"
14+
- "klaudbiusz/**"
15+
- "scripts/**"
1216

1317
env:
1418
CONTAINER_NAME: agent-apiserver-1
@@ -37,7 +41,55 @@ jobs:
3741
with:
3842
version: ">=0.11.5"
3943
args: check .
40-
continue-on-error: true
44+
45+
- name: Show fix instructions on failure
46+
if: failure()
47+
run: |
48+
echo "❌ Ruff checks failed!"
49+
echo ""
50+
echo "To fix automatically, run:"
51+
echo " uv run ruff check . --fix"
52+
echo ""
53+
echo "To check specific directories:"
54+
echo " uv run ruff check klaudbiusz/ --extend-exclude='node_modules'"
55+
echo " uv run ruff check scripts/"
56+
57+
type-check:
58+
name: Type Check (Pyright)
59+
runs-on: ubuntu-latest
60+
timeout-minutes: 5
61+
steps:
62+
- name: Checkout repository
63+
uses: actions/checkout@v3
64+
65+
- name: Install uv
66+
uses: astral-sh/setup-uv@v5
67+
with:
68+
version: "0.7.3"
69+
70+
- name: Type check agent
71+
working-directory: ./agent/
72+
run: uv run pyright .
73+
74+
- name: Type check klaudbiusz
75+
working-directory: ./klaudbiusz/
76+
run: uv run pyright .
77+
78+
- name: Type check scripts
79+
working-directory: ./
80+
run: uv run pyright scripts/
81+
82+
- name: Show fix instructions on failure
83+
if: failure()
84+
run: |
85+
echo "❌ Pyright type checks failed!"
86+
echo ""
87+
echo "To check types locally, run:"
88+
echo " cd agent && uv run pyright ."
89+
echo " cd klaudbiusz && uv run pyright ."
90+
echo " uv run pyright scripts/"
91+
echo ""
92+
echo "Fix type errors by adding proper type annotations or type: ignore comments."
4193
4294
rust-checks:
4395
name: Rust Checks
@@ -70,7 +122,6 @@ jobs:
70122
- name: Run cargo clippy
71123
working-directory: ./dabgent
72124
run: cargo clippy -- -W warnings
73-
continue-on-error: true
74125

75126
rust-tests:
76127
name: Rust Tests
@@ -97,12 +148,10 @@ jobs:
97148
- name: Run unit tests
98149
working-directory: ./dabgent
99150
run: cargo test --lib --all
100-
continue-on-error: true
101151

102152
- name: Run integration tests (excluding e2e)
103153
working-directory: ./dabgent
104154
run: cargo test --all --test '*' -- --skip e2e_generation
105-
continue-on-error: true
106155

107156
# E2E tests disabled until e2e_generation.rs is properly implemented
108157
# rust-e2e-tests:
@@ -317,6 +366,7 @@ jobs:
317366
health-check,
318367
e2e-tests,
319368
lint,
369+
type-check,
320370
rust-checks,
321371
rust-tests,
322372
]

klaudbiusz/cli/bulk_run.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,12 @@ def enrich_results_with_screenshots(results: list[RunResult]) -> None:
6868
result["screenshot_path"] = str(screenshot_path) if screenshot_path.exists() else None
6969

7070
# check if logs exist and are non-empty
71-
has_logs = False
7271
if logs_path.exists():
7372
try:
74-
has_logs = logs_path.stat().st_size > 0
75-
result["browser_logs_path"] = str(logs_path)
73+
if logs_path.stat().st_size > 0:
74+
result["browser_logs_path"] = str(logs_path)
75+
else:
76+
result["browser_logs_path"] = None
7677
except Exception:
7778
result["browser_logs_path"] = None
7879
else:
@@ -81,7 +82,7 @@ def enrich_results_with_screenshots(results: list[RunResult]) -> None:
8182

8283
def run_single_generation(app_name: str, prompt: str, wipe_db: bool = False, use_subagents: bool = False) -> RunResult:
8384
def timeout_handler(signum, frame):
84-
raise TimeoutError(f"Generation timed out after 900 seconds")
85+
raise TimeoutError("Generation timed out after 900 seconds")
8586

8687
try:
8788
# set 15 minute timeout for entire generation

klaudbiusz/cli/evaluate_all.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,18 @@
1313
import os
1414
import subprocess
1515
import sys
16-
from datetime import datetime
17-
from dotenv import load_dotenv
18-
19-
# Load environment variables from .env file
20-
load_dotenv()
2116
import time
2217
from collections import Counter, defaultdict
2318
from dataclasses import asdict, dataclass
19+
from datetime import datetime
2420
from pathlib import Path
2521
from typing import Any
2622

23+
from dotenv import load_dotenv
24+
25+
# load environment variables from .env file
26+
load_dotenv()
27+
2728
# Load environment variables
2829
try:
2930
from dotenv import load_dotenv
@@ -39,10 +40,11 @@
3940
pass
4041

4142
try:
42-
import anthropic
43+
import anthropic # type: ignore[import-untyped]
4344
ANTHROPIC_AVAILABLE = True
4445
except ImportError:
4546
ANTHROPIC_AVAILABLE = False
47+
anthropic = None # type: ignore[assignment]
4648

4749

4850
@dataclass
@@ -221,7 +223,7 @@ def evaluate_app(app_dir: Path, prompt: str | None = None) -> EvalResult:
221223
import base64
222224
image_data = base64.standard_b64encode(screenshot_path.read_bytes()).decode("utf-8")
223225

224-
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
226+
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) # type: ignore[union-attr]
225227
message = client.messages.create(
226228
model="claude-sonnet-4-5-20250929",
227229
max_tokens=100,
@@ -266,7 +268,6 @@ def evaluate_app(app_dir: Path, prompt: str | None = None) -> EvalResult:
266268

267269
# Metric 8: Local runability
268270
local_score = 0
269-
local_details = []
270271
readme = app_dir / "README.md"
271272
if readme.exists() and any(w in readme.read_text().lower() for w in ["setup", "installation"]):
272273
local_score += 1
@@ -285,8 +286,8 @@ def evaluate_app(app_dir: Path, prompt: str | None = None) -> EvalResult:
285286
local_score += 1
286287
if "start" in pkg_data.get("scripts", {}):
287288
local_score += 1
288-
except Exception as e:
289-
# Silently fail but at least we tried
289+
except Exception:
290+
# silently fail but at least we tried
290291
pass
291292
if (app_dir / "server" / "src" / "index.ts").exists():
292293
local_score += 1
@@ -902,7 +903,7 @@ def main():
902903
print(f" 8. Local Runability: {metrics['local_runability_avg']:.1f}/5 ⭐")
903904
print(f" 9. Deployability: {metrics['deployability_avg']:.1f}/5 ⭐")
904905

905-
print(f"\nQuality Distribution:")
906+
print("\nQuality Distribution:")
906907
qual = summary["quality_distribution"]
907908
print(f" 🟢 Excellent: {len(qual['excellent'])}")
908909
print(f" 🟡 Good: {len(qual['good'])}")

klaudbiusz/cli/evaluate_app.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,17 @@
1818
import json
1919
import os
2020
import subprocess
21-
from dotenv import load_dotenv
22-
23-
# Load environment variables from .env file
24-
load_dotenv()
2521
import sys
2622
import time
2723
from dataclasses import asdict, dataclass
2824
from pathlib import Path
2925
from typing import Any
3026

27+
from dotenv import load_dotenv
28+
29+
# load environment variables from .env file
30+
load_dotenv()
31+
3132
# Load environment variables from .env file
3233
try:
3334
from dotenv import load_dotenv
@@ -46,9 +47,9 @@
4647
print("Warning: python-dotenv not installed, relying on system environment variables")
4748

4849
try:
49-
import anthropic
50+
import anthropic # type: ignore[import-untyped]
5051
except ImportError:
51-
anthropic = None
52+
anthropic = None # type: ignore[assignment]
5253

5354

5455
@dataclass
@@ -351,7 +352,7 @@ def check_data_validity_llm(app_dir: Path, prompt: str | None) -> tuple[int, str
351352
],
352353
)
353354

354-
response_text = message.content[0].text
355+
response_text = message.content[0].text # type: ignore[union-attr]
355356
score = 0
356357
issues = "Unknown"
357358

@@ -435,7 +436,7 @@ def check_ui_functional_vlm(app_dir: Path, prompt: str | None) -> tuple[bool, st
435436
],
436437
)
437438

438-
response_text = message.content[0].text.strip().upper()
439+
response_text = message.content[0].text # type: ignore[union-attr].strip().upper()
439440

440441
# Binary check: PASS or FAIL
441442
if "PASS" in response_text:

klaudbiusz/cli/generate_eval_viewer.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,11 @@
99
def generate_html_viewer(eval_json_path: Path, output_path: Path):
1010
"""Generate a standalone HTML viewer for evaluation results."""
1111

12-
# Read evaluation data
12+
# read evaluation data
1313
with open(eval_json_path) as f:
1414
data = json.load(f)
1515

16-
summary = data.get("summary", {})
17-
apps = data.get("apps", [])
18-
19-
# Embed the JSON data directly in the HTML
16+
# embed the JSON data directly in the HTML
2017
json_data = json.dumps(data, indent=2)
2118

2219
html_content = f"""<!DOCTYPE html>

klaudbiusz/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ dependencies = [
1111
"python-dotenv>=1.0.0",
1212
"tqdm>=4.66.0",
1313
"joblib>=1.4.0",
14+
"anthropic>=0.40.0",
1415
]
1516

1617
[tool.ruff]

0 commit comments

Comments
 (0)