Skip to content

Commit 144fd0a

Browse files
committed
Reliably calc metrics if runs fail in runtime
1 parent 73f5a5f commit 144fd0a

File tree

1 file changed

+37
-37
lines changed

1 file changed

+37
-37
lines changed

klaudbiusz/cli/evaluate_app_dagger.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,12 @@ async def evaluate_app_async(
250250
else:
251251
print(" [5-7/7] Skipping DB/data/UI checks (runtime failed)")
252252

253+
except Exception as e:
254+
issues.append(f"Evaluation error: {str(e)}")
255+
print(f" ⚠️ Exception during evaluation: {e}")
256+
257+
# Calculate DevX metrics (run even if evaluation failed)
258+
try:
253259
# Metric 8: Local runability
254260
local_score, local_details = check_local_runability(app_dir, template)
255261
metrics.local_runability_score = local_score
@@ -267,9 +273,12 @@ async def evaluate_app_async(
267273
issues.append(
268274
f"Deployability concerns ({deploy_score}/5): {'; '.join([d for d in deploy_details if '✗' in d])}"
269275
)
276+
except Exception as e:
277+
print(f" ⚠️ Could not calculate DevX metrics: {e}")
270278

271-
# Calculate composite score
272-
from eval_metrics import calculate_appeval_100, eff_units
279+
# Calculate composite score (run even if evaluation failed)
280+
try:
281+
from eval_metrics import calculate_appeval_100
273282

274283
metrics.appeval_100 = calculate_appeval_100(
275284
build_success=metrics.build_success,
@@ -282,44 +291,35 @@ async def evaluate_app_async(
282291
local_runability_score=metrics.local_runability_score,
283292
deployability_score=metrics.deployability_score,
284293
)
285-
286-
# Calculate efficiency metric
287-
generation_metrics_file = app_dir / "generation_metrics.json"
288-
if generation_metrics_file.exists():
289-
generation_metrics = json.loads(generation_metrics_file.read_text())
290-
tokens = generation_metrics.get("input_tokens", 0) + generation_metrics.get("output_tokens", 0)
291-
turns = generation_metrics.get("turns")
292-
validations = generation_metrics.get("validation_runs")
293-
294-
metrics.eff_units = eff_units(
295-
tokens_used=tokens if tokens > 0 else None, agent_turns=turns, validation_runs=validations
296-
)
297-
298-
# Add LOC count
299-
metrics.total_loc = sum(1 for f in app_dir.rglob("*.ts") if f.is_file() and "node_modules" not in str(f))
300-
301294
except Exception as e:
302-
issues.append(f"Evaluation error: {str(e)}")
303-
print(f" ⚠️ Exception during evaluation: {e}")
295+
print(f" ⚠️ Could not calculate appeval_100: {e}")
304296

305297
# Calculate efficiency metric (run even if evaluation failed)
306-
try:
307-
import json
308-
from eval_metrics import eff_units
309-
generation_metrics_file = app_dir / "generation_metrics.json"
310-
if generation_metrics_file.exists():
311-
generation_metrics = json.loads(generation_metrics_file.read_text())
312-
tokens = generation_metrics.get("input_tokens", 0) + generation_metrics.get("output_tokens", 0)
313-
turns = generation_metrics.get("turns")
314-
validations = generation_metrics.get("validation_runs")
315-
316-
metrics.eff_units = eff_units(
317-
tokens_used=tokens if tokens > 0 else None,
318-
agent_turns=turns,
319-
validation_runs=validations
320-
)
321-
except Exception as e:
322-
print(f" ⚠️ Could not calculate efficiency: {e}")
298+
if metrics.eff_units is None:
299+
try:
300+
import json
301+
from eval_metrics import eff_units
302+
generation_metrics_file = app_dir / "generation_metrics.json"
303+
if generation_metrics_file.exists():
304+
generation_metrics = json.loads(generation_metrics_file.read_text())
305+
tokens = generation_metrics.get("input_tokens", 0) + generation_metrics.get("output_tokens", 0)
306+
turns = generation_metrics.get("turns")
307+
validations = generation_metrics.get("validation_runs")
308+
309+
metrics.eff_units = eff_units(
310+
tokens_used=tokens if tokens > 0 else None,
311+
agent_turns=turns,
312+
validation_runs=validations
313+
)
314+
except Exception as e:
315+
print(f" ⚠️ Could not calculate efficiency: {e}")
316+
317+
# Calculate LOC count (run even if evaluation failed)
318+
if metrics.total_loc == 0:
319+
try:
320+
metrics.total_loc = sum(1 for f in app_dir.rglob("*.ts") if f.is_file() and "node_modules" not in str(f))
321+
except Exception as e:
322+
print(f" ⚠️ Could not calculate LOC: {e}")
323323

324324
print(f"\nIssues: {len(issues)}")
325325

0 commit comments

Comments
 (0)