Fix critical trajectory analysis issues (#567)

arsenyinfo · web-flow · commit 27be244eb697 · 2025-11-12T14:39:39.000+01:00
diff --git a/edda/edda_mcp/src/providers/io.rs b/edda/edda_mcp/src/providers/io.rs
@@ -36,17 +36,19 @@ pub struct InitiateProjectResult {
     pub work_dir: String,
     pub template_name: String,
     pub template_description: String,
+    pub file_tree: String,
 }
 
 impl ToolResultDisplay for InitiateProjectResult {
     fn display(&self) -> String {
         format!(
-            "Successfully copied {} files from {} template to {}\n\nTemplate: {}\n\n{}",
+            "Successfully copied {} files from {} template to {}\n\nTemplate: {}\n\n{}\n\nFile structure:\n{}",
             self.files_copied,
             self.template_name,
             self.work_dir,
             self.template_name,
-            self.template_description
+            self.template_description,
+            self.file_tree
         )
     }
 }
@@ -168,14 +170,77 @@ impl IOProvider {
         let template_description = template.description().unwrap_or("".to_string());
         let files = template.extract(work_dir)?;
 
+        // generate file tree
+        let file_tree = Self::generate_file_tree(work_dir, &files)?;
+
         Ok(InitiateProjectResult {
             files_copied: files.len(),
             work_dir: work_dir.display().to_string(),
             template_name,
             template_description,
+            file_tree,
         })
     }
 
+    /// Generate a tree-style visualization of the file structure
+    /// Collapses directories with more than 10 files to avoid clutter
+    fn generate_file_tree(_base_dir: &Path, files: &[PathBuf]) -> Result<String> {
+        use std::collections::BTreeMap;
+
+        const MAX_FILES_TO_SHOW: usize = 10;
+
+        // build a tree structure
+        let mut tree: BTreeMap<String, Vec<String>> = BTreeMap::new();
+
+        for file in files {
+            let path_str = file.to_string_lossy().to_string();
+            let parts: Vec<&str> = path_str.split('/').collect();
+
+            if parts.len() == 1 {
+                // root level file
+                tree.entry("".to_string())
+                    .or_insert_with(Vec::new)
+                    .push(parts[0].to_string());
+            } else {
+                // file in subdirectory
+                let dir = parts[..parts.len() - 1].join("/");
+                let file_name = parts[parts.len() - 1].to_string();
+                tree.entry(dir)
+                    .or_insert_with(Vec::new)
+                    .push(file_name);
+            }
+        }
+
+        // format as tree
+        let mut output = String::new();
+        let mut sorted_dirs: Vec<_> = tree.keys().collect();
+        sorted_dirs.sort();
+
+        for dir in sorted_dirs {
+            let files_in_dir = &tree[dir];
+            if dir.is_empty() {
+                // root files - always show all
+                for file in files_in_dir {
+                    output.push_str(&format!("{}\n", file));
+                }
+            } else {
+                // directory
+                output.push_str(&format!("{}/\n", dir));
+                if files_in_dir.len() <= MAX_FILES_TO_SHOW {
+                    // show all files
+                    for file in files_in_dir {
+                        output.push_str(&format!("  {}\n", file));
+                    }
+                } else {
+                    // collapse large directories
+                    output.push_str(&format!("  ({} files)\n", files_in_dir.len()));
+                }
+            }
+        }
+
+        Ok(output)
+    }
+
     #[tool(
         name = "scaffold_data_app",
         description = "Initialize a project by copying template files from the default TypeScript (tRPC + React) template to a work directory. Supports force rewrite to wipe and recreate the directory. It sets up a basic project structure, and should be ALWAYS used as the first step in creating a new data or web app."
diff --git a/edda/edda_templates/template_trpc/CLAUDE.md b/edda/edda_templates/template_trpc/CLAUDE.md
@@ -14,12 +14,39 @@ import { strict as assert } from "node:assert";
 
 ## Databricks Type Handling:
 
+- **executeQuery REQUIRES Zod schema**: Pass the Zod schema object as second parameter, NOT a TypeScript type annotation
+  ```typescript
+  // ❌ WRONG - Do NOT use generic type parameter
+  const result = await client.executeQuery<MyType>(sql);
+
+  // ✅ CORRECT - Pass Zod schema as parameter
+  const mySchema = z.object({ id: z.number(), name: z.string() });
+  const result = await client.executeQuery(sql, mySchema);
+  ```
 - **QueryResult access**: `executeQuery()` returns `{rows: T[], rowCount: number}`. Always use `.rows` property: `const {rows} = await client.executeQuery(...)` or `result.rows.map(...)`
 - **Type imports**: Use `import type { T }` (not `import { T }`) when `verbatimModuleSyntax` is enabled
 - **Column access**: Use bracket notation `row['column_name']` (TypeScript strict mode requirement)
 - **DATE/TIMESTAMP columns**: Databricks returns Date objects. Use `z.coerce.date()` in schemas (never `z.string()` for dates)
 - **Dynamic properties**: Cast explicitly `row['order_id'] as number`
 
+### Helper Utilities:
+
+**mapRows<T>(rows, schema)** - Validates and maps raw SQL rows using Zod schema:
+```typescript
+import { mapRows } from './databricks';
+
+// When you have raw rows and need manual mapping
+const rawRows = [{id: 1, name: "Alice"}, {id: 2, name: "Bob"}];
+const userSchema = z.object({ id: z.number(), name: z.string() });
+const users = mapRows(rawRows, userSchema);
+// users is now typed as { id: number; name: string }[]
+```
+
+Use this when:
+- Processing nested query results
+- Manually mapping row data before returning from tRPC
+- Need to validate data from non-Databricks sources
+
 ## Frontend Styling Guidelines:
 
 ### Component Structure Pattern:
diff --git a/edda/edda_templates/template_trpc/server/package.json b/edda/edda_templates/template_trpc/server/package.json
@@ -9,7 +9,7 @@
     "db:push": "drizzle-kit push --force",
     "db:push-ci": "yes | npm run db:push",
     "lint": "eslint --cache src/index.ts",
-    "test": "node --test --import tsx src/*.test.ts"
+    "test": "sh -c 'ls src/*.test.ts src/*.test.tsx 2>/dev/null | grep -q . || (echo \"Error: No test files found (*.test.ts or *.test.tsx)\" && exit 1); node --test --import tsx src/*.test.ts'"
   },
   "dependencies": {
     "@databricks/sql": "^1.12.0",
diff --git a/edda/edda_templates/template_trpc/server/src/databricks.ts b/edda/edda_templates/template_trpc/server/src/databricks.ts
@@ -5,12 +5,18 @@
 //   const myTableSchema = z.object({
 //     id: z.number(),
 //     name: z.string(),
-//     created_at: z.string(),
+//     created_at: z.coerce.date(),
 //   });
 //
 //   const client = new DatabricksClient();
+//
+//   // ✅ CORRECT - Pass Zod schema (not TypeScript type)
 //   const result = await client.executeQuery("SELECT * FROM my_table", myTableSchema);
-//   // result.rows is now validated and typed as MyTable[]
+//   // result.rows is now validated and typed as z.infer<typeof myTableSchema>[]
+//
+//   // ❌ WRONG - Do NOT use generic type parameter alone
+//   // const result = await client.executeQuery<MyType>("SELECT ...");
+//   // This will cause runtime errors!
 
 import { DBSQLClient } from "@databricks/sql";
 import type { ConnectionOptions } from "@databricks/sql/dist/contracts/IDBSQLClient";
@@ -75,9 +81,21 @@ export class DatabricksClient {
     }
   }
 
-  async executeQuery<T extends z.ZodTypeAny = typeof defaultRowSchema>(
+  /**
+   * Execute a SQL query against Databricks and validate results with Zod schema.
+   *
+   * @param sql - SQL query string
+   * @param schema - Zod schema for row validation (REQUIRED - pass the schema, not a TypeScript type)
+   * @returns QueryResult with validated and typed rows
+   *
+   * @example
+   * const schema = z.object({ id: z.number(), name: z.string() });
+   * const result = await client.executeQuery("SELECT id, name FROM users", schema);
+   * // result.rows is typed as { id: number; name: string }[]
+   */
+  async executeQuery<T extends z.ZodTypeAny>(
     sql: string,
-    schema?: T,
+    schema: T,
   ): Promise<QueryResult<z.infer<T>>> {
     try {
       const client = new DBSQLClient();
@@ -92,8 +110,8 @@ export class DatabricksClient {
       await session.close();
       await connection.close();
 
-      // Apply schema validation if provided
-      const rows = schema ? result.map((row) => schema.parse(row)) : result;
+      // Apply schema validation
+      const rows = result.map((row) => schema.parse(row));
       return { rows: rows as z.infer<T>[], rowCount: rows.length };
     } catch (error) {
       console.error("Databricks SQL query error:", error);
@@ -106,3 +124,21 @@ export class DatabricksClient {
     }
   }
 }
+
+/**
+ * Helper utility to map and validate raw SQL rows using a Zod schema.
+ * Useful when you have raw rows from nested queries or need manual mapping.
+ *
+ * @param rows - Array of raw SQL rows (Record<string, SqlValue>)
+ * @param schema - Zod schema for validation
+ * @returns Array of validated and typed objects
+ *
+ * @example
+ * const rawRows = [{id: 1, name: "Alice"}, {id: 2, name: "Bob"}];
+ * const schema = z.object({ id: z.number(), name: z.string() });
+ * const users = mapRows(rawRows, schema);
+ * // users is typed as { id: number; name: string }[]
+ */
+export function mapRows<T>(rows: SqlRow[], schema: z.ZodSchema<T>): T[] {
+  return rows.map((row) => schema.parse(row));
+}
diff --git a/edda/edda_templates/template_trpc/server/src/server.test.ts b/edda/edda_templates/template_trpc/server/src/server.test.ts
@@ -6,6 +6,8 @@ import type { Server } from "node:http";
 process.env["DATABRICKS_HOST"] =
   process.env["DATABRICKS_HOST"] || "https://dummy.databricks.com";
 process.env["DATABRICKS_TOKEN"] = process.env["DATABRICKS_TOKEN"] || "dummy_token";
+process.env["DATABRICKS_WAREHOUSE_ID"] =
+  process.env["DATABRICKS_WAREHOUSE_ID"] || "dummy_warehouse_id";
 
 test("server starts and responds to healthcheck", async () => {
   // dynamic import to ensure env vars are set first
@@ -34,3 +36,37 @@ test("server starts and responds to healthcheck", async () => {
     }
   }
 });
+
+// Example: Testing tRPC procedures directly without HTTP server
+// This is faster and simpler for most tests
+//
+// test("getUsers returns array of users", async () => {
+//   const { appRouter } = await import("./index");
+//   const { initTRPC } = await import("@trpc/server");
+//
+//   // create tRPC caller - no HTTP server needed
+//   const t = initTRPC.create();
+//   const caller = t.createCallerFactory(appRouter)({});
+//
+//   const result = await caller.getUsers();
+//
+//   // validate structure
+//   assert.ok(Array.isArray(result));
+//   if (result.length > 0) {
+//     assert.ok(result[0].id);
+//     assert.ok(result[0].name);
+//   }
+// });
+//
+// test("getMetrics with input parameter", async () => {
+//   const { appRouter } = await import("./index");
+//   const { initTRPC } = await import("@trpc/server");
+//
+//   const t = initTRPC.create();
+//   const caller = t.createCallerFactory(appRouter)({});
+//
+//   const result = await caller.getMetrics({ category: "sales" });
+//
+//   assert.ok(Array.isArray(result));
+//   // add assertions for your expected data structure
+// });
diff --git a/klaudbiusz/cli/analyze_trajectories.py b/klaudbiusz/cli/analyze_trajectories.py
@@ -38,9 +38,20 @@ def load_trajectory(path: Path) -> list[TrajectoryStep]:
     return steps
 
 
-def format_tool_arguments(args: dict) -> str:
-    """Format tool arguments as readable JSON."""
-    return json.dumps(args, indent=2)
+def format_tool_arguments(args: dict, max_length: int = 8192) -> str:
+    """Format tool arguments as readable JSON, truncating long strings."""
+
+    def truncate_value(value):
+        if isinstance(value, str) and len(value) > max_length:
+            return f"[truncated {len(value)} chars]"
+        elif isinstance(value, dict):
+            return {k: truncate_value(v) for k, v in value.items()}
+        elif isinstance(value, list):
+            return [truncate_value(item) for item in value]
+        return value
+
+    truncated_args = truncate_value(args)
+    return json.dumps(truncated_args, indent=2)
 
 
 def format_trajectory_to_markdown(steps: list[TrajectoryStep]) -> str:
@@ -73,12 +84,16 @@ def format_trajectory_to_markdown(steps: list[TrajectoryStep]) -> str:
                     if result.get("is_error"):
                         lines.append("**⚠️ ERROR**")
                     lines.append("```")
-                    lines.append(result.get("content", ""))
+                    content = result.get("content", "")
+                    # truncate long tool results (e.g. base64 screenshots)
+                    if isinstance(content, str) and len(content) > 8192:
+                        lines.append(f"[truncated {len(content)} chars]")
+                    else:
+                        lines.append(content)
                     lines.append("```")
                     lines.append("")
 
         lines.append("---\n")
-
     return "\n".join(lines)
 
 
@@ -101,7 +116,6 @@ async def analyze_single_trajectory(trajectory_md: str, app_name: str, model: st
 Provide a concise analysis focusing on actionable insights."""
 
     logger.info(f"🔍 Analyzing trajectory: {app_name}")
-
     response = await litellm.acompletion(
         model=model,
         messages=[{"role": "user", "content": prompt}],
@@ -165,7 +179,6 @@ async def analyze_trajectories_async(
     trajectory_data = [
         (path.parent.name, format_trajectory_to_markdown(load_trajectory(path))) for path in trajectory_paths
     ]
-
     tasks = [
         analyze_single_trajectory(trajectory_md, app_name, map_model) for app_name, trajectory_md in trajectory_data
     ]
diff --git a/klaudbiusz/pyproject.toml b/klaudbiusz/pyproject.toml
@@ -35,6 +35,7 @@ venv = ".venv"
 
 [dependency-groups]
 dev = [
+    "pdbpp>=0.11.7",
     "pyright>=1.1.406",
     "ruff>=0.14.3",
 ]
diff --git a/klaudbiusz/uv.lock b/klaudbiusz/uv.lock
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ venv = ".venv"`
`35`	`35`
`36`	`36`	`[dependency-groups]`
`37`	`37`	`dev = [`
	`38`	`+ "pdbpp>=0.11.7",`
`38`	`39`	`"pyright>=1.1.406",`
`39`	`40`	`"ruff>=0.14.3",`
`40`	`41`	`]`