Skip to content

Commit f317249

Browse files
authored
Add databricks v2 prompts (#579)
1 parent 5faec0e commit f317249

File tree

14 files changed

+124
-81
lines changed

14 files changed

+124
-81
lines changed

klaudbiusz/cli/__init__.py

Whitespace-only changes.

klaudbiusz/cli/analyze_trajectories.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
query,
2121
)
2222
from dotenv import load_dotenv
23+
2324
from cli.utils.shared import build_mcp_command, validate_mcp_manifest
2425

2526
logger = logging.getLogger(__name__)
@@ -348,6 +349,7 @@ async def analyze_with_agent(
348349
system_prompt=base_instructions,
349350
permission_mode="bypassPermissions",
350351
disallowed_tools=disallowed_tools,
352+
model="claude-opus-4-5",
351353
allowed_tools=[
352354
"Read",
353355
"Glob",

klaudbiusz/cli/evaluation/evaluate_all.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
# Load environment variables from .env file
3636
env_paths = [
37-
Path(__file__).parent.parent.parent / "edda" / ".env",
37+
Path(__file__).parent.parent.parent.parent / "edda" / ".env",
3838
Path(__file__).parent.parent / ".env",
3939
]
4040
for env_path in env_paths:

klaudbiusz/cli/evaluation/evaluate_app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
# Load environment variables from .env file - try multiple locations
3737
env_paths = [
38-
Path(__file__).parent.parent.parent / "edda" / ".env",
38+
Path(__file__).parent.parent.parent.parent / "edda" / ".env",
3939
Path(__file__).parent.parent / ".env",
4040
Path(__file__).parent / ".env",
4141
]

klaudbiusz/cli/evaluation/evaluate_app_dagger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
# Load environment variables
4747
env_paths = [
48-
Path(__file__).parent.parent.parent / "edda" / ".env",
48+
Path(__file__).parent.parent.parent.parent / "edda" / ".env",
4949
Path(__file__).parent.parent / ".env",
5050
Path(__file__).parent / ".env",
5151
]

klaudbiusz/cli/generation/bulk_run.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ def run_single_generation(
4646
backend: str,
4747
model: str | None,
4848
wipe_db: bool = False,
49-
use_subagents: bool = False,
5049
suppress_logs: bool = True,
5150
mcp_binary: str | None = None,
5251
mcp_json: str | None = None,
52+
mcp_args: list[str] | None = None,
5353
output_dir: str | None = None,
5454
) -> RunResult:
5555
# re-apply litellm patch in worker process (joblib uses spawn/fork)
@@ -70,9 +70,9 @@ def timeout_handler(signum, frame):
7070
app_name=app_name,
7171
wipe_db=wipe_db,
7272
suppress_logs=suppress_logs,
73-
use_subagents=use_subagents,
7473
mcp_binary=mcp_binary,
7574
mcp_json_path=mcp_json,
75+
mcp_args=mcp_args,
7676
output_dir=output_dir,
7777
)
7878
metrics = codegen.run(prompt, wipe_db=wipe_db)
@@ -85,6 +85,7 @@ def timeout_handler(signum, frame):
8585
model=model,
8686
mcp_binary=mcp_binary,
8787
mcp_json_path=mcp_json,
88+
mcp_args=mcp_args,
8889
suppress_logs=suppress_logs,
8990
output_dir=output_dir,
9091
)
@@ -147,20 +148,20 @@ def main(
147148
model: str | None = None,
148149
wipe_db: bool = False,
149150
n_jobs: int = -1,
150-
use_subagents: bool = False,
151151
mcp_binary: str | None = None,
152152
mcp_json: str | None = None,
153+
mcp_args: list[str] | None = None,
153154
output_dir: str | None = None,
154155
) -> None:
155156
"""Bulk app generation from predefined prompt sets.
156157
157158
Args:
158-
prompts: Prompt set to use ("databricks" or "test", default: "databricks")
159+
prompts: Prompt set to use ("databricks", "databricks_v2", or "test", default: "databricks")
159160
backend: Backend to use ("claude" or "litellm", default: "claude")
160161
model: LLM model (required if backend=litellm, e.g., "openrouter/minimax/minimax-m2")
161162
wipe_db: Whether to wipe database on start
162163
n_jobs: Number of parallel jobs (-1 for all cores)
163-
use_subagents: Whether to enable subagent delegation (claude backend only)
164+
mcp_args: Optional list of args passed to the MCP server (overrides defaults)
164165
mcp_binary: Optional path to pre-built edda-mcp binary (default: use cargo run)
165166
mcp_json: Optional path to JSON config file for edda_mcp
166167
output_dir: Custom output directory for generated apps (default: ./app)
@@ -169,6 +170,9 @@ def main(
169170
# Claude backend (default) with databricks prompts (default)
170171
python bulk_run.py
171172
173+
# Claude backend with databricks_v2 prompts
174+
python bulk_run.py --prompts=databricks_v2
175+
172176
# Claude backend with test prompts
173177
python bulk_run.py --prompts=test
174178
@@ -195,10 +199,12 @@ def main(
195199
match prompts:
196200
case "databricks":
197201
from cli.generation.prompts.databricks import PROMPTS as selected_prompts
202+
case "databricks_v2":
203+
from cli.generation.prompts.databricks_v2 import PROMPTS as selected_prompts
198204
case "test":
199205
from cli.generation.prompts.web import PROMPTS as selected_prompts
200206
case _:
201-
raise ValueError(f"Unknown prompt set: {prompts}. Use 'databricks' or 'test'")
207+
raise ValueError(f"Unknown prompt set: {prompts}. Use 'databricks', 'databricks_v2', or 'test'")
202208

203209
# validate backend-specific requirements
204210
if backend == "litellm" and not model:
@@ -216,14 +222,13 @@ def main(
216222
print(f"Parallel jobs: {n_jobs}")
217223
if backend == "claude":
218224
print(f"Wipe DB: {wipe_db}")
219-
print(f"Use subagents: {use_subagents}")
220225
print(f"MCP binary: {mcp_binary if mcp_binary else 'cargo run (default)'}")
221226
print(f"Output dir: {output_dir if output_dir else './app (default)'}\n")
222227

223228
# generate all apps
224229
results: list[RunResult] = Parallel(n_jobs=n_jobs, backend="loky", verbose=10)( # type: ignore[assignment]
225230
delayed(run_single_generation)(
226-
app_name, prompt, backend, model, wipe_db, use_subagents, suppress_logs, mcp_binary, mcp_json, output_dir
231+
app_name, prompt, backend, model, wipe_db, suppress_logs, mcp_binary, mcp_json, mcp_args, output_dir
227232
)
228233
for app_name, prompt in selected_prompts.items()
229234
)

klaudbiusz/cli/generation/codegen.py

Lines changed: 11 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from uuid import UUID, uuid4
99

1010
from claude_agent_sdk import (
11-
AgentDefinition,
1211
AssistantMessage,
1312
ClaudeAgentOptions,
1413
ResultMessage,
@@ -19,6 +18,7 @@
1918
query,
2019
)
2120
from dotenv import load_dotenv
21+
2222
from cli.utils.shared import ScaffoldTracker, Tracker, build_mcp_command, setup_logging, validate_mcp_manifest
2323

2424
try:
@@ -44,35 +44,6 @@ class ToolInput:
4444
prompt: str = ""
4545

4646

47-
def _parse_agent_definition(agent_file: Path) -> tuple[dict[str, str], str] | None:
48-
"""Parse agent markdown file with YAML frontmatter.
49-
50-
Returns:
51-
Tuple of (frontmatter_dict, content) or None if parsing fails
52-
"""
53-
if not agent_file.exists():
54-
return None
55-
56-
content = agent_file.read_text()
57-
58-
# frontmatter must start and end with ---
59-
if not content.startswith("---"):
60-
return None
61-
62-
parts = content.split("---", 2)
63-
if len(parts) < 3:
64-
return None
65-
66-
# parse simple yaml-like frontmatter manually
67-
frontmatter = {}
68-
for line in parts[1].strip().split("\n"):
69-
if ":" in line:
70-
key, value = line.split(":", 1)
71-
frontmatter[key.strip()] = value.strip()
72-
73-
return frontmatter, parts[2].strip()
74-
75-
7647
class GenerationMetrics(TypedDict):
7748
cost_usd: float
7849
input_tokens: int
@@ -88,22 +59,22 @@ def __init__(
8859
app_name: str,
8960
wipe_db: bool = True,
9061
suppress_logs: bool = False,
91-
use_subagents: bool = False,
9262
mcp_binary: str | None = None,
9363
mcp_json_path: str | None = None,
64+
mcp_args: list[str] | None = None,
9465
output_dir: str | None = None,
9566
):
9667
load_dotenv()
97-
self.project_root = Path(__file__).parent.parent.parent
68+
self.project_root = Path(__file__).parent.parent.parent.parent
9869
self.mcp_manifest = validate_mcp_manifest(mcp_binary, self.project_root)
9970

10071
self.wipe_db = wipe_db
10172
self.run_id: UUID = uuid4()
10273
self.app_name = app_name
103-
self.use_subagents = use_subagents
10474
self.suppress_logs = suppress_logs
10575
self.mcp_binary = mcp_binary
10676
self.mcp_json_path = mcp_json_path
77+
self.mcp_args = mcp_args
10778
self.output_dir = Path(output_dir) if output_dir else Path.cwd() / "app"
10879
self.tracker = Tracker(self.run_id, app_name, suppress_logs)
10980
self.scaffold_tracker = ScaffoldTracker()
@@ -115,43 +86,23 @@ async def run_async(self, prompt: str) -> GenerationMetrics:
11586
await self.tracker.init(wipe_db=self.wipe_db)
11687

11788
agents = {}
118-
if self.use_subagents:
119-
agents_dir = self.project_root / "klaudbiusz" / "agents"
120-
dataresearch_file = agents_dir / "dataresearch.md"
121-
122-
if parsed := _parse_agent_definition(dataresearch_file):
123-
frontmatter, content = parsed
124-
tools_str = frontmatter.get("tools", "")
125-
tools = [t.strip() for t in tools_str.split(",")] if tools_str else None
126-
127-
agents["dataresearch"] = AgentDefinition(
128-
description=frontmatter.get("description", ""),
129-
prompt=content,
130-
tools=tools,
131-
model=frontmatter.get("model"), # type: ignore[arg-type]
132-
)
13389

13490
# workflow and template best practices are now in the MCP tool description
135-
base_instructions = "Use Edda MCP tools to scaffold, build, and test the app as needed.\n Use data from Databricks when relevant.\n"
136-
137-
if self.use_subagents:
138-
base_instructions += """When you need to explore Databricks tables, schemas, or execute SQL queries, use the Task tool to delegate to the 'dataresearch' subagent. Do NOT use databricks_* tools directly.\n"""
139-
140-
base_instructions += """Be concise and to the point in your responses.\n
141-
Use up to 10 tools per call to speed up the process.\n"""
91+
base_instructions = """Use MCP tools to scaffold, build, and test the app as needed.
92+
Use data from Databricks when relevant.
93+
Be concise and to the point in your responses.
94+
Use up to 10 tools per call to speed up the process.
95+
Never deploy the app, just scaffold and build it.
96+
"""
14297

14398
disallowed_tools = [
14499
"NotebookEdit",
145100
"WebSearch",
146101
"WebFetch",
147102
]
148103

149-
# NOTE: We cannot use disallowed_tools to block Databricks tools from the main agent
150-
# because disallowed_tools applies globally to ALL agents (including subagents).
151-
# The CLI doesn't support per-agent tool permissions yet.
152-
# Instead, we rely on system prompt instructions to enforce delegation.
104+
command, args = build_mcp_command(self.mcp_binary, self.mcp_manifest, self.mcp_json_path, self.mcp_args)
153105

154-
command, args = build_mcp_command(self.mcp_binary, self.mcp_manifest, self.mcp_json_path)
155106
mcp_config = {
156107
"type": "stdio",
157108
"command": command,

klaudbiusz/cli/generation/codegen_multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class MCPSession:
3434
def __init__(self, mcp_binary: str | None = None, mcp_json_path: str | None = None):
3535
self.mcp_binary = mcp_binary
3636
self.mcp_json_path = mcp_json_path
37-
self.project_root = Path(__file__).parent.parent.parent
37+
self.project_root = Path(__file__).parent.parent.parent.parent
3838
self.mcp_manifest = validate_mcp_manifest(mcp_binary, self.project_root)
3939

4040
self._context = None

0 commit comments

Comments
 (0)