Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions rdagent/components/coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@


class MultiProcessEvolvingStrategy(EvolvingStrategy):

KEY_CHANGE_SUMMARY = "__change_summary__" # Optional key for the summary of the change of evolving subjects

def __init__(self, scen: Scenario, settings: CoSTEERSettings):
super().__init__(scen)
self.settings = settings
Expand Down Expand Up @@ -51,6 +54,7 @@ def implement_one_task(
Return
------
The new files {<filename>: <content>} to update the workspace.
- Special Keys: self.KEY_CHANGE_SUMMARY;
"""
raise NotImplementedError

Expand Down
3 changes: 0 additions & 3 deletions rdagent/components/workflow/rd_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,3 @@ def feedback(self, prev_out: dict[str, Any]):
feedback = self.summarizer.generate_feedback(prev_out["running"], self.trace)
logger.log_object(feedback, tag="feedback")
self.trace.hist.append((prev_out["running"], feedback))

# TODO: `def record(self, prev_out: dict[str, Any]):` has already been hard coded into LoopBase
# So we should add it into RDLoop class to make sure every RDLoop Sub Class be aware of it.
1 change: 1 addition & 0 deletions rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
{}
) # The code injected into the folder, store them in the variable to reproduce the former result
self.workspace_path: Path = RD_AGENT_SETTINGS.workspace_path / uuid.uuid4().hex
self.change_summary: str | None = None # The change from the previous version of workspace

@staticmethod
def _format_code_dict(code_dict: dict[str, str]) -> str:
Expand Down
87 changes: 58 additions & 29 deletions rdagent/scenarios/data_science/dev/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,57 +51,84 @@ def implement_one_task(
# if no prev_task_feedback, it is the first loop; we do not make any changes and goto evaluators directly.
return {}

# Output Agent Map
output_map = {
True: (PythonBatchPatchOut.get_spec(), PythonBatchPatchOut.extract_output),
False: (
PythonBatchEditOut.get_spec(with_del=False),
PythonBatchEditOut.extract_output,
),
}
output_spec, extract_output_fn = output_map[self.settings.diff_mode]
# Get previous runner loops
task_info = target_task.get_task_information()
queried_former_failed_knowledge = (
queried_knowledge.task_to_former_failed_traces[task_info] if queried_knowledge is not None else []
)
queried_former_failed_knowledge = (
[
knowledge
for knowledge in queried_former_failed_knowledge[0]
if knowledge.implementation.file_dict.get("main.py") != workspace.file_dict.get("main.py")
],
queried_former_failed_knowledge[1],
)

# Set output agent
if self.settings.diff_mode:
output_spec = PythonBatchPatchOut.get_spec()
extract_output_fn = PythonBatchPatchOut.extract_output
else:
output_spec = PythonBatchEditOut.get_spec(with_del=False)
extract_output_fn = PythonBatchEditOut.extract_output

if prev_task_feedback.hyperparameter_tuning_decision:
# Use system_refine for hyperparameter tuning
system_prompt = T(".prompts:DSCoSTEER.system_refine").r(
status_desc=self.scen.describe_current_status(
stage="Running",
step="coder",
max_loop=DS_RD_SETTING.runner_max_loop,
cur_loop=len(queried_former_failed_knowledge), # FIXME: any better way to get this?
),
out_spec=output_spec,
diff_mode=self.settings.diff_mode,
)
else:
task_information_str = target_task.get_task_information()
# Use system_debugger for error fixing and debugging
system_prompt = T(".prompts:DSCoSTEER.system_refine").r(
system_prompt = T(".prompts:DSCoSTEER.system_debugger").r(
status_desc=self.scen.describe_current_status(
stage="Running",
step="coder",
max_loop=DS_RD_SETTING.runner_max_loop,
cur_loop=len(queried_former_failed_knowledge), # FIXME: any better way to get this?
),
task_desc=task_information_str,
out_spec=output_spec,
diff_mode=self.settings.diff_mode,
)

# Generate user prompt for both cases
# Multi-turn chat session
session = APIBackend().build_chat_session(
session_system_prompt=system_prompt,
)

# Code
user_prompt = T(".prompts:DSCoSTEER.user").r(
code=workspace.all_codes,
feedback=prev_task_feedback,
hyperparameter_tuning_suggestion=prev_task_feedback.hyperparameter_tuning_suggestion,
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
)

code = session.build_chat_completion(user_prompt=user_prompt)
if self.settings.diff_mode:
batch_edit = extract_output_fn(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt,
system_prompt=system_prompt,
),
prefix=workspace.workspace_path,
)
code_batch_edit = extract_output_fn(code, prefix=workspace.workspace_path)
else:
batch_edit = extract_output_fn(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
)

batch_edit = {k: v for k, v in batch_edit.items() if k in workspace.file_dict.keys()}

return batch_edit
code_batch_edit = extract_output_fn(code)
code_batch_edit = {k: v for k, v in code_batch_edit.items() if k in workspace.file_dict.keys()}

# Change Summary
user_prompt = (
"Based on the previous conversation and your latest code modifications, "
"please provide a concise and structured summary of the changes you made to the original code. "
"Clearly specify what was changed and how, focusing on key modifications. "
"Limit your summary to plain text, no more than three sentences."
)
change_summary = session.build_chat_completion(user_prompt=user_prompt)
code_batch_edit.update({"__change_summary__": change_summary})
return code_batch_edit

def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
"""
Expand All @@ -116,6 +143,8 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
if evo.sub_workspace_list[index] is None:
# evo.sub_workspace_list[index] = FBWorkspace(target_task=evo.sub_tasks[index])
evo.sub_workspace_list[index] = evo.experiment_workspace
if self.KEY_CHANGE_SUMMARY in code_list[index]:
evo.sub_workspace_list[index].change_summary = code_list[index].pop(self.KEY_CHANGE_SUMMARY)
evo.sub_workspace_list[index].inject_files(**code_list[index])
return evo

Expand Down
40 changes: 38 additions & 2 deletions rdagent/scenarios/data_science/dev/runner/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@ def __init__(
self.hyperparameter_tuning_decision = hyperparameter_tuning_decision
self.hyperparameter_tuning_suggestion = hyperparameter_tuning_suggestion

def __str__(self) -> str:
parts = [
"### Execution",
str(self.execution),
"### Return Check",
self.return_checking if self.return_checking is not None else "No return checking",
"### Code",
str(self.code),
"### Final Decision",
f"This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}.",
]
if self.hyperparameter_tuning_decision:
parts.append("### Hyperparameter Tuning Suggestion")
parts.append(str(self.hyperparameter_tuning_suggestion))
return "\n".join(parts)


class DSCoSTEERCoSTEEREvaluator(CoSTEEREvaluator):

Expand All @@ -50,7 +66,6 @@ def evaluate(
queried_knowledge: QueriedKnowledge = None,
**kwargs,
) -> DSCoSTEEREvalFeedback:

env = get_ds_env(
extra_volumes={
f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
Expand All @@ -62,7 +77,21 @@ def evaluate(

stdout = implementation.execute(
env=env, entry=get_clear_ws_cmd()
) # Remove previous submission and scores files generated by worklfow.
) # Remove previous submission and scores files generated by workflow.

# get previous runner loops
task_info = target_task.get_task_information()
queried_former_failed_knowledge = (
queried_knowledge.task_to_former_failed_traces[task_info] if queried_knowledge is not None else []
)
queried_former_failed_knowledge = (
[
knowledge
for knowledge in queried_former_failed_knowledge[0]
if knowledge.implementation.file_dict.get("main.py") != implementation.file_dict.get("main.py")
],
queried_former_failed_knowledge[1],
)

# execute workflow
result = implementation.run(env=env, entry="python -m coverage run main.py")
Expand Down Expand Up @@ -130,6 +159,12 @@ def evaluate(
stdout += f"\nSubmission check:\n{submission_check_out}\nIf Submission check returns a 'Submission is valid' or similar message, despite some warning messages, you should still consider the submission as valid and give a positive final decision. "

system_prompt = T(".prompts:DSCoSTEER_eval.system").r(
status_desc=self.scen.describe_current_status(
stage="Running",
step="evaluator",
max_loop=DS_RD_SETTING.runner_max_loop,
cur_loop=len(queried_former_failed_knowledge), # FIXME: any better way to get this?
),
scenario=self.scen.get_scenario_all_desc(eda_output=implementation.file_dict.get("EDA.md", None)),
is_sub_enabled=test_eval.is_sub_enabled(self.scen.competition),
task_desc=target_task.get_task_information(),
Expand All @@ -140,6 +175,7 @@ def evaluate(
time_spent=f"{implementation.running_info.running_time:.2f} seconds",
timeout=f"{env.conf.running_timeout_period} seconds",
percent_of_timeout_used=f"{(implementation.running_info.running_time / env.conf.running_timeout_period) * 100:.2f}%",
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
)

feedback = build_cls_from_json_with_retry(
Expand Down
Loading