Implemented initial version for debugging

2026-01-10 20:57:46 +01:00 · 2023-08-04 08:23:21 +02:00
parent 1ed59ed820
commit 72e4a1cf64
4 changed files with 87 additions and 7 deletions
--- a/euclid/const/function_calls.py
+++ b/euclid/const/function_calls.py
@@ -444,4 +444,58 @@ GET_TEST_TYPE = {
    'functions': {
        'test_changes': lambda type, command=None, automated_test_description=None, manual_test_description=None: (type, command, automated_test_description, manual_test_description)
    }
+}
+
+DEBUG_STEPS_BREAKDOWN = {
+    'definitions': [
+        {
+            'name': 'start_debugging',
+            'description': 'Starts the debugging process based on the list of steps that need to be done to debug the problem.',
+            'parameters': {
+                'type': 'object',
+                "properties": {
+                    "steps": {
+                        'type': 'array',
+                        'description': 'List of steps that need to be done to debug the problem.',
+                        'items': {
+                            'type': 'object',
+                            'description': 'A single step that needs to be done to get closer to debugging this issue.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds.',
+                            'properties': {
+                                'type': {
+                                    'type': 'string',
+                                    'enum': ['command', 'code_change', 'human_intervention'],
+                                    'description': 'Type of the step that needs to be done to debug this issue.',
+                                },
+                                'command': {
+                                    'type': 'string',
+                                    'description': 'Command that needs to be complete this step in debugging. This should be used only if the task is of a type "command".',
+                                },
+                                'command_timeout': {
+                                    'type': 'number',
+                                    'description': 'Timeout in milliseconds that represent the approximate time the command takes to finish. This should be used only if the task is of a type "command". If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds.',
+                                },
+                                'code_change_description': {
+                                    'type': 'string',
+                                    'description': 'Description of a step in debugging this issue when there are code changes required. This should be used only if the task is of a type "code_change" and it should thoroughly describe what needs to be done to implement the code change for a single file - it cannot include changes for multiple files.',
+                                },
+                                'human_intervention_description': {
+                                    'type': 'string',
+                                    'description': 'Description of a step in debugging this issue when there is a human intervention needed. This should be used only if the task is of a type "human_intervention".',
+                                },
+                                "check_if_fixed": {
+                                    'type': 'boolean',
+                                    'description': 'Flag that indicates if the original command that triggered the error that\'s being debugged should be tried after this step to check if the error is fixed. If this step is just one step that can\'t fix the error by itself, then `check_if_fixed` should be FALSE. If this step can fix the error by itself, then `check_if_fixed` should be TRUE.',
+                                }
+                            },
+                            'required': ['type', 'check_if_fixed'],
+                        }
+                    }
+                },
+                "required": ['steps'],
+            },
+        },
+    ],
+    'functions': {
+        'start_debugging': lambda steps: steps
+    },
 }
--- a/euclid/helpers/agents/Developer.py
+++ b/euclid/helpers/agents/Developer.py
@@ -52,22 +52,33 @@ class Developer(Agent):

        self.execute_task(convo_dev_task, task_steps)

-    def execute_task(self, convo, task_steps):
+    def execute_task(self, convo, task_steps, test_command=None, reset_convo=True):
        convo.save_branch('after_task_breakdown')
+
        for (i, step) in enumerate(task_steps):
-            convo.load_branch('after_task_breakdown')
+            if reset_convo:
+                convo.load_branch('after_task_breakdown')
+
            if step['type'] == 'command':
                run_command_until_success(step['command'], step['command_timeout'], convo)
+
            elif step['type'] == 'code_change':
                print(f'Implementing code changes for `{step["code_change_description"]}`')
                code_monkey = CodeMonkey(self.project, self)
                updated_convo = code_monkey.implement_code_changes(convo, step['code_change_description'], i)
                self.test_code_changes(code_monkey, updated_convo)
+
            elif step['type'] == 'human_intervention':
-                self.project.ask_for_human_intervention(step['human_intervention_description'])
+                self.project.ask_for_human_intervention('I need your help! Can you try debugging this yourself and let me take over afterwards? Here are the details about the issue:', step['human_intervention_description'])
+
            else:
                raise Exception('Step type must be either run_command or code_change.')
            
+            if test_command is not None and step['check_if_fixed']:
+                response = execute_command_and_check_cli_response(test_command['command'], test_command['timeout'], convo)
+                if response == 'DONE':
+                    return True
+
    def set_up_environment(self):
        self.project.current_step = 'environment_setup'
        self.convo_os_specific_tech = AgentConvo(self)
--- a/euclid/helpers/cli.py
+++ b/euclid/helpers/cli.py
@@ -7,6 +7,7 @@ import time

 from termcolor import colored
 from database.database import get_command_run_from_hash_id, save_command_run
+from const.function_calls import DEBUG_STEPS_BREAKDOWN

 from utils.questionary import styled_text
 from const.code_execution import MAX_COMMAND_DEBUG_TRIES
@@ -144,7 +145,7 @@ def execute_command_and_check_cli_response(command, timeout, convo):

 def run_command_until_success(command, timeout, convo):
    command_executed = False
-    for _ in range(MAX_COMMAND_DEBUG_TRIES):
+    for i in range(MAX_COMMAND_DEBUG_TRIES):
        cli_response = execute_command(convo.agent.project, command, timeout)
        response = convo.send_message('dev_ops/ran_command.prompt',
            {'cli_response': cli_response, 'command': command})
@@ -153,9 +154,20 @@ def run_command_until_success(command, timeout, convo):
        if command_executed:
            break

-        command = response
+        debugging_plan = convo.send_message('dev_ops/debug.prompt',
+            { 'command': command, 'debugging_try_num': i },
+            DEBUG_STEPS_BREAKDOWN)
+
+        # TODO refactor to nicely get the developer agent
+        convo.agent.project.developer.execute_task(
+            convo,
+            debugging_plan,
+            {'command': command, 'timeout': timeout},
+            False)

    if not command_executed:
+        # TODO explain better how should the user approach debugging
+        # we can copy the entire convo to clipboard so they can paste it in the playground
        convo.agent.project.ask_for_human_intervention(
            'It seems like I cannot debug this problem by myself. Can you please help me and try debugging it yourself?',
            command
--- a/euclid/prompts/dev_ops/debug.prompt
+++ b/euclid/prompts/dev_ops/debug.prompt
@@ -1,2 +1,5 @@
-Ok, we need to debug this issue so we can execute `{{ command }}` successfully. In case you cannot debug this by running any command and need a human assistance, respond with `NEED_HUMAN`. Write the command I need to run next to solve this issue in the following format:
-COMMAND: {command_that_needs_to_be_ran}
+{% if debugging_try_num == 0 %}
+Ok, we need to debug this issue so we can execute `{{ command }}` successfully. In case you cannot debug this by running any command and need a human assistance, respond with `NEED_HUMAN`. Write a step by step explanation of what needs to be done that will debug this issue
+{% else %}
+I've tried all you suggested but it's still not working. Can you suggest other things I can try to debug this issue?
+{% endif %}