From 186b1312f632ed6bd92b2fa6326b26ca65a058bf Mon Sep 17 00:00:00 2001
From: Zvonimir Sabljic <zvonimir@pythagora.io>
Date: Thu, 3 Aug 2023 11:20:50 +0200
Subject: [PATCH] Implemented testing of code changes

---
 euclid/const/function_calls.py                | 43 +++++++++++++++++++
 euclid/helpers/agents/CodeMonkey.py           | 17 +++++++-
 euclid/helpers/agents/Developer.py            | 26 +++++------
 .../task/request_test_files.prompt            |  4 +-
 .../task/step/write_automated_test.prompt     |  2 +-
 .../development/task/step_check.prompt        |  8 ++--
 6 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/euclid/const/function_calls.py b/euclid/const/function_calls.py
index 88e3a55..6f565b9 100644
--- a/euclid/const/function_calls.py
+++ b/euclid/const/function_calls.py
@@ -351,4 +351,47 @@ IMPLEMENT_CHANGES = {
     'functions': {
         'save_files': lambda files: files
     }
+}
+
+GET_TEST_TYPE = {
+    'definitions': [{
+        'name': 'test_changes',
+        'description': f'Tests the changes based on the test type.',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'type': {
+                    'type': 'string',
+                    'description': f'Type of a test that needs to be run. It can be "automated_test", "command_test" or "manual_test".',
+                    'enum': ['automated_test', 'command_test', 'manual_test']
+                },
+                'command': {
+                    'type': 'object',
+                    'description': 'Command that needs to be run to test the changes. This should be used only if the test type is "command_test". Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                    'properties': {
+                        'command': {
+                            'type': 'string',
+                            'description': 'Command that needs to be run to test the changes.',
+                        },
+                        'timeout': {
+                            'type': 'number',
+                            'description': 'Timeout in seconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                        }
+                    },
+                },
+                'automated_test_description': {
+                    'type': 'string',
+                    'description': 'Description of an automated test that needs to be run to test the changes. This should be used only if the test type is "automated_test".',
+                },
+                'manual_test_description': {
+                    'type': 'string',
+                    'description': 'Description of a manual test that needs to be run to test the changes. This should be used only if the test type is "manual_test".',
+                }
+            },
+            'required': ['type'],
+        },
+    }],
+    'functions': {
+        'test_changes': lambda type, command=None, automated_test_description=None, manual_test_description=None: (type, command, automated_test_description, manual_test_description)
+    }
 }
\ No newline at end of file
diff --git a/euclid/helpers/agents/CodeMonkey.py b/euclid/helpers/agents/CodeMonkey.py
index f2c3688..dd8ad2e 100644
--- a/euclid/helpers/agents/CodeMonkey.py
+++ b/euclid/helpers/agents/CodeMonkey.py
@@ -40,4 +40,19 @@ class CodeMonkey(Agent):
                 for file_data in changes:
                     update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
         
-        self.developer.test_changes()
\ No newline at end of file
+                self.developer.test_code_changes(self, convo)
+
+    def implement_test(self, convo, automated_test_description):
+        files_needed = convo.send_message('development/task/request_test_files.prompt', {
+            "testing_files_tree": self.project.get_directory_tree(),
+        }, GET_FILES)
+
+        changes = convo.send_message('development/write_automated_test.prompt', {
+            "files": self.project.get_files(files_needed),
+        }, IMPLEMENT_CHANGES)
+
+        for file_data in changes:
+            update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
+
+        self.developer.run_test_and_debug()
+        self.developer.run_all_tests_and_debug()
diff --git a/euclid/helpers/agents/Developer.py b/euclid/helpers/agents/Developer.py
index 6be4757..d6fac6f 100644
--- a/euclid/helpers/agents/Developer.py
+++ b/euclid/helpers/agents/Developer.py
@@ -7,7 +7,7 @@ from helpers.Agent import Agent
 from helpers.AgentConvo import AgentConvo
 from utils.utils import execute_step, array_of_objects_to_string, generate_app_data
 from helpers.cli import build_directory_tree, run_command_until_success, execute_command_and_check_cli_response
-from const.function_calls import FILTER_OS_TECHNOLOGIES, DEVELOPMENT_PLAN, EXECUTE_COMMANDS, DEV_STEPS
+from const.function_calls import FILTER_OS_TECHNOLOGIES, DEVELOPMENT_PLAN, EXECUTE_COMMANDS, DEV_STEPS, GET_TEST_TYPE
 from database.database import save_progress, get_progress_steps
 from utils.utils import get_os_info
 from helpers.cli import execute_command
@@ -163,19 +163,19 @@ class Developer(Agent):
         code_monkey = CodeMonkey(self.project, self)
         code_monkey.implement_code_changes(code_changes_description)
 
-    def test_code_changes(self, code_changes_description):
-        verification_type = convo.send_message('development/step_check.prompt', {
-            "instructions": code_changes_description,
-            "directory_tree": self.project.get_directory_tree(),
-            "files": self.project.get_files(files_needed),
-        }, CHANGE_VERIFICATION)
+    def test_code_changes(self, code_monkey, convo):
+        (test_type, command, automated_test_description, manual_test_description) = convo.send_message('development/task/step_check.prompt', {}, GET_TEST_TYPE)
         
-        if verification_type == 'command':
-            pass
-        elif verification_type == 'automated_test':
-            pass
-        elif verification_type == 'manual_test':
-            pass
+        if test_type == 'command_test':
+            run_command_until_success(command['command'], command['timeout'], convo)
+        elif test_type == 'automated_test':
+            code_monkey.implement_test(convo, automated_test_description)
+        elif test_type == 'manual_test':
+            # TODO make the message better
+            self.project.ask_for_human_verification(
+                'Message from Euclid: I need your help. Can you please test if this was successful?',
+                manual_test_description
+            )
 
     def implement_step(self, convo, step_index, type, description):
         # TODO remove hardcoded folder path
diff --git a/euclid/prompts/development/task/request_test_files.prompt b/euclid/prompts/development/task/request_test_files.prompt
index 905ce75..8c5ffe8 100644
--- a/euclid/prompts/development/task/request_test_files.prompt
+++ b/euclid/prompts/development/task/request_test_files.prompt
@@ -1,2 +1,4 @@
 Ok, now, I will show you the list of all files with automated tests that are written so far and I want you to tell me which automated tests do you want to see so that you can propriatelly modify tests or create new ones.
-{{ testing_files_tree }}
\ No newline at end of file
+{{ testing_files_tree }}
+
+Remember, ask for files relative to the project root. For example, if you need a file with path `{project_root}/models/model.py`, you need to request the file `models/model.py`.
\ No newline at end of file
diff --git a/euclid/prompts/development/task/step/write_automated_test.prompt b/euclid/prompts/development/task/step/write_automated_test.prompt
index 0fb6b87..e895396 100644
--- a/euclid/prompts/development/task/step/write_automated_test.prompt
+++ b/euclid/prompts/development/task/step/write_automated_test.prompt
@@ -2,7 +2,7 @@
 Here are the requested files:
 {% for file in files %}
 **{{ file.name }}**
-```{{ file.language }}
+```{# file.language #}
 {{ file.content }}
 ```
 
diff --git a/euclid/prompts/development/task/step_check.prompt b/euclid/prompts/development/task/step_check.prompt
index e106194..81c5a23 100644
--- a/euclid/prompts/development/task/step_check.prompt
+++ b/euclid/prompts/development/task/step_check.prompt
@@ -1,8 +1,8 @@
 Now, we need to verify if this change was successfully implemented. We can do that in 3 ways:
-1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page. If you think we can write an automated test, start the response with `AUTOMATED_TEST`
+1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page.
 
-2. By running a command - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. If you just want to run a command (or multiple commands), respond with `COMMAND_TEST: {explanation on how to test this with a command}`. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.
+2. By running a command (or multiple commands) - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.
 
-3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI. If you need a human to check the functionality, start the response with `MANUAL_TEST`.
+3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI.
 
-Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.
\ No newline at end of file
+Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.    
\ No newline at end of file