From e5e0f56d2e6accb2c0078cf7e75a8dc524576907 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Tue, 26 Sep 2023 19:38:24 +1000
Subject: [PATCH 01/10] JSON validation working

---
 pilot/const/function_calls.py      |  2 +-
 pilot/utils/function_calling.py    |  4 ++--
 pilot/utils/llm_connection.py      | 15 +++++-------
 pilot/utils/test_llm_connection.py | 37 +++++++++++++++++++++++++++---
 requirements.txt                   |  1 +
 5 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/pilot/const/function_calls.py b/pilot/const/function_calls.py
index c271943..2020b7e 100644
--- a/pilot/const/function_calls.py
+++ b/pilot/const/function_calls.py
@@ -369,7 +369,7 @@ DEVELOPMENT_PLAN = {
                                 'description': 'user-review goal that will determine if a task is done or not but from a user perspective since it will be reviewed by a human',
                             }
                         },
-                        'required': ['task_description', 'programmatic_goal', 'user_review_goal'],
+                        'required': ['description', 'programmatic_goal', 'user_review_goal'],
                     },
                 },
             },
diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py
index 469bc53..4b738f9 100644
--- a/pilot/utils/function_calling.py
+++ b/pilot/utils/function_calling.py
@@ -140,7 +140,7 @@ class JsonPrompter:
         return "\n".join(
             self.function_descriptions(functions, function_to_call)
             + [
-                "The response should be a JSON object matching this schema:",
+                "The response MUST be a JSON object matching this schema:",
                 "```json",
                 self.function_parameters(functions, function_to_call),
                 "```",
@@ -195,7 +195,7 @@ class JsonPrompter:
             "Help choose the appropriate function to call to answer the user's question."
             if function_to_call is None
             else f"Define the arguments for {function_to_call} to answer the user's question."
-        ) + "\nThe response should contain only the JSON object, with no additional text or explanation."
+        ) + "\nThe response must contain ONLY the JSON object, with NO additional text or explanation."
 
         data = (
             self.function_data(functions, function_to_call)
diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py
index 2492598..f3afcf1 100644
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -7,6 +7,7 @@ import json
 import tiktoken
 import questionary
 
+from jsonschema import validate
 from utils.style import red
 from typing import List
 from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
@@ -15,6 +16,7 @@ from helpers.exceptions.TokenLimitError import TokenLimitError
 from utils.utils import fix_json
 from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
 
+
 def get_tokens_in_messages(messages: List[str]) -> int:
     tokenizer = tiktoken.get_encoding("cl100k_base")  # GPT-4 tokenizer
     tokenized_messages = [tokenizer.encode(message['content']) for message in messages]
@@ -347,16 +349,11 @@ def assert_json_response(response: str, or_fail=True) -> bool:
 
 
 def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
+    for function in functions:
+        schema = function['parameters']
+        parsed = json.loads(response)
+        validate(parsed, schema)
     return True
-    # TODO: validation always fails
-    # for function in functions:
-    #     schema = function['parameters']
-    #     parser = parser_for_schema(schema)
-    #     validated = parser.validate(response)
-    #     if validated.valid and validated.end_index:
-    #         return True
-    #
-    # raise ValueError('LLM responded with invalid JSON')
 
 
 def postprocessing(gpt_response, req_type):
diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py
index ec55633..7f21d97 100644
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -1,6 +1,9 @@
 import builtins
+from json import JSONDecodeError
+
 import pytest
 from dotenv import load_dotenv
+from jsonschema import ValidationError
 
 from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN
 from helpers.AgentConvo import AgentConvo
@@ -45,13 +48,13 @@ class TestSchemaValidation:
     def test_assert_json_schema_invalid(self):
         # When assert_json_schema is called with invalid JSON
         # Then error is raised
-        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+        with pytest.raises(ValidationError, match="1 is not of type 'string'"):
             assert_json_schema('{"foo": 1}', [self.function])
 
     def test_assert_json_schema_incomplete(self):
         # When assert_json_schema is called with incomplete JSON
         # Then error is raised
-        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+        with pytest.raises(JSONDecodeError):
             assert_json_schema('{"foo": "b', [self.function])
 
     def test_assert_json_schema_required(self):
@@ -60,9 +63,37 @@ class TestSchemaValidation:
         self.function['parameters']['properties']['other'] = {'type': 'string'}
         self.function['parameters']['required'] = ['foo', 'other']
 
-        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+        with pytest.raises(ValidationError, match="'other' is a required property"):
             assert_json_schema('{"foo": "bar"}', [self.function])
 
+    def test_DEVELOPMENT_PLAN(self):
+        assert(assert_json_schema('''
+{
+  "plan": [
+    {
+      "description": "Set up project structure including creation of necessary directories and files. Initialize Node.js and install necessary libraries such as express and socket.io.",
+      "programmatic_goal": "Project structure should be set up and Node.js initialized. Express and socket.io libraries should be installed and reflected in the package.json file.",
+      "user_review_goal": "Developer should be able to start an empty express server by running `npm start` command without any errors."
+    },
+    {
+      "description": "Create a simple front-end HTML page with CSS and JavaScript that includes input for typing messages and area for displaying messages.",
+      "programmatic_goal": "There should be an HTML file containing an input box for typing messages and an area for displaying the messages. This HTML page should be served when user navigates to the root URL.",
+      "user_review_goal": "Navigating to the root URL (http://localhost:3000) should display the chat front-end with an input box and a message area."
+    },
+    {
+      "description": "Set up socket.io on the back-end to handle websocket connections and broadcasting messages to the clients.",
+      "programmatic_goal": "Server should be able to handle websocket connections using socket.io and broadcast messages to all connected clients.",
+      "user_review_goal": "By using two different browsers or browser tabs, when one user sends a message from one tab, it should appear in the other user's browser tab in real-time."
+    },
+    {
+      "description": "Integrate front-end with socket.io client to send messages from the input field to the server and display incoming messages in the message area.",
+      "programmatic_goal": "Front-end should be able to send messages to server and display incoming messages in the message area using socket.io client.",
+      "user_review_goal": "Typing a message in the chat input and sending it should then display the message in the chat area."
+    }
+  ]
+}
+'''.strip(), DEVELOPMENT_PLAN['definitions']))
+
 class TestLlmConnection:
     def setup_method(self):
         builtins.print, ipc_client_instance = get_custom_print({})
diff --git a/requirements.txt b/requirements.txt
index 7a4eeca..fbb89d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,7 @@ certifi==2023.5.7
 charset-normalizer==3.2.0
 distro==1.8.0
 idna==3.4
+jsonschema==4.19.1
 Jinja2==3.1.2
 MarkupSafe==2.1.3
 peewee==3.16.2

From cf97a1be5e06a699e2713fa823c6c2ebf27d0789 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Tue, 26 Sep 2023 19:44:52 +1000
Subject: [PATCH 02/10] removed TODOs

---
 pilot/utils/test_llm_connection.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py
index 7f21d97..bb3672f 100644
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -98,8 +98,6 @@ class TestLlmConnection:
     def setup_method(self):
         builtins.print, ipc_client_instance = get_custom_print({})
 
-
-
     @pytest.mark.uses_tokens
     @pytest.mark.parametrize("endpoint, model", [
         ("OPENAI", "gpt-4"),                                 # role: system
@@ -107,8 +105,6 @@ class TestLlmConnection:
         ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
         ("OPENROUTER", "google/palm-2-chat-bison"),          # role: user/system
         ("OPENROUTER", "google/palm-2-codechat-bison"),
-        # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
-        #           https://github.com/guidance-ai/guidance - token healing
         ("OPENROUTER", "anthropic/claude-2"),              # role: user, is_llama
     ])
     def test_chat_completion_Architect(self, endpoint, model, monkeypatch):
@@ -164,8 +160,6 @@ solution-oriented decision-making in areas where precise instructions were not p
         ("OPENROUTER", "meta-llama/codellama-34b-instruct"),  # rule: user, is_llama
         ("OPENROUTER", "google/palm-2-chat-bison"),  # role: user/system
         ("OPENROUTER", "google/palm-2-codechat-bison"),
-        # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
-        #           https://github.com/guidance-ai/guidance - token healing
         ("OPENROUTER", "anthropic/claude-2"),  # role: user, is_llama
     ])
     def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):

From 6dd5a032fa3720efc6103d636ab00e8a8b70c76f Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 10:53:44 +1000
Subject: [PATCH 03/10] Improved JSON prompting for GPT-4 and recover
 incomplete JSON responses from Code Llama

---
 pilot/helpers/agents/test_TechLead.py      |  5 +-
 pilot/prompts/development/plan.prompt      |  2 +-
 pilot/prompts/utils/incomplete_json.prompt |  7 ++
 pilot/utils/function_calling.py            | 12 +---
 pilot/utils/llm_connection.py              | 41 ++++++-----
 pilot/utils/test_files.py                  |  1 -
 pilot/utils/test_function_calling.py       | 25 ++++---
 pilot/utils/test_llm_connection.py         | 82 +++++++++++++++-------
 8 files changed, 108 insertions(+), 67 deletions(-)
 create mode 100644 pilot/prompts/utils/incomplete_json.prompt

diff --git a/pilot/helpers/agents/test_TechLead.py b/pilot/helpers/agents/test_TechLead.py
index f06d93c..1da468d 100644
--- a/pilot/helpers/agents/test_TechLead.py
+++ b/pilot/helpers/agents/test_TechLead.py
@@ -8,9 +8,8 @@ load_dotenv()
 from main import  get_custom_print
 from helpers.agents.TechLead import TechLead, DEVELOPMENT_PLANNING_STEP
 from helpers.Project import Project
-from test.test_utils import assert_non_empty_string, mock_terminal_size
+from test.test_utils import assert_non_empty_string
 from test.mock_questionary import MockQuestionary
-from utils.function_calling import parse_agent_response
 
 
 class TestTechLead:
@@ -51,10 +50,8 @@ The development process will include the creation of user stories and tasks, bas
         self.project.current_step = DEVELOPMENT_PLANNING_STEP
 
     @pytest.mark.uses_tokens
-    # @patch('database.database.get_progress_steps', return_value=None)
     @patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
     @patch('helpers.agents.TechLead.save_progress', return_value=None)
-    # @patch('os.get_terminal_size', mock_terminal_size)
     @patch('helpers.agents.TechLead.get_progress_steps', return_value=None)
     def test_create_development_plan(self, mock_get_saved_step, mock_save_progress, mock_get_progress_steps):
         self.techLead = TechLead(self.project)
diff --git a/pilot/prompts/development/plan.prompt b/pilot/prompts/development/plan.prompt
index 2c34a6e..a45ba3d 100644
--- a/pilot/prompts/development/plan.prompt
+++ b/pilot/prompts/development/plan.prompt
@@ -40,4 +40,4 @@ Each task needs to be related only to the development of this app and nothing el
 
 For each task, there must be a way for human developer to check if the task is done or not. Write how should the developer check if the task is done.
 
-Now, based on the app's description, user stories and user tasks, and the technologies that you need to use, think task by task and write up the entire plan for the development. Start from the project setup and specify each task until the moment when the entire app should be fully working. For each task, write a description and a user-review goal.
\ No newline at end of file
+Now, based on the app's description, user stories and user tasks, and the technologies that you need to use, think task by task and create the entire development plan. Start from the project setup and specify each task until the moment when the entire app should be fully working. For each task, write a description and a user-review goal.
\ No newline at end of file
diff --git a/pilot/prompts/utils/incomplete_json.prompt b/pilot/prompts/utils/incomplete_json.prompt
new file mode 100644
index 0000000..5400a8d
--- /dev/null
+++ b/pilot/prompts/utils/incomplete_json.prompt
@@ -0,0 +1,7 @@
+[INST]I received an incomplete JSON response. Please provide the remainder of the JSON object. I will append your entire response to the incomplete JSON data below so it is important that you must not include any of the data already received or any text that does not complete the JSON data.
+A response which starts with "Here is the remainder of the JSON object" would be an example of an invalid response, a preamble must NOT be included.
+Note that because the JSON data I have already received is an incomplete JSON object, you will need to include the opening and closing curly braces in your response, but rather continue off from EXACTLY where the received JSON ends.
+
+JSON received:
+[/INST]
+{{ received_json }}
diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py
index 4b738f9..a4b2821 100644
--- a/pilot/utils/function_calling.py
+++ b/pilot/utils/function_calling.py
@@ -70,8 +70,7 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None):
     """
 
     if function_calls:
-        text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL)
-        text = text.strip('` \n')
+        text = response['text']
         values = list(json.loads(text).values())
         if len(values) == 1:
             return values[0]
@@ -140,7 +139,7 @@ class JsonPrompter:
         return "\n".join(
             self.function_descriptions(functions, function_to_call)
             + [
-                "The response MUST be a JSON object matching this schema:",
+                "Here is the schema for the expected JSON object:",
                 "```json",
                 self.function_parameters(functions, function_to_call),
                 "```",
@@ -194,7 +193,7 @@ class JsonPrompter:
         system = (
             "Help choose the appropriate function to call to answer the user's question."
             if function_to_call is None
-            else f"Define the arguments for {function_to_call} to answer the user's question."
+            else f"Please provide a JSON object that defines the arguments for the `{function_to_call}` function to answer the user's question."
         ) + "\nThe response must contain ONLY the JSON object, with NO additional text or explanation."
 
         data = (
@@ -202,11 +201,6 @@ class JsonPrompter:
             if function_to_call
             else self.functions_summary(functions)
         )
-        response_start = (
-            f"Here are the arguments for the `{function_to_call}` function: ```json\n"
-            if function_to_call
-            else "Here's the function the user should call: "
-        )
 
         if self.is_instruct:
             return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py
index f3afcf1..04bdadb 100644
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -13,7 +13,7 @@ from typing import List
 from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
 from logger.logger import logger
 from helpers.exceptions.TokenLimitError import TokenLimitError
-from utils.utils import fix_json
+from utils.utils import fix_json, get_prompt
 from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
 
 
@@ -148,6 +148,11 @@ def retry_on_exception(func):
                 err_str = str(e)
 
                 # If the specific error "context_length_exceeded" is present, simply return without retry
+                if isinstance(e, json.JSONDecodeError):
+                    # codellama-34b-instruct seems to send incomplete JSON responses
+                    if e.msg == 'Expecting value':
+                        args[0]['function_buffer'] = e.doc
+                        continue
                 if "context_length_exceeded" in err_str:
                     raise TokenLimitError(get_tokens_in_messages_from_openai_error(err_str), MAX_GPT_MODEL_TOKENS)
                 if "rate_limit_exceeded" in err_str:
@@ -187,14 +192,20 @@ def stream_gpt_completion(data, req_type):
     # TODO add type dynamically - this isn't working when connected to the external process
     terminal_width = 50  # os.get_terminal_size().columns
     lines_printed = 2
+    gpt_response = ''
     buffer = ''  # A buffer to accumulate incoming data
-    expecting_json = False
+    expecting_json = None
     received_json = False
 
     if 'functions' in data:
         expecting_json = data['functions']
+        if 'function_buffer' in data:
+            incomplete_json = get_prompt('utils/incomplete_json.prompt', {'received_json': data['function_buffer']})
+            data['messages'].append({'role': 'user', 'content': incomplete_json})
+            gpt_response = data['function_buffer']
+            received_json = True
         # Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry
-        data = {key: value for key, value in data.items() if key != "functions"}
+        data = {key: value for key, value in data.items() if not key.startswith('function')}
 
     def return_result(result_data, lines_printed):
         if buffer:
@@ -251,7 +262,6 @@ def stream_gpt_completion(data, req_type):
         logger.debug(f'problem with request: {response.text}')
         raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")
 
-    gpt_response = ''
     # function_calls = {'name': '', 'arguments': ''}
 
     for line in response.iter_lines():
@@ -283,11 +293,9 @@ def stream_gpt_completion(data, req_type):
                 #     return return_result({'function_calls': function_calls}, lines_printed)
 
                 json_line = choice['delta']
-                # TODO: token healing? https://github.com/1rgs/jsonformer-claude
-                #       ...Is this what local_llm_function_calling.constrainer is for?
 
-            except json.JSONDecodeError:
-                logger.error(f'Unable to decode line: {line}')
+            except json.JSONDecodeError as e:
+                logger.error(f'Unable to decode line: {line} {e.msg}')
                 continue  # skip to the next line
 
             # handle the streaming response
@@ -306,16 +314,9 @@ def stream_gpt_completion(data, req_type):
                     buffer += content  # accumulate the data
 
                     # If you detect a natural breakpoint (e.g., line break or end of a response object), print & count:
-                    if buffer.endswith("\n"):
+                    if buffer.endswith('\n'):
                         if expecting_json and not received_json:
                             received_json = assert_json_response(buffer, lines_printed > 2)
-                            if received_json:
-                                gpt_response = ""
-                            # if not received_json:
-                            #     # Don't append to gpt_response, but increment lines_printed
-                            #     lines_printed += 1
-                            #     buffer = ""
-                            #     continue
 
                         # or some other condition that denotes a breakpoint
                         lines_printed += count_lines_based_on_width(buffer, terminal_width)
@@ -333,6 +334,7 @@ def stream_gpt_completion(data, req_type):
     logger.info(f'Response message: {gpt_response}')
 
     if expecting_json:
+        gpt_response = clean_json_response(gpt_response)
         assert_json_schema(gpt_response, expecting_json)
 
     new_code = postprocessing(gpt_response, req_type)  # TODO add type dynamically
@@ -348,12 +350,17 @@ def assert_json_response(response: str, or_fail=True) -> bool:
         return False
 
 
+def clean_json_response(response: str) -> str:
+    response = re.sub(r'^.*```json\s*', '', response, flags=re.DOTALL)
+    return response.strip('` \n')
+
+
 def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
     for function in functions:
         schema = function['parameters']
         parsed = json.loads(response)
         validate(parsed, schema)
-    return True
+        return True
 
 
 def postprocessing(gpt_response, req_type):
diff --git a/pilot/utils/test_files.py b/pilot/utils/test_files.py
index 74aa277..8f0785a 100644
--- a/pilot/utils/test_files.py
+++ b/pilot/utils/test_files.py
@@ -1,4 +1,3 @@
-import pytest
 from .files import setup_workspace
 
 
diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py
index 0e5ea69..bffa7d0 100644
--- a/pilot/utils/test_function_calling.py
+++ b/pilot/utils/test_function_calling.py
@@ -1,4 +1,5 @@
-from const.function_calls import ARCHITECTURE, DEV_STEPS
+from const.function_calls import ARCHITECTURE
+from utils.llm_connection import clean_json_response
 from .function_calling import parse_agent_response, JsonPrompter
 
 
@@ -30,6 +31,7 @@ class TestFunctionCalling:
         function_calls = {'definitions': [], 'functions': {}}
 
         # When
+        response['text'] = clean_json_response(response['text'])
         response = parse_agent_response(response, function_calls)
 
         # Then
@@ -41,6 +43,7 @@ class TestFunctionCalling:
         function_calls = {'definitions': [], 'functions': {}}
 
         # When
+        response['text'] = clean_json_response(response['text'])
         response = parse_agent_response(response, function_calls)
 
         # Then
@@ -68,7 +71,7 @@ def test_json_prompter():
 
     # Then
     assert prompt == '''Help choose the appropriate function to call to answer the user's question.
-The response should contain only the JSON object, with no additional text or explanation.
+The response must contain ONLY the JSON object, with NO additional text or explanation.
 
 Available functions:
 - process_technologies - Print the list of technologies that are created.
@@ -86,7 +89,7 @@ def test_llama_json_prompter():
     # Then
     assert prompt == '''[INST] <<SYS>>
 Help choose the appropriate function to call to answer the user's question.
-The response should contain only the JSON object, with no additional text or explanation.
+The response must contain ONLY the JSON object, with NO additional text or explanation.
 
 Available functions:
 - process_technologies - Print the list of technologies that are created.
@@ -103,11 +106,11 @@ def test_json_prompter_named():
     prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')
 
     # Then
-    assert prompt == '''Define the arguments for process_technologies to answer the user's question.
-The response should contain only the JSON object, with no additional text or explanation.
+    assert prompt == '''Please provide a JSON object that defines the arguments for the `process_technologies` function to answer the user's question.
+The response must contain ONLY the JSON object, with NO additional text or explanation.
 
-Print the list of technologies that are created.
-The response should be a JSON object matching this schema:
+# process_technologies: Print the list of technologies that are created.
+Here is the schema for the expected JSON object:
 ```json
 {
     "technologies": {
@@ -133,11 +136,11 @@ def test_llama_json_prompter_named():
 
     # Then
     assert prompt == '''[INST] <<SYS>>
-Define the arguments for process_technologies to answer the user's question.
-The response should contain only the JSON object, with no additional text or explanation.
+Please provide a JSON object that defines the arguments for the `process_technologies` function to answer the user's question.
+The response must contain ONLY the JSON object, with NO additional text or explanation.
 
-Print the list of technologies that are created.
-The response should be a JSON object matching this schema:
+# process_technologies: Print the list of technologies that are created.
+Here is the schema for the expected JSON object:
 ```json
 {
     "technologies": {
diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py
index bb3672f..5e6ebbc 100644
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -2,6 +2,7 @@ import builtins
 from json import JSONDecodeError
 
 import pytest
+from unittest.mock import patch, Mock
 from dotenv import load_dotenv
 from jsonschema import ValidationError
 
@@ -12,7 +13,8 @@ from helpers.agents.Architect import Architect
 from helpers.agents.TechLead import TechLead
 from utils.function_calling import parse_agent_response, FunctionType
 from test.test_utils import assert_non_empty_string
-from .llm_connection import create_gpt_chat_completion, assert_json_response, assert_json_schema
+from test.mock_questionary import MockQuestionary
+from utils.llm_connection import create_gpt_chat_completion, stream_gpt_completion, assert_json_response, assert_json_schema
 from main import get_custom_print
 
 load_dotenv()
@@ -98,14 +100,42 @@ class TestLlmConnection:
     def setup_method(self):
         builtins.print, ipc_client_instance = get_custom_print({})
 
+    @patch('utils.llm_connection.requests.post')
+    def test_stream_gpt_completion(self, mock_post):
+        # Given streaming JSON response
+        deltas = ['{', '\\n',
+                  '  \\"foo\\": \\"bar\\",', '\\n',
+                  '  \\"prompt\\": \\"Hello\\",', '\\n',
+                  '  \\"choices\\": []', '\\n',
+                  '}']
+        lines_to_yield = [
+            ('{"id": "gen-123", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "' + delta + '"}}]}')
+            .encode('utf-8')
+            for delta in deltas
+        ]
+        lines_to_yield.insert(1, b': OPENROUTER PROCESSING')  # Simulate OpenRoute keep-alive pings
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.iter_lines.return_value = lines_to_yield
+
+        mock_post.return_value = mock_response
+
+        # When
+        with patch('utils.llm_connection.requests.post', return_value=mock_response):
+            response = stream_gpt_completion({}, '')
+
+            # Then
+            assert response == {'text': '{\n  "foo": "bar",\n  "prompt": "Hello",\n  "choices": []\n}'}
+
+
     @pytest.mark.uses_tokens
-    @pytest.mark.parametrize("endpoint, model", [
-        ("OPENAI", "gpt-4"),                                 # role: system
-        ("OPENROUTER", "openai/gpt-3.5-turbo"),              # role: user
-        ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
-        ("OPENROUTER", "google/palm-2-chat-bison"),          # role: user/system
-        ("OPENROUTER", "google/palm-2-codechat-bison"),
-        ("OPENROUTER", "anthropic/claude-2"),              # role: user, is_llama
+    @pytest.mark.parametrize('endpoint, model', [
+        ('OPENAI', 'gpt-4'),                                 # role: system
+        ('OPENROUTER', 'openai/gpt-3.5-turbo'),              # role: user
+        ('OPENROUTER', 'meta-llama/codellama-34b-instruct'), # rule: user, is_llama
+        ('OPENROUTER', 'google/palm-2-chat-bison'),          # role: user/system
+        ('OPENROUTER', 'google/palm-2-codechat-bison'),
+        ('OPENROUTER', 'anthropic/claude-2'),              # role: user, is_llama
     ])
     def test_chat_completion_Architect(self, endpoint, model, monkeypatch):
         # Given
@@ -154,13 +184,13 @@ solution-oriented decision-making in areas where precise instructions were not p
         assert 'Node.js' in response
 
     @pytest.mark.uses_tokens
-    @pytest.mark.parametrize("endpoint, model", [
-        ("OPENAI", "gpt-4"),  # role: system
-        ("OPENROUTER", "openai/gpt-3.5-turbo"),  # role: user
-        ("OPENROUTER", "meta-llama/codellama-34b-instruct"),  # rule: user, is_llama
-        ("OPENROUTER", "google/palm-2-chat-bison"),  # role: user/system
-        ("OPENROUTER", "google/palm-2-codechat-bison"),
-        ("OPENROUTER", "anthropic/claude-2"),  # role: user, is_llama
+    @pytest.mark.parametrize('endpoint, model', [
+        ('OPENAI', 'gpt-4'),
+        ('OPENROUTER', 'openai/gpt-3.5-turbo'),
+        ('OPENROUTER', 'meta-llama/codellama-34b-instruct'),
+        ('OPENROUTER', 'google/palm-2-chat-bison'),
+        ('OPENROUTER', 'google/palm-2-codechat-bison'),
+        ('OPENROUTER', 'anthropic/claude-2'),
     ])
     def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):
         # Given
@@ -191,18 +221,22 @@ The development process will include the creation of user stories and tasks, bas
                                                     })
         function_calls = DEVELOPMENT_PLAN
 
+        # Retry on bad LLM responses
+        mock_questionary = MockQuestionary(['', '', 'no'])
+
         # When
-        response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
+        with patch('utils.llm_connection.questionary', mock_questionary):
+            response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
 
-        # Then
-        assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
-        assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
+            # Then
+            assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
+            assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
 
-        assert response is not None
-        response = parse_agent_response(response, function_calls)
-        assert_non_empty_string(response[0]['description'])
-        assert_non_empty_string(response[0]['programmatic_goal'])
-        assert_non_empty_string(response[0]['user_review_goal'])
+            assert response is not None
+            response = parse_agent_response(response, function_calls)
+            assert_non_empty_string(response[0]['description'])
+            assert_non_empty_string(response[0]['programmatic_goal'])
+            assert_non_empty_string(response[0]['user_review_goal'])
 
 
     # def test_break_down_development_task(self):

From af75e8ebdcff9e7ab6896c21d312eaab1139e7b8 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 11:37:15 +1000
Subject: [PATCH 04/10] switched to `colorama` which works on Windows and in VS
 Code

---
 pilot/helpers/Project.py |  3 ++-
 pilot/utils/arguments.py |  1 +
 pilot/utils/style.py     | 24 ++++++++++++------------
 requirements.txt         |  1 +
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/pilot/helpers/Project.py b/pilot/helpers/Project.py
index 7497cd0..97722d2 100644
--- a/pilot/helpers/Project.py
+++ b/pilot/helpers/Project.py
@@ -67,7 +67,8 @@ class Project:
         # if development_plan is not None:
         #     self.development_plan = development_plan
 
-        print(green_bold('\n------------------ STARTING NEW PROJECT ----------------------'))
+        # TODO: When running from the CLI, this would be displayed in addition to ---- LOADING PROJECT ----
+        print(green_bold('\n------------------ STARTING NEW PROJECT Project ----------------------'))
         print(f"If you wish to continue with this project in future run:")
         print(green_bold(f'python main.py app_id={args["app_id"]}'))
         print(green_bold('--------------------------------------------------------------\n'))
diff --git a/pilot/utils/arguments.py b/pilot/utils/arguments.py
index e4409eb..53aa26b 100644
--- a/pilot/utils/arguments.py
+++ b/pilot/utils/arguments.py
@@ -52,6 +52,7 @@ def get_arguments():
             # Handle the error as needed, possibly exiting the script
     else:
         arguments['app_id'] = str(uuid.uuid4())
+        # TODO: This intro is also presented by Project.py. This version is not presented in the VS Code extension
         print(colored('\n------------------ STARTING NEW PROJECT ----------------------', 'green', attrs=['bold']))
         print("If you wish to continue with this project in future run:")
         print(colored(f'python {sys.argv[0]} app_id={arguments["app_id"]}', 'green', attrs=['bold']))
diff --git a/pilot/utils/style.py b/pilot/utils/style.py
index 157c255..c36af9b 100644
--- a/pilot/utils/style.py
+++ b/pilot/utils/style.py
@@ -1,45 +1,45 @@
 from termcolor import colored
-
+from colorama import Fore, Style
 
 def red(text):
-    return colored(text, 'red')
+    return f'{Fore.RED}{text}{Style.RESET_ALL}'
 
 
 def red_bold(text):
-    return colored(text, 'red', attrs=['bold'])
+    return f'{Fore.RED}{Style.BRIGHT}{text}{Style.RESET_ALL}'
 
 
 def yellow(text):
-    return colored(text, 'yellow')
+    return f'{Fore.YELLOW}{text}{Style.RESET_ALL}'
 
 
 def yellow_bold(text):
-    return colored(text, 'yellow', attrs=['bold'])
+    return f'{Fore.YELLOW}{Style.BRIGHT}{text}{Style.RESET_ALL}'
 
 
 def green(text):
-    return colored(text, 'green')
+    return f'{Fore.GREEN}{text}{Style.RESET_ALL}'
 
 
 def green_bold(text):
-    return colored(text, 'green', attrs=['bold'])
+    return f'{Fore.GREEN}{Style.BRIGHT}{text}{Style.RESET_ALL}'
 
 
 def blue(text):
-    return colored(text, 'blue')
+    return f'{Fore.BLUE}{text}{Style.RESET_ALL}'
 
 
 def blue_bold(text):
-    return colored(text, 'blue', attrs=['bold'])
+    return f'{Fore.BLUE}{Style.BRIGHT}{text}{Style.RESET_ALL}'
 
 
 def cyan(text):
-    return colored(text, 'light_cyan')
+    return f'{Fore.CYAN}{text}{Style.RESET_ALL}'
 
 
 def white(text):
-    return colored(text, 'white')
+    return f'{Fore.WHITE}{text}{Style.RESET_ALL}'
 
 
 def white_bold(text):
-    return colored(text, 'white', attrs=['bold'])
+    return f'{Fore.WHITE}{Style.BRIGHT}{text}{Style.RESET_ALL}'
diff --git a/requirements.txt b/requirements.txt
index fbb89d8..1d11082 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 blessed==1.20.0
 certifi==2023.5.7
 charset-normalizer==3.2.0
+colorama==0.4.6
 distro==1.8.0
 idna==3.4
 jsonschema==4.19.1

From 0ba4c1976c9b2db11b533c427ced0a631e765c87 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 11:44:04 +1000
Subject: [PATCH 05/10] removed debugging aid

---
 pilot/helpers/Project.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pilot/helpers/Project.py b/pilot/helpers/Project.py
index 97722d2..ddd0a8a 100644
--- a/pilot/helpers/Project.py
+++ b/pilot/helpers/Project.py
@@ -68,7 +68,7 @@ class Project:
         #     self.development_plan = development_plan
 
         # TODO: When running from the CLI, this would be displayed in addition to ---- LOADING PROJECT ----
-        print(green_bold('\n------------------ STARTING NEW PROJECT Project ----------------------'))
+        print(green_bold('\n------------------ STARTING NEW PROJECT ----------------------'))
         print(f"If you wish to continue with this project in future run:")
         print(green_bold(f'python main.py app_id={args["app_id"]}'))
         print(green_bold('--------------------------------------------------------------\n'))

From 5f110322ed18c76b6a3e4ee9e76c68a61180e490 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 11:49:51 +1000
Subject: [PATCH 06/10] fixed failing tests

---
 pilot/helpers/agents/test_CodeMonkey.py | 2 +-
 pilot/helpers/agents/test_Developer.py  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pilot/helpers/agents/test_CodeMonkey.py b/pilot/helpers/agents/test_CodeMonkey.py
index bb04b97..0b1aa74 100644
--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -10,7 +10,7 @@ from database.models.files import File
 from database.models.development_steps import DevelopmentSteps
 from helpers.Project import Project, update_file, clear_directory
 from helpers.AgentConvo import AgentConvo
-from test.mock_terminal_size import mock_terminal_size
+from test.test_utils import mock_terminal_size
 
 SEND_TO_LLM = False
 WRITE_TO_FILE = False
diff --git a/pilot/helpers/agents/test_Developer.py b/pilot/helpers/agents/test_Developer.py
index 2009bce..fe2ca9c 100644
--- a/pilot/helpers/agents/test_Developer.py
+++ b/pilot/helpers/agents/test_Developer.py
@@ -1,6 +1,7 @@
 import builtins
 import os
-from unittest.mock import patch, Mock
+import pytest
+from unittest.mock import patch
 
 from helpers.AgentConvo import AgentConvo
 from dotenv import load_dotenv
@@ -9,7 +10,6 @@ load_dotenv()
 from main import  get_custom_print
 from .Developer import Developer, ENVIRONMENT_SETUP_STEP
 from helpers.Project import Project
-from test.mock_terminal_size import mock_terminal_size
 
 
 class TestDeveloper:
@@ -33,7 +33,7 @@ class TestDeveloper:
         self.project.current_step = ENVIRONMENT_SETUP_STEP
         self.developer = Developer(self.project)
 
-    # @pytest.mark.uses_tokens
+    @pytest.mark.uses_tokens
     @patch('helpers.AgentConvo.get_saved_development_step')
     @patch('helpers.AgentConvo.save_development_step')
     @patch('helpers.AgentConvo.create_gpt_chat_completion',

From 7a81b599c7bac6511dff076c3c5285893740e7b9 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 11:51:48 +1000
Subject: [PATCH 07/10] Removed `fabulous` and test against Python 3.9

---
 .github/workflows/ci.yml | 2 +-
 requirements.txt         | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 026a8a3..c9d1fc0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       matrix:
         # 3.10 - 04 Oct 2021
         # 3.11 - 24 Oct 2022
-        python-version: ['3.10', '3.11']
+        python-version: ['3.9', '3.10', '3.11']
 
     steps:
     - uses: actions/checkout@v4
diff --git a/requirements.txt b/requirements.txt
index 2a23dc8..1d11082 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,6 @@ certifi==2023.5.7
 charset-normalizer==3.2.0
 colorama==0.4.6
 distro==1.8.0
-fabulous==0.4.0
 idna==3.4
 jsonschema==4.19.1
 Jinja2==3.1.2

From b0fea79c7be5c5085f8fd6d979b23bd446449f04 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 12:45:20 +1000
Subject: [PATCH 08/10] fixed test for CI

---
 pilot/utils/test_llm_connection.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py
index 5e6ebbc..c7d0193 100644
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -101,8 +101,9 @@ class TestLlmConnection:
         builtins.print, ipc_client_instance = get_custom_print({})
 
     @patch('utils.llm_connection.requests.post')
-    def test_stream_gpt_completion(self, mock_post):
+    def test_stream_gpt_completion(self, mock_post, monkeypatch):
         # Given streaming JSON response
+        monkeypatch.setenv('OPENAI_API_KEY', 'secret')
         deltas = ['{', '\\n',
                   '  \\"foo\\": \\"bar\\",', '\\n',
                   '  \\"prompt\\": \\"Hello\\",', '\\n',

From c84ad714d49947813941a27904e99cec5584b5b1 Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 16:04:44 +1000
Subject: [PATCH 09/10] "hint" message type

---
 pilot/const/ipc.py          | 7 ++++---
 pilot/helpers/AgentConvo.py | 5 +++--
 pilot/prompts/prompts.py    | 8 +++++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/pilot/const/ipc.py b/pilot/const/ipc.py
index 22729e5..fb02916 100644
--- a/pilot/const/ipc.py
+++ b/pilot/const/ipc.py
@@ -1,7 +1,8 @@
 MESSAGE_TYPE = {
     'verbose': 'verbose',
     'stream': 'stream',
-    'user_input_request': 'user_input_request',
-    'info': 'info',
+    'user_input_request': 'user_input_request',   # Displayed above the
+    'hint': 'hint',                        # Hint text, eg "Do you want to add anything else? If not, just press ENTER."
+    'info': 'info',                        # JSON data can be sent to progress `progress_stage`
     'local': 'local',
-}
\ No newline at end of file
+}
diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py
index 928320b..36c08a9 100644
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -125,8 +125,9 @@ class AgentConvo:
 
         # Continue conversation until GPT response equals END_RESPONSE
         while response != END_RESPONSE:
-            print(yellow("Do you want to add anything else? If not, ") + yellow_bold('just press ENTER.'))
-            user_message = ask_user(self.agent.project, response, False)
+            user_message = ask_user(self.agent.project, response,
+                                    hint=yellow("Do you want to add anything else? If not, ") + yellow_bold('just press ENTER.'),
+                                    require_some_input=False)
 
             if user_message == "":
                 accepted_messages.append(response)
diff --git a/pilot/prompts/prompts.py b/pilot/prompts/prompts.py
index 0491080..965db3f 100644
--- a/pilot/prompts/prompts.py
+++ b/pilot/prompts/prompts.py
@@ -49,8 +49,10 @@ def ask_for_main_app_definition(project):
     return description
 
 
-def ask_user(project, question, require_some_input=True):
+def ask_user(project, question: str, require_some_input=True, hint: str = None):
     while True:
+        if hint is not None:
+            print(hint, type='hint')
         answer = styled_text(project, question)
 
         if answer is None:
@@ -72,7 +74,7 @@ def get_additional_info_from_openai(project, messages):
 
         if response is not None:
             if response['text'].strip() == END_RESPONSE:
-                print(response['text'] + '\n')
+                # print(response['text'] + '\n')
                 return messages
 
             # Ask the question to the user
@@ -100,7 +102,7 @@ def get_additional_info_from_user(project,  messages, role):
             if isinstance(message, dict) and 'text' in message:
                 message = message['text']
             print(yellow(f"Please check this message and say what needs to be changed. If everything is ok just press ENTER",))
-            answer = ask_user(project, message, False)
+            answer = ask_user(project, message, require_some_input=False)
             if answer.lower() == '':
                 break
             response = create_gpt_chat_completion(

From bbc163480c8701a2dc67c70ff4abefe02aa36d9d Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Wed, 27 Sep 2023 16:43:43 +1000
Subject: [PATCH 10/10] moved "Can I execute the command" into `ask_user()`
 call

---
 pilot/helpers/cli.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pilot/helpers/cli.py b/pilot/helpers/cli.py
index 5ff36db..1d9f407 100644
--- a/pilot/helpers/cli.py
+++ b/pilot/helpers/cli.py
@@ -12,7 +12,7 @@ from database.database import get_saved_command_run, save_command_run
 from const.function_calls import DEBUG_STEPS_BREAKDOWN
 from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
 from helpers.exceptions.TokenLimitError import TokenLimitError
-
+from prompts.prompts import ask_user
 from utils.questionary import styled_text
 from const.code_execution import MAX_COMMAND_DEBUG_TRIES, MIN_COMMAND_RUN_TIME, MAX_COMMAND_RUN_TIME, MAX_COMMAND_OUTPUT_LENGTH
 
@@ -105,11 +105,10 @@ def execute_command(project, command, timeout=None, force=False):
 
     if not force:
         print(yellow_bold(f'\n--------- EXECUTE COMMAND ----------'))
-        print(f'Can i execute the command: `' + yellow_bold(command) + f'` with {timeout}ms timeout?')
-
-        answer = styled_text(
+        answer = ask_user(
             project,
-            'If yes, just press ENTER'
+            f'Can I execute the command: `' + yellow_bold(command) + f'` with {timeout}ms timeout?',
+            hint='If yes, just press ENTER'
         )
 
         # TODO: I think AutoGPT allows other feedback here, like:
@@ -149,6 +148,7 @@ def execute_command(project, command, timeout=None, force=False):
         while True and return_value is None:
             elapsed_time = time.time() - start_time
             if timeout is not None:
+                # TODO: print to IPC using a different message type so VS Code can ignore it or update the previous value
                 print(white_bold(f'\rt: {round(elapsed_time * 1000)}ms : '), end='', flush=True)
 
             # Check if process has finished