rejecting responses that are not JSON.

Need to fix prompts for GPT-4
2026-02-15 22:18:12 +01:00 · 2023-09-26 17:27:54 +10:00
parent b8965f527d
commit 8a024c2ff2
11 changed files with 298 additions and 66 deletions
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ After you have Python and PostgreSQL installed, follow these steps:
 1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
 2. `cd gpt-pilot`
 3. `python -m venv pilot-env` (create a virtual environment)
-4. `source pilot-env/bin/activate` (activate the virtual environment)
+4. `source pilot-env/bin/activate` (or on Windows `pilot-env\Scripts\activate`) (activate the virtual environment)
 5. `pip install -r requirements.txt` (install the dependencies)
 6. `cd pilot`
 7. `mv .env.example .env` (create the .env file)
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -5,7 +5,7 @@ from utils.style import yellow, yellow_bold

 from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps
 from helpers.exceptions.TokenLimitError import TokenLimitError
-from utils.function_calling import parse_agent_response
+from utils.function_calling import parse_agent_response, FunctionCallSet
 from utils.llm_connection import create_gpt_chat_completion
 from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message, capitalize_first_word_with_underscores
 from logger.logger import logger
@@ -31,7 +31,7 @@ class AgentConvo:
        # add system message
        self.messages.append(get_sys_message(self.agent.role))

-    def send_message(self, prompt_path=None, prompt_data=None, function_calls=None):
+    def send_message(self, prompt_path=None, prompt_data=None, function_calls: FunctionCallSet = None):
        """
        Sends a message in the conversation.

--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -10,17 +10,12 @@ from database.models.files import File
 from database.models.development_steps import DevelopmentSteps
 from helpers.Project import Project, update_file, clear_directory
 from helpers.AgentConvo import AgentConvo
+from test.mock_terminal_size import mock_terminal_size

 SEND_TO_LLM = False
 WRITE_TO_FILE = False


-def mock_terminal_size():
-    mock_size = Mock()
-    mock_size.columns = 80  # or whatever width you want
-    return mock_size
-
-
 class TestCodeMonkey:
    def setup_method(self):
        name = 'TestDeveloper'
--- a/pilot/helpers/agents/test_Developer.py
+++ b/pilot/helpers/agents/test_Developer.py
@@ -9,12 +9,7 @@ load_dotenv()
 from main import  get_custom_print
 from .Developer import Developer, ENVIRONMENT_SETUP_STEP
 from helpers.Project import Project
-
-
-def mock_terminal_size():
-    mock_size = Mock()
-    mock_size.columns = 80  # or whatever width you want
-    return mock_size
+from test.mock_terminal_size import mock_terminal_size


 class TestDeveloper:
--- a/pilot/helpers/agents/test_TechLead.py
+++ b/pilot/helpers/agents/test_TechLead.py
@@ -0,0 +1,72 @@
+import builtins
+import os
+import pytest
+from unittest.mock import patch
+from dotenv import load_dotenv
+load_dotenv()
+
+from main import  get_custom_print
+from helpers.agents.TechLead import TechLead, DEVELOPMENT_PLANNING_STEP
+from helpers.Project import Project
+from test.test_utils import assert_non_empty_string, mock_terminal_size
+from test.mock_questionary import MockQuestionary
+from utils.function_calling import parse_agent_response
+
+
+class TestTechLead:
+    def setup_method(self):
+        builtins.print, ipc_client_instance = get_custom_print({})
+
+        name = 'TestTechLead'
+        self.project = Project({
+                'app_id': 'test-tech-lead',
+                'name': name,
+                'app_type': ''
+            },
+            name=name,
+            architecture=[],
+            user_stories=[]
+        )
+
+        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestTechLead'))
+        self.project.technologies = []
+        self.project.project_description = '''
+The project entails creating a web-based chat application, tentatively named "chat_app." 
+This application does not require user authentication or chat history storage. 
+It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files. 
+Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts. 
+The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand. 
+The development process will include the creation of user stories and tasks, based on detailed discussions with the client. 
+        '''
+        self.project.user_stories = [
+            'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
+        ]
+        self.project.architecture = ['Node.js', 'Socket.io', 'Bootstrap', 'JavaScript', 'HTML5', 'CSS3']
+        self.project.current_step = DEVELOPMENT_PLANNING_STEP
+
+    @pytest.mark.uses_tokens
+    # @patch('database.database.get_progress_steps', return_value=None)
+    @patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
+    @patch('helpers.agents.TechLead.save_progress', return_value=None)
+    # @patch('os.get_terminal_size', mock_terminal_size)
+    @patch('helpers.agents.TechLead.get_progress_steps', return_value=None)
+    def test_create_development_plan(self, mock_get_saved_step, mock_save_progress, mock_get_progress_steps):
+        self.techLead = TechLead(self.project)
+
+        mock_questionary = MockQuestionary(['', '', 'no'])
+
+        with patch('utils.llm_connection.questionary', mock_questionary):
+            # When
+            development_plan = self.techLead.create_development_plan()
+
+            # Then
+            assert development_plan is not None
+            assert_non_empty_string(development_plan[0]['description'])
+            assert_non_empty_string(development_plan[0]['programmatic_goal'])
+            assert_non_empty_string(development_plan[0]['user_review_goal'])
--- a/pilot/test/mock_questionary.py
+++ b/pilot/test/mock_questionary.py
@@ -1,9 +1,13 @@
 class MockQuestionary:
-    def __init__(self, answers=None):
+    def __init__(self, answers=None, initial_state='project_description'):
        if answers is None:
            answers = []
        self.answers = iter(answers)
-        self.state = 'project_description'
+        self.state = initial_state
+
+    class Style:
+        def __init__(self, *args, **kwargs):
+            pass

    def text(self, question: str, style=None):
        print('AI: ' + question)
@@ -13,6 +17,9 @@ class MockQuestionary:
            self.state = 'DONE'
        return self

+    def ask(self):
+        return self.unsafe_ask()
+
    def unsafe_ask(self):
        if self.state == 'user_stories':
            answer = ''
--- a/pilot/test/test_utils.py
+++ b/pilot/test/test_utils.py
@@ -0,0 +1,11 @@
+from unittest.mock import Mock
+
+
+def mock_terminal_size():
+    mock_size = Mock()
+    mock_size.columns = 80  # or whatever width you want
+    return mock_size
+
+def assert_non_empty_string(value):
+    assert isinstance(value, str)
+    assert len(value) > 0
--- a/pilot/utils/function_calling.py
+++ b/pilot/utils/function_calling.py
@@ -38,17 +38,11 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No
        return

    model: str = gpt_data['model']
-    is_llama = 'llama' in model or 'anthropic' in model
+    is_instruct = 'llama' in model or 'anthropic' in model

-    # if model == 'gpt-4':
-    #     gpt_data['functions'] = function_calls['definitions']
-    #     if len(function_calls['definitions']) > 1:
-    #         gpt_data['function_call'] = 'auto'
-    #     else:
-    #         gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']}
-    #     return
+    gpt_data['functions'] = function_calls['definitions']

-    prompter = JsonPrompter(is_llama)
+    prompter = JsonPrompter(is_instruct)

    if len(function_calls['definitions']) > 1:
        function_call = None
@@ -77,7 +71,8 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None):

    if function_calls:
        text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL)
-        values = list(json.loads(text.strip('` \n')).values())
+        text = text.strip('` \n')
+        values = list(json.loads(text).values())
        if len(values) == 1:
            return values[0]
        else:
@@ -90,8 +85,8 @@ class JsonPrompter:
    """
    Adapted from local_llm_function_calling
    """
-    def __init__(self, is_llama: bool = False):
-        self.is_llama = is_llama
+    def __init__(self, is_instruct: bool = False):
+        self.is_instruct = is_instruct

    def function_descriptions(
        self, functions: list[FunctionType], function_to_call: str
@@ -107,7 +102,7 @@ class JsonPrompter:
                (empty if the function doesn't exist or has no description)
        """
        return [
-            function["description"]
+            f'# {function["name"]}: {function["description"]}'
            for function in functions
            if function["name"] == function_to_call and "description" in function
        ]
@@ -213,7 +208,7 @@ class JsonPrompter:
            else "Here's the function the user should call: "
        )

-        if self.is_llama:
+        if self.is_instruct:
            return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
        else:
            return f"{system}\n\n{data}\n\n{prompt}"
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -7,14 +7,13 @@ import json
 import tiktoken
 import questionary

-
 from utils.style import red
 from typing import List
 from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
 from logger.logger import logger
 from helpers.exceptions.TokenLimitError import TokenLimitError
 from utils.utils import fix_json
-from utils.function_calling import add_function_calls_to_request
+from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType

 def get_tokens_in_messages(messages: List[str]) -> int:
    tokenizer = tiktoken.get_encoding("cl100k_base")  # GPT-4 tokenizer
@@ -58,7 +57,7 @@ def num_tokens_from_functions(functions):


 def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TOKENS_FOR_GPT_RESPONSE,
-                               function_calls=None):
+                               function_calls: FunctionCallSet = None):
    """
    Called from:
      - AgentConvo.send_message() - these calls often have `function_calls`, usually from `pilot/const/function_calls.py`
@@ -167,6 +166,7 @@ def retry_on_exception(func):
                        ('answer', 'fg:orange')
                    ])).ask()

+                # TODO: take user's input into consideration - send to LLM?
                if user_message != '':
                    return {}

@@ -183,9 +183,16 @@ def stream_gpt_completion(data, req_type):
    """

    # TODO add type dynamically - this isn't working when connected to the external process
-    terminal_width = 50#os.get_terminal_size().columns
+    terminal_width = 50  # os.get_terminal_size().columns
    lines_printed = 2
-    buffer = ""  # A buffer to accumulate incoming data
+    buffer = ''  # A buffer to accumulate incoming data
+    expecting_json = False
+    received_json = False
+
+    if 'functions' in data:
+        expecting_json = data['functions']
+        # Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry
+        data = {key: value for key, value in data.items() if key != "functions"}

    def return_result(result_data, lines_printed):
        if buffer:
@@ -197,7 +204,6 @@ def stream_gpt_completion(data, req_type):

    # spinner = spinner_start(yellow("Waiting for OpenAI API response..."))
    # print(yellow("Stream response from OpenAI:"))
-    api_key = os.getenv("OPENAI_API_KEY")

    logger.info(f'Request data: {data}')

@@ -208,15 +214,26 @@ def stream_gpt_completion(data, req_type):
    if endpoint == 'AZURE':
        # If yes, get the AZURE_ENDPOINT from .ENV file
        endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15'
-        headers = {'Content-Type': 'application/json', 'api-key':  os.getenv('AZURE_API_KEY')}
+        headers = {
+            'Content-Type': 'application/json',
+            'api-key': os.getenv('AZURE_API_KEY')
+        }
    elif endpoint == 'OPENROUTER':
        # If so, send the request to the OpenRouter API endpoint
-        headers = {'Content-Type': 'application/json', 'Authorization':  'Bearer ' + os.getenv("OPENROUTER_API_KEY"), 'HTTP-Referer': 'http://localhost:3000', 'X-Title': 'GPT Pilot (LOCAL)'}
-        endpoint_url = os.getenv("OPENROUTER_ENDPOINT", 'https://openrouter.ai/api/v1/chat/completions')
+        endpoint_url = os.getenv('OPENROUTER_ENDPOINT', 'https://openrouter.ai/api/v1/chat/completions')
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + os.getenv('OPENROUTER_API_KEY'),
+            'HTTP-Referer': 'http://localhost:3000',
+            'X-Title': 'GPT Pilot (LOCAL)'
+        }
    else:
        # If not, send the request to the OpenAI endpoint
-        headers = {'Content-Type': 'application/json', 'Authorization':  'Bearer ' + os.getenv("OPENAI_API_KEY")}
-        endpoint_url = os.getenv("OPENAI_ENDPOINT", 'https://api.openai.com/v1/chat/completions')
+        endpoint_url = os.getenv('OPENAI_ENDPOINT', 'https://api.openai.com/v1/chat/completions')
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + os.getenv('OPENAI_API_KEY')
+        }

    response = requests.post(
        endpoint_url,
@@ -233,7 +250,7 @@ def stream_gpt_completion(data, req_type):
        raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")

    gpt_response = ''
-    function_calls = {'name': '', 'arguments': ''}
+    # function_calls = {'name': '', 'arguments': ''}

    for line in response.iter_lines():
        # Ignore keep-alive new lines
@@ -259,9 +276,9 @@ def stream_gpt_completion(data, req_type):

                choice = json_line['choices'][0]

-                if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
-                    function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
-                    return return_result({'function_calls': function_calls}, lines_printed)
+                # if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
+                #     function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
+                #     return return_result({'function_calls': function_calls}, lines_printed)

                json_line = choice['delta']
                # TODO: token healing? https://github.com/1rgs/jsonformer-claude
@@ -272,14 +289,14 @@ def stream_gpt_completion(data, req_type):
                continue  # skip to the next line

            # handle the streaming response
-            if 'function_call' in json_line:
-                if 'name' in json_line['function_call']:
-                    function_calls['name'] = json_line['function_call']['name']
-                    print(f'Function call: {function_calls["name"]}')
-
-                if 'arguments' in json_line['function_call']:
-                    function_calls['arguments'] += json_line['function_call']['arguments']
-                    print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
+            # if 'function_call' in json_line:
+            #     if 'name' in json_line['function_call']:
+            #         function_calls['name'] = json_line['function_call']['name']
+            #         print(f'Function call: {function_calls["name"]}')
+            #
+            #     if 'arguments' in json_line['function_call']:
+            #         function_calls['arguments'] += json_line['function_call']['arguments']
+            #         print(json_line['function_call']['arguments'], type='stream', end='', flush=True)

            if 'content' in json_line:
                content = json_line.get('content')
@@ -287,7 +304,18 @@ def stream_gpt_completion(data, req_type):
                    buffer += content  # accumulate the data

                    # If you detect a natural breakpoint (e.g., line break or end of a response object), print & count:
-                    if buffer.endswith("\n"):  # or some other condition that denotes a breakpoint
+                    if buffer.endswith("\n"):
+                        if expecting_json and not received_json:
+                            received_json = assert_json_response(buffer, lines_printed > 2)
+                            if received_json:
+                                gpt_response = ""
+                            # if not received_json:
+                            #     # Don't append to gpt_response, but increment lines_printed
+                            #     lines_printed += 1
+                            #     buffer = ""
+                            #     continue
+
+                        # or some other condition that denotes a breakpoint
                        lines_printed += count_lines_based_on_width(buffer, terminal_width)
                        buffer = ""  # reset the buffer

@@ -295,15 +323,42 @@ def stream_gpt_completion(data, req_type):
                    print(content, type='stream', end='', flush=True)

    print('\n', type='stream')
-    if function_calls['arguments'] != '':
-        logger.info(f'Response via function call: {function_calls["arguments"]}')
-        function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
-        return return_result({'function_calls': function_calls}, lines_printed)
+
+    # if function_calls['arguments'] != '':
+    #     logger.info(f'Response via function call: {function_calls["arguments"]}')
+    #     function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
+    #     return return_result({'function_calls': function_calls}, lines_printed)
    logger.info(f'Response message: {gpt_response}')
+
+    if expecting_json:
+        assert_json_schema(gpt_response, expecting_json)
+
    new_code = postprocessing(gpt_response, req_type)  # TODO add type dynamically
    return return_result({'text': new_code}, lines_printed)


+def assert_json_response(response: str, or_fail=True) -> bool:
+    if re.match(r'.*(```(json)?|{|\[)', response):
+        return True
+    elif or_fail:
+        raise ValueError('LLM did not respond with JSON')
+    else:
+        return False
+
+
+def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
+    return True
+    # TODO: validation always fails
+    # for function in functions:
+    #     schema = function['parameters']
+    #     parser = parser_for_schema(schema)
+    #     validated = parser.validate(response)
+    #     if validated.valid and validated.end_index:
+    #         return True
+    #
+    # raise ValueError('LLM responded with invalid JSON')
+
+
 def postprocessing(gpt_response, req_type):
    return gpt_response

--- a/pilot/utils/test_function_calling.py
+++ b/pilot/utils/test_function_calling.py
@@ -78,7 +78,7 @@ Create a web-based chat app'''

 def test_llama_json_prompter():
    # Given
-    prompter = JsonPrompter(is_llama=True)
+    prompter = JsonPrompter(is_instruct=True)

    # When
    prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'])  # , 'process_technologies')
@@ -126,7 +126,7 @@ Create a web-based chat app'''

 def test_llama_json_prompter_named():
    # Given
-    prompter = JsonPrompter(is_llama=True)
+    prompter = JsonPrompter(is_instruct=True)

    # When
    prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -2,13 +2,14 @@ import builtins
 import pytest
 from dotenv import load_dotenv

-from const.function_calls import ARCHITECTURE, DEV_STEPS
+from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN
 from helpers.AgentConvo import AgentConvo
 from helpers.Project import Project
 from helpers.agents.Architect import Architect
-from helpers.agents.Developer import Developer
-from utils.function_calling import parse_agent_response
-from .llm_connection import create_gpt_chat_completion
+from helpers.agents.TechLead import TechLead
+from utils.function_calling import parse_agent_response, FunctionType
+from test.test_utils import assert_non_empty_string
+from .llm_connection import create_gpt_chat_completion, assert_json_response, assert_json_schema
 from main import get_custom_print

 load_dotenv()
@@ -16,10 +17,58 @@ load_dotenv()
 project = Project({'app_id': 'test-app'}, current_step='test')


+class TestSchemaValidation:
+    def setup_method(self):
+        self.function: FunctionType = {
+            'name': 'test',
+            'description': 'test schema',
+            'parameters': {
+                'type': 'object',
+                'properties': {'foo': {'type': 'string'}},
+                'required': ['foo']
+            }
+        }
+
+    def test_assert_json_response(self):
+        assert assert_json_response('{"foo": "bar"}')
+        assert assert_json_response('{\n"foo": "bar"}')
+        assert assert_json_response('```\n{"foo": "bar"}')
+        assert assert_json_response('```json\n{\n"foo": "bar"}')
+        with pytest.raises(ValueError, match='LLM did not respond with JSON'):
+            assert assert_json_response('# Foo\n bar')
+
+    def test_assert_json_schema(self):
+        # When assert_json_schema is called with valid JSON
+        # Then no errors
+        assert(assert_json_schema('{"foo": "bar"}', [self.function]))
+
+    def test_assert_json_schema_invalid(self):
+        # When assert_json_schema is called with invalid JSON
+        # Then error is raised
+        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+            assert_json_schema('{"foo": 1}', [self.function])
+
+    def test_assert_json_schema_incomplete(self):
+        # When assert_json_schema is called with incomplete JSON
+        # Then error is raised
+        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+            assert_json_schema('{"foo": "b', [self.function])
+
+    def test_assert_json_schema_required(self):
+        # When assert_json_schema is called with missing required property
+        # Then error is raised
+        self.function['parameters']['properties']['other'] = {'type': 'string'}
+        self.function['parameters']['required'] = ['foo', 'other']
+
+        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+            assert_json_schema('{"foo": "bar"}', [self.function])
+
 class TestLlmConnection:
    def setup_method(self):
        builtins.print, ipc_client_instance = get_custom_print({})

+
+
    @pytest.mark.uses_tokens
    @pytest.mark.parametrize("endpoint, model", [
        ("OPENAI", "gpt-4"),                                 # role: system
@@ -64,7 +113,6 @@ solution-oriented decision-making in areas where precise instructions were not p
                                                                'User will be able to register a new account on Test_App.',
                                                            ]
                                                        })
-
        function_calls = ARCHITECTURE

        # When
@@ -78,6 +126,60 @@ solution-oriented decision-making in areas where precise instructions were not p
        response = parse_agent_response(response, function_calls)
        assert 'Node.js' in response

+    @pytest.mark.uses_tokens
+    @pytest.mark.parametrize("endpoint, model", [
+        ("OPENAI", "gpt-4"),  # role: system
+        ("OPENROUTER", "openai/gpt-3.5-turbo"),  # role: user
+        ("OPENROUTER", "meta-llama/codellama-34b-instruct"),  # rule: user, is_llama
+        ("OPENROUTER", "google/palm-2-chat-bison"),  # role: user/system
+        ("OPENROUTER", "google/palm-2-codechat-bison"),
+        # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
+        #           https://github.com/guidance-ai/guidance - token healing
+        ("OPENROUTER", "anthropic/claude-2"),  # role: user, is_llama
+    ])
+    def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):
+        # Given
+        monkeypatch.setenv('ENDPOINT', endpoint)
+        monkeypatch.setenv('MODEL_NAME', model)
+
+        agent = TechLead(project)
+        convo = AgentConvo(agent)
+        convo.construct_and_add_message_from_prompt('development/plan.prompt',
+                                                    {
+                                                        'name': 'Test App',
+                                                        'app_summary': '''
+    The project entails creating a web-based chat application, tentatively named "chat_app." 
+This application does not require user authentication or chat history storage. 
+It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files. 
+Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts. 
+The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand. 
+The development process will include the creation of user stories and tasks, based on detailed discussions with the client.''',
+                                                        'app_type': 'web app',
+                                                        'user_stories': [
+            'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
+                                                        ]
+                                                    })
+        function_calls = DEVELOPMENT_PLAN
+
+        # When
+        response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
+
+        # Then
+        assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
+        assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
+
+        assert response is not None
+        response = parse_agent_response(response, function_calls)
+        assert_non_empty_string(response[0]['description'])
+        assert_non_empty_string(response[0]['programmatic_goal'])
+        assert_non_empty_string(response[0]['user_review_goal'])
+
+
    # def test_break_down_development_task(self):
    #     # Given
    #     agent = Developer(project)