Merge pull request #102 from nalbion/feature/99-remove-function-calling

Remove function calling
2026-01-03 09:54:58 +01:00 · 2023-09-26 10:01:50 +01:00
parent aa1bca8775 8a024c2ff2
commit c9afca290e
33 changed files with 1118 additions and 220 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,21 +7,25 @@ on:
  pull_request:
    branches:
      - main
+      - debugging_ipc

 jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
+        # 3.10 - 04 Oct 2021
+        # 3.11 - 24 Oct 2022
+        python-version: ['3.10', '3.11']

    steps:
    - uses: actions/checkout@v4

    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
+        cache: 'pip'

    - name: Install dependencies
      run: |
@@ -41,4 +45,4 @@ jobs:
      run: |
        pip install pytest
        cd pilot
-        PYTHONPATH=. pytest
+        PYTHONPATH=. pytest -m "not slow and not uses_tokens"
--- a/README.md
+++ b/README.md
@@ -51,7 +51,7 @@ https://github.com/Pythagora-io/gpt-pilot/assets/10895136/0495631b-511e-451b-93d
 # 🔌 Requirements


- **Python**
+- **Python >= 3.10**
 - **PostgreSQL** (optional, projects default is SQLite)
   - DB is needed for multiple reasons like continuing app development if you had to stop at any point or app crashed, going back to specific step so you can change some later steps in development, easier debugging, for future we will add functionality to update project (change some things in existing project or add new features to the project and so on)...

@@ -61,7 +61,7 @@ After you have Python and PostgreSQL installed, follow these steps:
 1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
 2. `cd gpt-pilot`
 3. `python -m venv pilot-env` (create a virtual environment)
-4. `source pilot-env/bin/activate` (activate the virtual environment)
+4. `source pilot-env/bin/activate` (or on Windows `pilot-env\Scripts\activate`) (activate the virtual environment)
 5. `pip install -r requirements.txt` (install the dependencies)
 6. `cd pilot`
 7. `mv .env.example .env` (create the .env file)
--- a/pilot/const/function_calls.py
+++ b/pilot/const/function_calls.py
@@ -28,7 +28,7 @@ def return_array_from_prompt(name_plural, name_singular, return_var_name):
            "properties": {
                f"{return_var_name}": {
                    "type": "array",
-                    "description": f"List of {name_plural} that are created in a list.",
+                    "description": f"List of {name_plural}.",
                    "items": {
                        "type": "string",
                        "description": f"{name_singular}"
--- a/pilot/database/database.py
+++ b/pilot/database/database.py
@@ -1,6 +1,6 @@
 from playhouse.shortcuts import model_to_dict
 from peewee import *
-from fabulous.color import yellow, red
+from utils.style import yellow, red
 from functools import reduce
 import operator
 import psycopg2
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -1,15 +1,13 @@
 import re
 import subprocess
 import uuid
-from fabulous.color import yellow, bold
+from utils.style import yellow, yellow_bold

 from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps
-from helpers.files import get_files_content
-from const.common import IGNORE_FOLDERS
 from helpers.exceptions.TokenLimitError import TokenLimitError
-from utils.utils import array_of_objects_to_string
-from utils.llm_connection import get_prompt, create_gpt_chat_completion
-from utils.utils import get_sys_message, find_role_from_step, capitalize_first_word_with_underscores
+from utils.function_calling import parse_agent_response, FunctionCallSet
+from utils.llm_connection import create_gpt_chat_completion
+from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message, capitalize_first_word_with_underscores
 from logger.logger import logger
 from prompts.prompts import ask_user
 from const.llm import END_RESPONSE
@@ -23,7 +21,8 @@ class AgentConvo:
        agent: An instance of the agent participating in the conversation.
    """
    def __init__(self, agent):
-        self.messages = []
+        # [{'role': 'system'|'user'|'assistant', 'content': ''}, ...]
+        self.messages: list[dict] = []
        self.branches = {}
        self.log_to_user = True
        self.agent = agent
@@ -32,7 +31,7 @@ class AgentConvo:
        # add system message
        self.messages.append(get_sys_message(self.agent.role))

-    def send_message(self, prompt_path=None, prompt_data=None, function_calls=None):
+    def send_message(self, prompt_path=None, prompt_data=None, function_calls: FunctionCallSet = None):
        """
        Sends a message in the conversation.

@@ -83,7 +82,7 @@ class AgentConvo:
        if response == {}:
            raise Exception("OpenAI API error happened.")

-        response = self.postprocess_response(response, function_calls)
+        response = parse_agent_response(response, function_calls)

        # TODO remove this once the database is set up properly
        message_content = response[0] if type(response) == tuple else response
@@ -126,7 +125,7 @@ class AgentConvo:

        # Continue conversation until GPT response equals END_RESPONSE
        while response != END_RESPONSE:
-            print(yellow("Do you want to add anything else? If not, ") + yellow(bold('just press ENTER.')))
+            print(yellow("Do you want to add anything else? If not, ") + yellow_bold('just press ENTER.'))
            user_message = ask_user(self.agent.project, response, False)

            if user_message == "":
@@ -174,25 +173,6 @@ class AgentConvo:
    def convo_length(self):
        return len([msg for msg in self.messages if msg['role'] != 'system'])

-    def postprocess_response(self, response, function_calls):
-        """
-        Post-processes the response from the agent.
-
-        Args:
-            response: The response from the agent.
-            function_calls: Optional function calls associated with the response.
-
-        Returns:
-            The post-processed response.
-        """
-        if 'function_calls' in response and function_calls is not None:
-            if 'send_convo' in function_calls:
-                response['function_calls']['arguments']['convo']  = self
-            response = function_calls['functions'][response['function_calls']['name']](**response['function_calls']['arguments'])
-        elif 'text' in response:
-            response = response['text']
-
-        return response

    def log_message(self, content):
        """
@@ -204,7 +184,7 @@ class AgentConvo:
        print_msg = capitalize_first_word_with_underscores(self.high_level_step)
        if self.log_to_user:
            if self.agent.project.checkpoints['last_development_step'] is not None:
-                print(yellow("\nDev step ") + yellow(bold(str(self.agent.project.checkpoints['last_development_step']))) + '\n', end='')
+                print(yellow("\nDev step ") + yellow_bold(str(self.agent.project.checkpoints['last_development_step'])) + '\n', end='')
            print(f"\n{content}\n")
        logger.info(f"{print_msg}: {content}\n")

--- a/pilot/helpers/Project.py
+++ b/pilot/helpers/Project.py
@@ -1,6 +1,6 @@
 import json

-from fabulous.color import bold, green, yellow, cyan, white
+from utils.style import green_bold, yellow_bold, cyan, white_bold
 from const.common import IGNORE_FOLDERS, STEPS
 from database.database import delete_unconnected_steps_from, delete_all_app_development_data
 from const.ipc import MESSAGE_TYPE
@@ -67,10 +67,10 @@ class Project:
        # if development_plan is not None:
        #     self.development_plan = development_plan

-        print(green(bold('\n------------------ STARTING NEW PROJECT ----------------------')))
+        print(green_bold('\n------------------ STARTING NEW PROJECT ----------------------'))
        print(f"If you wish to continue with this project in future run:")
-        print(green(bold(f'python main.py app_id={args["app_id"]}')))
-        print(green(bold('--------------------------------------------------------------\n')))
+        print(green_bold(f'python main.py app_id={args["app_id"]}'))
+        print(green_bold('--------------------------------------------------------------\n'))

    def start(self):
        """
@@ -306,10 +306,10 @@ class Project:
            reset_branch_id = convo.save_branch()

        while answer != 'continue':
-            print(yellow(bold(message)))
+            print(yellow_bold(message))
            if description is not None:
                print('\n' + '-'*100 + '\n' +
-                    white(bold(description)) +
+                    white_bold(description) +
                    '\n' + '-'*100 + '\n')

            answer = styled_text(
--- a/pilot/helpers/agents/Architect.py
+++ b/pilot/helpers/agents/Architect.py
@@ -1,7 +1,7 @@
 from utils.utils import step_already_finished
 from helpers.Agent import Agent
 import json
-from fabulous.color import green, bold
+from utils.style import green_bold
 from const.function_calls import ARCHITECTURE

 from utils.utils import should_execute_step, find_role_from_step, generate_app_data
@@ -28,7 +28,7 @@ class Architect(Agent):
            return step['architecture']

        # ARCHITECTURE
-        print(green(bold(f"Planning project architecture...\n")))
+        print(green_bold(f"Planning project architecture...\n"))
        logger.info(f"Planning project architecture...")

        self.convo_architecture = AgentConvo(self)
--- a/pilot/helpers/agents/CodeMonkey.py
+++ b/pilot/helpers/agents/CodeMonkey.py
@@ -1,9 +1,8 @@
 from const.function_calls import GET_FILES, DEV_STEPS, IMPLEMENT_CHANGES, CODE_CHANGES
-from database.models.files import File
-from helpers.files import update_file
 from helpers.AgentConvo import AgentConvo
 from helpers.Agent import Agent

+
 class CodeMonkey(Agent):
    def __init__(self, project, developer):
        super().__init__('code_monkey', project)
@@ -20,12 +19,11 @@ class CodeMonkey(Agent):
        #     "finished_steps": ', '.join(f"#{j}" for j in range(step_index))
        # }, GET_FILES)

-
        changes = convo.send_message('development/implement_changes.prompt', {
            "step_description": code_changes_description,
            "step_index": step_index,
            "directory_tree": self.project.get_directory_tree(True),
-            "files": []#self.project.get_files(files_needed),
+            "files": []  # self.project.get_files(files_needed),
        }, IMPLEMENT_CHANGES)
        convo.remove_last_x_messages(1)

--- a/pilot/helpers/agents/Developer.py
+++ b/pilot/helpers/agents/Developer.py
@@ -1,5 +1,5 @@
 import uuid
-from fabulous.color import yellow, green, red, bold, blue, white
+from utils.style import yellow, green, red, blue, white, green_bold, yellow_bold, red_bold, blue_bold, white_bold
 from helpers.exceptions.TokenLimitError import TokenLimitError
 from const.code_execution import MAX_COMMAND_DEBUG_TRIES
 from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
@@ -11,7 +11,7 @@ from logger.logger import logger
 from helpers.Agent import Agent
 from helpers.AgentConvo import AgentConvo
 from utils.utils import should_execute_step, array_of_objects_to_string, generate_app_data
-from helpers.cli import run_command_until_success, execute_command_and_check_cli_response, debug
+from helpers.cli import run_command_until_success, execute_command_and_check_cli_response
 from const.function_calls import FILTER_OS_TECHNOLOGIES, EXECUTE_COMMANDS, GET_TEST_TYPE, IMPLEMENT_TASK
 from database.database import save_progress, get_progress_steps
 from utils.utils import get_os_info
@@ -31,7 +31,7 @@ class Developer(Agent):
            self.project.skip_steps = False if ('skip_until_dev_step' in self.project.args and self.project.args['skip_until_dev_step'] == '0') else True

        # DEVELOPMENT
-        print(green(bold(f"Ok, great, now, let's start with the actual development...\n")))
+        print(green_bold(f"Ok, great, now, let's start with the actual development...\n"))
        logger.info(f"Starting to create the actual code...")

        for i, dev_task in enumerate(self.project.development_plan):
@@ -42,7 +42,7 @@ class Developer(Agent):
        logger.info('The app is DONE!!! Yay...you can use it now.')

    def implement_task(self, i, development_task=None):
-        print(green(bold(f'Implementing task #{i + 1}: ')) + green(f' {development_task["description"]}\n'))
+        print(green_bold(f'Implementing task #{i + 1}: ') + green(f' {development_task["description"]}\n'))

        convo_dev_task = AgentConvo(self)
        task_description = convo_dev_task.send_message('development/task/breakdown.prompt', {
@@ -96,7 +96,7 @@ class Developer(Agent):

    def step_human_intervention(self, convo, step):
        while True:
-            human_intervention_description = step['human_intervention_description'] + yellow(bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`')) if self.run_command is not None else step['human_intervention_description']
+            human_intervention_description = step['human_intervention_description'] + yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`') if self.run_command is not None else step['human_intervention_description']
            response = self.project.ask_for_human_intervention('I need human intervention:',
                human_intervention_description,
                cbs={ 'r': lambda conv: run_command_until_success(self.run_command, None, conv, force=True, return_cli_response=True) },
@@ -151,8 +151,8 @@ class Developer(Agent):
        if step_implementation_try >= MAX_COMMAND_DEBUG_TRIES:
            self.dev_help_needed(step)

-        print(red(bold(f'\n--------- LLM Reached Token Limit ----------')))
-        print(red(bold(f'Can I retry implementing the entire development step?')))
+        print(red_bold(f'\n--------- LLM Reached Token Limit ----------'))
+        print(red_bold(f'Can I retry implementing the entire development step?'))

        answer = ''
        while answer != 'y':
@@ -169,9 +169,9 @@ class Developer(Agent):
    def dev_help_needed(self, step):

        if step['type'] == 'command':
-            help_description = (red(bold(f'I tried running the following command but it doesn\'t seem to work:\n\n')) +
-                white(bold(step['command']['command'])) +
-                red(bold(f'\n\nCan you please make it work?')))
+            help_description = (red_bold(f'I tried running the following command but it doesn\'t seem to work:\n\n') +
+                white_bold(step['command']['command']) +
+                red_bold(f'\n\nCan you please make it work?'))
        elif step['type'] == 'code_change':
            help_description = step['code_change_description']
        elif step['type'] == 'human_intervention':
@@ -190,9 +190,9 @@ class Developer(Agent):

        answer = ''
        while answer != 'continue':
-            print(red(bold(f'\n----------------------------- I need your help ------------------------------')))
+            print(red_bold(f'\n----------------------------- I need your help ------------------------------'))
            print(extract_substring(str(help_description)))
-            print(red(bold(f'\n-----------------------------------------------------------------------------')))
+            print(red_bold(f'\n-----------------------------------------------------------------------------'))
            answer = styled_text(
                self.project,
                'Once you\'re done, type "continue"?'
@@ -256,8 +256,8 @@ class Developer(Agent):
    def continue_development(self, iteration_convo, last_branch_name, continue_description=''):
        while True:
            iteration_convo.load_branch(last_branch_name)
-            user_description = ('Here is a description of what should be working: \n\n' + blue(bold(continue_description)) + '\n') if continue_description != '' else ''
-            user_description = 'Can you check if the app works please? ' + user_description + '\nIf you want to run the app, ' + yellow(bold('just type "r" and press ENTER and that will run `' + self.run_command + '`'))
+            user_description = ('Here is a description of what should be working: \n\n' + blue_bold(continue_description) + '\n') if continue_description != '' else ''
+            user_description = 'Can you check if the app works please? ' + user_description + '\nIf you want to run the app, ' + yellow_bold('just type "r" and press ENTER and that will run `' + self.run_command + '`')
            # continue_description = ''
            response = self.project.ask_for_human_intervention(
                user_description,
@@ -324,36 +324,15 @@ class Developer(Agent):
            }, FILTER_OS_TECHNOLOGIES)

        for technology in os_specific_technologies:
-            # TODO move the functions definitions to function_calls.py
-            cli_response, llm_response = self.convo_os_specific_tech.send_message('development/env_setup/install_next_technology.prompt',
-                { 'technology': technology}, {
-                    'definitions': [{
-                        'name': 'execute_command',
-                        'description': f'Executes a command that should check if {technology} is installed on the machine. ',
-                        'parameters': {
-                            'type': 'object',
-                            'properties': {
-                                'command': {
-                                    'type': 'string',
-                                    'description': f'Command that needs to be executed to check if {technology} is installed on the machine.',
-                                },
-                                'timeout': {
-                                    'type': 'number',
-                                    'description': 'Timeout in seconds for the approcimate time this command takes to finish.',
-                                }
-                            },
-                            'required': ['command', 'timeout'],
-                        },
-                    }],
-                    'functions': {
-                        'execute_command': execute_command_and_check_cli_response
-                    },
-                    'send_convo': True
-                })
+            llm_response = self.install_technology(technology)

+            # TODO: I don't think llm_response would ever be 'DONE'?
            if llm_response != 'DONE':
-                installation_commands = self.convo_os_specific_tech.send_message('development/env_setup/unsuccessful_installation.prompt',
-                    { 'technology': technology }, EXECUTE_COMMANDS)
+                installation_commands = self.convo_os_specific_tech.send_message(
+                    'development/env_setup/unsuccessful_installation.prompt',
+                    {'technology': technology},
+                    EXECUTE_COMMANDS)
+
                if installation_commands is not None:
                    for cmd in installation_commands:
                        run_command_until_success(cmd['command'], cmd['timeout'], self.convo_os_specific_tech)
@@ -361,11 +340,46 @@ class Developer(Agent):
        logger.info('The entire tech stack needed is installed and ready to be used.')

        save_progress(self.project.args['app_id'], self.project.current_step, {
-            "os_specific_technologies": os_specific_technologies, "newly_installed_technologies": [], "app_data": generate_app_data(self.project.args)
+            "os_specific_technologies": os_specific_technologies,
+            "newly_installed_technologies": [],
+            "app_data": generate_app_data(self.project.args)
        })

        # ENVIRONMENT SETUP END

+    # TODO: This is only called from the unreachable section of set_up_environment()
+    def install_technology(self, technology):
+        # TODO move the functions definitions to function_calls.py
+        cmd, timeout_val = self.convo_os_specific_tech.send_message(
+            'development/env_setup/install_next_technology.prompt',
+            {'technology': technology}, {
+                'definitions': [{
+                    'name': 'execute_command',
+                    'description': f'Executes a command that should check if {technology} is installed on the machine. ',
+                    'parameters': {
+                        'type': 'object',
+                        'properties': {
+                            'command': {
+                                'type': 'string',
+                                'description': f'Command that needs to be executed to check if {technology} is installed on the machine.',
+                            },
+                            'timeout': {
+                                'type': 'number',
+                                'description': 'Timeout in seconds for the approximate time this command takes to finish.',
+                            }
+                        },
+                        'required': ['command', 'timeout'],
+                    },
+                }],
+                'functions': {
+                    'execute_command': lambda command, timeout: (command, timeout)
+                }
+            })
+
+        cli_response, llm_response = execute_command_and_check_cli_response(cmd, timeout_val, self.convo_os_specific_tech)
+
+        return llm_response
+
    def test_code_changes(self, code_monkey, convo):
        (test_type, command, automated_test_description, manual_test_description) = convo.send_message(
            'development/task/step_check.prompt',
--- a/pilot/helpers/agents/ProductOwner.py
+++ b/pilot/helpers/agents/ProductOwner.py
@@ -1,4 +1,4 @@
-from fabulous.color import bold, green, yellow
+from utils.style import green_bold

 from helpers.AgentConvo import AgentConvo
 from helpers.Agent import Agent
@@ -48,7 +48,7 @@ class ProductOwner(Agent):
            self.project,
            generate_messages_from_description(main_prompt, self.project.args['app_type'], self.project.args['name']))

-        print(green(bold('Project Summary:\n')))
+        print(green_bold('Project Summary:\n'))
        convo_project_description = AgentConvo(self)
        high_level_summary = convo_project_description.send_message('utils/summary.prompt',
                                                                    {'conversation': '\n'.join(
@@ -80,7 +80,7 @@ class ProductOwner(Agent):

        # USER STORIES
        msg = f"User Stories:\n"
-        print(green(bold(msg)))
+        print(green_bold(msg))
        logger.info(msg)

        self.project.user_stories = self.convo_user_stories.continuous_conversation('user_stories/specs.prompt', {
@@ -114,7 +114,7 @@ class ProductOwner(Agent):

        # USER TASKS
        msg = f"User Tasks:\n"
-        print(green(bold(msg)))
+        print(green_bold(msg))
        logger.info(msg)

        self.project.user_tasks = self.convo_user_stories.continuous_conversation('user_stories/user_tasks.prompt',
--- a/pilot/helpers/agents/TechLead.py
+++ b/pilot/helpers/agents/TechLead.py
@@ -1,7 +1,7 @@
 from utils.utils import step_already_finished
 from helpers.Agent import Agent
 import json
-from fabulous.color import green, bold
+from utils.style import green_bold
 from const.function_calls import DEV_STEPS
 from helpers.cli import build_directory_tree
 from helpers.AgentConvo import AgentConvo
@@ -32,7 +32,7 @@ class TechLead(Agent):
            return step['development_plan']
        
        # DEVELOPMENT PLANNING
-        print(green(bold(f"Starting to create the action plan for development...\n")))
+        print(green_bold(f"Starting to create the action plan for development...\n"))
        logger.info(f"Starting to create the action plan for development...")

        # TODO add clarifications
--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -7,19 +7,15 @@ load_dotenv()
 from .CodeMonkey import CodeMonkey
 from .Developer import Developer
 from database.models.files import File
+from database.models.development_steps import DevelopmentSteps
 from helpers.Project import Project, update_file, clear_directory
 from helpers.AgentConvo import AgentConvo
+from test.mock_terminal_size import mock_terminal_size

 SEND_TO_LLM = False
 WRITE_TO_FILE = False


-def mock_terminal_size():
-    mock_size = Mock()
-    mock_size.columns = 80  # or whatever width you want
-    return mock_size
-
-
 class TestCodeMonkey:
    def setup_method(self):
        name = 'TestDeveloper'
@@ -37,11 +33,14 @@ class TestCodeMonkey:
        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                                              '../../../workspace/TestDeveloper'))
        self.project.technologies = []
+        last_step = DevelopmentSteps()
+        last_step.id = 1
+        self.project.checkpoints = {'last_development_step': last_step}
        self.project.app = None
        self.developer = Developer(self.project)
        self.codeMonkey = CodeMonkey(self.project, developer=self.developer)

-    @patch('helpers.AgentConvo.get_development_step_from_hash_id', return_value=None)
+    @patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
    @patch('helpers.AgentConvo.save_development_step', return_value=None)
    @patch('os.get_terminal_size', mock_terminal_size)
    @patch.object(File, 'insert')
@@ -54,7 +53,7 @@ class TestCodeMonkey:
        else:
            convo = MagicMock()
            mock_responses = [
-                [],
+                # [],
                [{
                    'content': 'Washington',
                    'description': "A new .txt file with the word 'Washington' in it.",
@@ -79,7 +78,7 @@ class TestCodeMonkey:
                assert (called_data['path'] == '/' or called_data['path'] == called_data['name'])
                assert called_data['content'] == 'Washington'

-    @patch('helpers.AgentConvo.get_development_step_from_hash_id', return_value=None)
+    @patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
    @patch('helpers.AgentConvo.save_development_step', return_value=None)
    @patch('os.get_terminal_size', mock_terminal_size)
    @patch.object(File, 'insert')
@@ -94,7 +93,7 @@ class TestCodeMonkey:
        else:
            convo = MagicMock()
            mock_responses = [
-                ['file_to_read.txt', 'output.txt'],
+                # ['file_to_read.txt', 'output.txt'],
                [{
                    'content': 'Hello World!\n',
                    'description': 'This file is the output file. The content of file_to_read.txt is copied into this file.',
--- a/pilot/helpers/agents/test_Developer.py
+++ b/pilot/helpers/agents/test_Developer.py
@@ -0,0 +1,53 @@
+import builtins
+import os
+from unittest.mock import patch, Mock
+
+from helpers.AgentConvo import AgentConvo
+from dotenv import load_dotenv
+load_dotenv()
+
+from main import  get_custom_print
+from .Developer import Developer, ENVIRONMENT_SETUP_STEP
+from helpers.Project import Project
+from test.mock_terminal_size import mock_terminal_size
+
+
+class TestDeveloper:
+    def setup_method(self):
+        builtins.print, ipc_client_instance = get_custom_print({})
+
+        name = 'TestDeveloper'
+        self.project = Project({
+                'app_id': 'test-developer',
+                'name': name,
+                'app_type': ''
+            },
+            name=name,
+            architecture=[],
+            user_stories=[]
+        )
+
+        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestDeveloper'))
+        self.project.technologies = []
+        self.project.current_step = ENVIRONMENT_SETUP_STEP
+        self.developer = Developer(self.project)
+
+    # @pytest.mark.uses_tokens
+    @patch('helpers.AgentConvo.get_saved_development_step')
+    @patch('helpers.AgentConvo.save_development_step')
+    @patch('helpers.AgentConvo.create_gpt_chat_completion',
+           return_value={'text': '{"command": "python --version", "timeout": 10}'})
+    @patch('helpers.cli.styled_text', return_value='no')
+    @patch('helpers.cli.execute_command', return_value=('', 'DONE'))
+    def test_install_technology(self, mock_execute_command, mock_styled_text,
+                                mock_completion, mock_save, mock_get_saved_step):
+        # Given
+        self.developer.convo_os_specific_tech = AgentConvo(self.developer)
+
+        # When
+        llm_response = self.developer.install_technology('python')
+
+        # Then
+        assert llm_response == 'DONE'
+        mock_execute_command.assert_called_once_with(self.project, 'python --version', 10)
--- a/pilot/helpers/agents/test_TechLead.py
+++ b/pilot/helpers/agents/test_TechLead.py
@@ -0,0 +1,72 @@
+import builtins
+import os
+import pytest
+from unittest.mock import patch
+from dotenv import load_dotenv
+load_dotenv()
+
+from main import  get_custom_print
+from helpers.agents.TechLead import TechLead, DEVELOPMENT_PLANNING_STEP
+from helpers.Project import Project
+from test.test_utils import assert_non_empty_string, mock_terminal_size
+from test.mock_questionary import MockQuestionary
+from utils.function_calling import parse_agent_response
+
+
+class TestTechLead:
+    def setup_method(self):
+        builtins.print, ipc_client_instance = get_custom_print({})
+
+        name = 'TestTechLead'
+        self.project = Project({
+                'app_id': 'test-tech-lead',
+                'name': name,
+                'app_type': ''
+            },
+            name=name,
+            architecture=[],
+            user_stories=[]
+        )
+
+        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestTechLead'))
+        self.project.technologies = []
+        self.project.project_description = '''
+The project entails creating a web-based chat application, tentatively named "chat_app." 
+This application does not require user authentication or chat history storage. 
+It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files. 
+Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts. 
+The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand. 
+The development process will include the creation of user stories and tasks, based on detailed discussions with the client. 
+        '''
+        self.project.user_stories = [
+            'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
+        ]
+        self.project.architecture = ['Node.js', 'Socket.io', 'Bootstrap', 'JavaScript', 'HTML5', 'CSS3']
+        self.project.current_step = DEVELOPMENT_PLANNING_STEP
+
+    @pytest.mark.uses_tokens
+    # @patch('database.database.get_progress_steps', return_value=None)
+    @patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
+    @patch('helpers.agents.TechLead.save_progress', return_value=None)
+    # @patch('os.get_terminal_size', mock_terminal_size)
+    @patch('helpers.agents.TechLead.get_progress_steps', return_value=None)
+    def test_create_development_plan(self, mock_get_saved_step, mock_save_progress, mock_get_progress_steps):
+        self.techLead = TechLead(self.project)
+
+        mock_questionary = MockQuestionary(['', '', 'no'])
+
+        with patch('utils.llm_connection.questionary', mock_questionary):
+            # When
+            development_plan = self.techLead.create_development_plan()
+
+            # Then
+            assert development_plan is not None
+            assert_non_empty_string(development_plan[0]['description'])
+            assert_non_empty_string(development_plan[0]['programmatic_goal'])
+            assert_non_empty_string(development_plan[0]['user_review_goal'])
--- a/pilot/helpers/cli.py
+++ b/pilot/helpers/cli.py
@@ -7,7 +7,7 @@ import time
 import uuid
 import platform

-from fabulous.color import yellow, green, white, red, bold
+from utils.style import yellow, green, white, red, yellow_bold, white_bold
 from database.database import get_saved_command_run, save_command_run
 from const.function_calls import DEBUG_STEPS_BREAKDOWN
 from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
@@ -93,7 +93,10 @@ def execute_command(project, command, timeout=None, force=False):
        force (bool, optional): Whether to execute the command without confirmation. Default is False.

    Returns:
-        str: The command output.
+        cli_response (str): The command output
+                            or: '', 'DONE' if user answered 'no' or 'skip'
+        llm_response (str): The response from the agent.
+                            TODO: this seems to be 'DONE' (no or skip) or None
    """
    if timeout is not None:
        if timeout < 1000:
@@ -101,14 +104,17 @@ def execute_command(project, command, timeout=None, force=False):
        timeout = min(max(timeout, MIN_COMMAND_RUN_TIME), MAX_COMMAND_RUN_TIME)

    if not force:
-        print(yellow(bold(f'\n--------- EXECUTE COMMAND ----------')))
-        print(f'Can i execute the command: `' + yellow(bold(command)) + f'` with {timeout}ms timeout?')
+        print(yellow_bold(f'\n--------- EXECUTE COMMAND ----------'))
+        print(f'Can i execute the command: `' + yellow_bold(command) + f'` with {timeout}ms timeout?')

        answer = styled_text(
            project,
            'If yes, just press ENTER'
        )

+        # TODO: I think AutoGPT allows other feedback here, like:
+        #       "That's not going to work, let's do X instead"
+        #       We don't explicitly make "no" or "skip" options to the user
        if answer == 'no':
            return '', 'DONE'
        elif answer == 'skip':
@@ -143,7 +149,7 @@ def execute_command(project, command, timeout=None, force=False):
        while True and return_value is None:
            elapsed_time = time.time() - start_time
            if timeout is not None:
-                print(white(bold(f'\rt: {round(elapsed_time * 1000)}ms : ')), end='', flush=True)
+                print(white_bold(f'\rt: {round(elapsed_time * 1000)}ms : '), end='', flush=True)

            # Check if process has finished
            if process.poll() is not None:
@@ -252,12 +258,15 @@ def execute_command_and_check_cli_response(command, timeout, convo):

    Returns:
        tuple: A tuple containing the CLI response and the agent's response.
+            - cli_response (str): The command output.
+            - llm_response (str): 'DONE' or 'NEEDS_DEBUGGING'
    """
-    cli_response, response = execute_command(convo.agent.project, command, timeout)
-    if response is None:
-        response = convo.send_message('dev_ops/ran_command.prompt',
+    # TODO: Prompt mentions `command` could be `INSTALLED` or `NOT_INSTALLED`, where is this handled?
+    cli_response, llm_response = execute_command(convo.agent.project, command, timeout)
+    if llm_response is None:
+        llm_response = convo.send_message('dev_ops/ran_command.prompt',
            { 'cli_response': cli_response, 'command': command })
-    return cli_response, response
+    return cli_response, llm_response

 def run_command_until_success(command, timeout, convo, additional_message=None, force=False, return_cli_response=False, is_root_task=False):
    """
--- a/pilot/helpers/files.py
+++ b/pilot/helpers/files.py
@@ -1,4 +1,4 @@
-from fabulous.color import green
+from utils.style import green
 import os


--- a/pilot/logger/logger.py
+++ b/pilot/logger/logger.py
@@ -1,4 +1,4 @@
-# logger.py
+import os
 import logging


@@ -7,7 +7,7 @@ def setup_logger():
    log_format = "%(asctime)s [%(filename)s:%(lineno)s - %(funcName)20s() ] %(levelname)s: %(message)s"

    # Create a log handler for file output
-    file_handler = logging.FileHandler(filename='logger/debug.log', mode='w')
+    file_handler = logging.FileHandler(filename=os.path.join(os.path.dirname(__file__), 'debug.log'), mode='w')
    file_handler.setLevel(logging.DEBUG)

    # Apply the custom format to the handler
--- a/pilot/main.py
+++ b/pilot/main.py
@@ -11,7 +11,7 @@ from termcolor import colored
 from helpers.ipc import IPCClient
 from const.ipc import MESSAGE_TYPE
 from utils.utils import json_serial
-from fabulous.color import red
+from utils.style import red

 from helpers.Project import Project
 from utils.arguments import get_arguments
@@ -36,8 +36,6 @@ def init():
    return arguments


-
-
 def get_custom_print(args):
    built_in_print = builtins.print

--- a/pilot/prompts/architecture/technologies.prompt
+++ b/pilot/prompts/architecture/technologies.prompt
@@ -27,7 +27,7 @@ Here are user tasks that specify what users need to do to interact with "{{ name
 {% endfor %}
 ```#}

-Now, based on the app's description, user stories and user tasks, think step by step and write up all technologies that will be used by your development team to create the app "{{ name }}". Do not write any explanations behind your choices but only a list of technologies that will be used.
+Now, based on the app's description, user stories and user tasks, think step by step and list the names of the technologies that will be used by your development team to create the app "{{ name }}". Do not write any explanations behind your choices but only a list of technologies that will be used.

 You do not need to list any technologies related to automated tests like Jest, Cypress, Mocha, Selenium, etc.

--- a/pilot/prompts/prompts.py
+++ b/pilot/prompts/prompts.py
@@ -1,12 +1,9 @@
 # prompts/prompts.py
-
-from fabulous.color import yellow
-import questionary
-
+from utils.style import yellow
 from const import common
 from const.llm import MAX_QUESTIONS, END_RESPONSE
-from utils.llm_connection import create_gpt_chat_completion, get_prompt
-from utils.utils import capitalize_first_word_with_underscores, get_sys_message, find_role_from_step
+from utils.llm_connection import create_gpt_chat_completion
+from utils.utils import capitalize_first_word_with_underscores, get_sys_message, find_role_from_step, get_prompt
 from utils.questionary import styled_select, styled_text
 from logger.logger import logger

--- a/pilot/prompts/system_messages/architect.prompt
+++ b/pilot/prompts/system_messages/architect.prompt
@@ -1,7 +1,7 @@
 You are an experienced software architect. Your expertise is in creating an architecture for an MVP (minimum viable products) for {{ app_type }}s that can be developed as fast as possible by using as many ready-made technologies as possible. The technologies that you prefer using when other technologies are not explicitly specified are:
 **Scripts**: you prefer using Node.js for writing scripts that are meant to be ran just with the CLI.

-**Backend**: you prefer using Node.js with Mongo database if not explicitely specified otherwise. When you're using Mongo, you always use Mongoose and when you're using Postgresql, you always use PeeWee as an ORM.
+**Backend**: you prefer using Node.js with Mongo database if not explicitly specified otherwise. When you're using Mongo, you always use Mongoose and when you're using Postgresql, you always use PeeWee as an ORM.

 **Testing**: To create unit and integration tests, you prefer using Jest for Node.js projects and pytest for Python projects. To create end-to-end tests, you prefer using Cypress.

--- a/pilot/test/init.py
+++ b/pilot/test/init.py
--- a/pilot/test/mock_questionary.py
+++ b/pilot/test/mock_questionary.py
@@ -0,0 +1,32 @@
+class MockQuestionary:
+    def __init__(self, answers=None, initial_state='project_description'):
+        if answers is None:
+            answers = []
+        self.answers = iter(answers)
+        self.state = initial_state
+
+    class Style:
+        def __init__(self, *args, **kwargs):
+            pass
+
+    def text(self, question: str, style=None):
+        print('AI: ' + question)
+        if question.startswith('User Story'):
+            self.state = 'user_stories'
+        elif question.endswith('write "DONE"'):
+            self.state = 'DONE'
+        return self
+
+    def ask(self):
+        return self.unsafe_ask()
+
+    def unsafe_ask(self):
+        if self.state == 'user_stories':
+            answer = ''
+        elif self.state == 'DONE':
+            answer = 'DONE'
+        else:  # if self.state == 'project_description':
+            answer = next(self.answers, '')
+
+        print('User:', answer)
+        return answer
--- a/pilot/test/test_utils.py
+++ b/pilot/test/test_utils.py
@@ -0,0 +1,11 @@
+from unittest.mock import Mock
+
+
+def mock_terminal_size():
+    mock_size = Mock()
+    mock_size.columns = 80  # or whatever width you want
+    return mock_size
+
+def assert_non_empty_string(value):
+    assert isinstance(value, str)
+    assert len(value) > 0
--- a/pilot/test_main_e2e.py
+++ b/pilot/test_main_e2e.py
@@ -0,0 +1,61 @@
+import builtins
+import pytest
+from unittest.mock import patch
+from dotenv import load_dotenv
+load_dotenv()
+
+from database.database import create_tables
+from helpers.Project import Project
+from test.mock_questionary import MockQuestionary
+from .main import init, get_custom_print
+
+
+def test_init():
+    # When
+    args = init()
+
+    # Then
+    for field in ['app_id', 'user_id', 'email']:
+        assert args[field] is not None
+
+    for field in ['workspace', 'step']:
+        assert args[field] is None
+
+
+@pytest.mark.slow
+@pytest.mark.uses_tokens
+@pytest.mark.skip(reason="Uses lots of tokens")
+@pytest.mark.parametrize("endpoint, model", [
+    # ("OPENAI", "gpt-4"),
+    # ("OPENROUTER", "openai/gpt-3.5-turbo"),
+    # ("OPENROUTER", "meta-llama/codellama-34b-instruct"),
+    ("OPENROUTER", "google/palm-2-chat-bison"),
+    ("OPENROUTER", "google/palm-2-codechat-bison"),
+    # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
+    #           https://github.com/guidance-ai/guidance - token healing
+    ("OPENROUTER", "anthropic/claude-2"),
+])
+def test_end_to_end(endpoint, model, monkeypatch):
+    # Given
+    monkeypatch.setenv('ENDPOINT', endpoint)
+    monkeypatch.setenv('MODEL_NAME', model)
+
+    create_tables()
+    args = init()
+    builtins.print, ipc_client_instance = get_custom_print(args)
+    project = Project(args)
+    mock_questionary = MockQuestionary([
+        'Test App',
+        'A web-based chat app',
+        # 5 clarifying questions
+        'Users can send direct messages to each other but with no group chat functionality',
+        'No authentication is required at this stage',
+        'Use your best judgement',
+        'Use your best judgement',
+        'Use your best judgement',
+    ])
+
+    # When
+    with patch('utils.questionary.questionary', mock_questionary):
+        project.start()
+
--- a/pilot/utils/function_calling.py
+++ b/pilot/utils/function_calling.py
@@ -0,0 +1,214 @@
+import json
+import re
+from typing import Literal, Optional, TypedDict, Callable
+
+JsonType = str | int | float | bool | None | list["JsonType"] | dict[str, "JsonType"]
+
+
+class FunctionParameters(TypedDict):
+    """Function parameters"""
+
+    type: Literal["object"]
+    properties: dict[str, JsonType]
+    required: Optional[list[str]]
+
+
+class FunctionType(TypedDict):
+    """Function type"""
+
+    name: str
+    description: Optional[str]
+    parameters: FunctionParameters
+
+
+class FunctionCall(TypedDict):
+    """Function call"""
+
+    name: str
+    parameters: str
+
+
+class FunctionCallSet(TypedDict):
+    definitions: list[FunctionType]
+    functions: dict[str, Callable]
+
+
+def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | None):
+    if function_calls is None:
+        return
+
+    model: str = gpt_data['model']
+    is_instruct = 'llama' in model or 'anthropic' in model
+
+    gpt_data['functions'] = function_calls['definitions']
+
+    prompter = JsonPrompter(is_instruct)
+
+    if len(function_calls['definitions']) > 1:
+        function_call = None
+    else:
+        function_call = function_calls['definitions'][0]['name']
+
+    role = 'user' if '/' in model else 'system'
+
+    gpt_data['messages'].append({
+        'role': role,
+        'content': prompter.prompt('', function_calls['definitions'], function_call)
+    })
+
+
+def parse_agent_response(response, function_calls: FunctionCallSet | None):
+    """
+    Post-processes the response from the agent.
+
+    Args:
+        response: The response from the agent.
+        function_calls: Optional function calls associated with the response.
+
+    Returns:
+        The post-processed response.
+    """
+
+    if function_calls:
+        text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL)
+        text = text.strip('` \n')
+        values = list(json.loads(text).values())
+        if len(values) == 1:
+            return values[0]
+        else:
+            return tuple(values)
+
+    return response['text']
+
+
+class JsonPrompter:
+    """
+    Adapted from local_llm_function_calling
+    """
+    def __init__(self, is_instruct: bool = False):
+        self.is_instruct = is_instruct
+
+    def function_descriptions(
+        self, functions: list[FunctionType], function_to_call: str
+    ) -> list[str]:
+        """Get the descriptions of the functions
+
+        Args:
+            functions (list[FunctionType]): The functions to get the descriptions of
+            function_to_call (str): The function to call
+
+        Returns:
+            list[str]: The descriptions of the functions
+                (empty if the function doesn't exist or has no description)
+        """
+        return [
+            f'# {function["name"]}: {function["description"]}'
+            for function in functions
+            if function["name"] == function_to_call and "description" in function
+        ]
+
+    def function_parameters(
+        self, functions: list[FunctionType], function_to_call: str
+    ) -> str:
+        """Get the parameters of the function
+
+        Args:
+            functions (list[FunctionType]): The functions to get the parameters of
+            function_to_call (str): The function to call
+
+        Returns:
+            str: The parameters of the function as a JSON schema
+        """
+        return next(
+            json.dumps(function["parameters"]["properties"], indent=4)
+            for function in functions
+            if function["name"] == function_to_call
+        )
+
+    def function_data(
+        self, functions: list[FunctionType], function_to_call: str
+    ) -> str:
+        """Get the data for the function
+
+        Args:
+            functions (list[FunctionType]): The functions to get the data for
+            function_to_call (str): The function to call
+
+        Returns:
+            str: The data necessary to generate the arguments for the function
+        """
+        return "\n".join(
+            self.function_descriptions(functions, function_to_call)
+            + [
+                "The response should be a JSON object matching this schema:",
+                "```json",
+                self.function_parameters(functions, function_to_call),
+                "```",
+            ]
+        )
+
+    def function_summary(self, function: FunctionType) -> str:
+        """Get a summary of a function
+
+        Args:
+            function (FunctionType): The function to get the summary of
+
+        Returns:
+            str: The summary of the function, as a bullet point
+        """
+        return f"- {function['name']}" + (
+            f" - {function['description']}" if "description" in function else ""
+        )
+
+    def functions_summary(self, functions: list[FunctionType]) -> str:
+        """Get a summary of the functions
+
+        Args:
+            functions (list[FunctionType]): The functions to get the summary of
+
+        Returns:
+            str: The summary of the functions, as a bulleted list
+        """
+        return "Available functions:\n" + "\n".join(
+            self.function_summary(function) for function in functions
+        )
+
+    def prompt(
+        self,
+        prompt: str,
+        functions: list[FunctionType],
+        function_to_call: str | None = None,
+    ) -> str:
+        """Generate the llama prompt
+
+        Args:
+            prompt (str): The prompt to generate the response to
+            functions (list[FunctionType]): The functions to generate the response from
+            function_to_call (str | None): The function to call. Defaults to None.
+
+        Returns:
+            list[bytes | int]: The llama prompt, a function selection prompt if no
+                function is specified, or a function argument prompt if a function is
+                specified
+        """
+        system = (
+            "Help choose the appropriate function to call to answer the user's question."
+            if function_to_call is None
+            else f"Define the arguments for {function_to_call} to answer the user's question."
+        ) + "\nThe response should contain only the JSON object, with no additional text or explanation."
+
+        data = (
+            self.function_data(functions, function_to_call)
+            if function_to_call
+            else self.functions_summary(functions)
+        )
+        response_start = (
+            f"Here are the arguments for the `{function_to_call}` function: ```json\n"
+            if function_to_call
+            else "Here's the function the user should call: "
+        )
+
+        if self.is_instruct:
+            return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
+        else:
+            return f"{system}\n\n{data}\n\n{prompt}"
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -7,49 +7,21 @@ import json
 import tiktoken
 import questionary

+from utils.style import red
 from typing import List
-from jinja2 import Environment, FileSystemLoader
-
-from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS, MAX_QUESTIONS, END_RESPONSE
+from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
 from logger.logger import logger
-from fabulous.color import red
 from helpers.exceptions.TokenLimitError import TokenLimitError
-from utils.utils import get_prompt_components, fix_json
-from utils.spinner import spinner_start, spinner_stop
-
-
-def get_prompt(prompt_name, data=None):
-    if data is None:
-        data = {}
-
-    data.update(get_prompt_components())
-
-    logger.debug(f"Getting prompt for {prompt_name}")  # logging here
-    # Create a file system loader with the directory of the templates
-    file_loader = FileSystemLoader('prompts')
-
-    # Create the Jinja2 environment
-    env = Environment(loader=file_loader)
-
-    # Load the template
-    template = env.get_template(prompt_name)
-
-    # Render the template with the provided data
-    output = template.render(data)
-
-    return output
-
+from utils.utils import fix_json
+from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType

 def get_tokens_in_messages(messages: List[str]) -> int:
    tokenizer = tiktoken.get_encoding("cl100k_base")  # GPT-4 tokenizer
    tokenized_messages = [tokenizer.encode(message['content']) for message in messages]
    return sum(len(tokens) for tokens in tokenized_messages)

-#get endpoint and model name from .ENV file
-model = os.getenv('MODEL_NAME')
-endpoint = os.getenv('ENDPOINT')

-def num_tokens_from_functions(functions, model=model):
+def num_tokens_from_functions(functions):
    """Return the number of tokens used by a list of functions."""
    encoding = tiktoken.get_encoding("cl100k_base")

@@ -85,7 +57,7 @@ def num_tokens_from_functions(functions, model=model):


 def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TOKENS_FOR_GPT_RESPONSE,
-                               function_calls=None):
+                               function_calls: FunctionCallSet = None):
    """
    Called from:
      - AgentConvo.send_message() - these calls often have `function_calls`, usually from `pilot/const/function_calls.py`
@@ -115,19 +87,13 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO
    }

    # delete some keys if using "OpenRouter" API
-    if os.getenv('ENDPOINT') == "OPENROUTER":
+    if os.getenv('ENDPOINT') == 'OPENROUTER':
        keys_to_delete = ['n', 'max_tokens', 'temperature', 'top_p', 'presence_penalty', 'frequency_penalty']
        for key in keys_to_delete:
            if key in gpt_data:
                del gpt_data[key]

-    if function_calls is not None:
-        # Advise the LLM of the JSON response schema we are expecting
-        gpt_data['functions'] = function_calls['definitions']
-        if len(function_calls['definitions']) > 1:
-            gpt_data['function_call'] = 'auto'
-        else:
-            gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']}
+    add_function_calls_to_request(gpt_data, function_calls)

    try:
        response = stream_gpt_completion(gpt_data, req_type)
@@ -135,7 +101,7 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO
    except TokenLimitError as e:
        raise e
    except Exception as e:
-        print('The request to OpenAI API failed. Here is the error message:')
+        print(f'The request to {os.getenv("ENDPOINT")} API failed. Here is the error message:')
        print(e)


@@ -151,6 +117,7 @@ def count_lines_based_on_width(content, width):
    lines_required = sum(len(line) // width + 1 for line in content.split('\n'))
    return lines_required

+
 def get_tokens_in_messages_from_openai_error(error_message):
    """
    Extract the token count from a message.
@@ -199,6 +166,7 @@ def retry_on_exception(func):
                        ('answer', 'fg:orange')
                    ])).ask()

+                # TODO: take user's input into consideration - send to LLM?
                if user_message != '':
                    return {}

@@ -215,9 +183,16 @@ def stream_gpt_completion(data, req_type):
    """

    # TODO add type dynamically - this isn't working when connected to the external process
-    terminal_width = 50#os.get_terminal_size().columns
+    terminal_width = 50  # os.get_terminal_size().columns
    lines_printed = 2
-    buffer = ""  # A buffer to accumulate incoming data
+    buffer = ''  # A buffer to accumulate incoming data
+    expecting_json = False
+    received_json = False
+
+    if 'functions' in data:
+        expecting_json = data['functions']
+        # Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry
+        data = {key: value for key, value in data.items() if key != "functions"}

    def return_result(result_data, lines_printed):
        if buffer:
@@ -229,23 +204,36 @@ def stream_gpt_completion(data, req_type):

    # spinner = spinner_start(yellow("Waiting for OpenAI API response..."))
    # print(yellow("Stream response from OpenAI:"))
-    api_key = os.getenv("OPENAI_API_KEY")

    logger.info(f'Request data: {data}')

-    # Check if the ENDPOINT is AZURE
+    # Configure for the selected ENDPOINT
+    model = os.getenv('MODEL_NAME')
+    endpoint = os.getenv('ENDPOINT')
+
    if endpoint == 'AZURE':
        # If yes, get the AZURE_ENDPOINT from .ENV file
        endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15'
-        headers = {'Content-Type': 'application/json', 'api-key':  os.getenv('AZURE_API_KEY')}
+        headers = {
+            'Content-Type': 'application/json',
+            'api-key': os.getenv('AZURE_API_KEY')
+        }
    elif endpoint == 'OPENROUTER':
        # If so, send the request to the OpenRouter API endpoint
-        headers = {'Content-Type': 'application/json', 'Authorization':  'Bearer ' + os.getenv("OPENROUTER_API_KEY"), 'HTTP-Referer': 'http://localhost:3000', 'X-Title': 'GPT Pilot (LOCAL)'}
-        endpoint_url = os.getenv("OPENROUTER_ENDPOINT", 'https://openrouter.ai/api/v1/chat/completions')
+        endpoint_url = os.getenv('OPENROUTER_ENDPOINT', 'https://openrouter.ai/api/v1/chat/completions')
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + os.getenv('OPENROUTER_API_KEY'),
+            'HTTP-Referer': 'http://localhost:3000',
+            'X-Title': 'GPT Pilot (LOCAL)'
+        }
    else:
        # If not, send the request to the OpenAI endpoint
-        headers = {'Content-Type': 'application/json', 'Authorization':  'Bearer ' + os.getenv("OPENAI_API_KEY")}
-        endpoint_url = os.getenv("OPENAI_ENDPOINT", 'https://api.openai.com/v1/chat/completions')
+        endpoint_url = os.getenv('OPENAI_ENDPOINT', 'https://api.openai.com/v1/chat/completions')
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + os.getenv('OPENAI_API_KEY')
+        }

    response = requests.post(
        endpoint_url,
@@ -262,12 +250,11 @@ def stream_gpt_completion(data, req_type):
        raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")

    gpt_response = ''
-    function_calls = {'name': '', 'arguments': ''}
-
+    # function_calls = {'name': '', 'arguments': ''}

    for line in response.iter_lines():
        # Ignore keep-alive new lines
-        if line:
+        if line and line != b': OPENROUTER PROCESSING':
            line = line.decode("utf-8")  # decode the bytes to string

            if line.startswith('data: '):
@@ -287,25 +274,29 @@ def stream_gpt_completion(data, req_type):
                    logger.error(f'Error in LLM response: {json_line}')
                    raise ValueError(f'Error in LLM response: {json_line["error"]["message"]}')

-                if json_line['choices'][0]['finish_reason'] == 'function_call':
-                    function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
-                    return return_result({'function_calls': function_calls}, lines_printed)
+                choice = json_line['choices'][0]

-                json_line = json_line['choices'][0]['delta']
+                # if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
+                #     function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
+                #     return return_result({'function_calls': function_calls}, lines_printed)
+
+                json_line = choice['delta']
+                # TODO: token healing? https://github.com/1rgs/jsonformer-claude
+                #       ...Is this what local_llm_function_calling.constrainer is for?

            except json.JSONDecodeError:
                logger.error(f'Unable to decode line: {line}')
                continue  # skip to the next line

            # handle the streaming response
-            if 'function_call' in json_line:
-                if 'name' in json_line['function_call']:
-                    function_calls['name'] = json_line['function_call']['name']
-                    print(f'Function call: {function_calls["name"]}')
-
-                if 'arguments' in json_line['function_call']:
-                    function_calls['arguments'] += json_line['function_call']['arguments']
-                    print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
+            # if 'function_call' in json_line:
+            #     if 'name' in json_line['function_call']:
+            #         function_calls['name'] = json_line['function_call']['name']
+            #         print(f'Function call: {function_calls["name"]}')
+            #
+            #     if 'arguments' in json_line['function_call']:
+            #         function_calls['arguments'] += json_line['function_call']['arguments']
+            #         print(json_line['function_call']['arguments'], type='stream', end='', flush=True)

            if 'content' in json_line:
                content = json_line.get('content')
@@ -313,7 +304,18 @@ def stream_gpt_completion(data, req_type):
                    buffer += content  # accumulate the data

                    # If you detect a natural breakpoint (e.g., line break or end of a response object), print & count:
-                    if buffer.endswith("\n"):  # or some other condition that denotes a breakpoint
+                    if buffer.endswith("\n"):
+                        if expecting_json and not received_json:
+                            received_json = assert_json_response(buffer, lines_printed > 2)
+                            if received_json:
+                                gpt_response = ""
+                            # if not received_json:
+                            #     # Don't append to gpt_response, but increment lines_printed
+                            #     lines_printed += 1
+                            #     buffer = ""
+                            #     continue
+
+                        # or some other condition that denotes a breakpoint
                        lines_printed += count_lines_based_on_width(buffer, terminal_width)
                        buffer = ""  # reset the buffer

@@ -321,15 +323,42 @@ def stream_gpt_completion(data, req_type):
                    print(content, type='stream', end='', flush=True)

    print('\n', type='stream')
-    if function_calls['arguments'] != '':
-        logger.info(f'Response via function call: {function_calls["arguments"]}')
-        function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
-        return return_result({'function_calls': function_calls}, lines_printed)
+
+    # if function_calls['arguments'] != '':
+    #     logger.info(f'Response via function call: {function_calls["arguments"]}')
+    #     function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
+    #     return return_result({'function_calls': function_calls}, lines_printed)
    logger.info(f'Response message: {gpt_response}')
+
+    if expecting_json:
+        assert_json_schema(gpt_response, expecting_json)
+
    new_code = postprocessing(gpt_response, req_type)  # TODO add type dynamically
    return return_result({'text': new_code}, lines_printed)


+def assert_json_response(response: str, or_fail=True) -> bool:
+    if re.match(r'.*(```(json)?|{|\[)', response):
+        return True
+    elif or_fail:
+        raise ValueError('LLM did not respond with JSON')
+    else:
+        return False
+
+
+def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
+    return True
+    # TODO: validation always fails
+    # for function in functions:
+    #     schema = function['parameters']
+    #     parser = parser_for_schema(schema)
+    #     validated = parser.validate(response)
+    #     if validated.valid and validated.end_index:
+    #         return True
+    #
+    # raise ValueError('LLM responded with invalid JSON')
+
+
 def postprocessing(gpt_response, req_type):
    return gpt_response

--- a/pilot/utils/questionary.py
+++ b/pilot/utils/questionary.py
@@ -1,6 +1,6 @@
 from prompt_toolkit.styles import Style
 import questionary
-from fabulous.color import yellow, bold
+from utils.style import yellow_bold

 from database.database import save_user_input, get_saved_user_input
 from const.ipc import MESSAGE_TYPE
@@ -26,8 +26,8 @@ def styled_text(project, question, ignore_user_input_count=False):
        if user_input is not None and user_input.user_input is not None and project.skip_steps:
            # if we do, use it
            project.checkpoints['last_user_input'] = user_input
-            print(yellow(bold(f'Restoring user input id {user_input.id}: ')), end='')
-            print(yellow(bold(f'{user_input.user_input}')))
+            print(yellow_bold(f'Restoring user input id {user_input.id}: '), end='')
+            print(yellow_bold(f'{user_input.user_input}'))
            return user_input.user_input

    if project.ipc_client_instance is None or project.ipc_client_instance.client is None:
--- a/pilot/utils/style.py
+++ b/pilot/utils/style.py
@@ -0,0 +1,45 @@
+from termcolor import colored
+
+
+def red(text):
+    return colored(text, 'red')
+
+
+def red_bold(text):
+    return colored(text, 'red', attrs=['bold'])
+
+
+def yellow(text):
+    return colored(text, 'yellow')
+
+
+def yellow_bold(text):
+    return colored(text, 'yellow', attrs=['bold'])
+
+
+def green(text):
+    return colored(text, 'green')
+
+
+def green_bold(text):
+    return colored(text, 'green', attrs=['bold'])
+
+
+def blue(text):
+    return colored(text, 'blue')
+
+
+def blue_bold(text):
+    return colored(text, 'blue', attrs=['bold'])
+
+
+def cyan(text):
+    return colored(text, 'light_cyan')
+
+
+def white(text):
+    return colored(text, 'white')
+
+
+def white_bold(text):
+    return colored(text, 'white', attrs=['bold'])
--- a/pilot/utils/test_function_calling.py
+++ b/pilot/utils/test_function_calling.py
@@ -0,0 +1,155 @@
+from const.function_calls import ARCHITECTURE, DEV_STEPS
+from .function_calling import parse_agent_response, JsonPrompter
+
+
+class TestFunctionCalling:
+    def test_parse_agent_response_text(self):
+        # Given
+        response = {'text': 'Hello world!'}
+
+        # When
+        response = parse_agent_response(response, None)
+
+        # Then
+        assert response == 'Hello world!'
+
+    def test_parse_agent_response_json(self):
+        # Given
+        response = {'text': '{"greeting": "Hello world!"}'}
+        function_calls = {'definitions': [], 'functions': {}}
+
+        # When
+        response = parse_agent_response(response, function_calls)
+
+        # Then
+        assert response == 'Hello world!'
+
+    def test_parse_agent_response_json_markdown(self):
+        # Given
+        response = {'text': '```json\n{"greeting": "Hello world!"}\n```'}
+        function_calls = {'definitions': [], 'functions': {}}
+
+        # When
+        response = parse_agent_response(response, function_calls)
+
+        # Then
+        assert response == 'Hello world!'
+
+    def test_parse_agent_response_markdown(self):
+        # Given
+        response = {'text': '```\n{"greeting": "Hello world!"}\n```'}
+        function_calls = {'definitions': [], 'functions': {}}
+
+        # When
+        response = parse_agent_response(response, function_calls)
+
+        # Then
+        assert response == 'Hello world!'
+
+    def test_parse_agent_response_multiple_args(self):
+        # Given
+        response = {'text': '{"greeting": "Hello", "name": "John"}'}
+        function_calls = {'definitions': [], 'functions': {}}
+
+        # When
+        greeting, name = parse_agent_response(response, function_calls)
+
+        # Then
+        assert greeting == 'Hello'
+        assert name == 'John'
+
+
+def test_json_prompter():
+    # Given
+    prompter = JsonPrompter()
+
+    # When
+    prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'])  # , 'process_technologies')
+
+    # Then
+    assert prompt == '''Help choose the appropriate function to call to answer the user's question.
+The response should contain only the JSON object, with no additional text or explanation.
+
+Available functions:
+- process_technologies - Print the list of technologies that are created.
+
+Create a web-based chat app'''
+
+
+def test_llama_json_prompter():
+    # Given
+    prompter = JsonPrompter(is_instruct=True)
+
+    # When
+    prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'])  # , 'process_technologies')
+
+    # Then
+    assert prompt == '''[INST] <<SYS>>
+Help choose the appropriate function to call to answer the user's question.
+The response should contain only the JSON object, with no additional text or explanation.
+
+Available functions:
+- process_technologies - Print the list of technologies that are created.
+<</SYS>>
+
+Create a web-based chat app [/INST]'''
+
+
+def test_json_prompter_named():
+    # Given
+    prompter = JsonPrompter()
+
+    # When
+    prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')
+
+    # Then
+    assert prompt == '''Define the arguments for process_technologies to answer the user's question.
+The response should contain only the JSON object, with no additional text or explanation.
+
+Print the list of technologies that are created.
+The response should be a JSON object matching this schema:
+```json
+{
+    "technologies": {
+        "type": "array",
+        "description": "List of technologies.",
+        "items": {
+            "type": "string",
+            "description": "technology"
+        }
+    }
+}
+```
+
+Create a web-based chat app'''
+
+
+def test_llama_json_prompter_named():
+    # Given
+    prompter = JsonPrompter(is_instruct=True)
+
+    # When
+    prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')
+
+    # Then
+    assert prompt == '''[INST] <<SYS>>
+Define the arguments for process_technologies to answer the user's question.
+The response should contain only the JSON object, with no additional text or explanation.
+
+Print the list of technologies that are created.
+The response should be a JSON object matching this schema:
+```json
+{
+    "technologies": {
+        "type": "array",
+        "description": "List of technologies.",
+        "items": {
+            "type": "string",
+            "description": "technology"
+        }
+    }
+}
+```
+<</SYS>>
+
+Create a web-based chat app [/INST]'''
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -0,0 +1,207 @@
+import builtins
+import pytest
+from dotenv import load_dotenv
+
+from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN
+from helpers.AgentConvo import AgentConvo
+from helpers.Project import Project
+from helpers.agents.Architect import Architect
+from helpers.agents.TechLead import TechLead
+from utils.function_calling import parse_agent_response, FunctionType
+from test.test_utils import assert_non_empty_string
+from .llm_connection import create_gpt_chat_completion, assert_json_response, assert_json_schema
+from main import get_custom_print
+
+load_dotenv()
+
+project = Project({'app_id': 'test-app'}, current_step='test')
+
+
+class TestSchemaValidation:
+    def setup_method(self):
+        self.function: FunctionType = {
+            'name': 'test',
+            'description': 'test schema',
+            'parameters': {
+                'type': 'object',
+                'properties': {'foo': {'type': 'string'}},
+                'required': ['foo']
+            }
+        }
+
+    def test_assert_json_response(self):
+        assert assert_json_response('{"foo": "bar"}')
+        assert assert_json_response('{\n"foo": "bar"}')
+        assert assert_json_response('```\n{"foo": "bar"}')
+        assert assert_json_response('```json\n{\n"foo": "bar"}')
+        with pytest.raises(ValueError, match='LLM did not respond with JSON'):
+            assert assert_json_response('# Foo\n bar')
+
+    def test_assert_json_schema(self):
+        # When assert_json_schema is called with valid JSON
+        # Then no errors
+        assert(assert_json_schema('{"foo": "bar"}', [self.function]))
+
+    def test_assert_json_schema_invalid(self):
+        # When assert_json_schema is called with invalid JSON
+        # Then error is raised
+        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+            assert_json_schema('{"foo": 1}', [self.function])
+
+    def test_assert_json_schema_incomplete(self):
+        # When assert_json_schema is called with incomplete JSON
+        # Then error is raised
+        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+            assert_json_schema('{"foo": "b', [self.function])
+
+    def test_assert_json_schema_required(self):
+        # When assert_json_schema is called with missing required property
+        # Then error is raised
+        self.function['parameters']['properties']['other'] = {'type': 'string'}
+        self.function['parameters']['required'] = ['foo', 'other']
+
+        with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
+            assert_json_schema('{"foo": "bar"}', [self.function])
+
+class TestLlmConnection:
+    def setup_method(self):
+        builtins.print, ipc_client_instance = get_custom_print({})
+
+
+
+    @pytest.mark.uses_tokens
+    @pytest.mark.parametrize("endpoint, model", [
+        ("OPENAI", "gpt-4"),                                 # role: system
+        ("OPENROUTER", "openai/gpt-3.5-turbo"),              # role: user
+        ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
+        ("OPENROUTER", "google/palm-2-chat-bison"),          # role: user/system
+        ("OPENROUTER", "google/palm-2-codechat-bison"),
+        # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
+        #           https://github.com/guidance-ai/guidance - token healing
+        ("OPENROUTER", "anthropic/claude-2"),              # role: user, is_llama
+    ])
+    def test_chat_completion_Architect(self, endpoint, model, monkeypatch):
+        # Given
+        monkeypatch.setenv('ENDPOINT', endpoint)
+        monkeypatch.setenv('MODEL_NAME', model)
+
+        agent = Architect(project)
+        convo = AgentConvo(agent)
+        convo.construct_and_add_message_from_prompt('architecture/technologies.prompt',
+                                                        {
+                                                            'name': 'Test App',
+                                                            'prompt': '''
+The project involves the development of a web-based chat application named "Test_App". 
+In this application, users can send direct messages to each other. 
+However, it does not include a group chat functionality. 
+Multimedia messaging, such as the exchange of images and videos, is not a requirement for this application. 
+No clear instructions were given for the inclusion of user profile customization features like profile 
+picture and status updates, as well as a feature for chat history. The project must be developed strictly 
+as a monolithic application, regardless of any other suggested methods. 
+The project's specifications are subject to the project manager's discretion, implying a need for 
+solution-oriented decision-making in areas where precise instructions were not provided.''',
+                                                            'app_type': 'web app',
+                                                            'user_stories': [
+                                                                'User will be able to send direct messages to another user.',
+                                                                'User will receive direct messages from other users.',
+                                                                'User will view the sent and received messages in a conversation view.',
+                                                                'User will select a user to send a direct message.',
+                                                                'User will be able to search for users to send direct messages to.',
+                                                                'Users can view the online status of other users.',
+                                                                'User will be able to log into the application using their credentials.',
+                                                                'User will be able to logout from the Test_App.',
+                                                                'User will be able to register a new account on Test_App.',
+                                                            ]
+                                                        })
+        function_calls = ARCHITECTURE
+
+        # When
+        response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
+
+        # Then
+        assert convo.messages[0]['content'].startswith('You are an experienced software architect')
+        assert convo.messages[1]['content'].startswith('You are working in a software development agency')
+
+        assert response is not None
+        response = parse_agent_response(response, function_calls)
+        assert 'Node.js' in response
+
+    @pytest.mark.uses_tokens
+    @pytest.mark.parametrize("endpoint, model", [
+        ("OPENAI", "gpt-4"),  # role: system
+        ("OPENROUTER", "openai/gpt-3.5-turbo"),  # role: user
+        ("OPENROUTER", "meta-llama/codellama-34b-instruct"),  # rule: user, is_llama
+        ("OPENROUTER", "google/palm-2-chat-bison"),  # role: user/system
+        ("OPENROUTER", "google/palm-2-codechat-bison"),
+        # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
+        #           https://github.com/guidance-ai/guidance - token healing
+        ("OPENROUTER", "anthropic/claude-2"),  # role: user, is_llama
+    ])
+    def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):
+        # Given
+        monkeypatch.setenv('ENDPOINT', endpoint)
+        monkeypatch.setenv('MODEL_NAME', model)
+
+        agent = TechLead(project)
+        convo = AgentConvo(agent)
+        convo.construct_and_add_message_from_prompt('development/plan.prompt',
+                                                    {
+                                                        'name': 'Test App',
+                                                        'app_summary': '''
+    The project entails creating a web-based chat application, tentatively named "chat_app." 
+This application does not require user authentication or chat history storage. 
+It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files. 
+Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts. 
+The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand. 
+The development process will include the creation of user stories and tasks, based on detailed discussions with the client.''',
+                                                        'app_type': 'web app',
+                                                        'user_stories': [
+            'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+            'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
+            'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
+                                                        ]
+                                                    })
+        function_calls = DEVELOPMENT_PLAN
+
+        # When
+        response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
+
+        # Then
+        assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
+        assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
+
+        assert response is not None
+        response = parse_agent_response(response, function_calls)
+        assert_non_empty_string(response[0]['description'])
+        assert_non_empty_string(response[0]['programmatic_goal'])
+        assert_non_empty_string(response[0]['user_review_goal'])
+
+
+    # def test_break_down_development_task(self):
+    #     # Given
+    #     agent = Developer(project)
+    #     convo = AgentConvo(agent)
+    #     # convo.construct_and_add_message_from_prompt('architecture/technologies.prompt',
+    #     #                                             {
+    #     #                                                 'name': 'Test App',
+    #     #                                                 'prompt': '''
+    #
+    #     function_calls = DEV_STEPS
+    #
+    #     # When
+    #     response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
+    #     # response = {'function_calls': {
+    #     #     'name': 'break_down_development_task',
+    #     #     'arguments': {'tasks': [{'type': 'command', 'description': 'Run the app'}]}
+    #     # }}
+    #     response = parse_agent_response(response, function_calls)
+    #
+    #     # Then
+    #     # assert len(convo.messages) == 2
+    #     assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks')
+
+    def _create_convo(self, agent):
+        convo = AgentConvo(agent)
--- a/pilot/utils/utils.py
+++ b/pilot/utils/utils.py
@@ -9,12 +9,16 @@ import json
 import hashlib
 import re
 from jinja2 import Environment, FileSystemLoader
-from fabulous.color import green
+from .style import green

 from const.llm import MAX_QUESTIONS, END_RESPONSE
 from const.common import ROLES, STEPS
 from logger.logger import logger

+prompts_path = os.path.join(os.path.dirname(__file__), '..', 'prompts')
+file_loader = FileSystemLoader(prompts_path)
+env = Environment(loader=file_loader)
+

 def capitalize_first_word_with_underscores(s):
    # Split the string into words based on underscores.
@@ -29,6 +33,23 @@ def capitalize_first_word_with_underscores(s):
    return capitalized_string


+def get_prompt(prompt_name, data=None):
+    if data is None:
+        data = {}
+
+    data.update(get_prompt_components())
+
+    logger.debug(f"Getting prompt for {prompt_name}")  # logging here
+
+    # Load the template
+    template = env.get_template(prompt_name)
+
+    # Render the template with the provided data
+    output = template.render(data)
+
+    return output
+
+
 def get_prompt_components():
    # This function reads and renders all prompts inside /prompts/components and returns them in dictionary

@@ -40,7 +61,8 @@ def get_prompt_components():
    }

    # Create a FileSystemLoader
-    file_loader = FileSystemLoader('prompts/components')
+    prompts_path = os.path.join(os.path.dirname(__file__), '..', 'prompts/components')
+    file_loader = FileSystemLoader(prompts_path)

    # Create the Jinja2 environment
    env = Environment(loader=file_loader)
@@ -63,17 +85,7 @@ def get_prompt_components():


 def get_sys_message(role):
-    # Create a FileSystemLoader
-    file_loader = FileSystemLoader('prompts/system_messages')
-
-    # Create the Jinja2 environment
-    env = Environment(loader=file_loader)
-
-    # Load the template
-    template = env.get_template(f'{role}.prompt')
-
-    # Render the template with no variables
-    content = template.render()
+    content = get_prompt(f'system_messages/{role}.prompt')

    return {
        "role": "system",
@@ -186,4 +198,4 @@ def json_serial(obj):
    elif isinstance(obj, uuid.UUID):
        return str(obj)
    else:
-        return str(obj)
+        return str(obj)
--- a/pytest.ini
+++ b/pytest.ini
@@ -0,0 +1,8 @@
+[pytest]
+testpaths = .
+python_files = test_*.py
+
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
+    uses_tokens: Integration tests which use tokens
+    daily: tests which should be run daily