mirror of
https://github.com/OMGeeky/gpt-pilot.git
synced 2025-12-31 16:40:03 +01:00
rejecting responses that are not JSON.
Need to fix prompts for GPT-4
This commit is contained in:
@@ -61,7 +61,7 @@ After you have Python and PostgreSQL installed, follow these steps:
|
||||
1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
|
||||
2. `cd gpt-pilot`
|
||||
3. `python -m venv pilot-env` (create a virtual environment)
|
||||
4. `source pilot-env/bin/activate` (activate the virtual environment)
|
||||
4. `source pilot-env/bin/activate` (or on Windows `pilot-env\Scripts\activate`) (activate the virtual environment)
|
||||
5. `pip install -r requirements.txt` (install the dependencies)
|
||||
6. `cd pilot`
|
||||
7. `mv .env.example .env` (create the .env file)
|
||||
|
||||
@@ -5,7 +5,7 @@ from utils.style import yellow, yellow_bold
|
||||
|
||||
from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps
|
||||
from helpers.exceptions.TokenLimitError import TokenLimitError
|
||||
from utils.function_calling import parse_agent_response
|
||||
from utils.function_calling import parse_agent_response, FunctionCallSet
|
||||
from utils.llm_connection import create_gpt_chat_completion
|
||||
from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message, capitalize_first_word_with_underscores
|
||||
from logger.logger import logger
|
||||
@@ -31,7 +31,7 @@ class AgentConvo:
|
||||
# add system message
|
||||
self.messages.append(get_sys_message(self.agent.role))
|
||||
|
||||
def send_message(self, prompt_path=None, prompt_data=None, function_calls=None):
|
||||
def send_message(self, prompt_path=None, prompt_data=None, function_calls: FunctionCallSet = None):
|
||||
"""
|
||||
Sends a message in the conversation.
|
||||
|
||||
|
||||
@@ -10,17 +10,12 @@ from database.models.files import File
|
||||
from database.models.development_steps import DevelopmentSteps
|
||||
from helpers.Project import Project, update_file, clear_directory
|
||||
from helpers.AgentConvo import AgentConvo
|
||||
from test.mock_terminal_size import mock_terminal_size
|
||||
|
||||
SEND_TO_LLM = False
|
||||
WRITE_TO_FILE = False
|
||||
|
||||
|
||||
def mock_terminal_size():
|
||||
mock_size = Mock()
|
||||
mock_size.columns = 80 # or whatever width you want
|
||||
return mock_size
|
||||
|
||||
|
||||
class TestCodeMonkey:
|
||||
def setup_method(self):
|
||||
name = 'TestDeveloper'
|
||||
|
||||
@@ -9,12 +9,7 @@ load_dotenv()
|
||||
from main import get_custom_print
|
||||
from .Developer import Developer, ENVIRONMENT_SETUP_STEP
|
||||
from helpers.Project import Project
|
||||
|
||||
|
||||
def mock_terminal_size():
|
||||
mock_size = Mock()
|
||||
mock_size.columns = 80 # or whatever width you want
|
||||
return mock_size
|
||||
from test.mock_terminal_size import mock_terminal_size
|
||||
|
||||
|
||||
class TestDeveloper:
|
||||
|
||||
72
pilot/helpers/agents/test_TechLead.py
Normal file
72
pilot/helpers/agents/test_TechLead.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import builtins
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from main import get_custom_print
|
||||
from helpers.agents.TechLead import TechLead, DEVELOPMENT_PLANNING_STEP
|
||||
from helpers.Project import Project
|
||||
from test.test_utils import assert_non_empty_string, mock_terminal_size
|
||||
from test.mock_questionary import MockQuestionary
|
||||
from utils.function_calling import parse_agent_response
|
||||
|
||||
|
||||
class TestTechLead:
|
||||
def setup_method(self):
|
||||
builtins.print, ipc_client_instance = get_custom_print({})
|
||||
|
||||
name = 'TestTechLead'
|
||||
self.project = Project({
|
||||
'app_id': 'test-tech-lead',
|
||||
'name': name,
|
||||
'app_type': ''
|
||||
},
|
||||
name=name,
|
||||
architecture=[],
|
||||
user_stories=[]
|
||||
)
|
||||
|
||||
self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
'../../../workspace/TestTechLead'))
|
||||
self.project.technologies = []
|
||||
self.project.project_description = '''
|
||||
The project entails creating a web-based chat application, tentatively named "chat_app."
|
||||
This application does not require user authentication or chat history storage.
|
||||
It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files.
|
||||
Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts.
|
||||
The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand.
|
||||
The development process will include the creation of user stories and tasks, based on detailed discussions with the client.
|
||||
'''
|
||||
self.project.user_stories = [
|
||||
'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
|
||||
]
|
||||
self.project.architecture = ['Node.js', 'Socket.io', 'Bootstrap', 'JavaScript', 'HTML5', 'CSS3']
|
||||
self.project.current_step = DEVELOPMENT_PLANNING_STEP
|
||||
|
||||
@pytest.mark.uses_tokens
|
||||
# @patch('database.database.get_progress_steps', return_value=None)
|
||||
@patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
|
||||
@patch('helpers.agents.TechLead.save_progress', return_value=None)
|
||||
# @patch('os.get_terminal_size', mock_terminal_size)
|
||||
@patch('helpers.agents.TechLead.get_progress_steps', return_value=None)
|
||||
def test_create_development_plan(self, mock_get_saved_step, mock_save_progress, mock_get_progress_steps):
|
||||
self.techLead = TechLead(self.project)
|
||||
|
||||
mock_questionary = MockQuestionary(['', '', 'no'])
|
||||
|
||||
with patch('utils.llm_connection.questionary', mock_questionary):
|
||||
# When
|
||||
development_plan = self.techLead.create_development_plan()
|
||||
|
||||
# Then
|
||||
assert development_plan is not None
|
||||
assert_non_empty_string(development_plan[0]['description'])
|
||||
assert_non_empty_string(development_plan[0]['programmatic_goal'])
|
||||
assert_non_empty_string(development_plan[0]['user_review_goal'])
|
||||
@@ -1,9 +1,13 @@
|
||||
class MockQuestionary:
|
||||
def __init__(self, answers=None):
|
||||
def __init__(self, answers=None, initial_state='project_description'):
|
||||
if answers is None:
|
||||
answers = []
|
||||
self.answers = iter(answers)
|
||||
self.state = 'project_description'
|
||||
self.state = initial_state
|
||||
|
||||
class Style:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def text(self, question: str, style=None):
|
||||
print('AI: ' + question)
|
||||
@@ -13,6 +17,9 @@ class MockQuestionary:
|
||||
self.state = 'DONE'
|
||||
return self
|
||||
|
||||
def ask(self):
|
||||
return self.unsafe_ask()
|
||||
|
||||
def unsafe_ask(self):
|
||||
if self.state == 'user_stories':
|
||||
answer = ''
|
||||
|
||||
11
pilot/test/test_utils.py
Normal file
11
pilot/test/test_utils.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
|
||||
def mock_terminal_size():
|
||||
mock_size = Mock()
|
||||
mock_size.columns = 80 # or whatever width you want
|
||||
return mock_size
|
||||
|
||||
def assert_non_empty_string(value):
|
||||
assert isinstance(value, str)
|
||||
assert len(value) > 0
|
||||
@@ -38,17 +38,11 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No
|
||||
return
|
||||
|
||||
model: str = gpt_data['model']
|
||||
is_llama = 'llama' in model or 'anthropic' in model
|
||||
is_instruct = 'llama' in model or 'anthropic' in model
|
||||
|
||||
# if model == 'gpt-4':
|
||||
# gpt_data['functions'] = function_calls['definitions']
|
||||
# if len(function_calls['definitions']) > 1:
|
||||
# gpt_data['function_call'] = 'auto'
|
||||
# else:
|
||||
# gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']}
|
||||
# return
|
||||
gpt_data['functions'] = function_calls['definitions']
|
||||
|
||||
prompter = JsonPrompter(is_llama)
|
||||
prompter = JsonPrompter(is_instruct)
|
||||
|
||||
if len(function_calls['definitions']) > 1:
|
||||
function_call = None
|
||||
@@ -77,7 +71,8 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None):
|
||||
|
||||
if function_calls:
|
||||
text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL)
|
||||
values = list(json.loads(text.strip('` \n')).values())
|
||||
text = text.strip('` \n')
|
||||
values = list(json.loads(text).values())
|
||||
if len(values) == 1:
|
||||
return values[0]
|
||||
else:
|
||||
@@ -90,8 +85,8 @@ class JsonPrompter:
|
||||
"""
|
||||
Adapted from local_llm_function_calling
|
||||
"""
|
||||
def __init__(self, is_llama: bool = False):
|
||||
self.is_llama = is_llama
|
||||
def __init__(self, is_instruct: bool = False):
|
||||
self.is_instruct = is_instruct
|
||||
|
||||
def function_descriptions(
|
||||
self, functions: list[FunctionType], function_to_call: str
|
||||
@@ -107,7 +102,7 @@ class JsonPrompter:
|
||||
(empty if the function doesn't exist or has no description)
|
||||
"""
|
||||
return [
|
||||
function["description"]
|
||||
f'# {function["name"]}: {function["description"]}'
|
||||
for function in functions
|
||||
if function["name"] == function_to_call and "description" in function
|
||||
]
|
||||
@@ -213,7 +208,7 @@ class JsonPrompter:
|
||||
else "Here's the function the user should call: "
|
||||
)
|
||||
|
||||
if self.is_llama:
|
||||
if self.is_instruct:
|
||||
return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
|
||||
else:
|
||||
return f"{system}\n\n{data}\n\n{prompt}"
|
||||
|
||||
@@ -7,14 +7,13 @@ import json
|
||||
import tiktoken
|
||||
import questionary
|
||||
|
||||
|
||||
from utils.style import red
|
||||
from typing import List
|
||||
from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
|
||||
from logger.logger import logger
|
||||
from helpers.exceptions.TokenLimitError import TokenLimitError
|
||||
from utils.utils import fix_json
|
||||
from utils.function_calling import add_function_calls_to_request
|
||||
from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
|
||||
|
||||
def get_tokens_in_messages(messages: List[str]) -> int:
|
||||
tokenizer = tiktoken.get_encoding("cl100k_base") # GPT-4 tokenizer
|
||||
@@ -58,7 +57,7 @@ def num_tokens_from_functions(functions):
|
||||
|
||||
|
||||
def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TOKENS_FOR_GPT_RESPONSE,
|
||||
function_calls=None):
|
||||
function_calls: FunctionCallSet = None):
|
||||
"""
|
||||
Called from:
|
||||
- AgentConvo.send_message() - these calls often have `function_calls`, usually from `pilot/const/function_calls.py`
|
||||
@@ -167,6 +166,7 @@ def retry_on_exception(func):
|
||||
('answer', 'fg:orange')
|
||||
])).ask()
|
||||
|
||||
# TODO: take user's input into consideration - send to LLM?
|
||||
if user_message != '':
|
||||
return {}
|
||||
|
||||
@@ -183,9 +183,16 @@ def stream_gpt_completion(data, req_type):
|
||||
"""
|
||||
|
||||
# TODO add type dynamically - this isn't working when connected to the external process
|
||||
terminal_width = 50#os.get_terminal_size().columns
|
||||
terminal_width = 50 # os.get_terminal_size().columns
|
||||
lines_printed = 2
|
||||
buffer = "" # A buffer to accumulate incoming data
|
||||
buffer = '' # A buffer to accumulate incoming data
|
||||
expecting_json = False
|
||||
received_json = False
|
||||
|
||||
if 'functions' in data:
|
||||
expecting_json = data['functions']
|
||||
# Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry
|
||||
data = {key: value for key, value in data.items() if key != "functions"}
|
||||
|
||||
def return_result(result_data, lines_printed):
|
||||
if buffer:
|
||||
@@ -197,7 +204,6 @@ def stream_gpt_completion(data, req_type):
|
||||
|
||||
# spinner = spinner_start(yellow("Waiting for OpenAI API response..."))
|
||||
# print(yellow("Stream response from OpenAI:"))
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
logger.info(f'Request data: {data}')
|
||||
|
||||
@@ -208,15 +214,26 @@ def stream_gpt_completion(data, req_type):
|
||||
if endpoint == 'AZURE':
|
||||
# If yes, get the AZURE_ENDPOINT from .ENV file
|
||||
endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15'
|
||||
headers = {'Content-Type': 'application/json', 'api-key': os.getenv('AZURE_API_KEY')}
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'api-key': os.getenv('AZURE_API_KEY')
|
||||
}
|
||||
elif endpoint == 'OPENROUTER':
|
||||
# If so, send the request to the OpenRouter API endpoint
|
||||
headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + os.getenv("OPENROUTER_API_KEY"), 'HTTP-Referer': 'http://localhost:3000', 'X-Title': 'GPT Pilot (LOCAL)'}
|
||||
endpoint_url = os.getenv("OPENROUTER_ENDPOINT", 'https://openrouter.ai/api/v1/chat/completions')
|
||||
endpoint_url = os.getenv('OPENROUTER_ENDPOINT', 'https://openrouter.ai/api/v1/chat/completions')
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ' + os.getenv('OPENROUTER_API_KEY'),
|
||||
'HTTP-Referer': 'http://localhost:3000',
|
||||
'X-Title': 'GPT Pilot (LOCAL)'
|
||||
}
|
||||
else:
|
||||
# If not, send the request to the OpenAI endpoint
|
||||
headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + os.getenv("OPENAI_API_KEY")}
|
||||
endpoint_url = os.getenv("OPENAI_ENDPOINT", 'https://api.openai.com/v1/chat/completions')
|
||||
endpoint_url = os.getenv('OPENAI_ENDPOINT', 'https://api.openai.com/v1/chat/completions')
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ' + os.getenv('OPENAI_API_KEY')
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
endpoint_url,
|
||||
@@ -233,7 +250,7 @@ def stream_gpt_completion(data, req_type):
|
||||
raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")
|
||||
|
||||
gpt_response = ''
|
||||
function_calls = {'name': '', 'arguments': ''}
|
||||
# function_calls = {'name': '', 'arguments': ''}
|
||||
|
||||
for line in response.iter_lines():
|
||||
# Ignore keep-alive new lines
|
||||
@@ -259,9 +276,9 @@ def stream_gpt_completion(data, req_type):
|
||||
|
||||
choice = json_line['choices'][0]
|
||||
|
||||
if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
|
||||
function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
|
||||
return return_result({'function_calls': function_calls}, lines_printed)
|
||||
# if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
|
||||
# function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
|
||||
# return return_result({'function_calls': function_calls}, lines_printed)
|
||||
|
||||
json_line = choice['delta']
|
||||
# TODO: token healing? https://github.com/1rgs/jsonformer-claude
|
||||
@@ -272,14 +289,14 @@ def stream_gpt_completion(data, req_type):
|
||||
continue # skip to the next line
|
||||
|
||||
# handle the streaming response
|
||||
if 'function_call' in json_line:
|
||||
if 'name' in json_line['function_call']:
|
||||
function_calls['name'] = json_line['function_call']['name']
|
||||
print(f'Function call: {function_calls["name"]}')
|
||||
|
||||
if 'arguments' in json_line['function_call']:
|
||||
function_calls['arguments'] += json_line['function_call']['arguments']
|
||||
print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
|
||||
# if 'function_call' in json_line:
|
||||
# if 'name' in json_line['function_call']:
|
||||
# function_calls['name'] = json_line['function_call']['name']
|
||||
# print(f'Function call: {function_calls["name"]}')
|
||||
#
|
||||
# if 'arguments' in json_line['function_call']:
|
||||
# function_calls['arguments'] += json_line['function_call']['arguments']
|
||||
# print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
|
||||
|
||||
if 'content' in json_line:
|
||||
content = json_line.get('content')
|
||||
@@ -287,7 +304,18 @@ def stream_gpt_completion(data, req_type):
|
||||
buffer += content # accumulate the data
|
||||
|
||||
# If you detect a natural breakpoint (e.g., line break or end of a response object), print & count:
|
||||
if buffer.endswith("\n"): # or some other condition that denotes a breakpoint
|
||||
if buffer.endswith("\n"):
|
||||
if expecting_json and not received_json:
|
||||
received_json = assert_json_response(buffer, lines_printed > 2)
|
||||
if received_json:
|
||||
gpt_response = ""
|
||||
# if not received_json:
|
||||
# # Don't append to gpt_response, but increment lines_printed
|
||||
# lines_printed += 1
|
||||
# buffer = ""
|
||||
# continue
|
||||
|
||||
# or some other condition that denotes a breakpoint
|
||||
lines_printed += count_lines_based_on_width(buffer, terminal_width)
|
||||
buffer = "" # reset the buffer
|
||||
|
||||
@@ -295,15 +323,42 @@ def stream_gpt_completion(data, req_type):
|
||||
print(content, type='stream', end='', flush=True)
|
||||
|
||||
print('\n', type='stream')
|
||||
if function_calls['arguments'] != '':
|
||||
logger.info(f'Response via function call: {function_calls["arguments"]}')
|
||||
function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
|
||||
return return_result({'function_calls': function_calls}, lines_printed)
|
||||
|
||||
# if function_calls['arguments'] != '':
|
||||
# logger.info(f'Response via function call: {function_calls["arguments"]}')
|
||||
# function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
|
||||
# return return_result({'function_calls': function_calls}, lines_printed)
|
||||
logger.info(f'Response message: {gpt_response}')
|
||||
|
||||
if expecting_json:
|
||||
assert_json_schema(gpt_response, expecting_json)
|
||||
|
||||
new_code = postprocessing(gpt_response, req_type) # TODO add type dynamically
|
||||
return return_result({'text': new_code}, lines_printed)
|
||||
|
||||
|
||||
def assert_json_response(response: str, or_fail=True) -> bool:
|
||||
if re.match(r'.*(```(json)?|{|\[)', response):
|
||||
return True
|
||||
elif or_fail:
|
||||
raise ValueError('LLM did not respond with JSON')
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
|
||||
return True
|
||||
# TODO: validation always fails
|
||||
# for function in functions:
|
||||
# schema = function['parameters']
|
||||
# parser = parser_for_schema(schema)
|
||||
# validated = parser.validate(response)
|
||||
# if validated.valid and validated.end_index:
|
||||
# return True
|
||||
#
|
||||
# raise ValueError('LLM responded with invalid JSON')
|
||||
|
||||
|
||||
def postprocessing(gpt_response, req_type):
|
||||
return gpt_response
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ Create a web-based chat app'''
|
||||
|
||||
def test_llama_json_prompter():
|
||||
# Given
|
||||
prompter = JsonPrompter(is_llama=True)
|
||||
prompter = JsonPrompter(is_instruct=True)
|
||||
|
||||
# When
|
||||
prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies')
|
||||
@@ -126,7 +126,7 @@ Create a web-based chat app'''
|
||||
|
||||
def test_llama_json_prompter_named():
|
||||
# Given
|
||||
prompter = JsonPrompter(is_llama=True)
|
||||
prompter = JsonPrompter(is_instruct=True)
|
||||
|
||||
# When
|
||||
prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')
|
||||
|
||||
@@ -2,13 +2,14 @@ import builtins
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from const.function_calls import ARCHITECTURE, DEV_STEPS
|
||||
from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN
|
||||
from helpers.AgentConvo import AgentConvo
|
||||
from helpers.Project import Project
|
||||
from helpers.agents.Architect import Architect
|
||||
from helpers.agents.Developer import Developer
|
||||
from utils.function_calling import parse_agent_response
|
||||
from .llm_connection import create_gpt_chat_completion
|
||||
from helpers.agents.TechLead import TechLead
|
||||
from utils.function_calling import parse_agent_response, FunctionType
|
||||
from test.test_utils import assert_non_empty_string
|
||||
from .llm_connection import create_gpt_chat_completion, assert_json_response, assert_json_schema
|
||||
from main import get_custom_print
|
||||
|
||||
load_dotenv()
|
||||
@@ -16,10 +17,58 @@ load_dotenv()
|
||||
project = Project({'app_id': 'test-app'}, current_step='test')
|
||||
|
||||
|
||||
class TestSchemaValidation:
|
||||
def setup_method(self):
|
||||
self.function: FunctionType = {
|
||||
'name': 'test',
|
||||
'description': 'test schema',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {'foo': {'type': 'string'}},
|
||||
'required': ['foo']
|
||||
}
|
||||
}
|
||||
|
||||
def test_assert_json_response(self):
|
||||
assert assert_json_response('{"foo": "bar"}')
|
||||
assert assert_json_response('{\n"foo": "bar"}')
|
||||
assert assert_json_response('```\n{"foo": "bar"}')
|
||||
assert assert_json_response('```json\n{\n"foo": "bar"}')
|
||||
with pytest.raises(ValueError, match='LLM did not respond with JSON'):
|
||||
assert assert_json_response('# Foo\n bar')
|
||||
|
||||
def test_assert_json_schema(self):
|
||||
# When assert_json_schema is called with valid JSON
|
||||
# Then no errors
|
||||
assert(assert_json_schema('{"foo": "bar"}', [self.function]))
|
||||
|
||||
def test_assert_json_schema_invalid(self):
|
||||
# When assert_json_schema is called with invalid JSON
|
||||
# Then error is raised
|
||||
with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
|
||||
assert_json_schema('{"foo": 1}', [self.function])
|
||||
|
||||
def test_assert_json_schema_incomplete(self):
|
||||
# When assert_json_schema is called with incomplete JSON
|
||||
# Then error is raised
|
||||
with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
|
||||
assert_json_schema('{"foo": "b', [self.function])
|
||||
|
||||
def test_assert_json_schema_required(self):
|
||||
# When assert_json_schema is called with missing required property
|
||||
# Then error is raised
|
||||
self.function['parameters']['properties']['other'] = {'type': 'string'}
|
||||
self.function['parameters']['required'] = ['foo', 'other']
|
||||
|
||||
with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
|
||||
assert_json_schema('{"foo": "bar"}', [self.function])
|
||||
|
||||
class TestLlmConnection:
|
||||
def setup_method(self):
|
||||
builtins.print, ipc_client_instance = get_custom_print({})
|
||||
|
||||
|
||||
|
||||
@pytest.mark.uses_tokens
|
||||
@pytest.mark.parametrize("endpoint, model", [
|
||||
("OPENAI", "gpt-4"), # role: system
|
||||
@@ -64,7 +113,6 @@ solution-oriented decision-making in areas where precise instructions were not p
|
||||
'User will be able to register a new account on Test_App.',
|
||||
]
|
||||
})
|
||||
|
||||
function_calls = ARCHITECTURE
|
||||
|
||||
# When
|
||||
@@ -78,6 +126,60 @@ solution-oriented decision-making in areas where precise instructions were not p
|
||||
response = parse_agent_response(response, function_calls)
|
||||
assert 'Node.js' in response
|
||||
|
||||
@pytest.mark.uses_tokens
|
||||
@pytest.mark.parametrize("endpoint, model", [
|
||||
("OPENAI", "gpt-4"), # role: system
|
||||
("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user
|
||||
("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
|
||||
("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system
|
||||
("OPENROUTER", "google/palm-2-codechat-bison"),
|
||||
# TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
|
||||
# https://github.com/guidance-ai/guidance - token healing
|
||||
("OPENROUTER", "anthropic/claude-2"), # role: user, is_llama
|
||||
])
|
||||
def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):
|
||||
# Given
|
||||
monkeypatch.setenv('ENDPOINT', endpoint)
|
||||
monkeypatch.setenv('MODEL_NAME', model)
|
||||
|
||||
agent = TechLead(project)
|
||||
convo = AgentConvo(agent)
|
||||
convo.construct_and_add_message_from_prompt('development/plan.prompt',
|
||||
{
|
||||
'name': 'Test App',
|
||||
'app_summary': '''
|
||||
The project entails creating a web-based chat application, tentatively named "chat_app."
|
||||
This application does not require user authentication or chat history storage.
|
||||
It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files.
|
||||
Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts.
|
||||
The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand.
|
||||
The development process will include the creation of user stories and tasks, based on detailed discussions with the client.''',
|
||||
'app_type': 'web app',
|
||||
'user_stories': [
|
||||
'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
|
||||
'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
|
||||
]
|
||||
})
|
||||
function_calls = DEVELOPMENT_PLAN
|
||||
|
||||
# When
|
||||
response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
|
||||
|
||||
# Then
|
||||
assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
|
||||
assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
|
||||
|
||||
assert response is not None
|
||||
response = parse_agent_response(response, function_calls)
|
||||
assert_non_empty_string(response[0]['description'])
|
||||
assert_non_empty_string(response[0]['programmatic_goal'])
|
||||
assert_non_empty_string(response[0]['user_review_goal'])
|
||||
|
||||
|
||||
# def test_break_down_development_task(self):
|
||||
# # Given
|
||||
# agent = Developer(project)
|
||||
|
||||
Reference in New Issue
Block a user