rejecting responses that are not JSON.

Need to fix prompts for GPT-4
This commit is contained in:
Nicholas Albion
2023-09-26 17:27:54 +10:00
parent b8965f527d
commit 8a024c2ff2
11 changed files with 298 additions and 66 deletions

View File

@@ -61,7 +61,7 @@ After you have Python and PostgreSQL installed, follow these steps:
1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
2. `cd gpt-pilot`
3. `python -m venv pilot-env` (create a virtual environment)
4. `source pilot-env/bin/activate` (activate the virtual environment)
4. `source pilot-env/bin/activate` (or on Windows `pilot-env\Scripts\activate`) (activate the virtual environment)
5. `pip install -r requirements.txt` (install the dependencies)
6. `cd pilot`
7. `mv .env.example .env` (create the .env file)

View File

@@ -5,7 +5,7 @@ from utils.style import yellow, yellow_bold
from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps
from helpers.exceptions.TokenLimitError import TokenLimitError
from utils.function_calling import parse_agent_response
from utils.function_calling import parse_agent_response, FunctionCallSet
from utils.llm_connection import create_gpt_chat_completion
from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message, capitalize_first_word_with_underscores
from logger.logger import logger
@@ -31,7 +31,7 @@ class AgentConvo:
# add system message
self.messages.append(get_sys_message(self.agent.role))
def send_message(self, prompt_path=None, prompt_data=None, function_calls=None):
def send_message(self, prompt_path=None, prompt_data=None, function_calls: FunctionCallSet = None):
"""
Sends a message in the conversation.

View File

@@ -10,17 +10,12 @@ from database.models.files import File
from database.models.development_steps import DevelopmentSteps
from helpers.Project import Project, update_file, clear_directory
from helpers.AgentConvo import AgentConvo
from test.mock_terminal_size import mock_terminal_size
SEND_TO_LLM = False
WRITE_TO_FILE = False
def mock_terminal_size():
mock_size = Mock()
mock_size.columns = 80 # or whatever width you want
return mock_size
class TestCodeMonkey:
def setup_method(self):
name = 'TestDeveloper'

View File

@@ -9,12 +9,7 @@ load_dotenv()
from main import get_custom_print
from .Developer import Developer, ENVIRONMENT_SETUP_STEP
from helpers.Project import Project
def mock_terminal_size():
mock_size = Mock()
mock_size.columns = 80 # or whatever width you want
return mock_size
from test.mock_terminal_size import mock_terminal_size
class TestDeveloper:

View File

@@ -0,0 +1,72 @@
import builtins
import os
import pytest
from unittest.mock import patch
from dotenv import load_dotenv
load_dotenv()
from main import get_custom_print
from helpers.agents.TechLead import TechLead, DEVELOPMENT_PLANNING_STEP
from helpers.Project import Project
from test.test_utils import assert_non_empty_string, mock_terminal_size
from test.mock_questionary import MockQuestionary
from utils.function_calling import parse_agent_response
class TestTechLead:
def setup_method(self):
builtins.print, ipc_client_instance = get_custom_print({})
name = 'TestTechLead'
self.project = Project({
'app_id': 'test-tech-lead',
'name': name,
'app_type': ''
},
name=name,
architecture=[],
user_stories=[]
)
self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
'../../../workspace/TestTechLead'))
self.project.technologies = []
self.project.project_description = '''
The project entails creating a web-based chat application, tentatively named "chat_app."
This application does not require user authentication or chat history storage.
It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files.
Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts.
The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand.
The development process will include the creation of user stories and tasks, based on detailed discussions with the client.
'''
self.project.user_stories = [
'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
]
self.project.architecture = ['Node.js', 'Socket.io', 'Bootstrap', 'JavaScript', 'HTML5', 'CSS3']
self.project.current_step = DEVELOPMENT_PLANNING_STEP
@pytest.mark.uses_tokens
# @patch('database.database.get_progress_steps', return_value=None)
@patch('helpers.AgentConvo.get_saved_development_step', return_value=None)
@patch('helpers.agents.TechLead.save_progress', return_value=None)
# @patch('os.get_terminal_size', mock_terminal_size)
@patch('helpers.agents.TechLead.get_progress_steps', return_value=None)
def test_create_development_plan(self, mock_get_saved_step, mock_save_progress, mock_get_progress_steps):
self.techLead = TechLead(self.project)
mock_questionary = MockQuestionary(['', '', 'no'])
with patch('utils.llm_connection.questionary', mock_questionary):
# When
development_plan = self.techLead.create_development_plan()
# Then
assert development_plan is not None
assert_non_empty_string(development_plan[0]['description'])
assert_non_empty_string(development_plan[0]['programmatic_goal'])
assert_non_empty_string(development_plan[0]['user_review_goal'])

View File

@@ -1,9 +1,13 @@
class MockQuestionary:
def __init__(self, answers=None):
def __init__(self, answers=None, initial_state='project_description'):
if answers is None:
answers = []
self.answers = iter(answers)
self.state = 'project_description'
self.state = initial_state
class Style:
def __init__(self, *args, **kwargs):
pass
def text(self, question: str, style=None):
print('AI: ' + question)
@@ -13,6 +17,9 @@ class MockQuestionary:
self.state = 'DONE'
return self
def ask(self):
return self.unsafe_ask()
def unsafe_ask(self):
if self.state == 'user_stories':
answer = ''

11
pilot/test/test_utils.py Normal file
View File

@@ -0,0 +1,11 @@
from unittest.mock import Mock
def mock_terminal_size():
mock_size = Mock()
mock_size.columns = 80 # or whatever width you want
return mock_size
def assert_non_empty_string(value):
assert isinstance(value, str)
assert len(value) > 0

View File

@@ -38,17 +38,11 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No
return
model: str = gpt_data['model']
is_llama = 'llama' in model or 'anthropic' in model
is_instruct = 'llama' in model or 'anthropic' in model
# if model == 'gpt-4':
# gpt_data['functions'] = function_calls['definitions']
# if len(function_calls['definitions']) > 1:
# gpt_data['function_call'] = 'auto'
# else:
# gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']}
# return
gpt_data['functions'] = function_calls['definitions']
prompter = JsonPrompter(is_llama)
prompter = JsonPrompter(is_instruct)
if len(function_calls['definitions']) > 1:
function_call = None
@@ -77,7 +71,8 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None):
if function_calls:
text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL)
values = list(json.loads(text.strip('` \n')).values())
text = text.strip('` \n')
values = list(json.loads(text).values())
if len(values) == 1:
return values[0]
else:
@@ -90,8 +85,8 @@ class JsonPrompter:
"""
Adapted from local_llm_function_calling
"""
def __init__(self, is_llama: bool = False):
self.is_llama = is_llama
def __init__(self, is_instruct: bool = False):
self.is_instruct = is_instruct
def function_descriptions(
self, functions: list[FunctionType], function_to_call: str
@@ -107,7 +102,7 @@ class JsonPrompter:
(empty if the function doesn't exist or has no description)
"""
return [
function["description"]
f'# {function["name"]}: {function["description"]}'
for function in functions
if function["name"] == function_to_call and "description" in function
]
@@ -213,7 +208,7 @@ class JsonPrompter:
else "Here's the function the user should call: "
)
if self.is_llama:
if self.is_instruct:
return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
else:
return f"{system}\n\n{data}\n\n{prompt}"

View File

@@ -7,14 +7,13 @@ import json
import tiktoken
import questionary
from utils.style import red
from typing import List
from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
from logger.logger import logger
from helpers.exceptions.TokenLimitError import TokenLimitError
from utils.utils import fix_json
from utils.function_calling import add_function_calls_to_request
from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
def get_tokens_in_messages(messages: List[str]) -> int:
tokenizer = tiktoken.get_encoding("cl100k_base") # GPT-4 tokenizer
@@ -58,7 +57,7 @@ def num_tokens_from_functions(functions):
def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TOKENS_FOR_GPT_RESPONSE,
function_calls=None):
function_calls: FunctionCallSet = None):
"""
Called from:
- AgentConvo.send_message() - these calls often have `function_calls`, usually from `pilot/const/function_calls.py`
@@ -167,6 +166,7 @@ def retry_on_exception(func):
('answer', 'fg:orange')
])).ask()
# TODO: take user's input into consideration - send to LLM?
if user_message != '':
return {}
@@ -183,9 +183,16 @@ def stream_gpt_completion(data, req_type):
"""
# TODO add type dynamically - this isn't working when connected to the external process
terminal_width = 50#os.get_terminal_size().columns
terminal_width = 50 # os.get_terminal_size().columns
lines_printed = 2
buffer = "" # A buffer to accumulate incoming data
buffer = '' # A buffer to accumulate incoming data
expecting_json = False
received_json = False
if 'functions' in data:
expecting_json = data['functions']
# Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry
data = {key: value for key, value in data.items() if key != "functions"}
def return_result(result_data, lines_printed):
if buffer:
@@ -197,7 +204,6 @@ def stream_gpt_completion(data, req_type):
# spinner = spinner_start(yellow("Waiting for OpenAI API response..."))
# print(yellow("Stream response from OpenAI:"))
api_key = os.getenv("OPENAI_API_KEY")
logger.info(f'Request data: {data}')
@@ -208,15 +214,26 @@ def stream_gpt_completion(data, req_type):
if endpoint == 'AZURE':
# If yes, get the AZURE_ENDPOINT from .ENV file
endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15'
headers = {'Content-Type': 'application/json', 'api-key': os.getenv('AZURE_API_KEY')}
headers = {
'Content-Type': 'application/json',
'api-key': os.getenv('AZURE_API_KEY')
}
elif endpoint == 'OPENROUTER':
# If so, send the request to the OpenRouter API endpoint
headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + os.getenv("OPENROUTER_API_KEY"), 'HTTP-Referer': 'http://localhost:3000', 'X-Title': 'GPT Pilot (LOCAL)'}
endpoint_url = os.getenv("OPENROUTER_ENDPOINT", 'https://openrouter.ai/api/v1/chat/completions')
endpoint_url = os.getenv('OPENROUTER_ENDPOINT', 'https://openrouter.ai/api/v1/chat/completions')
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + os.getenv('OPENROUTER_API_KEY'),
'HTTP-Referer': 'http://localhost:3000',
'X-Title': 'GPT Pilot (LOCAL)'
}
else:
# If not, send the request to the OpenAI endpoint
headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + os.getenv("OPENAI_API_KEY")}
endpoint_url = os.getenv("OPENAI_ENDPOINT", 'https://api.openai.com/v1/chat/completions')
endpoint_url = os.getenv('OPENAI_ENDPOINT', 'https://api.openai.com/v1/chat/completions')
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + os.getenv('OPENAI_API_KEY')
}
response = requests.post(
endpoint_url,
@@ -233,7 +250,7 @@ def stream_gpt_completion(data, req_type):
raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")
gpt_response = ''
function_calls = {'name': '', 'arguments': ''}
# function_calls = {'name': '', 'arguments': ''}
for line in response.iter_lines():
# Ignore keep-alive new lines
@@ -259,9 +276,9 @@ def stream_gpt_completion(data, req_type):
choice = json_line['choices'][0]
if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
return return_result({'function_calls': function_calls}, lines_printed)
# if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
# function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
# return return_result({'function_calls': function_calls}, lines_printed)
json_line = choice['delta']
# TODO: token healing? https://github.com/1rgs/jsonformer-claude
@@ -272,14 +289,14 @@ def stream_gpt_completion(data, req_type):
continue # skip to the next line
# handle the streaming response
if 'function_call' in json_line:
if 'name' in json_line['function_call']:
function_calls['name'] = json_line['function_call']['name']
print(f'Function call: {function_calls["name"]}')
if 'arguments' in json_line['function_call']:
function_calls['arguments'] += json_line['function_call']['arguments']
print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
# if 'function_call' in json_line:
# if 'name' in json_line['function_call']:
# function_calls['name'] = json_line['function_call']['name']
# print(f'Function call: {function_calls["name"]}')
#
# if 'arguments' in json_line['function_call']:
# function_calls['arguments'] += json_line['function_call']['arguments']
# print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
if 'content' in json_line:
content = json_line.get('content')
@@ -287,7 +304,18 @@ def stream_gpt_completion(data, req_type):
buffer += content # accumulate the data
# If you detect a natural breakpoint (e.g., line break or end of a response object), print & count:
if buffer.endswith("\n"): # or some other condition that denotes a breakpoint
if buffer.endswith("\n"):
if expecting_json and not received_json:
received_json = assert_json_response(buffer, lines_printed > 2)
if received_json:
gpt_response = ""
# if not received_json:
# # Don't append to gpt_response, but increment lines_printed
# lines_printed += 1
# buffer = ""
# continue
# or some other condition that denotes a breakpoint
lines_printed += count_lines_based_on_width(buffer, terminal_width)
buffer = "" # reset the buffer
@@ -295,15 +323,42 @@ def stream_gpt_completion(data, req_type):
print(content, type='stream', end='', flush=True)
print('\n', type='stream')
if function_calls['arguments'] != '':
logger.info(f'Response via function call: {function_calls["arguments"]}')
function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
return return_result({'function_calls': function_calls}, lines_printed)
# if function_calls['arguments'] != '':
# logger.info(f'Response via function call: {function_calls["arguments"]}')
# function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
# return return_result({'function_calls': function_calls}, lines_printed)
logger.info(f'Response message: {gpt_response}')
if expecting_json:
assert_json_schema(gpt_response, expecting_json)
new_code = postprocessing(gpt_response, req_type) # TODO add type dynamically
return return_result({'text': new_code}, lines_printed)
def assert_json_response(response: str, or_fail=True) -> bool:
if re.match(r'.*(```(json)?|{|\[)', response):
return True
elif or_fail:
raise ValueError('LLM did not respond with JSON')
else:
return False
def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
return True
# TODO: validation always fails
# for function in functions:
# schema = function['parameters']
# parser = parser_for_schema(schema)
# validated = parser.validate(response)
# if validated.valid and validated.end_index:
# return True
#
# raise ValueError('LLM responded with invalid JSON')
def postprocessing(gpt_response, req_type):
return gpt_response

View File

@@ -78,7 +78,7 @@ Create a web-based chat app'''
def test_llama_json_prompter():
# Given
prompter = JsonPrompter(is_llama=True)
prompter = JsonPrompter(is_instruct=True)
# When
prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies')
@@ -126,7 +126,7 @@ Create a web-based chat app'''
def test_llama_json_prompter_named():
# Given
prompter = JsonPrompter(is_llama=True)
prompter = JsonPrompter(is_instruct=True)
# When
prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')

View File

@@ -2,13 +2,14 @@ import builtins
import pytest
from dotenv import load_dotenv
from const.function_calls import ARCHITECTURE, DEV_STEPS
from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN
from helpers.AgentConvo import AgentConvo
from helpers.Project import Project
from helpers.agents.Architect import Architect
from helpers.agents.Developer import Developer
from utils.function_calling import parse_agent_response
from .llm_connection import create_gpt_chat_completion
from helpers.agents.TechLead import TechLead
from utils.function_calling import parse_agent_response, FunctionType
from test.test_utils import assert_non_empty_string
from .llm_connection import create_gpt_chat_completion, assert_json_response, assert_json_schema
from main import get_custom_print
load_dotenv()
@@ -16,10 +17,58 @@ load_dotenv()
project = Project({'app_id': 'test-app'}, current_step='test')
class TestSchemaValidation:
def setup_method(self):
self.function: FunctionType = {
'name': 'test',
'description': 'test schema',
'parameters': {
'type': 'object',
'properties': {'foo': {'type': 'string'}},
'required': ['foo']
}
}
def test_assert_json_response(self):
assert assert_json_response('{"foo": "bar"}')
assert assert_json_response('{\n"foo": "bar"}')
assert assert_json_response('```\n{"foo": "bar"}')
assert assert_json_response('```json\n{\n"foo": "bar"}')
with pytest.raises(ValueError, match='LLM did not respond with JSON'):
assert assert_json_response('# Foo\n bar')
def test_assert_json_schema(self):
# When assert_json_schema is called with valid JSON
# Then no errors
assert(assert_json_schema('{"foo": "bar"}', [self.function]))
def test_assert_json_schema_invalid(self):
# When assert_json_schema is called with invalid JSON
# Then error is raised
with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
assert_json_schema('{"foo": 1}', [self.function])
def test_assert_json_schema_incomplete(self):
# When assert_json_schema is called with incomplete JSON
# Then error is raised
with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
assert_json_schema('{"foo": "b', [self.function])
def test_assert_json_schema_required(self):
# When assert_json_schema is called with missing required property
# Then error is raised
self.function['parameters']['properties']['other'] = {'type': 'string'}
self.function['parameters']['required'] = ['foo', 'other']
with pytest.raises(ValueError, match='LLM responded with invalid JSON'):
assert_json_schema('{"foo": "bar"}', [self.function])
class TestLlmConnection:
def setup_method(self):
builtins.print, ipc_client_instance = get_custom_print({})
@pytest.mark.uses_tokens
@pytest.mark.parametrize("endpoint, model", [
("OPENAI", "gpt-4"), # role: system
@@ -64,7 +113,6 @@ solution-oriented decision-making in areas where precise instructions were not p
'User will be able to register a new account on Test_App.',
]
})
function_calls = ARCHITECTURE
# When
@@ -78,6 +126,60 @@ solution-oriented decision-making in areas where precise instructions were not p
response = parse_agent_response(response, function_calls)
assert 'Node.js' in response
@pytest.mark.uses_tokens
@pytest.mark.parametrize("endpoint, model", [
("OPENAI", "gpt-4"), # role: system
("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user
("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system
("OPENROUTER", "google/palm-2-codechat-bison"),
# TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
# https://github.com/guidance-ai/guidance - token healing
("OPENROUTER", "anthropic/claude-2"), # role: user, is_llama
])
def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):
# Given
monkeypatch.setenv('ENDPOINT', endpoint)
monkeypatch.setenv('MODEL_NAME', model)
agent = TechLead(project)
convo = AgentConvo(agent)
convo.construct_and_add_message_from_prompt('development/plan.prompt',
{
'name': 'Test App',
'app_summary': '''
The project entails creating a web-based chat application, tentatively named "chat_app."
This application does not require user authentication or chat history storage.
It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files.
Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts.
The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand.
The development process will include the creation of user stories and tasks, based on detailed discussions with the client.''',
'app_type': 'web app',
'user_stories': [
'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
]
})
function_calls = DEVELOPMENT_PLAN
# When
response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls)
# Then
assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
assert response is not None
response = parse_agent_response(response, function_calls)
assert_non_empty_string(response[0]['description'])
assert_non_empty_string(response[0]['programmatic_goal'])
assert_non_empty_string(response[0]['user_review_goal'])
# def test_break_down_development_task(self):
# # Given
# agent = Developer(project)