Files
gpt-pilot/pilot/utils/llm_connection.py

387 lines
15 KiB
Python

import re
import requests
import os
import sys
import time
import json
import tiktoken
import questionary
from jsonschema import validate
from utils.style import red
from typing import List
from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS
from logger.logger import logger
from helpers.exceptions.TokenLimitError import TokenLimitError
from utils.utils import fix_json, get_prompt
from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
def get_tokens_in_messages(messages: List[str]) -> int:
tokenizer = tiktoken.get_encoding("cl100k_base") # GPT-4 tokenizer
tokenized_messages = [tokenizer.encode(message['content']) for message in messages]
return sum(len(tokens) for tokens in tokenized_messages)
def num_tokens_from_functions(functions):
"""Return the number of tokens used by a list of functions."""
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = 0
for function in functions:
function_tokens = len(encoding.encode(function['name']))
function_tokens += len(encoding.encode(function['description']))
if 'parameters' in function:
parameters = function['parameters']
if 'properties' in parameters:
for propertiesKey in parameters['properties']:
function_tokens += len(encoding.encode(propertiesKey))
v = parameters['properties'][propertiesKey]
for field in v:
if field == 'type':
function_tokens += 2
function_tokens += len(encoding.encode(v['type']))
elif field == 'description':
function_tokens += 2
function_tokens += len(encoding.encode(v['description']))
elif field == 'enum':
function_tokens -= 3
for o in v['enum']:
function_tokens += 3
function_tokens += len(encoding.encode(o))
function_tokens += 11
num_tokens += function_tokens
num_tokens += 12
return num_tokens
def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TOKENS_FOR_GPT_RESPONSE,
function_calls: FunctionCallSet = None):
"""
Called from:
- AgentConvo.send_message() - these calls often have `function_calls`, usually from `pilot/const/function_calls.py`
- convo.continuous_conversation()
- prompts.get_additional_info_from_openai()
- prompts.get_additional_info_from_user() after the user responds to each
"Please check this message and say what needs to be changed... {message}"
:param messages: [{ "role": "system"|"assistant"|"user", "content": string }, ... ]
:param req_type: 'project_description' etc. See common.STEPS
:param min_tokens: defaults to 600
:param function_calls: (optional) {'definitions': [{ 'name': str }, ...]}
see `IMPLEMENT_CHANGES` etc. in `pilot/const/function_calls.py`
:return: {'text': new_code}
or if `function_calls` param provided
{'function_calls': {'name': str, arguments: {...}}}
"""
gpt_data = {
'model': os.getenv('MODEL_NAME', 'gpt-4'),
'n': 1,
'temperature': 1,
'top_p': 1,
'presence_penalty': 0,
'frequency_penalty': 0,
'messages': messages,
'stream': True
}
# delete some keys if using "OpenRouter" API
if os.getenv('ENDPOINT') == 'OPENROUTER':
keys_to_delete = ['n', 'max_tokens', 'temperature', 'top_p', 'presence_penalty', 'frequency_penalty']
for key in keys_to_delete:
if key in gpt_data:
del gpt_data[key]
# Advise the LLM of the JSON response schema we are expecting
add_function_calls_to_request(gpt_data, function_calls)
try:
response = stream_gpt_completion(gpt_data, req_type)
return response
except TokenLimitError as e:
raise e
except Exception as e:
print(f'The request to {os.getenv("ENDPOINT")} API failed. Here is the error message:')
print(e)
def delete_last_n_lines(n):
for _ in range(n):
# Move the cursor up one line
sys.stdout.write('\033[F')
# Clear the current line
sys.stdout.write('\033[K')
def count_lines_based_on_width(content, width):
lines_required = sum(len(line) // width + 1 for line in content.split('\n'))
return lines_required
def get_tokens_in_messages_from_openai_error(error_message):
"""
Extract the token count from a message.
Args:
message (str): The message to extract the token count from.
Returns:
int or None: The token count if found, otherwise None.
"""
match = re.search(r"your messages resulted in (\d+) tokens", error_message)
if match:
return int(match.group(1))
else:
return None
def retry_on_exception(func):
def wrapper(*args, **kwargs):
# spinner = None
while True:
try:
# spinner_stop(spinner)
return func(*args, **kwargs)
except Exception as e:
# Convert exception to string
err_str = str(e)
# If the specific error "context_length_exceeded" is present, simply return without retry
if isinstance(e, json.JSONDecodeError):
# codellama-34b-instruct seems to send incomplete JSON responses
if e.msg == 'Expecting value':
logger.info('Received incomplete JSON response from LLM. Asking for the rest...')
args[0]['function_buffer'] = e.doc
continue
if "context_length_exceeded" in err_str:
# spinner_stop(spinner)
raise TokenLimitError(get_tokens_in_messages_from_openai_error(err_str), MAX_GPT_MODEL_TOKENS)
if "rate_limit_exceeded" in err_str:
# Extracting the duration from the error string
match = re.search(r"Please try again in (\d+)ms.", err_str)
if match:
# spinner = spinner_start(colored("Rate limited. Waiting...", 'yellow'))
logger.debug('Rate limited. Waiting...')
wait_duration = int(match.group(1)) / 1000
time.sleep(wait_duration)
continue
print(red(f'There was a problem with request to openai API:'))
# spinner_stop(spinner)
print(err_str)
logger.error(f'There was a problem with request to openai API: {err_str}')
user_message = questionary.text(
"Do you want to try make the same request again? If yes, just press ENTER. Otherwise, type 'no'.",
style=questionary.Style([
('question', 'fg:red'),
('answer', 'fg:orange')
])).ask()
# TODO: take user's input into consideration - send to LLM?
if user_message != '':
return {}
return wrapper
@retry_on_exception
def stream_gpt_completion(data, req_type):
"""
Called from create_gpt_chat_completion()
:param data:
:param req_type: 'project_description' etc. See common.STEPS
:return: {'text': str} or {'function_calls': {'name': str, arguments: '{...}'}}
"""
# TODO add type dynamically - this isn't working when connected to the external process
try:
terminal_width = os.get_terminal_size().columns
except OSError:
terminal_width = 50
lines_printed = 2
gpt_response = ''
buffer = '' # A buffer to accumulate incoming data
expecting_json = None
received_json = False
if 'functions' in data:
expecting_json = data['functions']
if 'function_buffer' in data:
incomplete_json = get_prompt('utils/incomplete_json.prompt', {'received_json': data['function_buffer']})
data['messages'].append({'role': 'user', 'content': incomplete_json})
gpt_response = data['function_buffer']
received_json = True
# Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry
data = {key: value for key, value in data.items() if not key.startswith('function')}
def return_result(result_data, lines_printed):
if buffer:
lines_printed += count_lines_based_on_width(buffer, terminal_width)
logger.debug(f'lines printed: {lines_printed} - {terminal_width}')
delete_last_n_lines(lines_printed)
return result_data
# spinner = spinner_start(yellow("Waiting for OpenAI API response..."))
# print(yellow("Stream response from OpenAI:"))
# Configure for the selected ENDPOINT
model = os.getenv('MODEL_NAME')
endpoint = os.getenv('ENDPOINT')
logger.info(f'> Request model: {model} ({data["model"]}) messages: {data["messages"]}')
if endpoint == 'AZURE':
# If yes, get the AZURE_ENDPOINT from .ENV file
endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15'
headers = {
'Content-Type': 'application/json',
'api-key': os.getenv('AZURE_API_KEY')
}
elif endpoint == 'OPENROUTER':
# If so, send the request to the OpenRouter API endpoint
endpoint_url = os.getenv('OPENROUTER_ENDPOINT', 'https://openrouter.ai/api/v1/chat/completions')
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + os.getenv('OPENROUTER_API_KEY'),
'HTTP-Referer': 'http://localhost:3000',
'X-Title': 'GPT Pilot (LOCAL)'
}
else:
# If not, send the request to the OpenAI endpoint
endpoint_url = os.getenv('OPENAI_ENDPOINT', 'https://api.openai.com/v1/chat/completions')
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + os.getenv('OPENAI_API_KEY')
}
response = requests.post(
endpoint_url,
headers=headers,
json=data,
stream=True
)
# Log the response status code and message
logger.debug(f'Response status code: {response.status_code}')
if response.status_code != 200:
logger.info(f'problem with request: {response.text}')
raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")
# function_calls = {'name': '', 'arguments': ''}
for line in response.iter_lines():
# Ignore keep-alive new lines
if line and line != b': OPENROUTER PROCESSING':
line = line.decode("utf-8") # decode the bytes to string
if line.startswith('data: '):
line = line[6:] # remove the 'data: ' prefix
# Check if the line is "[DONE]" before trying to parse it as JSON
if line == "[DONE]":
continue
try:
json_line = json.loads(line)
if len(json_line['choices']) == 0:
continue
if 'error' in json_line:
logger.error(f'Error in LLM response: {json_line}')
raise ValueError(f'Error in LLM response: {json_line["error"]["message"]}')
choice = json_line['choices'][0]
# if 'finish_reason' in choice and choice['finish_reason'] == 'function_call':
# function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
# return return_result({'function_calls': function_calls}, lines_printed)
json_line = choice['delta']
except json.JSONDecodeError as e:
logger.error(f'Unable to decode line: {line} {e.msg}')
continue # skip to the next line
# handle the streaming response
# if 'function_call' in json_line:
# if 'name' in json_line['function_call']:
# function_calls['name'] = json_line['function_call']['name']
# print(f'Function call: {function_calls["name"]}')
#
# if 'arguments' in json_line['function_call']:
# function_calls['arguments'] += json_line['function_call']['arguments']
# print(json_line['function_call']['arguments'], type='stream', end='', flush=True)
if 'content' in json_line:
content = json_line.get('content')
if content:
buffer += content # accumulate the data
# If you detect a natural breakpoint (e.g., line break or end of a response object), print & count:
if buffer.endswith('\n'):
if expecting_json and not received_json:
received_json = assert_json_response(buffer, lines_printed > 2)
# or some other condition that denotes a breakpoint
lines_printed += count_lines_based_on_width(buffer, terminal_width)
buffer = "" # reset the buffer
gpt_response += content
print(content, type='stream', end='', flush=True)
print('\n', type='stream')
# if function_calls['arguments'] != '':
# logger.info(f'Response via function call: {function_calls["arguments"]}')
# function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
# return return_result({'function_calls': function_calls}, lines_printed)
logger.info(f'< Response message: {gpt_response}')
if expecting_json:
gpt_response = clean_json_response(gpt_response)
assert_json_schema(gpt_response, expecting_json)
new_code = postprocessing(gpt_response, req_type) # TODO add type dynamically
return return_result({'text': new_code}, lines_printed)
def assert_json_response(response: str, or_fail=True) -> bool:
if re.match(r'.*(```(json)?|{|\[)', response):
return True
elif or_fail:
logger.error(f'LLM did not respond with JSON: {response}')
raise ValueError('LLM did not respond with JSON')
else:
return False
def clean_json_response(response: str) -> str:
response = re.sub(r'^.*```json\s*', '', response, flags=re.DOTALL)
return response.strip('` \n')
def assert_json_schema(response: str, functions: list[FunctionType]) -> True:
for function in functions:
schema = function['parameters']
parsed = json.loads(response)
validate(parsed, schema)
return True
def postprocessing(gpt_response: str, req_type) -> str:
return gpt_response
def load_data_to_json(string):
return json.loads(fix_json(string))