notes and testing

This commit is contained in:
Nicholas Albion
2023-09-25 10:36:37 +10:00
parent 68f8368eaf
commit b8965f527d
4 changed files with 31 additions and 9 deletions

View File

@@ -93,7 +93,10 @@ def execute_command(project, command, timeout=None, force=False):
force (bool, optional): Whether to execute the command without confirmation. Default is False.
Returns:
str: The command output.
cli_response (str): The command output
or: '', 'DONE' if user answered 'no' or 'skip'
llm_response (str): The response from the agent.
TODO: this seems to be 'DONE' (no or skip) or None
"""
if timeout is not None:
if timeout < 1000:
@@ -109,6 +112,9 @@ def execute_command(project, command, timeout=None, force=False):
'If yes, just press ENTER'
)
# TODO: I think AutoGPT allows other feedback here, like:
# "That's not going to work, let's do X instead"
# We don't explicitly make "no" or "skip" options to the user
if answer == 'no':
return '', 'DONE'
elif answer == 'skip':
@@ -252,12 +258,15 @@ def execute_command_and_check_cli_response(command, timeout, convo):
Returns:
tuple: A tuple containing the CLI response and the agent's response.
- cli_response (str): The command output.
- llm_response (str): 'DONE' or 'NEEDS_DEBUGGING'
"""
cli_response, response = execute_command(convo.agent.project, command, timeout)
if response is None:
response = convo.send_message('dev_ops/ran_command.prompt',
# TODO: Prompt mentions `command` could be `INSTALLED` or `NOT_INSTALLED`, where is this handled?
cli_response, llm_response = execute_command(convo.agent.project, command, timeout)
if llm_response is None:
llm_response = convo.send_message('dev_ops/ran_command.prompt',
{ 'cli_response': cli_response, 'command': command })
return cli_response, response
return cli_response, llm_response
def run_command_until_success(command, timeout, convo, additional_message=None, force=False, return_cli_response=False, is_root_task=False):
"""

View File

@@ -25,8 +25,21 @@ def test_init():
@pytest.mark.slow
@pytest.mark.uses_tokens
@pytest.mark.skip(reason="Uses lots of tokens")
def test_end_to_end():
@pytest.mark.parametrize("endpoint, model", [
# ("OPENAI", "gpt-4"),
# ("OPENROUTER", "openai/gpt-3.5-turbo"),
# ("OPENROUTER", "meta-llama/codellama-34b-instruct"),
("OPENROUTER", "google/palm-2-chat-bison"),
("OPENROUTER", "google/palm-2-codechat-bison"),
# TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
# https://github.com/guidance-ai/guidance - token healing
("OPENROUTER", "anthropic/claude-2"),
])
def test_end_to_end(endpoint, model, monkeypatch):
# Given
monkeypatch.setenv('ENDPOINT', endpoint)
monkeypatch.setenv('MODEL_NAME', model)
create_tables()
args = init()
builtins.print, ipc_client_instance = get_custom_print(args)

View File

@@ -24,7 +24,7 @@ class TestLlmConnection:
@pytest.mark.parametrize("endpoint, model", [
("OPENAI", "gpt-4"), # role: system
("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user
("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama missed "choices"
("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system
("OPENROUTER", "google/palm-2-codechat-bison"),
# TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py

View File

@@ -9,7 +9,7 @@ import json
import hashlib
import re
from jinja2 import Environment, FileSystemLoader
from termcolor import colored
from .style import green
from const.llm import MAX_QUESTIONS, END_RESPONSE
from const.common import ROLES, STEPS
@@ -138,7 +138,7 @@ def step_already_finished(args, step):
args.update(step['app_data'])
message = f"{capitalize_first_word_with_underscores(step['step'])} already done for this app_id: {args['app_id']}. Moving to next step..."
print(colored(message, 'green'))
print(green(message))
logger.info(message)