notes and testing

2026-02-23 15:49:50 +01:00 · 2023-09-25 10:36:37 +10:00
parent 68f8368eaf
commit b8965f527d
4 changed files with 31 additions and 9 deletions
--- a/pilot/helpers/cli.py
+++ b/pilot/helpers/cli.py
@@ -93,7 +93,10 @@ def execute_command(project, command, timeout=None, force=False):
        force (bool, optional): Whether to execute the command without confirmation. Default is False.

    Returns:
-        str: The command output.
+        cli_response (str): The command output
+                            or: '', 'DONE' if user answered 'no' or 'skip'
+        llm_response (str): The response from the agent.
+                            TODO: this seems to be 'DONE' (no or skip) or None
    """
    if timeout is not None:
        if timeout < 1000:
@@ -109,6 +112,9 @@ def execute_command(project, command, timeout=None, force=False):
            'If yes, just press ENTER'
        )

+        # TODO: I think AutoGPT allows other feedback here, like:
+        #       "That's not going to work, let's do X instead"
+        #       We don't explicitly make "no" or "skip" options to the user
        if answer == 'no':
            return '', 'DONE'
        elif answer == 'skip':
@@ -252,12 +258,15 @@ def execute_command_and_check_cli_response(command, timeout, convo):

    Returns:
        tuple: A tuple containing the CLI response and the agent's response.
+            - cli_response (str): The command output.
+            - llm_response (str): 'DONE' or 'NEEDS_DEBUGGING'
    """
-    cli_response, response = execute_command(convo.agent.project, command, timeout)
-    if response is None:
-        response = convo.send_message('dev_ops/ran_command.prompt',
+    # TODO: Prompt mentions `command` could be `INSTALLED` or `NOT_INSTALLED`, where is this handled?
+    cli_response, llm_response = execute_command(convo.agent.project, command, timeout)
+    if llm_response is None:
+        llm_response = convo.send_message('dev_ops/ran_command.prompt',
            { 'cli_response': cli_response, 'command': command })
-    return cli_response, response
+    return cli_response, llm_response

 def run_command_until_success(command, timeout, convo, additional_message=None, force=False, return_cli_response=False, is_root_task=False):
    """
--- a/pilot/test_main_e2e.py
+++ b/pilot/test_main_e2e.py
@@ -25,8 +25,21 @@ def test_init():
@pytest.mark.slow
@pytest.mark.uses_tokens
@pytest.mark.skip(reason="Uses lots of tokens")
-def test_end_to_end():
+@pytest.mark.parametrize("endpoint, model", [
+    # ("OPENAI", "gpt-4"),
+    # ("OPENROUTER", "openai/gpt-3.5-turbo"),
+    # ("OPENROUTER", "meta-llama/codellama-34b-instruct"),
+    ("OPENROUTER", "google/palm-2-chat-bison"),
+    ("OPENROUTER", "google/palm-2-codechat-bison"),
+    # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
+    #           https://github.com/guidance-ai/guidance - token healing
+    ("OPENROUTER", "anthropic/claude-2"),
+])
+def test_end_to_end(endpoint, model, monkeypatch):
    # Given
+    monkeypatch.setenv('ENDPOINT', endpoint)
+    monkeypatch.setenv('MODEL_NAME', model)
+
    create_tables()
    args = init()
    builtins.print, ipc_client_instance = get_custom_print(args)
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -24,7 +24,7 @@ class TestLlmConnection:
    @pytest.mark.parametrize("endpoint, model", [
        ("OPENAI", "gpt-4"),                                 # role: system
        ("OPENROUTER", "openai/gpt-3.5-turbo"),              # role: user
-        ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama   missed "choices"
+        ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
        ("OPENROUTER", "google/palm-2-chat-bison"),          # role: user/system
        ("OPENROUTER", "google/palm-2-codechat-bison"),
        # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
--- a/pilot/utils/utils.py
+++ b/pilot/utils/utils.py
@@ -9,7 +9,7 @@ import json
 import hashlib
 import re
 from jinja2 import Environment, FileSystemLoader
-from termcolor import colored
+from .style import green

 from const.llm import MAX_QUESTIONS, END_RESPONSE
 from const.common import ROLES, STEPS
@@ -138,7 +138,7 @@ def step_already_finished(args, step):
    args.update(step['app_data'])

    message = f"{capitalize_first_word_with_underscores(step['step'])} already done for this app_id: {args['app_id']}. Moving to next step..."
-    print(colored(message, 'green'))
+    print(green(message))
    logger.info(message)