From 156b36126363974817ad944298f0fb159d006b2e Mon Sep 17 00:00:00 2001
From: Nicholas Albion <nalbion@yahoo.com>
Date: Sat, 23 Sep 2023 00:45:23 +1000
Subject: [PATCH] function_call-style JSON response from gpt-4, gpt-3_5,
 codellama, palm-2-chat-bison

---
 .github/workflows/ci.yml             |  3 +-
 pilot/utils/function_calling.py      | 43 +++++++-----
 pilot/utils/llm_connection.py        |  2 +
 pilot/utils/test_function_calling.py |  4 +-
 pilot/utils/test_llm_connection.py   | 97 ++++++++++++++++++----------
 5 files changed, 96 insertions(+), 53 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c27c8fb..2fca911 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,9 +21,10 @@ jobs:
     - uses: actions/checkout@v4
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
+        cache: 'pip'
 
     - name: Install dependencies
       run: |
diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py
index 0ec3360..03eac27 100644
--- a/pilot/utils/function_calling.py
+++ b/pilot/utils/function_calling.py
@@ -18,25 +18,33 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No
     if function_calls is None:
         return
 
-    if gpt_data['model'] == 'gpt-4':
-        gpt_data['functions'] = function_calls['definitions']
-        if len(function_calls['definitions']) > 1:
-            gpt_data['function_call'] = 'auto'
-        else:
-            gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']}
-        return
+    model: str = gpt_data['model']
+    is_llama = 'llama' in model
+
+    # if model == 'gpt-4':
+    #     gpt_data['functions'] = function_calls['definitions']
+    #     if len(function_calls['definitions']) > 1:
+    #         gpt_data['function_call'] = 'auto'
+    #     else:
+    #         gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']}
+    #     return
 
     # prompter = CompletionModelPrompter()
     # prompter = InstructModelPrompter()
-    prompter = LlamaInstructPrompter()
+    prompter = JsonPrompter(is_llama)
 
     if len(function_calls['definitions']) > 1:
         function_call = None
     else:
         function_call = function_calls['definitions'][0]['name']
 
+    role = 'user' if '/' in model else 'system'
+    # role = 'user'
+    # role = 'system'
+    # is_llama = True
+
     gpt_data['messages'].append({
-        'role': 'user',
+        'role': role,
         'content': prompter.prompt('', function_calls['definitions'], function_call)
     })
 
@@ -54,7 +62,7 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None):
     """
 
     if function_calls:
-        text = re.sub(r'^```json\n', '', response['text'])
+        text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL)
         values = list(json.loads(text.strip('` \n')).values())
         if len(values) == 1:
             return values[0]
@@ -64,11 +72,12 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None):
     return response['text']
 
 
-class LlamaInstructPrompter:
+class JsonPrompter:
     """
-    A prompter for Llama2 instruct models.
     Adapted from local_llm_function_calling
     """
+    def __init__(self, is_llama: bool = False):
+        self.is_llama = is_llama
 
     def function_descriptions(
         self, functions: list[FunctionType], function_to_call: str
@@ -177,7 +186,9 @@ class LlamaInstructPrompter:
             "Help choose the appropriate function to call to answer the user's question."
             if function_to_call is None
             else f"Define the arguments for {function_to_call} to answer the user's question."
-        ) + "In your response you must only use JSON output and provide no notes or commentary."
+        # ) + "\nYou must return a JSON object without notes or commentary."
+        ) + " \nIn your response you must only use JSON output and provide no explanation or commentary."
+
         data = (
             self.function_data(functions, function_to_call)
             if function_to_call
@@ -188,6 +199,8 @@ class LlamaInstructPrompter:
             if function_to_call
             else "Here's the function the user should call: "
         )
-        return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
-        # {response_start}"
 
+        if self.is_llama:
+            return f"[INST] <<SYS>>\n{system}\n\n{data}\n<</SYS>>\n\n{prompt} [/INST]"
+        else:
+            return f"{system}\n\n{data}\n\n{prompt}"
diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py
index cf20bcb..2aac35a 100644
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -264,6 +264,8 @@ def stream_gpt_completion(data, req_type):
                     return return_result({'function_calls': function_calls}, lines_printed)
 
                 json_line = choice['delta']
+                # TODO: token healing? https://github.com/1rgs/jsonformer-claude
+                #       ...Is this what local_llm_function_calling.constrainer is for?
 
             except json.JSONDecodeError:
                 logger.error(f'Unable to decode line: {line}')
diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py
index 635e1c6..c64b2ce 100644
--- a/pilot/utils/test_function_calling.py
+++ b/pilot/utils/test_function_calling.py
@@ -1,7 +1,7 @@
 from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter
 
 from const.function_calls import ARCHITECTURE, DEV_STEPS
-from .function_calling import parse_agent_response, LlamaInstructPrompter
+from .function_calling import parse_agent_response, JsonPrompter
 
 
 class TestFunctionCalling:
@@ -140,7 +140,7 @@ Function call: '''
 
 def test_llama_instruct_function_prompter_named():
     # Given
-    prompter = LlamaInstructPrompter()
+    prompter = JsonPrompter()
 
     # When
     prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies')
diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py
index a5c82da..fcba855 100644
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -1,14 +1,14 @@
 import builtins
 import os
+import pytest
 from dotenv import load_dotenv
-from unittest.mock import patch
-
 
 from const.function_calls import ARCHITECTURE, DEV_STEPS
 from helpers.AgentConvo import AgentConvo
 from helpers.Project import Project
 from helpers.agents.Architect import Architect
 from helpers.agents.Developer import Developer
+from utils.function_calling import parse_agent_response
 from .llm_connection import create_gpt_chat_completion
 from main import get_custom_print
 
@@ -45,44 +45,72 @@ class TestLlmConnection:
     #     # assert len(convo.messages) == 2
     #     assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks')
 
-    def test_chat_completion_Architect(self, monkeypatch):
-        """Test the chat completion method."""
+    # @pytest.fixture(params=[
+    #     {"endpoint": "OPENAI", "model": "gpt-4"},
+    #     {"endpoint": "OPENROUTER", "model": "openai/gpt-3.5-turbo"},
+    #     {"endpoint": "OPENROUTER", "model": "meta-llama/codellama-34b-instruct"},
+    #     {"endpoint": "OPENROUTER", "model": "anthropic/claude-2"},
+    #     {"endpoint": "OPENROUTER", "model": "google/palm-2-codechat-bison"},
+    #     {"endpoint": "OPENROUTER", "model": "google/palm-2-chat-bison"},
+    # ])
+    # def params(self, request):
+    #     return request.param
+
+    @pytest.mark.slow
+    @pytest.mark.uses_tokens
+    @pytest.mark.parametrize("endpoint, model", [
+        ("OPENAI", "gpt-4"),                                 # role: system
+        ("OPENROUTER", "openai/gpt-3.5-turbo"),              # role: user
+        ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama
+        ("OPENROUTER", "google/palm-2-chat-bison"),          # role: user/system
+
+        # See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py
+        # ("OPENROUTER", "anthropic/claude-2"),  # role: user, prompt 2
+        # ("OPENROUTER", "google/palm-2-codechat-bison"),      # not working
+    ])
+    def test_chat_completion_Architect(self, endpoint, model, monkeypatch):
         # Given
         agent = Architect(project)
         convo = AgentConvo(agent)
         convo.construct_and_add_message_from_prompt('architecture/technologies.prompt',
-                                                    {
-                                                        'name': 'Test App',
-                                                        'prompt': '''
-    The project involves the development of a web-based chat application named "Test_App". 
-    In this application, users can send direct messages to each other. 
-    However, it does not include a group chat functionality. 
-    Multimedia messaging, such as the exchange of images and videos, is not a requirement for this application. 
-    No clear instructions were given for the inclusion of user profile customization features like profile 
-    picture and status updates, as well as a feature for chat history. The project must be developed strictly 
-    as a monolithic application, regardless of any other suggested methods. 
-    The project's specifications are subject to the project manager's discretion, implying a need for 
-    solution-oriented decision-making in areas where precise instructions were not provided.''',
-                                                        'app_type': 'web app',
-                                                        'user_stories': [
-                                                            'User will be able to send direct messages to another user.',
-                                                            'User will receive direct messages from other users.',
-                                                            'User will view the sent and received messages in a conversation view.',
-                                                            'User will select a user to send a direct message.',
-                                                            'User will be able to search for users to send direct messages to.',
-                                                            'Users can view the online status of other users.',
-                                                            'User will be able to log into the application using their credentials.',
-                                                            'User will be able to logout from the Test_App.',
-                                                            'User will be able to register a new account on Test_App.',
-                                                        ]
-                                                    })
+                                                        {
+                                                            'name': 'Test App',
+                                                            'prompt': '''
+The project involves the development of a web-based chat application named "Test_App". 
+In this application, users can send direct messages to each other. 
+However, it does not include a group chat functionality. 
+Multimedia messaging, such as the exchange of images and videos, is not a requirement for this application. 
+No clear instructions were given for the inclusion of user profile customization features like profile 
+picture and status updates, as well as a feature for chat history. The project must be developed strictly 
+as a monolithic application, regardless of any other suggested methods. 
+The project's specifications are subject to the project manager's discretion, implying a need for 
+solution-oriented decision-making in areas where precise instructions were not provided.''',
+                                                            'app_type': 'web app',
+                                                            'user_stories': [
+                                                                'User will be able to send direct messages to another user.',
+                                                                'User will receive direct messages from other users.',
+                                                                'User will view the sent and received messages in a conversation view.',
+                                                                'User will select a user to send a direct message.',
+                                                                'User will be able to search for users to send direct messages to.',
+                                                                'Users can view the online status of other users.',
+                                                                'User will be able to log into the application using their credentials.',
+                                                                'User will be able to logout from the Test_App.',
+                                                                'User will be able to register a new account on Test_App.',
+                                                            ]
+                                                        })
+
+        # endpoint = 'OPENROUTER'
+        # monkeypatch.setattr('utils.llm_connection.endpoint', endpoint)
+        monkeypatch.setenv('ENDPOINT', endpoint)
+        monkeypatch.setenv('MODEL_NAME', model)
+        # monkeypatch.setenv('MODEL_NAME', 'meta-llama/codellama-34b-instruct')
+        # monkeypatch.setenv('MODEL_NAME', 'openai/gpt-3.5-turbo-16k-0613')
+        # monkeypatch.setenv('MODEL_NAME', 'anthropic/claude-2')  # TODO: remove ```json\n ... ```
+        # monkeypatch.setenv('MODEL_NAME', 'google/palm-2-codechat-bison')  # TODO: not JSON
+        # monkeypatch.setenv('MODEL_NAME', 'google/palm-2-chat-bison')        # TODO: not JSON
 
         messages = convo.messages
         function_calls = ARCHITECTURE
-        endpoint = 'OPENROUTER'
-        # monkeypatch.setattr('utils.llm_connection.endpoint', endpoint)
-        monkeypatch.setenv('ENDPOINT', endpoint)
-        monkeypatch.setenv('MODEL_NAME', 'meta-llama/codellama-34b-instruct')
 
         # with patch('.llm_connection.endpoint', endpoint):
         # When
@@ -93,11 +121,10 @@ class TestLlmConnection:
         assert convo.messages[1]['content'].startswith('You are working in a software development agency')
 
         assert response is not None
-        response = convo.postprocess_response(response, function_calls)
+        response = parse_agent_response(response, function_calls)
         # response = response['function_calls']['arguments']['technologies']
         assert 'Node.js' in response
 
 
-
     def _create_convo(self, agent):
         convo = AgentConvo(agent)
\ No newline at end of file