Merge remote-tracking branch 'origin/main'

# Conflicts: # README.md # pilot/helpers/AgentConvo.py # pilot/utils/llm_connection.py
2026-02-23 15:49:50 +01:00 · 2023-09-12 22:57:48 +10:00
parent a8008b263e 80914f0722
commit 54bee95a8c
11 changed files with 466 additions and 232 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,171 +1,171 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/#use-with-ide
-.pdm.toml
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-.idea/
-
-
-# Logger
-/pilot/logger/debug.log
-
-#sqlite
-/pilot/gpt-pilot
-
-# workspace
-workspace
-pilot-env/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+
+# Logger
+/pilot/logger/debug.log
+
+#sqlite
+/pilot/gpt-pilot
+
+# workspace
+workspace
+pilot-env/
--- a/29
+++ b/29
@@ -0,0 +1,29 @@
+FROM python:3
+
+# Download precompiled ttyd binary from GitHub releases
+RUN apt-get update && \
+    apt-get install -y wget && \
+    wget https://github.com/tsl0922/ttyd/releases/download/1.6.3/ttyd.x86_64 -O /usr/bin/ttyd && \
+    chmod +x /usr/bin/ttyd && \
+    apt-get remove -y wget && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+ENV NVM_DIR /root/.nvm
+
+RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash \
+    && . "$NVM_DIR/nvm.sh" \
+    && nvm install node \
+    && nvm use node
+
+WORKDIR /usr/src/app
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN python -m venv pilot-env
+RUN /bin/bash -c "source pilot-env/bin/activate"
+
+WORKDIR /usr/src/app/pilot
+RUN pip install -r requirements.txt
+
+EXPOSE 7681
+CMD ["ttyd", "bash"]
--- a/README.md
+++ b/README.md
@@ -1,9 +1,30 @@
 # 🧑‍✈️ GPT PILOT
-### GPT Pilot codes the entire app as you oversee the code being written
+### GPT Pilot helps developers build apps 20x faster
+
+You specify what kind of an app you want to build. Then, GPT Pilot asks clarifying questions, creates the product and technical requirements, sets up the environment, and **starts coding the app step by step, like in real life while you oversee the development process**. It asks you to review each task it finishes or to help when it gets stuck. This way, GPT Pilot acts as a coder while you are a lead dev who reviews code and helps when needed.

 ---

-This is a research project to see how can GPT-4 be utilized to generate fully working, production-ready, apps. **The main idea is that AI can write most of the code for an app (maybe 95%) but for the rest 5%, a developer is and will be needed until we get full AGI**.
+<!-- TOC -->
+* [🔌 Requirements](#-requirements)
+* [🚦How to start using gpt-pilot?](#how-to-start-using-gpt-pilot)
+* [🧑‍💻️ Other arguments](#%EF%B8%8F-other-arguments)
+* [🔎 Examples](#-examples)
+    * [Real-time chat app](#-real-time-chat-app)
+    * [Markdown editor](#-markdown-editor)
+    * [Timer app](#%EF%B8%8F-timer-app)
+* [🏛 Main pillars of GPT Pilot](#-main-pillars-of-gpt-pilot)
+* [🏗 How GPT Pilot works?](#-how-gpt-pilot-works)
+* [🕴How's GPT Pilot different from _Smol developer_ and _GPT engineer_?](#hows-gpt-pilot-different-from-smol-developer-and-gpt-engineer)
+* [🍻 Contributing](#-contributing)
+* [🔗 Connect with us](#-connect-with-us)
+<!-- TOC -->
+
+---
+
+The goal of GPT Pilot is to research how much can GPT-4 be utilized to generate fully working, production-ready apps while the developer oversees the implementation.
+
+**The main idea is that AI can write most of the code for an app (maybe 95%) but for the rest 5%, a developer is and will be needed until we get full AGI**.

 I've broken down the idea behind GPT Pilot and how it works in the following blog posts:

@@ -15,26 +36,17 @@ I've broken down the idea behind GPT Pilot and how it works in the following blo

 ---

+
+<div align="center">
+
 ### **[👉 Examples of apps written by GPT Pilot 👈](#-examples)**

+</div>
+
 <br>

 https://github.com/Pythagora-io/gpt-pilot/assets/10895136/0495631b-511e-451b-93d5-8a42acf22d3d

-<br>
-
-## Main pillars of GPT Pilot:
-1. For AI to create a fully working app, **a developer needs to be involved** in the process of app creation. They need to be able to change the code at any moment and GPT Pilot needs to continue working with those changes (eg. add an API key or fix an issue if an AI gets stuck) <br><br>
-2. **The app needs to be written step by step as a developer would write it** - Let's say you want to create a simple app and you know everything you need to code and have the entire architecture in your head. Even then, you won't code it out entirely, then run it for the first time and debug all the issues at once. Rather, you will implement something simple, like add routes, run it, see how it works, and then move on to the next task. This way, you can debug issues as they arise. The same should be in the case when AI codes. It will make mistakes for sure so in order for it to have an easier time debugging issues and for the developer to understand what is happening, the AI shouldn't just spit out the entire codebase at once. Rather, the app should be developed step by step just like a developer would code it - eg. setup routes, add database connection, etc. <br><br>
-3. **The approach needs to be scalable** so that AI can create a production ready app
-   1. **Context rewinding** - for solving each development task, the context size of the first message to the LLM has to be relatively the same. For example, the context size of the first LLM message while implementing development task #5 has to be more or less the same as the first message while developing task #50. Because of this, the conversation needs to be rewound to the first message upon each task. [See the diagram here](https://blogpythagora.files.wordpress.com/2023/08/pythagora-product-development-frame-3-1.jpg?w=1714).
-   2. **Recursive conversations** are LLM conversations that are set up in a way that they can be used “recursively”. For example, if GPT Pilot detects an error, it needs to debug it but let’s say that, during the debugging process, another error happens. Then, GPT Pilot needs to stop debugging the first issue, fix the second one, and then get back to fixing the first issue. This is a very important concept that, I believe, needs to work to make AI build large and scalable apps by itself. It works by rewinding the context and explaining each error in the recursion separately. Once the deepest level error is fixed, we move up in the recursion and continue fixing that error. We do this until the entire recursion is completed. 
-   3. **TDD (Test Driven Development)** - for GPT Pilot to be able to scale the codebase, it will need to be able to create new code without breaking previously written code. There is no better way to do this than working with TDD methodology. For each code that GPT Pilot writes, it needs to write tests that check if the code works as intended so that whenever new changes are made, all previous tests can be run.
-
-The idea is that AI won't be able to (at least in the near future) create apps from scratch without the developer being involved. That's why we created an interactive tool that generates code but also requires the developer to check each step so that they can understand what's going on and so that the AI can have a better overview of the entire codebase.
-
-Obviously, it still can't create any production-ready app but the general concept of how this could work is there.
-
 # 🔌 Requirements

 - **Python**
@@ -63,6 +75,19 @@ All generated code will be stored in the folder `workspace` inside the folder na
 **IMPORTANT: To run GPT Pilot, you need to have PostgreSQL set up on your machine**
 <br>

+
+# 🐳 How to start gpt-pilot in docker?
+1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
+2. Update the `docker-compose.yml` environment variables
+3. run `docker compose build`. this will build a gpt-pilot container for you.
+4. run `docker compose up`.
+5. access web terminal on `port 7681`
+6. `python db_init.py` (initialize the database)
+7. `python main.py` (start GPT Pilot)
+
+This will start two containers, one being a new image built by the `Dockerfile` and a postgres database. The new image also has [ttyd](https://github.com/tsl0922/ttyd) installed so you can easily interact with gpt-pilot.
+
+
 # 🧑‍💻️ CLI arguments

 ## `app_type` and `name`
@@ -153,34 +178,36 @@ python main.py app_id=<ID_OF_THE_APP> skip_until_dev_step=0

 Here are a couple of example apps GPT Pilot created by itself:

-### Real-time chat app
+### 📱 Real-time chat app
 - 💬 Prompt: `A simple chat app with real time communication`
 - ▶️ [Video of the app creation process](https://youtu.be/bUj9DbMRYhA)
 - 💻️ [GitHub repo](https://github.com/Pythagora-io/gpt-pilot-chat-app-demo)

-<p align="left">
-  <img src="https://github.com/Pythagora-io/gpt-pilot/assets/10895136/85bc705c-be88-4ca1-9a3b-033700b97a22" alt="gpt-pilot demo chat app" width="500px"/>
-</p>

-
-### Markdown editor
+### 📝 Markdown editor
 - 💬 Prompt: `Build a simple markdown editor using HTML, CSS, and JavaScript. Allow users to input markdown text and display the formatted output in real-time.`
 - ▶️ [Video of the app creation process](https://youtu.be/uZeA1iX9dgg)
 - 💻️ [GitHub repo](https://github.com/Pythagora-io/gpt-pilot-demo-markdown-editor.git)

-<p align="left">
-  <img src="https://github.com/Pythagora-io/gpt-pilot/assets/10895136/dbe1ccc3-b126-4df0-bddb-a524d6a386a8" alt="gpt-pilot demo markdown editor" width="500px"/>
-</p>

-
-### Timer app
+### ⏱️ Timer app
 - 💬 Prompt: `Create a simple timer app using HTML, CSS, and JavaScript that allows users to set a countdown timer and receive an alert when the time is up.`
 - ▶️ [Video of the app creation process](https://youtu.be/CMN3W18zfiE)
 - 💻️ [GitHub repo](https://github.com/Pythagora-io/gpt-pilot-timer-app-demo)

-<p align="left">
-  <img src="https://github.com/Pythagora-io/gpt-pilot/assets/10895136/93bed40b-b769-4c8b-b16d-b80fb6fc73e0" alt="gpt-pilot demo markdown editor" width="500px"/>
-</p>
+<br>
+
+# 🏛 Main pillars of GPT Pilot:
+1. For AI to create a fully working app, **a developer needs to be involved** in the process of app creation. They need to be able to change the code at any moment and GPT Pilot needs to continue working with those changes (eg. add an API key or fix an issue if an AI gets stuck) <br><br>
+2. **The app needs to be written step by step as a developer would write it** - Let's say you want to create a simple app and you know everything you need to code and have the entire architecture in your head. Even then, you won't code it out entirely, then run it for the first time and debug all the issues at once. Rather, you will implement something simple, like add routes, run it, see how it works, and then move on to the next task. This way, you can debug issues as they arise. The same should be in the case when AI codes. It will make mistakes for sure so in order for it to have an easier time debugging issues and for the developer to understand what is happening, the AI shouldn't just spit out the entire codebase at once. Rather, the app should be developed step by step just like a developer would code it - eg. setup routes, add database connection, etc. <br><br>
+3. **The approach needs to be scalable** so that AI can create a production ready app
+   1. **Context rewinding** - for solving each development task, the context size of the first message to the LLM has to be relatively the same. For example, the context size of the first LLM message while implementing development task #5 has to be more or less the same as the first message while developing task #50. Because of this, the conversation needs to be rewound to the first message upon each task. [See the diagram here](https://blogpythagora.files.wordpress.com/2023/08/pythagora-product-development-frame-3-1.jpg?w=1714).
+   2. **Recursive conversations** are LLM conversations that are set up in a way that they can be used “recursively”. For example, if GPT Pilot detects an error, it needs to debug it but let’s say that, during the debugging process, another error happens. Then, GPT Pilot needs to stop debugging the first issue, fix the second one, and then get back to fixing the first issue. This is a very important concept that, I believe, needs to work to make AI build large and scalable apps by itself. It works by rewinding the context and explaining each error in the recursion separately. Once the deepest level error is fixed, we move up in the recursion and continue fixing that error. We do this until the entire recursion is completed. 
+   3. **TDD (Test Driven Development)** - for GPT Pilot to be able to scale the codebase, it will need to be able to create new code without breaking previously written code. There is no better way to do this than working with TDD methodology. For each code that GPT Pilot writes, it needs to write tests that check if the code works as intended so that whenever new changes are made, all previous tests can be run.
+
+The idea is that AI won't be able to (at least in the near future) create apps from scratch without the developer being involved. That's why we created an interactive tool that generates code but also requires the developer to check each step so that they can understand what's going on and so that the AI can have a better overview of the entire codebase.
+
+Obviously, it still can't create any production-ready app but the general concept of how this could work is there.

 # 🏗 How GPT Pilot works?
 Here are the steps GPT Pilot takes to create an app:
@@ -198,16 +225,15 @@ Here are the steps GPT Pilot takes to create an app:

 For more details on the roles of agents employed by GPT Pilot refer to [AGENTS.md](https://github.com/Pythagora-io/gpt-pilot/blob/main/pilot/helpers/agents/AGENTS.md)

-![GPT Pilot Coding Workflow](https://github.com/Pythagora-io/gpt-pilot/assets/10895136/54a8ec24-a2ea-43a6-a494-03139d4e43f5)
+![GPT Pilot Coding Workflow](https://github.com/Pythagora-io/gpt-pilot/assets/10895136/53ea246c-cefe-401c-8ba0-8e4dd49c987b)
+

 <br>

 # 🕴How's GPT Pilot different from _Smol developer_ and _GPT engineer_?
- **Human developer is involved throughout the process** - I don't think that AI can (at least in the near future) create apps without a developer being involved. Also, I think it's hard for a developer to get into a big codebase and try debugging it. That's why my idea was for AI to develop the app step by step where each step is reviewed by the developer. If you want to change some code yourself, you can just change it and GPT Pilot will continue developing on top of those changes.
+- **GPT Pilot works with the developer to create fully working production-ready app** - I don't think that AI can (at least in the near future) create apps without a developer being involved. So, **GPT Pilot codes the app step by step** just like a developer would in real life. This way, it can debug issues as they arise throughout the development process. If it gets stuck, you, the developer in charge, can review the code and fix the issue. Other similar tools give you the entire codebase at once - this way, bugs are much harder to fix both for AI and for you as a developer.
  <br><br>
- **Continuous development loops** - The goal behind this project was to see how we can create recursive conversations with GPT so that it can debug any issue and implement any feature. For example, after the app is generated, you can always add more instructions about what you want to implement or debug. I wanted to see if this can be so flexible that, regardless of the app's size, it can just iterate and build bigger and bigger apps
-  <br><br>
- **Auto debugging** - when it detects an error, it debugs it by itself. I still haven't implemented writing automated tests which should make this fully autonomous but for now, you can input the error that's happening (eg. within a UI) and GPT Pilot will debug it from there. The plan is to make it write automated tests in Cypress as well so that it can test it by itself and debug without the developer's explanation.
+- **Works at scale** - GPT Pilot isn't meant to create simple apps but rather so it can work at any scale. It has mechanisms that filter out the code so in each LLM conversation, it doesn't need to store the entire codebase in context but it shows the LLM only the code that is relevant for the current task it's working on. Once an app is finished, you can always continue working on it by writing instructions on what feature you want to add.

 # 🍻 Contributing
 If you are interested in contributing to GPT Pilot, I would be more than happy to have you on board but also help you get started. Feel free to ping [zvonimir@pythagora.ai](mailto:zvonimir@pythagora.ai) and I'll help you get started.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,40 @@
+version: '3'
+services:
+    gpt-pilot:
+        environment:
+            #OPENAI or AZURE
+            - ENDPOINT=OPENAI
+            - OPENAI_API_KEY=
+            # - AZURE_API_KEY=
+            # - AZURE_ENDPOINT=
+            #In case of Azure endpoint, change this to your deployed model name
+            - MODEL_NAME=gpt-4
+            - MAX_TOKENS=8192
+            - DATABASE_TYPE=postgres
+            - DB_NAME=pilot
+            - DB_HOST=postgres
+            - DB_PORT=5432
+            - DB_USER=pilot
+            - DB_PASSWORD=pilot
+        build:
+            context: .
+            dockerfile: Dockerfile
+        ports:
+        - "7681:7681"
+        depends_on:
+            postgres:
+                condition: service_healthy
+    postgres:
+        image: postgres
+        restart: always
+        environment:
+            POSTGRES_USER: pilot
+            POSTGRES_PASSWORD: pilot
+            POSTGRES_DB: pilot
+        ports:
+        - "5432:5432"
+        healthcheck:
+            test: ["CMD-SHELL", "pg_isready -U user"]
+            interval: 30s
+            timeout: 10s
+            retries: 3
--- a/pilot/.env.example
+++ b/pilot/.env.example
@@ -1,13 +1,25 @@
-#OPENAI or AZURE
+# OPENAI or AZURE or OPENROUTER
 ENDPOINT=OPENAI
+
+OPENAI_ENDPOINT=
 OPENAI_API_KEY=
+
 AZURE_API_KEY=
 AZURE_ENDPOINT=
-#In case of Azure endpoint, change this to your deployed model name
+
+OPENROUTER_API_KEY=
+OPENROUTER_ENDPOINT=https://openrouter.ai/api/v1/chat/completions
+
+# In case of Azure/OpenRouter endpoint, change this to your deployed model name
 MODEL_NAME=gpt-4
+# MODEL_NAME=openai/gpt-3.5-turbo-16k
 MAX_TOKENS=8192
+
+# Database
+# DATABASE_TYPE=postgres
+
 DB_NAME=gpt-pilot
-DB_HOST=localhost
-DB_PORT=5432
-DB_USER=admin
-DB_PASSWORD=admin
+DB_HOST=
+DB_PORT=
+DB_USER=
+DB_PASSWORD=
--- a/pilot/database/database.py
+++ b/pilot/database/database.py
@@ -231,10 +231,8 @@ def save_development_step(project, prompt_path, prompt_data, messages, llm_respo
    development_step = hash_and_save_step(DevelopmentSteps, project.args['app_id'], hash_data_args, data_fields, "Saved Development Step")
    project.checkpoints['last_development_step'] = development_step

-
    project.save_files_snapshot(development_step.id)

-
    return development_step


--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -42,10 +42,6 @@ class AgentConvo:
        # craft message
        self.construct_and_add_message_from_prompt(prompt_path, prompt_data)

-        # TODO: should this be "... 'functions' in function_calls:"?
-        if function_calls is not None and 'function_calls' in function_calls:
-            self.messages[-1]['content'] += '\nMAKE SURE THAT YOU RESPOND WITH A CORRECT JSON FORMAT!!!'
-
        # check if we already have the LLM response saved
        if self.agent.__class__.__name__ == 'Developer':
            self.agent.project.llm_req_num += 1
--- a/pilot/helpers/Project.py
+++ b/pilot/helpers/Project.py
@@ -168,7 +168,7 @@ class Project:
        Save a file.

        Args:
-            data (dict): File data.
+            data: { name: 'hello.py', path: 'path/to/hello.py', content: 'print("Hello!")' }
        """
        # TODO fix this in prompts
        if ' ' in data['name'] or '.' not in data['name']:
--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -0,0 +1,120 @@
+import re
+import os
+from unittest.mock import patch, Mock, MagicMock
+from dotenv import load_dotenv
+load_dotenv()
+
+from .CodeMonkey import CodeMonkey
+from .Developer import Developer
+from database.models.files import File
+from helpers.Project import Project, update_file, clear_directory
+from helpers.AgentConvo import AgentConvo
+
+SEND_TO_LLM = False
+WRITE_TO_FILE = False
+
+
+def mock_terminal_size():
+    mock_size = Mock()
+    mock_size.columns = 80  # or whatever width you want
+    return mock_size
+
+
+class TestCodeMonkey:
+    def setup_method(self):
+        name = 'TestDeveloper'
+        self.project = Project({
+                'app_id': 'test-developer',
+                'name': name,
+                'app_type': ''
+            },
+            name=name,
+            architecture=[],
+            user_stories=[],
+            current_step='coding',
+        )
+
+        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestDeveloper'))
+        self.project.technologies = []
+        self.project.app = None
+        self.developer = Developer(self.project)
+        self.codeMonkey = CodeMonkey(self.project, developer=self.developer)
+
+    @patch('helpers.AgentConvo.get_development_step_from_hash_id', return_value=None)
+    @patch('helpers.AgentConvo.save_development_step', return_value=None)
+    @patch('os.get_terminal_size', mock_terminal_size)
+    @patch.object(File, 'insert')
+    def test_implement_code_changes(self, mock_get_dev, mock_save_dev, mock_file_insert):
+        # Given
+        code_changes_description = "Write the word 'Washington' to a .txt file"
+
+        if SEND_TO_LLM:
+            convo = AgentConvo(self.codeMonkey)
+        else:
+            convo = MagicMock()
+            mock_responses = [
+                [],
+                [{
+                    'content': 'Washington',
+                    'description': "A new .txt file with the word 'Washington' in it.",
+                    'name': 'washington.txt',
+                    'path': 'washington.txt'
+                }]
+            ]
+            convo.send_message.side_effect = mock_responses
+
+        if WRITE_TO_FILE:
+            self.codeMonkey.implement_code_changes(convo, code_changes_description)
+        else:
+            # don't write the file, just
+            with patch.object(Project, 'save_file') as mock_save_file:
+                # When
+                self.codeMonkey.implement_code_changes(convo, code_changes_description)
+
+                # Then
+                mock_save_file.assert_called_once()
+                called_data = mock_save_file.call_args[0][0]
+                assert re.match(r'\w+\.txt$', called_data['name'])
+                assert (called_data['path'] == '/' or called_data['path'] == called_data['name'])
+                assert called_data['content'] == 'Washington'
+
+    @patch('helpers.AgentConvo.get_development_step_from_hash_id', return_value=None)
+    @patch('helpers.AgentConvo.save_development_step', return_value=None)
+    @patch('os.get_terminal_size', mock_terminal_size)
+    @patch.object(File, 'insert')
+    def test_implement_code_changes_with_read(self, mock_get_dev, mock_save_dev, mock_file_insert):
+        # Given
+        code_changes_description = "Read the file called file_to_read.txt and write its content to a file called output.txt"
+        workspace = self.project.root_path
+        update_file(os.path.join(workspace, 'file_to_read.txt'), 'Hello World!\n')
+
+        if SEND_TO_LLM:
+            convo = AgentConvo(self.codeMonkey)
+        else:
+            convo = MagicMock()
+            mock_responses = [
+                ['file_to_read.txt', 'output.txt'],
+                [{
+                    'content': 'Hello World!\n',
+                    'description': 'This file is the output file. The content of file_to_read.txt is copied into this file.',
+                    'name': 'output.txt',
+                    'path': 'output.txt'
+                }]
+            ]
+            convo.send_message.side_effect = mock_responses
+
+        if WRITE_TO_FILE:
+            self.codeMonkey.implement_code_changes(convo, code_changes_description)
+        else:
+            with patch.object(Project, 'save_file') as mock_save_file:
+                # When
+                self.codeMonkey.implement_code_changes(convo, code_changes_description)
+
+                # Then
+                clear_directory(workspace)
+                mock_save_file.assert_called_once()
+                called_data = mock_save_file.call_args[0][0]
+                assert called_data['name'] == 'output.txt'
+                assert (called_data['path'] == '/' or called_data['path'] == called_data['name'])
+                assert called_data['content'] == 'Hello World!\n'
--- a/pilot/prompts/development/parse_task.prompt
+++ b/pilot/prompts/development/parse_task.prompt
@@ -1 +1 @@
-Ok, now, take your previous message and convert it to actionable items. An item might be a code change or a command run. When you need to change code, make sure that you put the entire content of the file in the value of `content` key even though you will likely copy and paste the most of the previous messsage.
+Ok, now, take your previous message and convert it to actionable items. An item might be a code change or a command run. When you need to change code, make sure that you put the entire content of the file in the value of `content` key even though you will likely copy and paste the most of the previous message.
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -103,10 +103,12 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO
    :param min_tokens: defaults to 600
    :param function_calls: (optional) {'definitions': [{ 'name': str }, ...]}
        see `IMPLEMENT_CHANGES` etc. in `pilot/const/function_calls.py`
-    :return: {'text': new_code} or (if `function_calls` param provided) {'function_calls': function_calls}
+    :return: {'text': new_code}
+        or if `function_calls` param provided
+             {'function_calls': {'name': str, arguments: {...}}}
    """
    gpt_data = {
-        'model': os.getenv('OPENAI_MODEL', 'gpt-4'),
+        'model': os.getenv('MODEL_NAME', 'gpt-4'),
        'n': 1,
        'max_tokens': 4096,
        'temperature': 1,
@@ -117,7 +119,15 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO
        'stream': True
    }

+    # delete some keys if using "OpenRouter" API
+    if os.getenv('ENDPOINT') == "OPENROUTER":
+        keys_to_delete = ['n', 'max_tokens', 'temperature', 'top_p', 'presence_penalty', 'frequency_penalty']
+        for key in keys_to_delete:
+            if key in gpt_data:
+                del gpt_data[key]
+
    if function_calls is not None:
+        # Advise the LLM of the JSON response schema we are expecting
        gpt_data['functions'] = function_calls['definitions']
        if len(function_calls['definitions']) > 1:
            # DEV_STEPS
@@ -200,7 +210,7 @@ def stream_gpt_completion(data, req_type):
    Called from create_gpt_chat_completion()
    :param data:
    :param req_type: 'project_description' etc. See common.STEPS
-    :return: {'text': str} or {'function_calls': function_calls}
+    :return: {'text': str} or {'function_calls': {'name': str, arguments: '{...}'}}
    """
    terminal_width = os.get_terminal_size().columns
    lines_printed = 2
@@ -223,17 +233,15 @@ def stream_gpt_completion(data, req_type):
    if endpoint == 'AZURE':
        # If yes, get the AZURE_ENDPOINT from .ENV file
        endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15'
-        headers = {
-            'Content-Type': 'application/json',
-            'api-key':  os.getenv('AZURE_API_KEY')
-        }
+        headers = {'Content-Type': 'application/json', 'api-key':  os.getenv('AZURE_API_KEY')}
+    elif endpoint == 'OPENROUTER':
+        # If so, send the request to the OpenRouter API endpoint
+        headers = {'Content-Type': 'application/json', 'Authorization':  'Bearer ' + os.getenv("OPENROUTER_API_KEY"), 'HTTP-Referer': 'http://localhost:3000', 'X-Title': 'GPT Pilot (LOCAL)'}
+        endpoint_url = os.getenv("OPENROUTER_ENDPOINT", 'https://openrouter.ai/api/v1/chat/completions')
    else:
        # If not, send the request to the OpenAI endpoint
-        headers = {
-            'Content-Type': 'application/json',
-            'Authorization': 'Bearer ' + os.getenv("OPENAI_API_KEY")
-        }
-        endpoint_url = 'https://api.openai.com/v1/chat/completions'
+        headers = {'Content-Type': 'application/json', 'Authorization':  'Bearer ' + os.getenv("OPENAI_API_KEY")}
+        endpoint_url = os.getenv("OPENAI_ENDPOINT", 'https://api.openai.com/v1/chat/completions')

    response = requests.post(
        endpoint_url,
@@ -266,6 +274,10 @@ def stream_gpt_completion(data, req_type):

            try:
                json_line = json.loads(line)
+
+                if len(json_line['choices']) == 0:
+                    continue
+                
                if 'error' in json_line:
                    logger.error(f'Error in LLM response: {json_line}')
                    raise ValueError(f'Error in LLM response: {json_line["error"]["message"]}')
@@ -280,6 +292,7 @@ def stream_gpt_completion(data, req_type):
                logger.error(f'Unable to decode line: {line}')
                continue  # skip to the next line

+            # handle the streaming response
            if 'function_call' in json_line:
                if 'name' in json_line['function_call']:
                    function_calls['name'] = json_line['function_call']['name']