Niek · mkellerman · Apr 10, 2023 · Apr 10, 2023 · Apr 10, 2023 · Apr 10, 2023
diff --git a/.env b/.env
@@ -1,2 +1,3 @@
-# Uncomment the following line to use the mocked API
-#VITE_API_BASE=http://localhost:5174
+# Uncomment the following line to use the llama or mocked API
+VITE_API_BASE=http://localhost:5174
+
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,6 @@ dist-ssr
 *.sln
 *.sw?
 *.env
+
+# llama-api
+models
diff --git a/README.md b/README.md
@@ -44,6 +44,15 @@ git subtree pull --prefix src/awesome-chatgpt-prompts https://github.com/f/aweso
 docker compose up -d
 ```
 
+## Llama api
+If you want to use local/offline models using llama.cpp, you can use llama-cpp-python API instead. 
+
+To use the llama API:
+- Copy your models in the root of the project in a folder called `models`. 
+- Edit the `docker-compose.yml` file at the root of the project and uncomment the `llama-api` service and the `depends-on` in the chatgpt-web service.
+- Edit the `.env` file at root of the project and uncommentt the key `VITE_API_BASE=http://localhost:5174` in it. You will also need to alter the name of the model path to point to the model you wish to use.
+- Run the `docker compose up -d` command above.
+
 ## Mocked api
 If you don't want to wait for the API to respond, you can use the mocked API instead. To use the mocked API, edit the `.env` file at root of the project ans set the key `VITE_API_BASE=http://localhost:5174` in it. Then, run the `docker compose up -d` command above.
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -5,7 +5,8 @@ services:
     container_name: chatgpt_web
     restart: always
     depends_on:
-      - mocked_api
+    #   - mocked_api
+      - llama_api
     env_file:
       - .env
     ports:
@@ -16,11 +17,19 @@ services:
       context: "."
       dockerfile: Dockerfile
 
-  mocked_api:
-    container_name: mocked_api
-    build:
-        context: "."
-        dockerfile: mocked_api/Dockerfile-mockapi
-    restart: always
+  # mocked_api:
+  #   container_name: mocked_api
+  #   build:
+  #       context: "."
+  #       dockerfile: mocked_api/Dockerfile-mockapi
+  #   restart: always
+  #   ports:
+  #     - 5174:5174
+
+  llama_api:
+    image:   quay.io/go-skynet/llama-cli:latest
     ports:
-      - 5174:5174
+      - 5174:8080
+    volumes:
+      - ./models/:/models:cached
+    command: api --context-size 700 --threads 10 --models-path /models/7B --default-model /models/7B/ggml-vicuna-7b-4bit.bin