feat(llm): autopull ollama models (#2019)

* chore: update ollama (llm) * feat: allow to autopull ollama models * fix: mypy * chore: install always ollama client * refactor: check connection and pull ollama method to utils * docs: update ollama config with autopulling info
zylon-ai · Jul 29, 2024 · 20bad17 · 20bad17
1 parent dabf556
commit 20bad17
Show file tree

Hide file tree

Showing 8 changed files with 129 additions and 21 deletions.
diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx
@@ -130,16 +130,20 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll
 
 After the installation, make sure the Ollama desktop app is closed.
 
-Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore:
-
+Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
 ```bash
-ollama pull mistral
-ollama pull nomic-embed-text
+ollama serve
 ```
 
-Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
+Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)
+
+By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.
+
+In any case, if you want to manually pull models, run the following commands:
+
 ```bash
-ollama serve
+ollama pull mistral
+ollama pull nomic-embed-text
 ```
 
 Once done, on a different terminal, you can install PrivateGPT with the following command:

diff --git a/poetry.lock b/poetry.lock
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
@@ -71,16 +71,46 @@ def __init__(self, settings: Settings) -> None:
                     from llama_index.embeddings.ollama import (  # type: ignore
                         OllamaEmbedding,
                     )
+                    from ollama import Client  # type: ignore
                 except ImportError as e:
                     raise ImportError(
                         "Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
                     ) from e
 
                 ollama_settings = settings.ollama
+
+                # Calculate embedding model. If not provided tag, it will be use latest
+                model_name = (
+                    ollama_settings.embedding_model + ":latest"
+                    if ":" not in ollama_settings.embedding_model
+                    else ollama_settings.embedding_model
+                )
+
                 self.embedding_model = OllamaEmbedding(
-                    model_name=ollama_settings.embedding_model,
+                    model_name=model_name,
                     base_url=ollama_settings.embedding_api_base,
                 )
+
+                if ollama_settings.autopull_models:
+                    if ollama_settings.autopull_models:
+                        from private_gpt.utils.ollama import (
+                            check_connection,
+                            pull_model,
+                        )
+
+                        # TODO: Reuse llama-index client when llama-index is updated
+                        client = Client(
+                            host=ollama_settings.embedding_api_base,
+                            timeout=ollama_settings.request_timeout,
+                        )
+
+                        if not check_connection(client):
+                            raise ValueError(
+                                f"Failed to connect to Ollama, "
+                                f"check if Ollama server is running on {ollama_settings.api_base}"
+                            )
+                        pull_model(client, model_name)
+
             case "azopenai":
                 try:
                     from llama_index.embeddings.azure_openai import (  # type: ignore

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -146,15 +146,32 @@ def __init__(self, settings: Settings) -> None:
                     "repeat_penalty": ollama_settings.repeat_penalty,  # ollama llama-cpp
                 }
 
-                self.llm = Ollama(
-                    model=ollama_settings.llm_model,
+                # calculate llm model. If not provided tag, it will be use latest
+                model_name = (
+                    ollama_settings.llm_model + ":latest"
+                    if ":" not in ollama_settings.llm_model
+                    else ollama_settings.llm_model
+                )
+
+                llm = Ollama(
+                    model=model_name,
                     base_url=ollama_settings.api_base,
                     temperature=settings.llm.temperature,
                     context_window=settings.llm.context_window,
                     additional_kwargs=settings_kwargs,
                     request_timeout=ollama_settings.request_timeout,
                 )
 
+                if ollama_settings.autopull_models:
+                    from private_gpt.utils.ollama import check_connection, pull_model
+
+                    if not check_connection(llm.client):
+                        raise ValueError(
+                            f"Failed to connect to Ollama, "
+                            f"check if Ollama server is running on {ollama_settings.api_base}"
+                        )
+                    pull_model(llm.client, model_name)
+
                 if (
                     ollama_settings.keep_alive
                     != ollama_settings.model_fields["keep_alive"].default
@@ -172,6 +189,8 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
                     Ollama.complete = add_keep_alive(Ollama.complete)
                     Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
 
+                self.llm = llm
+
             case "azopenai":
                 try:
                     from llama_index.llms.azure_openai import (  # type: ignore

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -290,6 +290,10 @@ class OllamaSettings(BaseModel):
         120.0,
         description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
     )
+    autopull_models: bool = Field(
+        False,
+        description="If set to True, the Ollama will automatically pull the models from the API base.",
+    )
 
 
 class AzureOpenAISettings(BaseModel):

diff --git a/private_gpt/utils/ollama.py b/private_gpt/utils/ollama.py
@@ -0,0 +1,32 @@
+import logging
+
+try:
+    from ollama import Client  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
+    ) from e
+
+logger = logging.getLogger(__name__)
+
+
+def check_connection(client: Client) -> bool:
+    try:
+        client.list()
+        return True
+    except Exception as e:
+        logger.error(f"Failed to connect to Ollama: {e!s}")
+        return False
+
+
+def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:
+    try:
+        installed_models = [model["name"] for model in client.list().get("models", {})]
+        if model_name not in installed_models:
+            logger.info(f"Pulling model {model_name}. Please wait...")
+            client.pull(model_name)
+            logger.info(f"Model {model_name} pulled successfully")
+    except Exception as e:
+        logger.error(f"Failed to pull model {model_name}: {e!s}")
+        if raise_error:
+            raise e
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ llama-index-readers-file = "^0.1.27"
 llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
 llama-index-llms-openai = {version = "^0.1.25", optional = true}
 llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
-llama-index-llms-ollama = {version ="^0.1.5", optional = true}
+llama-index-llms-ollama = {version ="^0.2.2", optional = true}
 llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
 llama-index-llms-gemini = {version ="^0.1.11", optional = true}
 llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
@@ -62,16 +62,19 @@ ffmpy = {git = "https://github.com/EuDs63/ffmpy.git", rev = "333a19ee4d21f32537c
 # Optional Google Gemini dependency
 google-generativeai = {version ="^0.5.4", optional = true}
 
+# Optional Ollama client
+ollama = {version ="^0.3.0", optional = true}
+
 [tool.poetry.extras]
 ui = ["gradio", "ffmpy"]
 llms-llama-cpp = ["llama-index-llms-llama-cpp"]
 llms-openai = ["llama-index-llms-openai"]
 llms-openai-like = ["llama-index-llms-openai-like"]
-llms-ollama = ["llama-index-llms-ollama"]
+llms-ollama = ["llama-index-llms-ollama", "ollama"]
 llms-sagemaker = ["boto3"]
 llms-azopenai = ["llama-index-llms-azure-openai"]
 llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
-embeddings-ollama = ["llama-index-embeddings-ollama"]
+embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
 embeddings-huggingface = ["llama-index-embeddings-huggingface"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]

diff --git a/settings.yaml b/settings.yaml
@@ -117,6 +117,7 @@ ollama:
   embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
   keep_alive: 5m
   request_timeout: 120.0
+  autopull_models: true
 
 azopenai:
   api_key: ${AZ_OPENAI_API_KEY:}