Skip to content

Commit

Permalink
feat(llm): autopull ollama models (#2019)
Browse files Browse the repository at this point in the history
* chore: update ollama (llm)

* feat: allow to autopull ollama models

* fix: mypy

* chore: install always ollama client

* refactor: check connection and pull ollama method to utils

* docs: update ollama config with autopulling info
  • Loading branch information
jaluma authored Jul 29, 2024
1 parent dabf556 commit 20bad17
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 21 deletions.
16 changes: 10 additions & 6 deletions fern/docs/pages/installation/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,20 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll

After the installation, make sure the Ollama desktop app is closed.

Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore:

Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
```bash
ollama pull mistral
ollama pull nomic-embed-text
ollama serve
```

Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)

By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.

In any case, if you want to manually pull models, run the following commands:

```bash
ollama serve
ollama pull mistral
ollama pull nomic-embed-text
```

Once done, on a different terminal, you can install PrivateGPT with the following command:
Expand Down
33 changes: 24 additions & 9 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 31 additions & 1 deletion private_gpt/components/embedding/embedding_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,46 @@ def __init__(self, settings: Settings) -> None:
from llama_index.embeddings.ollama import ( # type: ignore
OllamaEmbedding,
)
from ollama import Client # type: ignore
except ImportError as e:
raise ImportError(
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
) from e

ollama_settings = settings.ollama

# Calculate embedding model. If not provided tag, it will be use latest
model_name = (
ollama_settings.embedding_model + ":latest"
if ":" not in ollama_settings.embedding_model
else ollama_settings.embedding_model
)

self.embedding_model = OllamaEmbedding(
model_name=ollama_settings.embedding_model,
model_name=model_name,
base_url=ollama_settings.embedding_api_base,
)

if ollama_settings.autopull_models:
if ollama_settings.autopull_models:
from private_gpt.utils.ollama import (
check_connection,
pull_model,
)

# TODO: Reuse llama-index client when llama-index is updated
client = Client(
host=ollama_settings.embedding_api_base,
timeout=ollama_settings.request_timeout,
)

if not check_connection(client):
raise ValueError(
f"Failed to connect to Ollama, "
f"check if Ollama server is running on {ollama_settings.api_base}"
)
pull_model(client, model_name)

case "azopenai":
try:
from llama_index.embeddings.azure_openai import ( # type: ignore
Expand Down
23 changes: 21 additions & 2 deletions private_gpt/components/llm/llm_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,32 @@ def __init__(self, settings: Settings) -> None:
"repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp
}

self.llm = Ollama(
model=ollama_settings.llm_model,
# calculate llm model. If not provided tag, it will be use latest
model_name = (
ollama_settings.llm_model + ":latest"
if ":" not in ollama_settings.llm_model
else ollama_settings.llm_model
)

llm = Ollama(
model=model_name,
base_url=ollama_settings.api_base,
temperature=settings.llm.temperature,
context_window=settings.llm.context_window,
additional_kwargs=settings_kwargs,
request_timeout=ollama_settings.request_timeout,
)

if ollama_settings.autopull_models:
from private_gpt.utils.ollama import check_connection, pull_model

if not check_connection(llm.client):
raise ValueError(
f"Failed to connect to Ollama, "
f"check if Ollama server is running on {ollama_settings.api_base}"
)
pull_model(llm.client, model_name)

if (
ollama_settings.keep_alive
!= ollama_settings.model_fields["keep_alive"].default
Expand All @@ -172,6 +189,8 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
Ollama.complete = add_keep_alive(Ollama.complete)
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)

self.llm = llm

case "azopenai":
try:
from llama_index.llms.azure_openai import ( # type: ignore
Expand Down
4 changes: 4 additions & 0 deletions private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ class OllamaSettings(BaseModel):
120.0,
description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
)
autopull_models: bool = Field(
False,
description="If set to True, the Ollama will automatically pull the models from the API base.",
)


class AzureOpenAISettings(BaseModel):
Expand Down
32 changes: 32 additions & 0 deletions private_gpt/utils/ollama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging

try:
from ollama import Client # type: ignore
except ImportError as e:
raise ImportError(
"Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
) from e

logger = logging.getLogger(__name__)


def check_connection(client: Client) -> bool:
try:
client.list()
return True
except Exception as e:
logger.error(f"Failed to connect to Ollama: {e!s}")
return False


def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:
try:
installed_models = [model["name"] for model in client.list().get("models", {})]
if model_name not in installed_models:
logger.info(f"Pulling model {model_name}. Please wait...")
client.pull(model_name)
logger.info(f"Model {model_name} pulled successfully")
except Exception as e:
logger.error(f"Failed to pull model {model_name}: {e!s}")
if raise_error:
raise e
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ llama-index-readers-file = "^0.1.27"
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.1.5", optional = true}
llama-index-llms-ollama = {version ="^0.2.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
Expand Down Expand Up @@ -62,16 +62,19 @@ ffmpy = {git = "https://github.com/EuDs63/ffmpy.git", rev = "333a19ee4d21f32537c
# Optional Google Gemini dependency
google-generativeai = {version ="^0.5.4", optional = true}

# Optional Ollama client
ollama = {version ="^0.3.0", optional = true}

[tool.poetry.extras]
ui = ["gradio", "ffmpy"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama"]
llms-ollama = ["llama-index-llms-ollama", "ollama"]
llms-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
embeddings-ollama = ["llama-index-embeddings-ollama"]
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]
Expand Down
1 change: 1 addition & 0 deletions settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ ollama:
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
keep_alive: 5m
request_timeout: 120.0
autopull_models: true

azopenai:
api_key: ${AZ_OPENAI_API_KEY:}
Expand Down

0 comments on commit 20bad17

Please sign in to comment.