Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NIFI-13764: Embedding model name property is ignored in multiple vector db processor #9

Merged
merged 3 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions src/extensions/vectorstores/EmbeddingUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,23 @@
default_value=OPENAI,
required=True,
)
OPENAI_API_MODEL = PropertyDescriptor(
lordgamez marked this conversation as resolved.
Show resolved Hide resolved
name="OpenAI Model",
description="The name of the OpenAI model to use",
default_value="text-embedding-ada-002",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
default_value="sentence-transformers/all-MiniLM-L6-v2",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)

PROPERTIES = [
EMBEDDING_FUNCTION,
HUGGING_FACE_MODEL_NAME,
Expand Down Expand Up @@ -160,6 +177,8 @@ def create_embedding_service(context):

if embedding_service == OPENAI:
openai_api_key = context.getProperty(OPENAI_API_KEY).getValue()
return OpenAIEmbeddings(openai_api_key=openai_api_key)
openai_model = context.getProperty(OPENAI_API_MODEL).getValue()
return OpenAIEmbeddings(openai_api_key=openai_api_key, model=openai_model)
huggingface_api_key = context.getProperty(HUGGING_FACE_API_KEY).getValue()
return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key)
huggingface_model = context.getProperty(HUGGING_FACE_MODEL).getValue()
return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key, model_name=huggingface_model)
16 changes: 0 additions & 16 deletions src/extensions/vectorstores/OpenSearchVectorUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,6 @@
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
default_value="sentence-transformers/all-MiniLM-L6-v2",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for OpenAI in order to create embeddings",
Expand All @@ -33,14 +25,6 @@
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
OPENAI_API_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The API Key for OpenAI in order to create embeddings",
default_value="text-embedding-ada-002",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HTTP_HOST = PropertyDescriptor(
name="HTTP Host",
description="URL where OpenSearch is hosted.",
Expand Down
4 changes: 1 addition & 3 deletions src/extensions/vectorstores/PutOpenSearchVector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0

from EmbeddingUtils import EMBEDDING_MODEL, create_embedding_service
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE_MODEL, OPENAI_API_MODEL, create_embedding_service
from langchain.vectorstores import OpenSearchVectorSearch
from nifiapi.documentation import use_case
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
Expand All @@ -9,13 +9,11 @@
COSINESIMIL,
HTTP_HOST,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
INDEX_NAME,
L1,
L2,
LINF,
OPENAI_API_KEY,
OPENAI_API_MODEL,
PASSWORD,
TEXT_FIELD,
USERNAME,
Expand Down
25 changes: 8 additions & 17 deletions src/extensions/vectorstores/PutPinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
import json

import langchain.vectorstores
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE, OPENAI, create_embedding_service
from EmbeddingUtils import (
EMBEDDING_MODEL,
HUGGING_FACE,
HUGGING_FACE_MODEL,
OPENAI,
OPENAI_API_MODEL,
create_embedding_service,
)
from nifiapi.documentation import use_case
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators
Expand Down Expand Up @@ -77,14 +84,6 @@ class ProcessorDetails:
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
default_value="sentence-transformers/all-MiniLM-L6-v2",
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for OpenAI in order to create embeddings",
Expand All @@ -93,14 +92,6 @@ class ProcessorDetails:
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
OPENAI_API_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The API Key for OpenAI in order to create embeddings",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
PINECONE_ENV = PropertyDescriptor(
name="Pinecone Environment",
description="The name of the Pinecone Environment. This can be found in the Pinecone console next to the API Key.",
Expand Down
18 changes: 2 additions & 16 deletions src/extensions/vectorstores/QdrantUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from EmbeddingUtils import (
EMBEDDING_MODEL,
HUGGING_FACE,
HUGGING_FACE_MODEL,
OPENAI,
OPENAI_API_MODEL,
)
from nifiapi.properties import (
ExpressionLanguageScope,
Expand Down Expand Up @@ -68,14 +70,6 @@
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use.",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
default_value="sentence-transformers/all-MiniLM-L6-v2",
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for OpenAI in order to create embeddings.",
Expand All @@ -84,14 +78,6 @@
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
OPENAI_API_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The name of the OpenAI model to use.",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)

EMBEDDING_MODEL_PROPERTIES = [
EMBEDDING_MODEL,
Expand Down
4 changes: 1 addition & 3 deletions src/extensions/vectorstores/QueryOpenSearchVector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@

import json

from EmbeddingUtils import EMBEDDING_MODEL, create_embedding_service
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE_MODEL, OPENAI_API_MODEL, create_embedding_service
from langchain.vectorstores import OpenSearchVectorSearch
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators
from OpenSearchVectorUtils import (
COSINESIMIL,
HTTP_HOST,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
INDEX_NAME,
L1,
L2,
LINF,
OPENAI_API_KEY,
OPENAI_API_MODEL,
PASSWORD,
TEXT_FIELD,
USERNAME,
Expand Down
27 changes: 9 additions & 18 deletions src/extensions/vectorstores/QueryPinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@

import langchain.vectorstores
import QueryUtils
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE, OPENAI, create_embedding_service
from EmbeddingUtils import (
EMBEDDING_MODEL,
HUGGING_FACE,
HUGGING_FACE_MODEL,
OPENAI,
OPENAI_API_MODEL,
create_embedding_service,
)
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators
from pinecone import Pinecone
Expand Down Expand Up @@ -54,22 +61,6 @@ class ProcessorDetails:
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The API Key for OpenAI in order to create embeddings",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
default_value="sentence-transformers/all-MiniLM-L6-v2",
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
PINECONE_ENV = PropertyDescriptor(
name="Pinecone Environment",
description="The name of the Pinecone Environment. This can be found in the Pinecone console next to the API Key.",
Expand Down Expand Up @@ -127,7 +118,7 @@ class ProcessorDetails:
PINECONE_API_KEY,
EMBEDDING_MODEL,
OPENAI_API_KEY,
OPENAI_MODEL,
OPENAI_API_MODEL,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
PINECONE_ENV,
Expand Down