Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support MetadataFilters for Milvus and SimpleVectorStore #1033

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/famous-poets-hammer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"llamaindex": patch
"@llamaindex/llamaindex-test": patch
---

Add support for Metadata filters
40 changes: 40 additions & 0 deletions examples/metadata-filter/milvus.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { MilvusVectorStore, VectorStoreIndex } from "llamaindex";

const collectionName = "movie_reviews";

async function main() {
try {
const milvus = new MilvusVectorStore({ collection: collectionName });
const index = await VectorStoreIndex.fromVectorStore(milvus);
const retriever = index.asRetriever({ similarityTopK: 20 });

console.log("\n=====\nQuerying the index with filters");
const queryEngineWithFilters = index.asQueryEngine({
retriever,
preFilters: {
filters: [
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "==",
},
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "!=",
},
],
condition: "or",
},
});
const resultAfterFilter = await queryEngineWithFilters.query({
query: "Get all movie titles.",
});
console.log(`Query from ${resultAfterFilter.sourceNodes?.length} nodes`);
console.log(resultAfterFilter.response);
} catch (e) {
console.error(e);
}
}

void main();
61 changes: 55 additions & 6 deletions examples/metadata-filter/simple.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,29 +66,78 @@ async function main() {
console.log("No filter response:", noFilterResponse.toString());

console.log(
"\n=============\nQuerying index with dogId 2. The output always should be red.",
"\n=============\nQuerying index with dogId 2 and private false. The output always should be red.",
);
const queryEngineDogId2 = index.asQueryEngine({
const queryEngineEQ = index.asQueryEngine({
preFilters: {
filters: [
{
key: "private",
value: "false",
filterType: "ExactMatch",
operator: "==",
},
{
key: "dogId",
value: "3",
filterType: "ExactMatch",
operator: "==",
},
],
},
similarityTopK: 3,
});
const response = await queryEngineDogId2.query({
const responseEQ = await queryEngineEQ.query({
query: "What is the color of the dog?",
});
console.log("Filter with dogId 2 response:", response.toString());
console.log("Filter with dogId 2 response:", responseEQ.toString());

console.log(
"\n=============\nQuerying index with dogId IN (1, 3). The output should be brown and red.",
);
const queryEngineIN = index.asQueryEngine({
preFilters: {
filters: [
{
key: "dogId",
value: ["1", "3"],
operator: "in",
},
],
},
similarityTopK: 3,
});
const responseIN = await queryEngineIN.query({
query: "What is the color of the dog?",
});
console.log("Filter with dogId IN (1, 3) response:", responseIN.toString());

console.log(
"\n=============\nQuerying index with dogId IN (1, 3). The output should be any.",
);
const queryEngineOR = index.asQueryEngine({
preFilters: {
filters: [
{
key: "private",
value: "false",
operator: "==",
},
{
key: "dogId",
value: ["1", "3"],
operator: "in",
},
],
condition: "or",
},
similarityTopK: 3,
});
const responseOR = await queryEngineOR.query({
query: "What is the color of the dog?",
});
console.log(
"Filter with dogId with OR operator response:",
responseOR.toString(),
);
}

void main();
64 changes: 63 additions & 1 deletion packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,66 @@ import {
import {
VectorStoreBase,
type IEmbedModel,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
import {
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
parseNumberValue,
parsePrimitiveValue,
} from "./utils.js";

function parseScalarFilters(scalarFilters: MetadataFilters): string {
const condition = scalarFilters.condition ?? "and";
const filters: string[] = [];

for (const filter of scalarFilters.filters) {
switch (filter.operator) {
case "==":
case "!=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} "${parsePrimitiveValue(filter.value)}"`,
);
break;
}
case "in": {
const filterValue = parseArrayValue(filter.value)
.map((v) => `"${v}"`)
.join(", ");
filters.push(
`metadata["${filter.key}"] ${filter.operator} [${filterValue}]`,
);
break;
}
case "nin": {
// Milmus does not support `nin` operator, so we need to manually check every value
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
// Expected: not metadata["key"] != "value1" and not metadata["key"] != "value2"
const filterStr = parseArrayValue(filter.value)
.map((v) => `metadata["${filter.key}"] != "${v}"`)
.join(" && ");
filters.push(filterStr);
break;
}
case "<":
case "<=":
case ">":
case ">=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} ${parseNumberValue(filter.value)}`,
);
break;
}
default:
throw new Error(`Operator ${filter.operator} is not supported.`);
}
}

return filters.join(` ${condition} `);
}

export class MilvusVectorStore
extends VectorStoreBase
Expand Down Expand Up @@ -183,6 +238,12 @@ export class MilvusVectorStore
});
}

public toMilvusFilter(filters?: MetadataFilters): string | undefined {
if (!filters) return undefined;
// TODO: Milvus also support standard filters, we can add it later
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
return parseScalarFilters(filters);
}

public async query(
query: VectorStoreQuery,
_options?: any,
Expand All @@ -193,6 +254,7 @@ export class MilvusVectorStore
collection_name: this.collectionName,
limit: query.similarityTopK,
vector: query.queryEmbedding,
filter: this.toMilvusFilter(query.filters),
});

const nodes: BaseNode<Metadata>[] = [];
Expand Down
5 changes: 4 additions & 1 deletion packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,10 @@ export class PGVectorStore
query.filters?.filters.forEach((filter, index) => {
const paramIndex = params.length + 1;
whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`);
params.push(filter.value);
// TODO: support filter with other operators
if (!Array.isArray(filter.value)) {
params.push(filter.value);
}
});

const where =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
VectorStoreBase,
type ExactMatchFilter,
type IEmbedModel,
type MetadataFilter,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
Expand Down Expand Up @@ -199,8 +199,12 @@ export class PineconeVectorStore
}

toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: ExactMatchFilter) => {
carry[item.key] = item.value;
return stdFilters?.filters?.reduce((carry: any, item: MetadataFilter) => {
// Use MetadataFilter with EQ operator to replace ExactMatchFilter
// TODO: support filter with other operators
if (item.operator === "==") {
carry[item.key] = item.value;
}
return carry;
}, {});
}
Expand Down
Loading