Skip to content

Commit

Permalink
feat: Support MetadataFilters for Milvus and SimpleVectorStore (#1033)
Browse files Browse the repository at this point in the history
Co-authored-by: Marcus Schiesser <[email protected]>
  • Loading branch information
thucpn and marcusschiesser committed Jul 17, 2024
1 parent 4cc3c1b commit 1ebc973
Show file tree
Hide file tree
Showing 12 changed files with 921 additions and 68 deletions.
6 changes: 6 additions & 0 deletions .changeset/famous-poets-hammer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"llamaindex": patch
"@llamaindex/llamaindex-test": patch
---

Add support for Metadata filters
40 changes: 40 additions & 0 deletions examples/metadata-filter/milvus.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { MilvusVectorStore, VectorStoreIndex } from "llamaindex";

const collectionName = "movie_reviews";

async function main() {
try {
const milvus = new MilvusVectorStore({ collection: collectionName });
const index = await VectorStoreIndex.fromVectorStore(milvus);
const retriever = index.asRetriever({ similarityTopK: 20 });

console.log("\n=====\nQuerying the index with filters");
const queryEngineWithFilters = index.asQueryEngine({
retriever,
preFilters: {
filters: [
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "==",
},
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "!=",
},
],
condition: "or",
},
});
const resultAfterFilter = await queryEngineWithFilters.query({
query: "Get all movie titles.",
});
console.log(`Query from ${resultAfterFilter.sourceNodes?.length} nodes`);
console.log(resultAfterFilter.response);
} catch (e) {
console.error(e);
}
}

void main();
61 changes: 55 additions & 6 deletions examples/metadata-filter/simple.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,29 +66,78 @@ async function main() {
console.log("No filter response:", noFilterResponse.toString());

console.log(
"\n=============\nQuerying index with dogId 2. The output always should be red.",
"\n=============\nQuerying index with dogId 2 and private false. The output always should be red.",
);
const queryEngineDogId2 = index.asQueryEngine({
const queryEngineEQ = index.asQueryEngine({
preFilters: {
filters: [
{
key: "private",
value: "false",
filterType: "ExactMatch",
operator: "==",
},
{
key: "dogId",
value: "3",
filterType: "ExactMatch",
operator: "==",
},
],
},
similarityTopK: 3,
});
const response = await queryEngineDogId2.query({
const responseEQ = await queryEngineEQ.query({
query: "What is the color of the dog?",
});
console.log("Filter with dogId 2 response:", response.toString());
console.log("Filter with dogId 2 response:", responseEQ.toString());

console.log(
"\n=============\nQuerying index with dogId IN (1, 3). The output should be brown and red.",
);
const queryEngineIN = index.asQueryEngine({
preFilters: {
filters: [
{
key: "dogId",
value: ["1", "3"],
operator: "in",
},
],
},
similarityTopK: 3,
});
const responseIN = await queryEngineIN.query({
query: "What is the color of the dog?",
});
console.log("Filter with dogId IN (1, 3) response:", responseIN.toString());

console.log(
"\n=============\nQuerying index with dogId IN (1, 3). The output should be any.",
);
const queryEngineOR = index.asQueryEngine({
preFilters: {
filters: [
{
key: "private",
value: "false",
operator: "==",
},
{
key: "dogId",
value: ["1", "3"],
operator: "in",
},
],
condition: "or",
},
similarityTopK: 3,
});
const responseOR = await queryEngineOR.query({
query: "What is the color of the dog?",
});
console.log(
"Filter with dogId with OR operator response:",
responseOR.toString(),
);
}

void main();
64 changes: 63 additions & 1 deletion packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,66 @@ import {
import {
VectorStoreBase,
type IEmbedModel,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
import {
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
parseNumberValue,
parsePrimitiveValue,
} from "./utils.js";

function parseScalarFilters(scalarFilters: MetadataFilters): string {
const condition = scalarFilters.condition ?? "and";
const filters: string[] = [];

for (const filter of scalarFilters.filters) {
switch (filter.operator) {
case "==":
case "!=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} "${parsePrimitiveValue(filter.value)}"`,
);
break;
}
case "in": {
const filterValue = parseArrayValue(filter.value)
.map((v) => `"${v}"`)
.join(", ");
filters.push(
`metadata["${filter.key}"] ${filter.operator} [${filterValue}]`,
);
break;
}
case "nin": {
// Milvus does not support `nin` operator, so we need to manually check every value
// Expected: not metadata["key"] != "value1" and not metadata["key"] != "value2"
const filterStr = parseArrayValue(filter.value)
.map((v) => `metadata["${filter.key}"] != "${v}"`)
.join(" && ");
filters.push(filterStr);
break;
}
case "<":
case "<=":
case ">":
case ">=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} ${parseNumberValue(filter.value)}`,
);
break;
}
default:
throw new Error(`Operator ${filter.operator} is not supported.`);
}
}

return filters.join(` ${condition} `);
}

export class MilvusVectorStore
extends VectorStoreBase
Expand Down Expand Up @@ -183,6 +238,12 @@ export class MilvusVectorStore
});
}

public toMilvusFilter(filters?: MetadataFilters): string | undefined {
if (!filters) return undefined;
// TODO: Milvus also support standard filters, we can add it later
return parseScalarFilters(filters);
}

public async query(
query: VectorStoreQuery,
_options?: any,
Expand All @@ -193,6 +254,7 @@ export class MilvusVectorStore
collection_name: this.collectionName,
limit: query.similarityTopK,
vector: query.queryEmbedding,
filter: this.toMilvusFilter(query.filters),
});

const nodes: BaseNode<Metadata>[] = [];
Expand Down
5 changes: 4 additions & 1 deletion packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,10 @@ export class PGVectorStore
query.filters?.filters.forEach((filter, index) => {
const paramIndex = params.length + 1;
whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`);
params.push(filter.value);
// TODO: support filter with other operators
if (!Array.isArray(filter.value)) {
params.push(filter.value);
}
});

const where =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
VectorStoreBase,
type ExactMatchFilter,
type IEmbedModel,
type MetadataFilter,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
Expand Down Expand Up @@ -199,8 +199,12 @@ export class PineconeVectorStore
}

toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: ExactMatchFilter) => {
carry[item.key] = item.value;
return stdFilters?.filters?.reduce((carry: any, item: MetadataFilter) => {
// Use MetadataFilter with EQ operator to replace ExactMatchFilter
// TODO: support filter with other operators
if (item.operator === "==") {
carry[item.key] = item.value;
}
return carry;
}, {});
}
Expand Down
Loading

0 comments on commit 1ebc973

Please sign in to comment.