Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add filter for SimpleVectorStore #1030

Merged
merged 12 commits into from
Jul 17, 2024
7 changes: 7 additions & 0 deletions .changeset/famous-poets-hammer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"llamaindex": patch
"@llamaindex/llamaindex-test": patch
"@llamaindex/core": patch
---

Add support for Metadata filters
4 changes: 2 additions & 2 deletions apps/docs/docs/modules/query_engines/metadata_filtering.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ const queryEngine = index.asQueryEngine({
{
key: "dogId",
value: "2",
filterType: "ExactMatch",
operator: "==",
},
],
},
Expand Down Expand Up @@ -135,7 +135,7 @@ async function main() {
{
key: "dogId",
value: "2",
filterType: "ExactMatch",
operator: "==",
},
],
},
Expand Down
2 changes: 1 addition & 1 deletion examples/chromadb/preFilters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ async function main() {
{
key: "dogId",
value: "2",
filterType: "ExactMatch",
operator: "==",
},
],
},
Expand Down
40 changes: 40 additions & 0 deletions examples/metadata-filter/milvus.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { MilvusVectorStore, VectorStoreIndex } from "llamaindex";

const collectionName = "movie_reviews";

async function main() {
try {
const milvus = new MilvusVectorStore({ collection: collectionName });
const index = await VectorStoreIndex.fromVectorStore(milvus);
const retriever = index.asRetriever({ similarityTopK: 20 });

console.log("\n=====\nQuerying the index with filters");
const queryEngineWithFilters = index.asQueryEngine({
retriever,
preFilters: {
filters: [
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "==",
},
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "!=",
},
],
condition: "or",
},
});
const resultAfterFilter = await queryEngineWithFilters.query({
query: "Get all movie titles.",
});
console.log(`Query from ${resultAfterFilter.sourceNodes?.length} nodes`);
console.log(resultAfterFilter.response);
} catch (e) {
console.error(e);
}
}

void main();
143 changes: 143 additions & 0 deletions examples/metadata-filter/simple.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import {
Document,
Settings,
SimpleDocumentStore,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";

Settings.callbackManager.on("retrieve-end", (event) => {
const { nodes } = event.detail;
console.log("Number of retrieved nodes:", nodes.length);
});

async function getDataSource() {
const docs = [
new Document({
text: "The dog is brown",
metadata: {
dogId: "1",
private: true,
},
}),
new Document({
text: "The dog is yellow",
metadata: {
dogId: "2",
private: false,
},
}),
new Document({
text: "The dog is red",
metadata: {
dogId: "3",
private: false,
},
}),
];
const storageContext = await storageContextFromDefaults({
persistDir: "./cache",
});
const numberOfDocs = Object.keys(
(storageContext.docStore as SimpleDocumentStore).toDict(),
).length;
if (numberOfDocs === 0) {
// Generate the data source if it's empty
return await VectorStoreIndex.fromDocuments(docs, {
storageContext,
});
}
return await VectorStoreIndex.init({
storageContext,
});
}

async function main() {
const index = await getDataSource();
console.log(
"=============\nQuerying index with no filters. The output should be any color.",
);
const queryEngineNoFilters = index.asQueryEngine({
similarityTopK: 3,
});
const noFilterResponse = await queryEngineNoFilters.query({
query: "What is the color of the dog?",
});
console.log("No filter response:", noFilterResponse.toString());

console.log(
"\n=============\nQuerying index with dogId 2 and private false. The output always should be red.",
);
const queryEngineEQ = index.asQueryEngine({
preFilters: {
filters: [
{
key: "private",
value: "false",
operator: "==",
},
{
key: "dogId",
value: "3",
operator: "==",
},
],
},
similarityTopK: 3,
});
const responseEQ = await queryEngineEQ.query({
query: "What is the color of the dog?",
});
console.log("Filter with dogId 2 response:", responseEQ.toString());

console.log(
"\n=============\nQuerying index with dogId IN (1, 3). The output should be brown and red.",
);
const queryEngineIN = index.asQueryEngine({
preFilters: {
filters: [
{
key: "dogId",
value: ["1", "3"],
operator: "in",
},
],
},
similarityTopK: 3,
});
const responseIN = await queryEngineIN.query({
query: "What is the color of the dog?",
});
console.log("Filter with dogId IN (1, 3) response:", responseIN.toString());

console.log(
"\n=============\nQuerying index with dogId IN (1, 3). The output should be any.",
);
const queryEngineOR = index.asQueryEngine({
preFilters: {
filters: [
{
key: "private",
value: "false",
operator: "==",
},
{
key: "dogId",
value: ["1", "3"],
operator: "in",
},
],
condition: "or",
},
similarityTopK: 3,
});
const responseOR = await queryEngineOR.query({
query: "What is the color of the dog?",
});
console.log(
"Filter with dogId with OR operator response:",
responseOR.toString(),
);
}

void main();
2 changes: 1 addition & 1 deletion examples/qdrantdb/preFilters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ async function main() {
{
key: "dogId",
value: "2",
filterType: "ExactMatch",
operator: "==",
},
],
},
Expand Down
64 changes: 63 additions & 1 deletion packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,66 @@ import {
import {
VectorStoreBase,
type IEmbedModel,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
import {
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
parseNumberValue,
parsePrimitiveValue,
} from "./utils.js";

function parseScalarFilters(scalarFilters: MetadataFilters): string {
const condition = scalarFilters.condition ?? "and";
const filters: string[] = [];

for (const filter of scalarFilters.filters) {
switch (filter.operator) {
case "==":
case "!=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} "${parsePrimitiveValue(filter.value)}"`,
);
break;
}
case "in": {
const filterValue = parseArrayValue(filter.value)
.map((v) => `"${v}"`)
.join(", ");
filters.push(
`metadata["${filter.key}"] ${filter.operator} [${filterValue}]`,
);
break;
}
case "nin": {
// Milvus does not support `nin` operator, so we need to manually check every value
// Expected: not metadata["key"] != "value1" and not metadata["key"] != "value2"
const filterStr = parseArrayValue(filter.value)
.map((v) => `metadata["${filter.key}"] != "${v}"`)
.join(" && ");
filters.push(filterStr);
break;
}
case "<":
case "<=":
case ">":
case ">=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} ${parseNumberValue(filter.value)}`,
);
break;
}
default:
throw new Error(`Operator ${filter.operator} is not supported.`);
}
}

return filters.join(` ${condition} `);
}

export class MilvusVectorStore
extends VectorStoreBase
Expand Down Expand Up @@ -183,6 +238,12 @@ export class MilvusVectorStore
});
}

public toMilvusFilter(filters?: MetadataFilters): string | undefined {
if (!filters) return undefined;
// TODO: Milvus also support standard filters, we can add it later
return parseScalarFilters(filters);
}

public async query(
query: VectorStoreQuery,
_options?: any,
Expand All @@ -193,6 +254,7 @@ export class MilvusVectorStore
collection_name: this.collectionName,
limit: query.similarityTopK,
vector: query.queryEmbedding,
filter: this.toMilvusFilter(query.filters),
});

const nodes: BaseNode<Metadata>[] = [];
Expand Down
5 changes: 4 additions & 1 deletion packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,10 @@ export class PGVectorStore
query.filters?.filters.forEach((filter, index) => {
const paramIndex = params.length + 1;
whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`);
params.push(filter.value);
// TODO: support filter with other operators
if (!Array.isArray(filter.value)) {
params.push(filter.value);
}
});

const where =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
VectorStoreBase,
type ExactMatchFilter,
type IEmbedModel,
type MetadataFilter,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
Expand Down Expand Up @@ -199,8 +199,12 @@ export class PineconeVectorStore
}

toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: ExactMatchFilter) => {
carry[item.key] = item.value;
return stdFilters?.filters?.reduce((carry: any, item: MetadataFilter) => {
// Use MetadataFilter with EQ operator to replace ExactMatchFilter
// TODO: support filter with other operators
if (item.operator === "==") {
carry[item.key] = item.value;
}
return carry;
}, {});
}
Expand Down
Loading