Skip to content

Commit

Permalink
Merge pull request #2 from awhiteside1/search
Browse files Browse the repository at this point in the history
Semantic Search Happy Path
  • Loading branch information
awhiteside1 authored Sep 16, 2024
2 parents 70d5c9f + 8c8968d commit 10adc6d
Show file tree
Hide file tree
Showing 15 changed files with 3,440 additions and 35 deletions.
17 changes: 8 additions & 9 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,24 @@ on:
push:

jobs:
build:
runs-on: ubuntu-22.04
strategy:
matrix:
node-version: [20]
build_and_test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
version: 9
- name: Use Node.js ${{ matrix.node-version }}
- name: Use Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
node-version: 22
cache: 'pnpm'
- name: Install dependencies
run: pnpm install
- name: Build
run: pnpm build
- name: Lint
run: pnpm lint
- name: Build
run: pnpm build
- name: Test
run: pnpm test
1 change: 1 addition & 0 deletions .npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
auto-install-peers=true
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"test": "pnpm run -r test",
"lint": "biome check .",
"prettier": "prettier -w .",
"lint:fix": "pnpm run prettier && biome check --write . ",
Expand Down
2 changes: 2 additions & 0 deletions packages/llm-utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"type": "module",
"types": "./dist/index.d.ts",
"module": "./dist/index.mjs",
"version": "0.0.1",
"files": ["dist/"],
"exports": {
".": {
Expand All @@ -11,6 +12,7 @@
}
},
"scripts": {
"prepack": "pnpm run build",
"test": "vitest --run",
"bench": "vitest --run bench",
"build:stub": "unbuild --stub",
Expand Down
42 changes: 42 additions & 0 deletions packages/semantic-search/e2e/_setup/createConfig.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { mongooseAdapter } from '@payloadcms/db-mongodb'
import defu from 'defu'
import { MongoMemoryReplSet } from 'mongodb-memory-server'
import type { CollectionConfig, Config } from 'payload'
import { uid } from 'radash'
import { inject } from 'vitest'
import type { VectorDB } from '../../src'

export const givenACollectionConfig = (
base: Partial<CollectionConfig> = {},
): CollectionConfig => {
return defu(base, {
slug: 'myCollection',
fields: [{ name: 'description', type: 'text' }],
}) as CollectionConfig
}
export const givenAnEnvironment = (base: Partial<Config>): Config => {
const x = process.env.NODE_ENV

const config: Config = {
secret: 'hello',
// db: postgresAdapter({
// pool: {
// connectionString: inject('postgresURL'),
// },
// }),
db: mongooseAdapter({ url: inject('mongoURL') }),
plugins: [],
}

return defu(base, config) as Config
}

export const givenAVectorDB = (base: Partial<VectorDB>) => {
return defu(base, {
name: 'mock',
upsert: vi.fn(),
search: vi.fn(),
createTable: vi.fn(),
delete: vi.fn(),
}) as VectorDB
}
53 changes: 53 additions & 0 deletions packages/semantic-search/e2e/_setup/vitest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import Docker from 'dockerode'
import { MongoMemoryReplSet } from 'mongodb-memory-server'
import { sleep, uid } from 'radash'
import type { GlobalSetupContext } from 'vitest/node'

export const givenAPostgres = async () => {
const docker = new Docker()
const instance = await docker.createContainer({
name: `postgres-${uid(5)}`,
Image: 'postgres',
Env: ['POSTGRES_PASSWORD=postgres'],
ExposedPorts: { '5432/tcp': {} },
HostConfig: {
PortBindings: { '5432/tcp': [{ HostPort: '' }] },
},
})
await instance.start()
const details = await instance.inspect()
const port = details.NetworkSettings.Ports['5432/tcp'].find(
(x) => x.HostIp === '0.0.0.0',
)?.HostPort
if (!port) {
await instance.stop()
throw new Error('no port')
}
await sleep(1500)
return {
url: `postgres://postgres:postgres@localhost:${port}/postgres`,
shutdown: async () => {
try {
await instance.stop()
} catch (err) {}
},
}
}

export default async function setup({ provide }: GlobalSetupContext) {
const mongo = await MongoMemoryReplSet.create({})
if (mongo.state === 'stopped') {
await mongo.start()
}
provide('mongoURL', mongo.getUri())
return async () => {
await mongo.stop()
}
}

declare module 'vitest' {
export interface ProvidedContext {
postgresURL: string
mongoURL: string
}
}
64 changes: 59 additions & 5 deletions packages/semantic-search/e2e/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,62 @@
import { expect } from 'vitest'
import { adder } from '../src'
import { mongooseAdapter } from '@payloadcms/db-mongodb'
import { MongoMemoryReplSet } from 'mongodb-memory-server'
import payload, { buildConfig, type Payload } from 'payload'
import { list } from 'radash'
import { afterAll, beforeAll, expect } from 'vitest'
import { semanticSearchPlugin } from '../src'
import {
givenACollectionConfig,
givenAVectorDB,
givenAnEnvironment,
} from './_setup/createConfig'

describe('Semantic Search', () => {
it('should work', () => {
expect(adder(1, 1)).toEqual(2)
describe('Semantic Search', async () => {
let instance: Payload
const mongo = await MongoMemoryReplSet.create()

const spys = {
embeddingSpy: vi.fn(() =>
Promise.resolve(list(0, 5, (i) => Math.random())),
),
upsertSpy: vi.fn(),
}

beforeAll(async () => {
const environment = givenAnEnvironment({
collections: [givenACollectionConfig({ slug: 'myCollection' })],
plugins: [
semanticSearchPlugin({
vectorDB: givenAVectorDB({ upsert: spys.upsertSpy }),
indexableFields: ['myCollection.description'],
enabled: true,
dimensions: 768,
embeddingFn: spys.embeddingSpy,
}),
],
db: mongooseAdapter({ mongoMemoryServer: mongo, url: mongo.getUri() }),
})
instance = await payload.init({
config: buildConfig(environment),
loggerOptions: { enabled: false },
})
})

afterAll(async () => {
await mongo.stop()
})
it('should insert a vector on create', async () => {
const item = await instance.create({
collection: 'myCollection',
data: { description: 'hello' },
})
expect(item.id).toBeTruthy()
expect(spys.embeddingSpy).toHaveBeenCalledWith('hello')
expect(spys.upsertSpy).toHaveBeenCalledWith(
expect.objectContaining({
documentId: item.id,
collection: 'myCollection',
field: 'description',
}),
)
})
})
20 changes: 16 additions & 4 deletions packages/semantic-search/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"name": "@payload-llm-plugins/semantic-search",
"type": "module",
"version": "0.0.1",
"types": "./dist/index.d.ts",
"module": "./dist/index.mjs",
"files": ["dist/"],
Expand All @@ -11,18 +12,29 @@
}
},
"scripts": {
"prepack": "pnpm run build",
"test": "vitest --run",
"bench": "vitest --run bench",
"build:stub": "unbuild --stub",
"build": "unbuild",
"postinstall": "pnpm build:stub"
},
"devDependencies": {},
"devDependencies": {
"@apache-arrow/ts": "^17.0.0",
"@lancedb/lancedb": "^0.10.0",
"@payloadcms/db-mongodb": "^3.0.0-beta.107",
"@payloadcms/db-postgres": "^3.0.0-beta.107",
"@types/dockerode": "^3.3.31",
"dockerode": "^4.0.2",
"mongodb-memory-server": "^10.0.0",
"ollama": "^0.5.9"
},
"dependencies": {
"@workspace/llm-utils": "workspace:*"
"@workspace/llm-utils": "workspace:*",
"defu": "^6.1.4",
"radash": "^12.1.0"
},
"bundledDependencies": ["@workspace/llm-utils"],
"peerDependencies": {
"payload": "^3.0.0-beta.106"
"payload": "^3.0.0-beta.107"
}
}
65 changes: 65 additions & 0 deletions packages/semantic-search/src/hooks/afterChangeHook.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import type { FieldHook, FieldHookArgs } from 'payload'
import { isObject, isString } from 'radash'
import { getSemanticSearchCustom } from '../utils/customContext'

export const afterChangeHook: FieldHook = (args) => {
if (args.operation === 'create' || args.operation === 'update') {
if (args.previousValue !== args.value) {
insertEmbedding(args).catch(console.error)
}
} else if (args.operation === 'delete') {
deleteEmbedding(args).catch(console.error)
}
}

const insertEmbedding = async ({
req,
value,
collection,
field,
originalDoc,
}: Pick<
FieldHookArgs,
'field' | 'collection' | 'originalDoc' | 'value' | 'req'
>) => {
const semanticSearch = getSemanticSearchCustom(req.payload.config.custom)
if (
!isString(value) ||
!isString(field.name) ||
!isObject(collection) ||
!isObject(originalDoc) ||
!('id' in originalDoc)
)
return

const vector = await semanticSearch.embeddingFn(value)
await semanticSearch.vectorDB.upsert({
collection: collection.slug,
vector,
field: field.name,
documentId: originalDoc.id as string | number,
})
}

const deleteEmbedding = async ({
field,
collection,
req,
data,
}: Pick<FieldHookArgs, 'field' | 'collection' | 'data' | 'req'>) => {
const semanticSearch = getSemanticSearchCustom(req.payload)

if (
!isString(field.name) ||
!isObject(collection) ||
!isObject(data) ||
!('id' in data)
)
return

await semanticSearch.vectorDB.delete({
collection: collection.slug,
documentId: data.id,
field: field.name,
})
}
38 changes: 37 additions & 1 deletion packages/semantic-search/src/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,37 @@
export { adder } from '@workspace/llm-utils'
import defu from 'defu'
import type { Config, FieldBase } from 'payload'
import { afterChangeHook } from './hooks/afterChangeHook'
import type { SemanticSearchPluginConfig } from './types'
import { setupSemanticSearchCustom } from './utils/customContext'
import { getField, parseFields } from './utils/fields'

export const semanticSearchPlugin =
(incomingPluginConfig: SemanticSearchPluginConfig) =>
(config: Config): Config => {
if (!incomingPluginConfig.enabled) {
return config
}

setupFields(config, incomingPluginConfig.indexableFields)

return setupSemanticSearchCustom(config, {
vectorDB: incomingPluginConfig.vectorDB,
embeddingFn: incomingPluginConfig.embeddingFn,
})
}

const setupFields = (config: Config, indexableFields: Array<string>) => {
const fields = parseFields(indexableFields)
for (const entry of fields) {
const field = getField(config, entry.collection, entry.field)
if (!field) continue
Object.assign(
field.fieldConfig,
defu(field.fieldConfig, {
hooks: { afterChange: [afterChangeHook] } satisfies FieldBase['hooks'],
}),
)
}
}

export type { SemanticSearchPluginConfig, VectorDB } from './types'
Loading

0 comments on commit 10adc6d

Please sign in to comment.