-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
10adc6d
commit 187412d
Showing
13 changed files
with
7,529 additions
and
621 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import { type Table, connect } from '@lancedb/lancedb' | ||
import { LanceSchema } from '@lancedb/lancedb/embedding' | ||
import { Utf8 } from 'apache-arrow' | ||
import type { Identifier, InsertFields, VectorDB } from '../../types' | ||
import { OllamaEmbeddings } from '../ollama' | ||
|
||
export class LanceDB implements VectorDB { | ||
public name = 'lancedb' | ||
async search(index: { collection: string; field: string }, query: string) { | ||
const where = `WHERE field=${index.field} and collection=${index.collection} ` | ||
const result = await this.table | ||
.search(query) | ||
.where(where) | ||
.limit(5) | ||
.toArray() | ||
return result.map((res) => res.toJSON()) | ||
} | ||
|
||
constructor(private table: Table) {} | ||
|
||
getTable() { | ||
return this.table | ||
} | ||
|
||
static async create(path = './lancedb') { | ||
const func = new OllamaEmbeddings({ | ||
host: 'http://100.67.29.127:11434', | ||
model: 'nomic-embed-text', | ||
timeout: 10000, | ||
}) | ||
const schema = LanceSchema({ | ||
text: func.sourceField(), | ||
documentId: new Utf8(), | ||
field: new Utf8(), | ||
collection: new Utf8(), | ||
vector: func.vectorField(), | ||
}) | ||
|
||
const connection = await connect(path) | ||
const table = await connection.createEmptyTable( | ||
'payloadDocuments', | ||
schema, | ||
{ existOk: true }, | ||
) | ||
const instance = new LanceDB(table) | ||
return instance | ||
} | ||
|
||
async delete(record: Identifier) { | ||
const where = `WHERE documentId=${record.documentId} and field=${record.field} and collection=${record.collection}` | ||
await this.table.delete(where) | ||
} | ||
|
||
async upsert(fields: InsertFields) { | ||
await this.table.add([fields]) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import { embedding } from '@lancedb/lancedb' | ||
import { | ||
EmbeddingFunction, | ||
TextEmbeddingFunction, | ||
getRegistry, | ||
} from '@lancedb/lancedb/embedding' | ||
import type { Float } from 'apache-arrow' | ||
import { Ollama } from 'ollama' | ||
|
||
interface Options { | ||
model: string | ||
timeout: number | ||
host: string | ||
} | ||
// @ts-ignore | ||
@embedding.register('ollama') | ||
export class OllamaEmbeddings extends TextEmbeddingFunction<Partial<Options>> { | ||
private client: Ollama | ||
constructor(private modelOptions: Options) { | ||
super() | ||
this.client = new Ollama({ | ||
host: modelOptions.host, | ||
}) | ||
} | ||
embeddingDataType(): Float { | ||
return super.embeddingDataType() | ||
} | ||
|
||
override ndims() { | ||
return 768 | ||
} | ||
toJSON(): object { | ||
return { | ||
...this.modelOptions, | ||
type: 'ollama', | ||
} | ||
} | ||
|
||
async generateEmbeddings( | ||
texts: string[], | ||
): Promise<number[][] | Float32Array[] | Float64Array[]> { | ||
const embeddings = await Promise.all( | ||
texts.map(async (text) => { | ||
const response = await this.client.embeddings({ | ||
model: this.modelOptions?.model ?? 'nomic-embed-text', | ||
prompt: text, | ||
}) | ||
return response.embedding | ||
}), | ||
) | ||
return embeddings | ||
} | ||
} | ||
|
||
export const register = () => { | ||
const registry = getRegistry() | ||
// @ts-ignore | ||
registry.register('ollama')(OllamaEmbeddings) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.