Skip to content

Commit

Permalink
fix(llm.gblib): Talk to data local db use fix.
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigorodriguez committed Nov 24, 2024
1 parent 6e46bcf commit 9c2c1bc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 15 deletions.
18 changes: 11 additions & 7 deletions packages/llm.gblib/services/ChatServices.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,27 +180,31 @@ export class ChatServices {
if (sanitizedQuestion === '' || !vectorStore) {
return '';
}

let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments * 10);
const uniqueDocuments = {};
const MAX_DOCUMENTS = numDocuments;

for (const document of documents) {
if (!GBUtil.isContentPage(document.pageContent)) {
continue;
}

if (!uniqueDocuments[document.metadata.source]) {
uniqueDocuments[document.metadata.source] = document;
}
}

// Stop once we have max unique documents
if (Object.keys(uniqueDocuments).length >= MAX_DOCUMENTS) {
break;
}
}
let output = '';

for (const filePaths of Object.keys(uniqueDocuments)) {
const doc = uniqueDocuments[filePaths];
const metadata = doc.metadata;
const filename = path.basename(metadata.source);

if (!GBUtil.isContentPage(doc.pageContent)){
continue;
}

let page = 0;
if (metadata.source.endsWith('.pdf')) {
page = await ChatServices.findPageForText(metadata.source, doc.pageContent);
Expand Down
8 changes: 0 additions & 8 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -365,15 +365,7 @@ export class GBUtil {
// Common patterns that indicate non-content pages
const nonContentPatterns = [
/^index$/i,
/^contents$/i,
/^table of contents$/i,
/^appendix/i,
/^glossary$/i,
/^bibliography$/i,
/^references$/i,
/^acknowledgments?$/i,
/^copyright/i,
/^about the author/i
];

// Check if page is mostly dots, numbers or blank
Expand Down

0 comments on commit 9c2c1bc

Please sign in to comment.