diff --git a/manifest.json b/manifest.json index 5f27c38..e7d97bc 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "smart-seeker", "name": "Smart Seeker", - "version": "0.0.12", + "version": "0.0.13", "minAppVersion": "0.15.0", "description": "Demonstrates some of the capabilities of the Obsidian API.", "author": "Obsidian", diff --git a/package.json b/package.json index f357f63..273df3b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "smart-seeker", - "version": "0.0.12", + "version": "0.0.13", "description": "An Obsidian plugin that enables fast and intelligent note search using RAG (Retrieval Augmented Generation) with Pinecone vector database and OpenAI", "main": "main.js", "scripts": { diff --git a/src/helpers/document/DocumentProcessor.ts b/src/helpers/document/DocumentProcessor.ts index 832d542..064b135 100644 --- a/src/helpers/document/DocumentProcessor.ts +++ b/src/helpers/document/DocumentProcessor.ts @@ -20,13 +20,6 @@ import { getFileNameSafe } from "../utils/fileUtils"; import getEmbeddingModel from "../utils/getEmbeddingModel"; import { createContentHash, createHash } from "../utils/hash"; -interface ProcessingResult { - totalDocuments: number; // 입력된 전체 문서 수 - processedDocuments: number; // 처리된 문서 수 - skippedDocuments: number; // 건너뛴 문서 수 - processedChunks: number; // 처리된 청크 수 -} - interface DocumentChunk { ids: string[]; chunks: Document[]; @@ -70,81 +63,11 @@ export default class DocumentProcessor { return pinecone.Index(settings.selectedIndex); } - // 기존 파인콘DB에 있는 문서는 필터링한다. - async filterDocuments(documents: Document[]): Promise { - if (!documents?.length) return []; - - try { - const documentIds = this.generateDocumentIds(documents); - const existingHashes = await this.fetchExistingHashes(documentIds); - - return documents.filter((doc) => !existingHashes.has(doc.metadata.hash)); - } catch (error) { - this.logger.error("Error filtering documents:", error); - } - - return []; - } - - private async fetchExistingHashes( - documentIds: string[], - ): Promise> { - const { records } = await this.pineconeIndex.fetch(documentIds); - return new Set( - Object.values(records).map( - (record) => (record.metadata as { hash: string }).hash, - ), - ); - } - private generateDocumentIds(documents: Document[]): string[] { return documents.map((doc) => `${doc.metadata.id}-0`); } - async processSingleDocument(document: Document) { - const { ids, chunks } = await this.createChunks([document]); - this.logger.debug("chunks", chunks); - return await this.saveToVectorStore(chunks, ids); - } - - async processSingleFile(file: TFile) { - const document = await this.createDocument(file); - const { ids, chunks } = await this.createChunks([document]); - return await this.saveToVectorStore(chunks, ids); - } - - async processDocuments(documents: Document[]): Promise { - try { - const totalDocuments = documents.length; - const filteredDocs = await this.filterDocuments(documents); - this.logger.debug("Filtered documents count:", filteredDocs.length); - - if (!filteredDocs.length) { - return { - totalDocuments: totalDocuments, - processedDocuments: 0, - skippedDocuments: totalDocuments, - processedChunks: 0, - }; - } - - const { ids, chunks } = await this.createChunks(filteredDocs); - console.log("chunks", chunks); - await this.saveToVectorStore(chunks, ids); - - return { - totalDocuments: totalDocuments, - processedDocuments: filteredDocs.length, - skippedDocuments: totalDocuments - filteredDocs.length, - processedChunks: chunks.length, - }; - } catch (error) { - this.logger.error("Error processing documents:", error); - throw error; - } - } - - async createDocumentsFromFiles( + private async createDocumentsFromFiles( files: TFile[], ): Promise[]> { const documents: Document[] = []; @@ -155,13 +78,7 @@ export default class DocumentProcessor { return documents; } - async processMultiFiles(files: TFile[]) { - const documents = await this.createDocumentsFromFiles(files); - const { ids, chunks } = await this.createChunks(documents); - await this.saveToVectorStore(chunks, ids); - } - - async createChunks(documents: Document[]): Promise { + private async createChunks(documents: Document[]): Promise { const result: DocumentChunk = { ids: [], chunks: [] }; for (const document of documents) { @@ -181,7 +98,7 @@ export default class DocumentProcessor { return result; } - async saveToVectorStore( + private async saveToVectorStore( chunks: Document[], ids: string[], ): Promise { @@ -193,7 +110,7 @@ export default class DocumentProcessor { return await vectorStore.addDocuments(chunks, { ids }); } - private async filterDocumentsByQuery(documents: Document[]) { + async filterDocumentsByQuery(documents: Document[]) { const filterPromises = documents.map(async (doc) => { try { const queryResult = await this.pineconeIndex.query({ @@ -223,7 +140,7 @@ export default class DocumentProcessor { return results.filter((doc): doc is Document => doc !== null); } - public async createDocument(file: TFile) { + private async createDocument(file: TFile) { const content = await this.plugin.app.vault.cachedRead(file); const hash = await createContentHash(content); const id = await createHash(file.path); @@ -254,4 +171,37 @@ export default class DocumentProcessor { console.log("--→ document", document); return document; } + + // 기존 파인콘DB에 있는 문서는 필터링한다. + public async filterDocuments(documents: Document[]): Promise { + if (!documents?.length) return []; + + try { + const documentIds = this.generateDocumentIds(documents); + const { records } = await this.pineconeIndex.fetch(documentIds); + const existingHashes = new Set( + Object.values(records).map( + (record) => (record.metadata as { hash: string }).hash, + ), + ); + + return documents.filter((doc) => !existingHashes.has(doc.metadata.hash)); + } catch (error) { + this.logger.error("Error filtering documents:", error); + } + + return []; + } + + public async processSingleFile(file: TFile) { + const document = await this.createDocument(file); + const { ids, chunks } = await this.createChunks([document]); + return await this.saveToVectorStore(chunks, ids); + } + + public async processMultiFiles(files: TFile[]): Promise { + const documents = await this.createDocumentsFromFiles(files); + const { ids, chunks } = await this.createChunks(documents); + return await this.saveToVectorStore(chunks, ids); + } } diff --git a/src/main.ts b/src/main.ts index d0fb86f..225a023 100644 --- a/src/main.ts +++ b/src/main.ts @@ -332,10 +332,7 @@ export default class SmartSeekerPlugin extends Plugin { return; } - if (Object.keys(this.taskQueue).length === 0) { - this.logger.debug("📭 처리할 taskQueue가 없습니다."); - return; - } + if (Object.keys(this.taskQueue).length === 0) return; this.isProcessing = true; @@ -345,41 +342,21 @@ export default class SmartSeekerPlugin extends Plugin { } const files = Object.values(this.taskQueue); - const documents = - await this.documentProcessor.createDocumentsFromFiles(files); - const filteredDocs = - await this.documentProcessor.filterDocuments(documents); - const totalCount = documents.length; - const filterdCount = filteredDocs.length; - if (filteredDocs.length === 0) { - new Notice( - this.createResultMessage(totalCount, filterdCount, totalCount), - 5000, - ); - } + await this.documentProcessor.processMultiFiles(files); + const totalCount = files.length; - const { ids, chunks } = - await this.documentProcessor.createChunks(filteredDocs); - await this.documentProcessor.saveToVectorStore(chunks, ids); + this.logger.debug(`${totalCount} notes successfully saved to PineconeDB`); - this.logger.debug( - `${filterdCount} notes successfully saved to PineconeDB`, - ); - - new Notice( - this.createResultMessage(totalCount, filterdCount, totalCount), - 5000, - ); + new Notice(`📊 총 ${totalCount}개 노트 처리`, 5000); // 처리된 노트 제거 for (const file of files) { delete this.taskQueue[file.path]; } } catch (error) { - const errorMessage = - error instanceof Error ? error.message : "Unknown error"; - this.logger.error(`Failed to process notes: ${errorMessage}`); - new Notice(`Failed to save notes: ${errorMessage}`); + this.logger.error( + `Failed to process notes: ${error?.message || error.toString()}`, + ); } finally { this.isProcessing = false; } diff --git a/versions.json b/versions.json index 1c3c0d1..d79e934 100644 --- a/versions.json +++ b/versions.json @@ -10,5 +10,6 @@ "0.0.9": "0.15.0", "0.0.10": "0.15.0", "0.0.11": "0.15.0", - "0.0.12": "0.15.0" + "0.0.12": "0.15.0", + "0.0.13": "0.15.0" } \ No newline at end of file