diff --git a/frontends/search/src/components/UploadFile.tsx b/frontends/search/src/components/UploadFile.tsx index f93185f8f7..293ad4a3fc 100644 --- a/frontends/search/src/components/UploadFile.tsx +++ b/frontends/search/src/components/UploadFile.tsx @@ -17,6 +17,7 @@ interface RequestBody { group_tracking_id?: string; metadata: any; time_stamp?: string; + use_pdf2md_ocr?: boolean; } export const UploadFile = () => { @@ -38,6 +39,7 @@ export const UploadFile = () => { const [splitDelimiters, setSplitDelimiters] = createSignal([".", "?", "\\n"]); const [targetSplitsPerChunk, setTargetSplitsPerChunk] = createSignal(20); const [rebalanceChunks, setRebalanceChunks] = createSignal(false); + const [useGptChunking, setUseGptChunking] = createSignal(false); const [groupTrackingId, setGroupTrackingId] = createSignal(""); const [showFileInput, setShowFileInput] = createSignal(true); @@ -136,19 +138,20 @@ export const UploadFile = () => { }); const requestBodyTemplate: Omit = - { - link: link() === "" ? undefined : link(), - tag_set: - tagSet().split(",").length > 0 ? undefined : tagSet().split(","), - split_delimiters: splitDelimiters(), - target_splits_per_chunk: targetSplitsPerChunk(), - rebalance_chunks: rebalanceChunks(), - group_tracking_id: - groupTrackingId() === "" ? undefined : groupTrackingId(), - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - metadata: metadata(), - time_stamp: timestamp() ? timestamp() + " 00:00:00" : undefined, - }; + { + link: link() === "" ? undefined : link(), + tag_set: + tagSet().split(",").length > 0 ? undefined : tagSet().split(","), + split_delimiters: splitDelimiters(), + target_splits_per_chunk: targetSplitsPerChunk(), + rebalance_chunks: rebalanceChunks(), + use_pdf2md_ocr: useGptChunking(), + group_tracking_id: + groupTrackingId() === "" ? undefined : groupTrackingId(), + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + metadata: metadata(), + time_stamp: timestamp() ? timestamp() + " 00:00:00" : undefined, + }; const uploadFilePromises = files().map(async (file) => { let base64File = await toBase64(file); @@ -323,15 +326,27 @@ export const UploadFile = () => { onInput={(e) => setRebalanceChunks(e.currentTarget.checked)} class="h-4 w-4 rounded-md border border-gray-300 bg-neutral-100 px-4 py-1 dark:bg-neutral-700" /> +
+
Use gpt4o chunking
+ } + tooltipText="Use gpt4o chunking. If set to true, Trieve will use the gpt4o model to chunk the document if it is a pdf file. This is an experimental feature and may not work as expected." + /> +
+ setUseGptChunking(e.currentTarget.checked)} + class="h-4 w-4 rounded-md border border-gray-300 bg-neutral-100 px-4 py-1 dark:bg-neutral-700" + />