From 30f0b623b306aff6ec5a2278207cf2481a609b85 Mon Sep 17 00:00:00 2001 From: Pionxzh Date: Mon, 9 Sep 2024 03:28:45 +0800 Subject: [PATCH] fix: fix unexpected latex formula escape --- src/exporter/markdown.ts | 47 +++++++++++++++++++++++++++++----------- src/exporter/text.ts | 20 +++++++++++++++++ 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/exporter/markdown.ts b/src/exporter/markdown.ts index 71cd174..c3b111b 100644 --- a/src/exporter/markdown.ts +++ b/src/exporter/markdown.ts @@ -63,6 +63,8 @@ export async function exportAllToMarkdown(fileNameFormat: string, apiConversatio return true } +const LatexRegex = /(\s\$\$.+\$\$\s|\s\$.+\$\s|\\\[.+\\\]|\\\(.+\\\))|(^\$$[\S\s]+^\$$)|(^\$\$[\S\s]+^\$\$$)/gm + function conversationToMarkdown(conversation: ConversationResult, metaList?: ExportMeta[]) { const { id, title, model, modelSlug, createTime, updateTime, conversationNodes } = conversation const source = `${baseUrl}/c/${id}` @@ -125,25 +127,44 @@ function conversationToMarkdown(conversation: ConversationResult, metaList?: Exp const author = transformAuthor(message.author) - let postSteps: Array<(input: string) => string> = [] + const postSteps: Array<(input: string) => string> = [] if (message.author.role === 'assistant') { - postSteps = [...postSteps, input => transformFootNotes(input, message.metadata)] + postSteps.push(input => transformFootNotes(input, message.metadata)) } // Only message from assistant will be reformatted if (message.author.role === 'assistant') { - postSteps = [...postSteps, (input) => { + postSteps.push((input) => { + // Replace mathematical formula annotation + input = input + .replace(/^\\\[(.+)\\\]$/gm, '$$$$$1$$$$') + .replace(/\\\[/g, '$') + .replace(/\\\]/g, '$') + .replace(/\\\(/g, '$') + .replace(/\\\)/g, '$') + + const matches = input.match(LatexRegex) + // Skip code block as the following steps can potentially break the code - if (!(/```/.test(input))) { - // Replace mathematical formula annotation - input = input - .replace(/^\\\[(.+)\\\]$/gm, '$$$$$1$$$$') - .replace(/\\\[/g, '$') - .replace(/\\\]/g, '$') - .replace(/\\\(/g, '$') - .replace(/\\\)/g, '$') + const isCodeBlock = /```/.test(input) + if (!isCodeBlock && matches) { + let index = 0 + input = input.replace(LatexRegex, () => { + // Replace it with `╬${index}╬` to avoid markdown processor ruin the formula + return `╬${index++}╬` + }) } - return toMarkdown(fromMarkdown(input)) - }] + + let transformed = toMarkdown(fromMarkdown(input)) + + if (!isCodeBlock && matches) { + // Replace `╬${index}╬` back to the original latex + transformed = transformed.replace(/╬(\d+)╬/g, (_, index) => { + return matches[+index] + }) + } + + return transformed + }) } const postProcess = (input: string) => postSteps.reduce((acc, fn) => fn(acc), input) const content = transformContent(message.content, message.metadata, postProcess) diff --git a/src/exporter/text.ts b/src/exporter/text.ts index dbb3c1b..04b2852 100644 --- a/src/exporter/text.ts +++ b/src/exporter/text.ts @@ -29,6 +29,8 @@ export async function exportToText() { return true } +const LatexRegex = /(\s\$\$.+\$\$\s|\s\$.+\$\s|\\\[.+\\\]|\\\(.+\\\))|(^\$$[\S\s]+^\$$)|(^\$\$[\S\s]+^\$\$$)/gm + function transformMessage(message?: ConversationNodeMessage) { if (!message || !message.content) return null @@ -53,6 +55,16 @@ function transformMessage(message?: ConversationNodeMessage) { const author = transformAuthor(message.author) let content = transformContent(message.content, message.metadata) + + const matches = content.match(LatexRegex) + if (matches) { + let index = 0 + content = content.replace(LatexRegex, () => { + // Replace it with `╬${index}╬` to avoid markdown processor ruin the formula + return `╬${index++}╬` + }) + } + if (message.author.role === 'assistant') { content = transformFootNotes(content, message.metadata) } @@ -61,6 +73,14 @@ function transformMessage(message?: ConversationNodeMessage) { if (message.author.role === 'assistant' && content) { content = reformatContent(content) } + + if (matches) { + // Replace `╬${index}╬` back to the original latex + content = content.replace(/╬(\d+)╬/g, (_, index) => { + return matches[+index] + }) + } + return `${author}:\n${content}` }