From 4cdb48d887c9ed0968cfe3be04b290ba75b77641 Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 27 Sep 2024 15:25:18 +0100 Subject: [PATCH] Paste handler: optimization and unit test --- .../edit/prose/plugins/sectionPasteHandler.js | 111 +++---- .../prose/plugins/sectionPasteHandler.test.js | 286 ++++++++++++++++++ 2 files changed, 336 insertions(+), 61 deletions(-) create mode 100644 test/unit/blocks/edit/prose/plugins/sectionPasteHandler.test.js diff --git a/blocks/edit/prose/plugins/sectionPasteHandler.js b/blocks/edit/prose/plugins/sectionPasteHandler.js index a348d2f..f2dda3b 100644 --- a/blocks/edit/prose/plugins/sectionPasteHandler.js +++ b/blocks/edit/prose/plugins/sectionPasteHandler.js @@ -17,38 +17,24 @@ function closeParagraph(paraContent, newContent) { * In Desktop Word each section is represented as a top-level div element, right * under the body element. */ -function handleDesktopWordSectionBreaks(html) { - try { - const parser = new DOMParser(); - const doc = parser.parseFromString(html, 'text/html'); - - if (doc.querySelector('meta[name="ProgId"]')?.content !== 'Word.Document') { - // This is not a word document - return html; - } - - let modified = false; - // Add a hr element after all top-level div elements - const sections = doc.querySelectorAll('body > div'); - sections.forEach((section) => { - if (section.nextElementSibling) { - // only add the hr if there is something after the section - section.after(doc.createElement('hr')); - modified = true; - } - }); +function handleDesktopWordSectionBreaks(doc) { + if (doc.querySelector('meta[name="ProgId"]')?.content !== 'Word.Document') { + // This is not a word document + return false; + } - if (!modified) { - return html; + let modified = false; + // Add a hr element after all top-level div elements + const sections = doc.querySelectorAll('body > div'); + sections.forEach((section) => { + if (section.nextElementSibling) { + // only add the hr if there is something after the section + section.after(doc.createElement('hr')); + modified = true; } + }); - const serializer = new XMLSerializer(); - return serializer.serializeToString(doc); - } catch (error) { - // eslint-disable-next-line no-console - console.error('Error handling desktop Word section breaks:', error); - return html; - } + return modified; } /** @@ -60,38 +46,24 @@ function handleDesktopWordSectionBreaks(html) { * to be the only way to find them. In the future Word online might provide a * better way to identify section breaks. */ -function handleWordOnlineSectionBreaks(html) { - try { - const parser = new DOMParser(); - const doc = parser.parseFromString(html, 'text/html'); - - let modified = false; - // The span[data-ccp-props] are the magic indicator if one of the JSON values in there is the - // word 'single' then we need to add a section break. - const sections = doc.querySelectorAll('div > p > span[data-ccp-props]'); - sections.forEach((section) => { - const props = JSON.parse(section.getAttribute('data-ccp-props')); - for (const key of Object.keys(props)) { - if (props[key] === 'single') { - const hr = doc.createElement('hr'); - section.parentNode.after(hr); - modified = true; - break; - } +function handleWordOnlineSectionBreaks(doc) { + let modified = false; + // The span[data-ccp-props] are the magic indicator if one of the JSON values in there is the + // word 'single' then we need to add a section break. + const sections = doc.querySelectorAll('div > p > span[data-ccp-props]'); + sections.forEach((section) => { + const props = JSON.parse(section.getAttribute('data-ccp-props')); + for (const key of Object.keys(props)) { + if (props[key] === 'single') { + const hr = doc.createElement('hr'); + section.parentNode.after(hr); + modified = true; + break; } - }); - - if (!modified) { - return html; } + }); - const serializer = new XMLSerializer(); - return serializer.serializeToString(doc); - } catch (error) { - // eslint-disable-next-line no-console - console.error('Error handling Word online section breaks:', error); - return html; - } + return modified; } /* When text is pasted, handle section breaks. */ @@ -103,9 +75,26 @@ export default function sectionPasteHandler(schema) { * these section breaks and adds a
element for them. */ transformPastedHTML: (html) => { - const newHTML = handleDesktopWordSectionBreaks(html); - const newHTML2 = handleWordOnlineSectionBreaks(newHTML); - return newHTML2; + try { + const parser = new DOMParser(); + const doc = parser.parseFromString(html, 'text/html'); + + let modified = handleDesktopWordSectionBreaks(doc); + if (!modified) { + modified = handleWordOnlineSectionBreaks(doc); + } + + if (!modified) { + return html; + } + + const serializer = new XMLSerializer(); + return serializer.serializeToString(doc); + } catch (error) { + // eslint-disable-next-line no-console + console.error('Error handling Word section breaks:', error); + return html; + } }, /* Convert 3 dashes on a line by itself (top level only) to a horizontal rule, diff --git a/test/unit/blocks/edit/prose/plugins/sectionPasteHandler.test.js b/test/unit/blocks/edit/prose/plugins/sectionPasteHandler.test.js new file mode 100644 index 0000000..9e9795e --- /dev/null +++ b/test/unit/blocks/edit/prose/plugins/sectionPasteHandler.test.js @@ -0,0 +1,286 @@ +import { expect } from '@esm-bundle/chai'; +import { baseSchema, Slice } from 'da-y-wrapper'; +import sectionPasteHandler from '../../../../../../blocks/edit/prose/plugins/sectionPasteHandler.js'; + +function normalizeHTML(html) { + return html.replace(/[\n\s]+/g, ' ').replace(/> <').trim(); +} + +describe('Section paste handler', () => { + it('Test paste from desktop Word inserts hr elements', () => { + const plugin = sectionPasteHandler(); + const wordPasteHandler = plugin.props.transformPastedHTML; + + const inputHTML = ` + + + + + +
Section 1
+
Section 2
+

Section 3

+ +`; + + const result = normalizeHTML(wordPasteHandler(inputHTML)); + + // Note the added
tags + const expectedHTML = normalizeHTML(` + + + + + +
Section 1

+
Section 2

+

Section 3

+ +`); + expect(result).to.equal(expectedHTML); + }); + + it('Test paste from desktop Word no hr after last element', () => { + const plugin = sectionPasteHandler(); + const wordPasteHandler = plugin.props.transformPastedHTML; + + const inputHTML = ` + + + + + +
Section 1
+
Section 2
+ +`; + + const result = normalizeHTML(wordPasteHandler(inputHTML)); + + // Note the added
tags + const expectedHTML = normalizeHTML(` + + + + + +
Section 1

+
Section 2
+ +`); + expect(result).to.equal(expectedHTML); + }); + + it('Test desktop Word handler ignores alien content', () => { + const plugin = sectionPasteHandler(); + const wordPasteHandler = plugin.props.transformPastedHTML; + + const inputHTML = ` + + + + + +
Section 1
+
Section 2
+

Section 3

+ +`; + + const result = wordPasteHandler(inputHTML); + expect(result).to.equal(inputHTML); + }); + + it('Test paste from desktop Word ignores non-matching content', () => { + const plugin = sectionPasteHandler(); + const wordPasteHandler = plugin.props.transformPastedHTML; + + const inputHTML = ` + + + + + +
Section 1
+ +`; + + const result = wordPasteHandler(inputHTML); + expect(result).to.equal(inputHTML); + }); + + it('Test paste from online Word inserts hr elements', () => { + const plugin = sectionPasteHandler(); + const wordPasteHandler = plugin.props.transformPastedHTML; + + // Note the 'special marker' is in the data-ccp-props attribute + // the value "469789806":"single" in there indicates a section break + const inputHTML = ` + + +

+ Section 1 + +

+
Section 2
+ +`; + + const result = normalizeHTML(wordPasteHandler(inputHTML)); + + // Note the added
tag + const expectedHTML = normalizeHTML(` + + + +

+ Section 1 + +


+
Section 2
+ +`); + + expect(result).to.equal(expectedHTML); + }); + + it('Test transform pasted dashes', () => { + const json = { + content: [{ + type: 'paragraph', + content: [{ type: 'text', text: 'Aaa' }], + }, { + type: 'paragraph', + content: [{ type: 'text', text: '---' }], + }, { + type: 'paragraph', + content: [{ type: 'text', text: 'Bbb' }], + }], + openStart: 1, + openEnd: 1, + }; + const slice = Slice.fromJSON(baseSchema, json); + + const plugin = sectionPasteHandler(baseSchema); + const pasteHandler = plugin.props.transformPasted; + + const newSlice = pasteHandler(slice); + expect(newSlice.openStart).to.equal(slice.openStart); + expect(newSlice.openEnd).to.equal(slice.openEnd); + + const expectedJSON = { + content: [{ + type: 'paragraph', + content: [{ type: 'text', text: 'Aaa' }], + }, { type: 'horizontal_rule' }, { + type: 'paragraph', + content: [{ type: 'text', text: 'Bbb' }], + }], + openStart: 1, + openEnd: 1, + }; + + const newJSON = JSON.stringify(newSlice.content.toJSON()); + expect(newJSON).to.equal(JSON.stringify(expectedJSON.content)); + }); + + it('Test transform pasted dashes 2', async () => { + const json = { + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'Heading 1', + }, + ], + }, + { + type: 'paragraph', + content: [{ type: 'hard_break' }], + }, + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'Hi there', + }, + ], + }, + { + type: 'paragraph', + content: [ + { + type: 'text', + text: '---', + }, + ], + }, + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'Goodbye', + }, + ], + }, + ], + openStart: 1, + openEnd: 1, + }; + + const slice = Slice.fromJSON(baseSchema, json); + + const plugin = sectionPasteHandler(baseSchema); + const pasteHandler = plugin.props.transformPasted; + + const newSlice = pasteHandler(slice); + expect(newSlice.openStart).to.equal(slice.openStart); + expect(newSlice.openEnd).to.equal(slice.openEnd); + + const expectedJSON = { + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'Heading 1', + }, + ], + }, + { + type: 'paragraph', + content: [{ type: 'hard_break' }], + }, + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'Hi there', + }, + ], + }, { type: 'horizontal_rule' }, { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'Goodbye', + }, + ], + }, + ], + openStart: 1, + openEnd: 1, + }; + + const newJSON = JSON.stringify(newSlice.content.toJSON()); + expect(newJSON).to.equal(JSON.stringify(expectedJSON.content)); + }); +});