Skip to content

Commit

Permalink
avoid "growing" code block; avoid removal of empty code blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
as-op committed Sep 19, 2024
1 parent c28ba0d commit 25f9b88
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 115 deletions.
52 changes: 38 additions & 14 deletions src/commonmark/commonmarkdataprocessor.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,13 @@ import {HtmlDataProcessor, DomConverter} from '@ckeditor/ckeditor5-engine';
import {highlightedCodeBlock} from 'turndown-plugin-gfm';
import TurndownService from 'turndown';
import {textNodesPreprocessor, linkPreprocessor, breaksPreprocessor} from './utils/preprocessor';
import {fixBreaksInCodeBlocks, fixCodeBlocks} from "./utils/fix-code-blocks";
import {fixTasklistWhitespaces} from './utils/fix-tasklist-whitespaces';
import {fixBreaksInTables, fixBreaksInLists, fixBreaksOnRootLevel} from "./utils/fix-breaks";
import markdownIt from 'markdown-it';
import markdownItTaskLists from 'markdown-it-task-lists';

export const originalSrcAttribute = 'data-original-src';

function debugOutFragment(fragment) {
console.log(Array.prototype.reduce.call(fragment.childNodes, (result, node) => result + (node.outerHTML || node.nodeValue), ''));
}

/**
* This data processor implementation uses CommonMark as input/output data.
*
Expand Down Expand Up @@ -54,17 +49,18 @@ export default class CommonMarkDataProcessor {
// Use tasklist plugin
let parser = md.use(markdownItTaskLists, {label: true});

const previousRenderer = parser.renderer.rules.code_block;
md.renderer.rules.code_block = function (tokens, idx, options, env, self) {
// markdown-it adds a newline to the end of code blocks, we need to remove it
tokens[idx].content = tokens[idx].content.replace(/\n$/, '');
return previousRenderer(tokens, idx, options, env, self);
};

const html = parser.render(data);

// Convert input HTML data to DOM DocumentFragment.
const domFragment = this._htmlDP._toDom(html);

// Fix empty line on the end of code blocks
fixBreaksInCodeBlocks(domFragment)

// Fix empty code blocks
fixCodeBlocks(domFragment);

// Fix duplicate whitespace in task lists
fixTasklistWhitespaces(domFragment);

Expand Down Expand Up @@ -113,7 +109,20 @@ export default class CommonMarkDataProcessor {
// Use Turndown to convert DOM fragment to markdown
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced'
codeBlockStyle: 'fenced',
blankReplacement: function (content, node) {
if (node.tagName === 'CODE') {
// we don't want to remove code silently
const prefix = (node.getAttribute('class') || '').replace('language-', '');
const textContent = node.textContent || '';

return "```" + prefix + '\n' + (textContent.length ? textContent : '\n') + "```\n";
// we don't want to remove pre silently
} else if (node.tagName === 'PRE') {
return content;
}
return node.isBlock ? '\n\n' : ''
},
});

turndownService.use([
Expand Down Expand Up @@ -226,7 +235,7 @@ export default class CommonMarkDataProcessor {
)
);
},
replacement: (_content, node) => {
replacement: (_content, node) => {
if (!node.parentElement && !node.nextSibling && !node.previousSibling) { //document with only one empty paragraph
return '';
} else {
Expand All @@ -235,9 +244,24 @@ export default class CommonMarkDataProcessor {
},
});

// turndownService.addRule('emptyCode', {
// filter: (node) => {
// console.log(node);
// // return (
// // (node.nodeName === 'CODE' && node.textContent && node.textContent.includes('###turndown-ignore###'))
// // );
// return false;
// },
// replacement: (_content, node) => {
// const s = node.textContent.replace('###turndown-ignore###', '');
// console.log(s);
// return s;
// },
// });

let turndown = turndownService.turndown(domFragment);

// Escape non-breaking space characters
return turndown.replace(/\u00A0/, ' ');
return turndown.replace(/\u00A0/, ' ').replace('###turndown-ignore###\n', '');
}
}
56 changes: 0 additions & 56 deletions src/commonmark/utils/fix-code-blocks.js

This file was deleted.

1 change: 0 additions & 1 deletion src/commonmark/utils/preprocessor.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ export function breaksPreprocessor(root, allowed_whitespace_nodes, allowed_raw_n
}
}


export function hasParentOfType(node, tagNames) {
let parent = node.parentElement;

Expand Down
4 changes: 2 additions & 2 deletions tests/commonmark/blockquotes.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ describe('CommonMarkProcessor', () => {
'<p>Example 1:</p>' +
'<pre>' +
'<code>' +
'code 1' +
'code 1\n' +
'</code>' +
'</pre>' +
'<p>Example 2:</p>' +
'<pre>' +
'<code>' +
'code 2' +
'code 2\n' +
'</code>' +
'</pre>' +
'</blockquote>',
Expand Down
92 changes: 51 additions & 41 deletions tests/commonmark/code.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ describe('CommonMarkProcessor', () => {
'<p><code>some `backticks` inside</code></p>'
);
});
});

describe('code block', () => {
it('should process code blocks indented with tabs', () => {
testDataProcessor(
' code block',
Expand Down Expand Up @@ -161,7 +163,7 @@ describe('CommonMarkProcessor', () => {
// We will need to handle this separately by some feature.

'<pre><code class="language-js">var a = \'hello\';\n' +
'console.log(a + \' world\');</code></pre>'
'console.log(a + \' world\');\n</code></pre>'
);
});

Expand All @@ -174,7 +176,7 @@ describe('CommonMarkProcessor', () => {
// GitHub is rendering as special html with syntax highlighting.
// We will need to handle this separately by some feature.

'<pre><code class="language-bash">#!/bin/bash</code></pre>',
'<pre><code class="language-bash">#!/bin/bash\n</code></pre>',

// When converting back ~~~ are normalized to ```.

Expand All @@ -195,7 +197,7 @@ describe('CommonMarkProcessor', () => {
// We will need to handle this separately by some feature.

'<pre><code class="language-js">var a = \'hello\';\n' +
'console.log(a + \' world\');</code></pre>',
'console.log(a + \' world\');\n</code></pre>',

// When converting back ``````` are normalized to ```.

Expand All @@ -217,7 +219,7 @@ describe('CommonMarkProcessor', () => {
// We will need to handle this separately by some feature.

'<pre><code class="language-js">var a = \'hello\';\n' +
'console.log(a + \' world\');</code></pre>',
'console.log(a + \' world\');\n</code></pre>',

// When converting back ~~~~~~~~~~ are normalized to ```.

Expand All @@ -228,41 +230,6 @@ describe('CommonMarkProcessor', () => {
);
});

it('should process empty code block', () => {
testDataProcessor(
'```js\n' +
'```',

// GitHub is rendering as special html with syntax highlighting.
// We will need to handle this separately by some feature.

'<pre><code class="language-js">\n</code></pre>',

// When converting back, empty code blocks will be removed.
// This might be an issue when switching from source to editor
// but changing this cannot be done in to-markdown converters.
''
);
});

it('should process code block with empty line', () => {
testDataProcessor(
'```js\n' +
'\n' +
'```',

// GitHub is rendering as special html with syntax highlighting.
// We will need to handle this separately by some feature.

'<pre><code class="language-js">\n</code></pre>',

// When converting back, empty code blocks will be removed.
// This might be an issue when switching from source to editor
// but changing this cannot be done in to-markdown converters.
''
);
});

it('should process nested code', () => {
testDataProcessor(
'````` code `` code ``` `````',
Expand All @@ -288,7 +255,7 @@ describe('CommonMarkProcessor', () => {
'<pre><code>' +
'```\n' +
'Code\n' +
'```' +
'```\n' +
'</code></pre>'
);
});
Expand All @@ -308,9 +275,52 @@ describe('CommonMarkProcessor', () => {
'```\n' +
'Code\n' +
'```\n' +
'````' +
'````\n' +
'</code></pre>'
);
});

it('should process empty code block', () => {
testDataProcessor(
'```js\n' +
'```',
'<pre><code class="language-js"></code></pre>',
// we always keep min one line in code block
'```js\n' +
'\n' +
'```',
);
});

it('should process code block with empty line', () => {
testDataProcessor(
'```js\n' +
'\n' +
'```',

// GitHub is rendering as special html with syntax highlighting.
// We will need to handle this separately by some feature.

'<pre><code class="language-js">\n</code></pre>',

'```js\n' +
'\n' +
'```',
);
});

it('should keep the amount of empty lines', () => {
testDataProcessor(
'```js\n' +
'\n\n\n' +
'```',
'<pre><code class="language-js">\n\n\n</code></pre>',

'```js\n' +
'\n\n\n' +
'```',
);
});

});
});
2 changes: 1 addition & 1 deletion tests/commonmark/escaping.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ describe('Commonmark', () => {
'\\<h1>Test\\</h1>\n' +
'```',
'<pre><code>' +
'\\<h1>Test\\</h1>' +
'\\<h1>Test\\</h1>\n' +
'</code></pre>');
});

Expand Down

0 comments on commit 25f9b88

Please sign in to comment.