Skip to content

Commit

Permalink
Add CopyAsMarkDown-TablesTest. See #1187
Browse files Browse the repository at this point in the history
  • Loading branch information
pilotmoon committed May 2, 2022
1 parent 14d7bdc commit c62701c
Show file tree
Hide file tree
Showing 12 changed files with 385 additions and 0 deletions.
11 changes: 11 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"uglify-js": "^3.14.2"
},
"dependencies": {
"@joplin/turndown-plugin-gfm": "^1.0.43",
"@types/voca": "^1.4.1",
"evernote": "^2.0.5",
"html-entities": "^2.3.2",
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.joplinturndownPluginGfm = f()}})(function(){var define,module,exports;return (function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
'use strict';

Object.defineProperty(exports, '__esModule', { value: true });

var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;

function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild;
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || '';
var language = (className.match(highlightRegExp) || [null, ''])[1];

return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
});
}

function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~~' + content + '~~'
}
});
}

var indexOf = Array.prototype.indexOf;
var every = Array.prototype.every;
var rules = {};

rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
if (tableShouldBeSkipped(nodeParentTable(node))) return content;
return cell(content, node)
}
};

rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
const parentTable = nodeParentTable(node);
if (tableShouldBeSkipped(parentTable)) return content;

var borderCells = '';
var alignMap = { left: ':--', right: '--:', center: ':-:' };

if (isHeadingRow(node)) {
const colCount = tableColCount(parentTable);
for (var i = 0; i < colCount; i++) {
const childNode = colCount >= node.childNodes.length ? null : node.childNodes[i];
var border = '---';
var align = childNode ? (childNode.getAttribute('align') || '').toLowerCase() : '';

if (align) border = alignMap[align] || border;

if (childNode) {
borderCells += cell(border, node.childNodes[i]);
} else {
borderCells += cell(border, null, i);
}
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
};

rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE'
},

replacement: function (content, node) {
if (tableShouldBeSkipped(node)) return content;

// Ensure there are no blank lines
content = content.replace(/\n+/g, '\n');

// If table has no heading, add an empty one so as to get a valid Markdown table
var secondLine = content.trim().split('\n');
if (secondLine.length >= 2) secondLine = secondLine[1];
var secondLineIsDivider = secondLine.indexOf('| ---') === 0;

var columnCount = tableColCount(node);
var emptyHeader = '';
if (columnCount && !secondLineIsDivider) {
emptyHeader = '|' + ' |'.repeat(columnCount) + '\n' + '|' + ' --- |'.repeat(columnCount);
}

return '\n\n' + emptyHeader + content + '\n\n'
}
};

rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
};

// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode;
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}

function isFirstTbody (element) {
var previousSibling = element.previousSibling;
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}

function cell (content, node = null, index = null) {
if (index === null) index = indexOf.call(node.parentNode.childNodes, node);
var prefix = ' ';
if (index === 0) prefix = '| ';
let filteredContent = content.trim().replace(/\n\r/g, '<br>').replace(/\n/g, "<br>");
filteredContent = filteredContent.replace(/\|+/g, '\\|');
while (filteredContent.length < 3) filteredContent += ' ';
if (node) filteredContent = handleColSpan(filteredContent, node, ' ');
return prefix + filteredContent + ' |'
}

function nodeContainsTable(node) {
if (!node.childNodes) return false;

for (let i = 0; i < node.childNodes.length; i++) {
const child = node.childNodes[i];
if (child.nodeName === 'TABLE') return true;
if (nodeContainsTable(child)) return true;
}
return false;
}

// Various conditions under which a table should be skipped - i.e. each cell
// will be rendered one after the other as if they were paragraphs.
function tableShouldBeSkipped(tableNode) {
if (!tableNode) return true;
if (!tableNode.rows) return true;
if (tableNode.rows.length === 1 && tableNode.rows[0].childNodes.length <= 1) return true; // Table with only one cell
if (nodeContainsTable(tableNode)) return true;
return false;
}

function nodeParentTable(node) {
let parent = node.parentNode;
while (parent.nodeName !== 'TABLE') {
parent = parent.parentNode;
if (!parent) return null;
}
return parent;
}

function handleColSpan(content, node, emptyChar) {
const colspan = node.getAttribute('colspan') || 1;
for (let i = 1; i < colspan; i++) {
content += ' | ' + emptyChar.repeat(3);
}
return content
}

function tableColCount(node) {
let maxColCount = 0;
for (let i = 0; i < node.rows.length; i++) {
const row = node.rows[i];
const colCount = row.childNodes.length;
if (colCount > maxColCount) maxColCount = colCount;
}
return maxColCount
}

function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE'
});
for (var key in rules) turndownService.addRule(key, rules[key]);
}

function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
});
}

function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
]);
}

exports.gfm = gfm;
exports.highlightedCodeBlock = highlightedCodeBlock;
exports.strikethrough = strikethrough;
exports.tables = tables;
exports.taskListItems = taskListItems;

},{}],2:[function(require,module,exports){
module.exports=require("@joplin/turndown-plugin-gfm")

},{"@joplin/turndown-plugin-gfm":1}]},{},[2])(2)
});
12 changes: 12 additions & 0 deletions source-contrib/CopyAsMarkdown-TablesTest.popclipext/Config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"popclip version": 3785,
"identifier": "com.pilotmoon.popclip.extension.copy-as-markdown",
"name": "Copy as Markdown",
"icon": ">md.png",
"capture html": true,
"module": "copy-as-markdown.js",
"after": "copy-result",
"long name": "Copy as Markdown",
"description": "Copy web content as Markdown.",
"note": "Updated 3 Feb 2022 for compatibility with macOS 12.3."
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
12 changes: 12 additions & 0 deletions source-contrib/CopyAsMarkdown-TablesTest.popclipext/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copy as Markdown — Tables Test

And attempt at integrating @joplin/turndown-plugin-jfm into turndown
in order to get Markdown tables.

So far it seems that perhaps linkedom (which we use instead of jsdom)
does not produce the required output. Specificaly the table node has no
rows property.

So it doesn't work.

Nick, 2 May 2022
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.action = exports.htmlToMarkdown = void 0;
const linkedom = require("@popclip/linkedom");
const TurndownService = require("@popclip/turndown");
const turndownPluginGfm = require("./@joplin+turndown-plugin-gfm");
function htmlToMarkdown(html) {
// generate DOM object from HTML
function JSDOM(html) { return linkedom.parseHTML(html); } // facade to work like jsdom
const { document } = new JSDOM(html);
const options = { headingStyle: 'atx' };
var turndownService = new TurndownService(options);
turndownService.use(turndownPluginGfm.gfm);
return turndownService.turndown(document);
}
exports.htmlToMarkdown = htmlToMarkdown;
const action = (input) => {
return htmlToMarkdown(input.html);
};
exports.action = action;
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import linkedom = require('@popclip/linkedom')
import TurndownService = require('@popclip/turndown')
import turndownPluginGfm = require('./@joplin+turndown-plugin-gfm')

export function htmlToMarkdown (html: string): string {
// generate DOM object from HTML
function JSDOM (html): any { return linkedom.parseHTML(html) } // facade to work like jsdom
const { document } = new (JSDOM as any)(html)
const options = { headingStyle: 'atx' }
var turndownService = new TurndownService(options)
turndownService.use(turndownPluginGfm.gfm)
return turndownService.turndown(document)
}

export const action: Action = (input) => {
return htmlToMarkdown(input.html)
}
35 changes: 35 additions & 0 deletions source-contrib/CopyAsMarkdown-TablesTest.popclipext/test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
// run with /Applications/PopClip.app/Contents/MacOS/PopClip runjs test.js
const copy_as_markdown_1 = require("./copy-as-markdown");
print((0, copy_as_markdown_1.htmlToMarkdown)('<p>hello</p>'));
print((0, copy_as_markdown_1.htmlToMarkdown)('<h1>head1</h1>'));
print((0, copy_as_markdown_1.htmlToMarkdown)('<strike>strike</strike>'));
const table = `
<table>
<thead>
<tr>
<th align="left">Option</th>
<th align="left">Valid values</th>
<th align="left">Default</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><code>blankReplacement</code></td>
<td align="left">rule replacement function</td>
<td align="left">See <strong>Special Rules</strong> below</td>
</tr>
<tr>
<td align="left"><code>keepReplacement</code></td>
<td align="left">rule replacement function</td>
<td align="left">See <strong>Special Rules</strong> below</td>
</tr>
<tr>
<td align="left"><code>defaultReplacement</code></td>
<td align="left">rule replacement function</td>
<td align="left">See <strong>Special Rules</strong> below</td>
</tr>
</tbody>
</table>`;
print((0, copy_as_markdown_1.htmlToMarkdown)(table));
Loading

0 comments on commit c62701c

Please sign in to comment.