-
Notifications
You must be signed in to change notification settings - Fork 0
/
simplify.js
55 lines (49 loc) · 1.7 KB
/
simplify.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
const { JSDOM } = require("jsdom")
const { Readability } = require("@mozilla/readability")
const TurndownService = require("turndown")
const yargs = require("yargs")
const { hideBin } = require("yargs/helpers")
const fs = require("fs")
const path = require("path")
// parse cli arguments
const args = yargs(hideBin(process.argv))
.command("* <files..>", "extract content from given HTML files to Markdown", argv => {
// NOTE: in the command above, the value in `<>` must match the positonal arg.
// ALSO: The `..` in there means "this is an array"
argv.positional("files", {type: "string", normalize: true, default: []})
}).parseSync()
function simplify(html) {
const doc = new JSDOM(html)
const reader = new Readability(doc.window.document)
return reader.parse()
}
function generateMarkdown(readable) {
const td = new TurndownService({
headingStyle: "atx",
bulletListMarker: "-",
codeBlockStyle: "fenced"
})
td.keep(["table"])
return td.turndown(readable.content)
}
function generateFrontMatter(readable) {
// NOTE: Can't correctly indent this because then front-matter in the file is indented and breaks YAML
return `---
title: ${readable.title}
source: ${readable.siteName}
date: ${readable.publishedTime}
author: ${readable.byline}
description: ${readable.excerpt}
---
`
}
for (const arg of args.files) {
const full_path = path.join(process.cwd(), arg)
console.log(`Processing ${full_path}...`)
const file = fs.readFileSync(full_path, "utf8")
const simple = simplify(file)
const output = generateFrontMatter(simple) + generateMarkdown(simple)
const source_file = path.parse(full_path)
fs.writeFileSync(path.join(source_file.dir, source_file.name + ".md"), output, "utf8")
console.log("Done!")
}