forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
update-internal-links.js
executable file
·209 lines (171 loc) · 7.67 KB
/
update-internal-links.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env node
// [start-readme]
//
// Run this script to find internal links in all content and data Markdown files, check if either the title or link
// (or both) are outdated, and automatically update them if so.
//
// Exceptions:
// * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment
// and title will be unchanged (e.g., [Bar](/noo#bar)).
// * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if
// necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)).
// * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved.
//
// [end-readme]
import { fileURLToPath } from 'url'
import path from 'path'
import fs from 'fs'
import walk from 'walk-sync'
import { fromMarkdown } from 'mdast-util-from-markdown'
import visit from 'unist-util-visit'
import { loadPages, loadPageMap } from '../lib/page-data.js'
import loadSiteData from '../lib/site-data.js'
import loadRedirects from '../lib/redirects/precompile.js'
import { getPathWithoutLanguage, getPathWithoutVersion } from '../lib/path-utils.js'
import { allVersionKeys } from '../lib/all-versions.js'
import frontmatter from '../lib/read-frontmatter.js'
import renderContent from '../lib/render-content/index.js'
import patterns from '../lib/patterns.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const walkFiles = (pathToWalk) => {
return walk(path.posix.join(__dirname, '..', pathToWalk), {
includeBasePath: true,
directories: false,
})
.filter((file) => file.endsWith('.md') && !file.endsWith('README.md'))
.filter((file) => !file.includes('/early-access/')) // ignore EA for now
}
const allFiles = walkFiles('content').concat(walkFiles('data'))
// The script will throw an error if it finds any markup not represented here.
// Hacky but it captures the current rare edge cases.
const linkInlineMarkup = {
emphasis: '*',
strong: '**',
}
const currentVersionWithSpacesRegex = /\/enterprise\/{{ currentVersion }}/g
const currentVersionWithoutSpaces = '/enterprise/{{currentVersion}}'
main()
async function main() {
console.log('Working...')
const pageList = await loadPages()
const pageMap = await loadPageMap(pageList)
const redirects = await loadRedirects(pageList)
const site = await loadSiteData()
const context = {
pages: pageMap,
redirects,
site: site.en.site,
currentLanguage: 'en',
}
for (const file of allFiles) {
const { data, content } = frontmatter(fs.readFileSync(file, 'utf8'))
let newContent = content
// Do a blanket find-replace for /enterprise/{{ currentVersion }}/ to /enterprise/{{currentVersion}}/
// so that the AST parser recognizes the link as a link node. The spaces prevent it from doing so.
newContent = newContent.replace(currentVersionWithSpacesRegex, currentVersionWithoutSpaces)
const ast = fromMarkdown(newContent)
// We can't do async functions within visit, so gather the nodes upfront
const nodesPerFile = []
visit(ast, (node) => {
if (node.type !== 'link') return
if (!node.url.startsWith('/')) return
if (node.url.startsWith('/assets')) return
if (node.url.startsWith('/public')) return
if (node.url.includes('/11.10.340/')) return
if (node.url.includes('/2.1/')) return
if (node.url === '/') return
nodesPerFile.push(node)
})
// For every Markdown link...
for (const node of nodesPerFile) {
const oldLink = node.url
// Find and preserve any inline markup in link titles, like [*Foo*](/foo)
let inlineMarkup = ''
if (node.children[0].children) {
inlineMarkup = linkInlineMarkup[node.children[0].type]
if (!inlineMarkup) {
console.error(`Cannot find an inline markup entry for ${node.children[0].type}!`)
process.exit(1)
}
}
const oldTitle = node.children[0].value || node.children[0].children[0].value
const oldMarkdownLink = `[${inlineMarkup}${oldTitle}${inlineMarkup}](${oldLink})`
// As a blanket rule, only update titles in links that begin with quotes. (Many links
// have punctuation before the closing quotes, so we'll only check for opening quotes.)
// Update: "[Foo](/foo)
// Do not update: [Bar](/bar)
const hasQuotesAroundLink = newContent.includes(`"${oldMarkdownLink}`)
let foundPage, fragmentMatch, versionMatch
// Run through all supported versions...
for (const version of allVersionKeys) {
context.currentVersion = version
// Render the link for each version using the renderContent pipeline, which includes the rewrite-local-links plugin.
const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true })
let linkToCheck = $('a').attr('href')
// We need to preserve fragments and hardcoded versions if any are found.
fragmentMatch = oldLink.match(/(#.*$)/)
versionMatch = oldLink.match(/(enterprise-server(?:@.[^/]*?)?)\//)
// Remove the fragment for now.
linkToCheck = linkToCheck.replace(/#.*$/, '').replace(patterns.trailingSlash, '$1')
// Try to find the rendered link in the set of pages!
foundPage = findPage(linkToCheck, pageMap, redirects)
// Once a page is found for a particular version, exit immediately; we don't need to check the other versions
// because all we care about is the page title and path.
if (foundPage) {
break
}
}
if (!foundPage) {
console.error(
`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`
)
process.exit(1)
}
// If the original link includes a fragment OR the original title includes Liquid, do not change;
// otherwise, use the found page title. (We don't want to update the title if a fragment is found because
// the title likely points to the fragment section header, not the page title.)
const newTitle =
fragmentMatch || oldTitle.includes('{%') || !hasQuotesAroundLink
? oldTitle
: foundPage.title
// If the original link includes a fragment, append it to the found page path.
// Also remove the language code because Markdown links don't include language codes.
let newLink = getPathWithoutLanguage(
fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path
)
// If the original link includes a hardcoded version, preserve it; otherwise, remove versioning
// because Markdown links don't include versioning.
newLink = versionMatch
? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}`
: getPathWithoutVersion(newLink)
let newMarkdownLink = `[${inlineMarkup}${newTitle}${inlineMarkup}](${newLink})`
// Handle a few misplaced quotation marks.
if (oldMarkdownLink.includes('["')) {
newMarkdownLink = `"${newMarkdownLink}`
}
// Stream the results to console as we find them.
if (oldMarkdownLink !== newMarkdownLink) {
console.log('old link', oldMarkdownLink)
console.log('new link', newMarkdownLink)
console.log('-------')
}
newContent = newContent.replace(oldMarkdownLink, newMarkdownLink)
}
fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 }))
}
console.log('Done!')
}
function findPage(tryPath, pageMap, redirects) {
if (pageMap[tryPath]) {
return {
title: pageMap[tryPath].title,
path: tryPath,
}
}
if (pageMap[redirects[tryPath]]) {
return {
title: pageMap[redirects[tryPath]].title,
path: redirects[tryPath],
}
}
}