-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.js
28 lines (23 loc) · 1.38 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import { readFileSync, writeFileSync } from 'fs';
// Matches the scrambled number of the /Pages object
const pagesObjRegexGroupIndex = /(?<=\n)(\d{8,10})(?= 0 obj\r\n<< \/Type \/Pages \/Kids )/;
// Matches the true number of the /Pages object, i.e. the parent of the first /Page object it
// finds. Note that PDF syntax leaves some freedom in terms of white space, so this regex was
// updated to consider (hopefully) all formatting possibilities.
const firstPageIndexRegexGroupPagesIndex = /(?<=<< ?\/Type ?\/Page[\s\S]+?\/Parent )(\d+)(?= 0 R)/;
// Matches all the scrambled object numbers and xref byte indices
const objIndexRegex = /(?<=\n)(\d{8,10})(?= 0 obj)|(?<=\n)(\d{10})(?= 00000 n)/g;
const findKey = (scrambled) => {
const scrambledIndex = parseInt(scrambled.match(pagesObjRegexGroupIndex));
const trueIndex = parseInt(scrambled.match(firstPageIndexRegexGroupPagesIndex));
// Note that the key will be zero if the PDF is already unscrambled, meaning this algorithm is
// idempotent and can also be run on working PDFs without breaking them
return scrambledIndex ^ trueIndex;
};
const unscramble = (scrambled, key) =>
scrambled.replace(objIndexRegex, (match) =>
(parseInt(match) ^ key).toString(10).padStart(match.length, '0'),
);
const pdfPath = process.argv[2];
const pdfText = readFileSync(pdfPath, 'binary');
writeFileSync(pdfPath, unscramble(pdfText, findKey(pdfText)), 'binary');