Skip to content

Commit

Permalink
Merge pull request #18390 from alexcat3/fix-issue-18099
Browse files Browse the repository at this point in the history
Handle toUnicode cMaps that omit leading zeros in hex encoded UTF-16 (issue 18099)
  • Loading branch information
Snuffleupagus authored Jul 6, 2024
2 parents 145951d + 1c36442 commit 5ee6169
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -3852,6 +3852,11 @@ class PartialEvaluator {
map[charCode] = String.fromCodePoint(token);
return;
}
// Add back omitted leading zeros on odd length tokens
// (fixes issue #18099)
if (token.length % 2 !== 0) {
token = "\u0000" + token;
}
const str = [];
for (let k = 0; k < token.length; k += 2) {
const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -653,3 +653,4 @@
!bug1539074.1.pdf
!issue18305.pdf
!issue18360.pdf
!issue18099_reduced.pdf
Binary file added test/pdfs/issue18099_reduced.pdf
Binary file not shown.
15 changes: 15 additions & 0 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -3419,6 +3419,21 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});

it("gets text content, correctly handling documents with toUnicode cmaps that omit leading zeros on hex-encoded UTF-16", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("issue18099_reduced.pdf")
);
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent({
disableNormalization: true,
});
const text = mergeText(items);
expect(text).toEqual("Hello world!");

await loadingTask.destroy();
});

it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
Expand Down

0 comments on commit 5ee6169

Please sign in to comment.