From 94d53d5b4527dd491c76852e4f89ba4b5feae800 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 28 Nov 2024 16:43:48 +0100 Subject: [PATCH] Very slightly improve the performance when searching in a pdf It helps to slightly decrease memory use in reducing the number of created arrays. In searching for "a" in pdf.pdf, the time spent in getOriginalIndex is decreased by around 30%. --- web/pdf_find_controller.js | 45 ++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 11e6b73947b39..fc6250c65c854 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -184,7 +184,7 @@ function normalize(text) { } let normalized = text.normalize("NFD"); - const positions = [[0, 0]]; + const positions = [0, 0]; let rawDiacriticsIndex = 0; let syllableIndex = 0; let shift = 0; @@ -201,7 +201,7 @@ function normalize(text) { const replacement = CHARACTERS_TO_NORMALIZE[p1]; const jj = replacement.length; for (let j = 1; j < jj; j++) { - positions.push([i - shift + j, shift - j]); + positions.push(i - shift + j, shift - j); } shift -= jj - 1; return replacement; @@ -216,7 +216,7 @@ function normalize(text) { } const jj = replacement.length; for (let j = 1; j < jj; j++) { - positions.push([i - shift + j, shift - j]); + positions.push(i - shift + j, shift - j); } shift -= jj - 1; return replacement; @@ -233,13 +233,13 @@ function normalize(text) { } else { // i is the position of the first diacritic // so (i - 1) is the position for the letter before. - positions.push([i - 1 - shift + 1, shift - 1]); + positions.push(i - 1 - shift + 1, shift - 1); shift -= 1; shiftOrigin += 1; } // End-of-line. - positions.push([i - shift + 1, shift]); + positions.push(i - shift + 1, shift); shiftOrigin += 1; eol += 1; @@ -261,7 +261,7 @@ function normalize(text) { for (let j = 1; j <= jj; j++) { // i is the position of the first diacritic // so (i - 1) is the position for the letter before. - positions.push([i - 1 - shift + j, shift - j]); + positions.push(i - 1 - shift + j, shift - j); } shift -= jj; shiftOrigin += jj; @@ -270,7 +270,7 @@ function normalize(text) { // Diacritics are followed by a -\n. // See comments in `if (p6)` block. i += len - 1; - positions.push([i - shift + 1, 1 + shift]); + positions.push(i - shift + 1, 1 + shift); shift += 1; shiftOrigin += 1; eol += 1; @@ -296,7 +296,7 @@ function normalize(text) { // The \n isn't in the original text so here y = i, n = X.len - 2 and // o = X.len - 1. const len = p6.length - 2; - positions.push([i - shift + len, 1 + shift]); + positions.push(i - shift + len, 1 + shift); shift += 1; shiftOrigin += 1; eol += 1; @@ -308,7 +308,7 @@ function normalize(text) { // white space. // A CJK can be encoded in UTF-32, hence their length isn't always 1. const len = p7.length - 1; - positions.push([i - shift + len, shift]); + positions.push(i - shift + len, shift); shiftOrigin += 1; eol += 1; return p7.slice(0, -1); @@ -317,7 +317,7 @@ function normalize(text) { if (p8) { // eol is replaced by space: "foo\nbar" is likely equivalent to // "foo bar". - positions.push([i - shift + 1, shift - 1]); + positions.push(i - shift + 1, shift - 1); shift -= 1; shiftOrigin += 1; eol += 1; @@ -331,7 +331,7 @@ function normalize(text) { const newCharLen = syllablePositions[syllableIndex][0] - 1; ++syllableIndex; for (let j = 1; j <= newCharLen; j++) { - positions.push([i - (shift - j), shift - j]); + positions.push(i - (shift - j), shift - j); } shift -= newCharLen; shiftOrigin += newCharLen; @@ -340,9 +340,15 @@ function normalize(text) { } ); - positions.push([normalized.length, shift]); + positions.push(normalized.length, shift); + const starts = new Uint32Array(positions.length >> 1); + const shifts = new Int32Array(positions.length >> 1); + for (let i = 0, ii = positions.length; i < ii; i += 2) { + starts[i >> 1] = positions[i]; + shifts[i >> 1] = positions[i + 1]; + } - return [normalized, positions, hasDiacritics]; + return [normalized, [starts, shifts], hasDiacritics]; } // Determine the original, non-normalized, match index such that highlighting of @@ -353,25 +359,26 @@ function getOriginalIndex(diffs, pos, len) { return [pos, len]; } + const [starts, shifts] = diffs; // First char in the new string. const start = pos; // Last char in the new string. const end = pos + len - 1; - let i = binarySearchFirstItem(diffs, x => x[0] >= start); - if (diffs[i][0] > start) { + let i = binarySearchFirstItem(starts, x => x >= start); + if (starts[i] > start) { --i; } - let j = binarySearchFirstItem(diffs, x => x[0] >= end, i); - if (diffs[j][0] > end) { + let j = binarySearchFirstItem(starts, x => x >= end, i); + if (starts[j] > end) { --j; } // First char in the old string. - const oldStart = start + diffs[i][1]; + const oldStart = start + shifts[i]; // Last char in the old string. - const oldEnd = end + diffs[j][1]; + const oldEnd = end + shifts[j]; const oldLen = oldEnd + 1 - oldStart; return [oldStart, oldLen];