Skip to content

Commit

Permalink
Merge pull request #19123 from calixteman/improve_search_perf
Browse files Browse the repository at this point in the history
Very slightly improve the performance when searching in a pdf
  • Loading branch information
calixteman authored Nov 28, 2024
2 parents 65f20b0 + 94d53d5 commit 308ca2a
Showing 1 changed file with 26 additions and 19 deletions.
45 changes: 26 additions & 19 deletions web/pdf_find_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ function normalize(text) {
}

let normalized = text.normalize("NFD");
const positions = [[0, 0]];
const positions = [0, 0];
let rawDiacriticsIndex = 0;
let syllableIndex = 0;
let shift = 0;
Expand All @@ -201,7 +201,7 @@ function normalize(text) {
const replacement = CHARACTERS_TO_NORMALIZE[p1];
const jj = replacement.length;
for (let j = 1; j < jj; j++) {
positions.push([i - shift + j, shift - j]);
positions.push(i - shift + j, shift - j);
}
shift -= jj - 1;
return replacement;
Expand All @@ -216,7 +216,7 @@ function normalize(text) {
}
const jj = replacement.length;
for (let j = 1; j < jj; j++) {
positions.push([i - shift + j, shift - j]);
positions.push(i - shift + j, shift - j);
}
shift -= jj - 1;
return replacement;
Expand All @@ -233,13 +233,13 @@ function normalize(text) {
} else {
// i is the position of the first diacritic
// so (i - 1) is the position for the letter before.
positions.push([i - 1 - shift + 1, shift - 1]);
positions.push(i - 1 - shift + 1, shift - 1);
shift -= 1;
shiftOrigin += 1;
}

// End-of-line.
positions.push([i - shift + 1, shift]);
positions.push(i - shift + 1, shift);
shiftOrigin += 1;
eol += 1;

Expand All @@ -261,7 +261,7 @@ function normalize(text) {
for (let j = 1; j <= jj; j++) {
// i is the position of the first diacritic
// so (i - 1) is the position for the letter before.
positions.push([i - 1 - shift + j, shift - j]);
positions.push(i - 1 - shift + j, shift - j);
}
shift -= jj;
shiftOrigin += jj;
Expand All @@ -270,7 +270,7 @@ function normalize(text) {
// Diacritics are followed by a -\n.
// See comments in `if (p6)` block.
i += len - 1;
positions.push([i - shift + 1, 1 + shift]);
positions.push(i - shift + 1, 1 + shift);
shift += 1;
shiftOrigin += 1;
eol += 1;
Expand All @@ -296,7 +296,7 @@ function normalize(text) {
// The \n isn't in the original text so here y = i, n = X.len - 2 and
// o = X.len - 1.
const len = p6.length - 2;
positions.push([i - shift + len, 1 + shift]);
positions.push(i - shift + len, 1 + shift);
shift += 1;
shiftOrigin += 1;
eol += 1;
Expand All @@ -308,7 +308,7 @@ function normalize(text) {
// white space.
// A CJK can be encoded in UTF-32, hence their length isn't always 1.
const len = p7.length - 1;
positions.push([i - shift + len, shift]);
positions.push(i - shift + len, shift);
shiftOrigin += 1;
eol += 1;
return p7.slice(0, -1);
Expand All @@ -317,7 +317,7 @@ function normalize(text) {
if (p8) {
// eol is replaced by space: "foo\nbar" is likely equivalent to
// "foo bar".
positions.push([i - shift + 1, shift - 1]);
positions.push(i - shift + 1, shift - 1);
shift -= 1;
shiftOrigin += 1;
eol += 1;
Expand All @@ -331,7 +331,7 @@ function normalize(text) {
const newCharLen = syllablePositions[syllableIndex][0] - 1;
++syllableIndex;
for (let j = 1; j <= newCharLen; j++) {
positions.push([i - (shift - j), shift - j]);
positions.push(i - (shift - j), shift - j);
}
shift -= newCharLen;
shiftOrigin += newCharLen;
Expand All @@ -340,9 +340,15 @@ function normalize(text) {
}
);

positions.push([normalized.length, shift]);
positions.push(normalized.length, shift);
const starts = new Uint32Array(positions.length >> 1);
const shifts = new Int32Array(positions.length >> 1);
for (let i = 0, ii = positions.length; i < ii; i += 2) {
starts[i >> 1] = positions[i];
shifts[i >> 1] = positions[i + 1];
}

return [normalized, positions, hasDiacritics];
return [normalized, [starts, shifts], hasDiacritics];
}

// Determine the original, non-normalized, match index such that highlighting of
Expand All @@ -353,25 +359,26 @@ function getOriginalIndex(diffs, pos, len) {
return [pos, len];
}

const [starts, shifts] = diffs;
// First char in the new string.
const start = pos;
// Last char in the new string.
const end = pos + len - 1;
let i = binarySearchFirstItem(diffs, x => x[0] >= start);
if (diffs[i][0] > start) {
let i = binarySearchFirstItem(starts, x => x >= start);
if (starts[i] > start) {
--i;
}

let j = binarySearchFirstItem(diffs, x => x[0] >= end, i);
if (diffs[j][0] > end) {
let j = binarySearchFirstItem(starts, x => x >= end, i);
if (starts[j] > end) {
--j;
}

// First char in the old string.
const oldStart = start + diffs[i][1];
const oldStart = start + shifts[i];

// Last char in the old string.
const oldEnd = end + diffs[j][1];
const oldEnd = end + shifts[j];
const oldLen = oldEnd + 1 - oldStart;

return [oldStart, oldLen];
Expand Down

0 comments on commit 308ca2a

Please sign in to comment.