diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index e455d5048c58f4..e1f3169d58fb8a 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -51,7 +51,8 @@ class MockLinkService extends SimpleLinkService { async function initPdfFindController( filename, - updateMatchesCountOnProgress = true + updateMatchesCountOnProgress = true, + matcher = undefined ) { const loadingTask = getDocument( buildGetDocumentParams(filename || tracemonkeyFileName, { @@ -65,7 +66,13 @@ async function initPdfFindController( const linkService = new MockLinkService(); linkService.setDocument(pdfDocument); - const pdfFindController = new PDFFindController({ + let FindControllerClass = PDFFindController; + if (matcher !== undefined) { + FindControllerClass = class extends PDFFindController {}; + FindControllerClass.prototype.match = matcher; + } + + const pdfFindController = new FindControllerClass({ linkService, eventBus, updateMatchesCountOnProgress, @@ -1054,4 +1061,80 @@ describe("pdf_find_controller", function () { const { eventBus } = await initPdfFindController(); await testOnFind({ eventBus }); }); + + describe("custom matcher", () => { + it("calls to the matcher with the right arguments", async () => { + const QUERY = "Foo bar"; + + const spy = jasmine + .createSpy("custom find matcher") + .and.callFake(() => [{ index: 0, length: 1 }]); + + const { eventBus, pdfFindController } = await initPdfFindController( + null, + false, + spy + ); + + const PAGES_COUNT = 14; + + await testSearch({ + eventBus, + pdfFindController, + state: { query: QUERY }, + selectedMatch: { pageIndex: 0, matchIndex: 0 }, + matchesPerPage: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + }); + + expect(spy).toHaveBeenCalledTimes(PAGES_COUNT); + + for (let i = 0; i < PAGES_COUNT; i++) { + const args = spy.calls.argsFor(i); + expect(args[0]).withContext(`page ${i}`).toBe(QUERY); + expect(args[2]).withContext(`page ${i}`).toBe(i); + } + + expect(spy.calls.argsFor(0)[1]).toMatch(/^Trace-based /); + expect(spy.calls.argsFor(1)[1]).toMatch(/^Hence, recording and /); + expect(spy.calls.argsFor(12)[1]).toMatch(/Figure 12. Fraction of time /); + expect(spy.calls.argsFor(13)[1]).toMatch(/^not be interpreted as /); + }); + + it("uses the results returned by the custom matcher", async () => { + const QUERY = "Foo bar"; + + // prettier-ignore + const spy = jasmine.createSpy("custom find matcher") + .and.returnValue(undefined) + .withArgs(QUERY, jasmine.anything(), 0) + .and.returnValue([ + { index: 20, length: 3 }, + { index: 50, length: 8 }, + ]) + .withArgs(QUERY, jasmine.anything(), 2) + .and.returnValue([ + { index: 7, length: 19 } + ]) + .withArgs(QUERY, jasmine.anything(), 13) + .and.returnValue([ + { index: 50, length: 2 }, + { index: 54, length: 9 }, + { index: 80, length: 4 }, + ]); + + const { eventBus, pdfFindController } = await initPdfFindController( + null, + false, + spy + ); + + await testSearch({ + eventBus, + pdfFindController, + state: { query: QUERY }, + selectedMatch: { pageIndex: 0, matchIndex: 0 }, + matchesPerPage: [2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3], + }); + }); + }); }); diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 3990b2eb6668fa..88d259c8bff6dd 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -670,37 +670,6 @@ class PDFFindController { return true; } - #calculateRegExpMatch(query, entireWord, pageIndex, pageContent) { - const matches = (this._pageMatches[pageIndex] = []); - const matchesLength = (this._pageMatchesLength[pageIndex] = []); - if (!query) { - // The query can be empty because some chars like diacritics could have - // been stripped out. - return; - } - const diffs = this._pageDiffs[pageIndex]; - let match; - while ((match = query.exec(pageContent)) !== null) { - if ( - entireWord && - !this.#isEntireWord(pageContent, match.index, match[0].length) - ) { - continue; - } - - const [matchPos, matchLen] = getOriginalIndex( - diffs, - match.index, - match[0].length - ); - - if (matchLen) { - matches.push(matchPos); - matchesLength.push(matchLen); - } - } - } - #convertToRegExpString(query, hasDiacritics) { const { matchDiacritics } = this.#state; let isUnicode = false; @@ -771,13 +740,65 @@ class PDFFindController { return [isUnicode, query]; } - #calculateMatch(pageIndex) { - let query = this.#query; + async #calculateMatch(pageIndex) { + const query = this.#query; if (query.length === 0) { return; // Do nothing: the matches should be wiped out already. } - const { caseSensitive, entireWord } = this.#state; const pageContent = this._pageContents[pageIndex]; + const matcherResult = await this.match(query, pageContent, pageIndex); + + const matches = (this._pageMatches[pageIndex] = []); + const matchesLength = (this._pageMatchesLength[pageIndex] = []); + const diffs = this._pageDiffs[pageIndex]; + + matcherResult?.forEach(({ index, length }) => { + const [matchPos, matchLen] = getOriginalIndex(diffs, index, length); + if (matchLen) { + matches.push(matchPos); + matchesLength.push(matchLen); + } + }); + + // When `highlightAll` is set, ensure that the matches on previously + // rendered (and still active) pages are correctly highlighted. + if (this.#state.highlightAll) { + this.#updatePage(pageIndex); + } + if (this._resumePageIdx === pageIndex) { + this._resumePageIdx = null; + this.#nextPageMatch(); + } + + // Update the match count. + const pageMatchesCount = this._pageMatches[pageIndex].length; + this._matchesCountTotal += pageMatchesCount; + if (this.#updateMatchesCountOnProgress) { + if (pageMatchesCount > 0) { + this.#updateUIResultsCount(); + } + } else if (++this.#visitedPagesCount === this._linkService.pagesCount) { + // For example, in GeckoView we want to have only the final update because + // the Java side provides only one object to update the counts. + this.#updateUIResultsCount(); + } + } + + /** + * @typedef {Object} SingleFindMatch + * @property {number} index - The start of the matched text in the page's string + * contents. + * @property {number} length - The length of the matched text. + */ + + /** + * @param {string | string[]} query - The search query. + * @param {string} pageContent - The text content of the page to search in. + * @param {number} pageIndex - The index of the page that is being processed. + * @returns {Promise | SingleFindMatch[] | undefined} An + * array of matches in the provided page. + */ + match(query, pageContent, pageIndex) { const hasDiacritics = this._hasDiacritics[pageIndex]; let isUnicode = false; @@ -799,34 +820,22 @@ class PDFFindController { }) .join("|"); } + if (!query) { + return undefined; + } + const { caseSensitive, entireWord } = this.#state; const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`; - query = query ? new RegExp(query, flags) : null; - - this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent); + query = new RegExp(query, flags); - // When `highlightAll` is set, ensure that the matches on previously - // rendered (and still active) pages are correctly highlighted. - if (this.#state.highlightAll) { - this.#updatePage(pageIndex); - } - if (this._resumePageIdx === pageIndex) { - this._resumePageIdx = null; - this.#nextPageMatch(); - } - - // Update the match count. - const pageMatchesCount = this._pageMatches[pageIndex].length; - this._matchesCountTotal += pageMatchesCount; - if (this.#updateMatchesCountOnProgress) { - if (pageMatchesCount > 0) { - this.#updateUIResultsCount(); + const matches = []; + for (const { index, 0: match } of pageContent.matchAll(query)) { + if (entireWord && !this.#isEntireWord(pageContent, index, match.length)) { + continue; } - } else if (++this.#visitedPagesCount === this._linkService.pagesCount) { - // For example, in GeckoView we want to have only the final update because - // the Java side provides only one object to update the counts. - this.#updateUIResultsCount(); + matches.push({ index, length: match.length }); } + return matches; } #extractText() { @@ -930,10 +939,9 @@ class PDFFindController { continue; } this._pendingFindMatches.add(i); - this._extractTextPromises[i].then(() => { - this._pendingFindMatches.delete(i); - this.#calculateMatch(i); - }); + this._extractTextPromises[i] + .then(() => this.#calculateMatch(i)) + .finally(() => this._pendingFindMatches.delete(i)); } }