diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 49be8eabfb9ed..f11d4b964a90e 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -15,8 +15,8 @@ import { AnnotationPrefix, stringToPDFString, warn } from "../shared/util.js"; import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js"; +import { lookupNormalRect, stringToAsciiOrUTF16BE } from "./core_utils.js"; import { NumberTree } from "./name_number_tree.js"; -import { stringToAsciiOrUTF16BE } from "./core_utils.js"; import { writeObject } from "./writer.js"; const MAX_DEPTH = 40; @@ -751,10 +751,38 @@ class StructTreePage { obj.role = node.role; obj.children = []; parent.children.push(obj); - const alt = node.dict.get("Alt"); + let alt = node.dict.get("Alt"); + if (typeof alt !== "string") { + alt = node.dict.get("ActualText"); + } if (typeof alt === "string") { obj.alt = stringToPDFString(alt); } + + const a = node.dict.get("A"); + if (a instanceof Dict) { + const bbox = lookupNormalRect(a.getArray("BBox"), null); + if (bbox) { + obj.bbox = bbox; + } else { + const width = a.get("Width"); + const height = a.get("Height"); + if ( + typeof width === "number" && + width > 0 && + typeof height === "number" && + height > 0 + ) { + obj.bbox = [0, 0, width, height]; + } + } + // TODO: If the bbox is not available, we should try to get it from + // the content stream. + // For example when rendering on the canvas the commands between the + // beginning and the end of the marked-content sequence, we can + // compute the overall bbox. + } + const lang = node.dict.get("Lang"); if (typeof lang === "string") { obj.lang = stringToPDFString(lang); diff --git a/src/display/editor/annotation_editor_layer.js b/src/display/editor/annotation_editor_layer.js index 5bd3e778c7dd4..d25e97d2b6f1e 100644 --- a/src/display/editor/annotation_editor_layer.js +++ b/src/display/editor/annotation_editor_layer.js @@ -395,7 +395,8 @@ class AnnotationEditorLayer { const { target } = event; if ( target === this.#textLayer.div || - (target.classList.contains("endOfContent") && + ((target.getAttribute("role") === "img" || + target.classList.contains("endOfContent")) && this.#textLayer.div.contains(target)) ) { const { isMac } = FeatureTest.platform; @@ -413,7 +414,7 @@ class AnnotationEditorLayer { HighlightEditor.startHighlighting( this, this.#uiManager.direction === "ltr", - event + { target: this.#textLayer.div, x: event.x, y: event.y } ); this.#textLayer.div.addEventListener( "pointerup", diff --git a/test/integration/accessibility_spec.mjs b/test/integration/accessibility_spec.mjs index ed7414ebb0db6..f3e8e39cb4246 100644 --- a/test/integration/accessibility_spec.mjs +++ b/test/integration/accessibility_spec.mjs @@ -241,4 +241,44 @@ describe("accessibility", () => { ); }); }); + + describe("Figure in the content stream", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait("bug1708040.pdf", ".textLayer"); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must check that an image is correctly inserted in the text layer", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + expect(await isStructTreeVisible(page)) + .withContext(`In ${browserName}`) + .toBeTrue(); + + const spanId = await page.evaluate(() => { + const el = document.querySelector( + `.structTree span[role="figure"]` + ); + return el.getAttribute("aria-owns") || null; + }); + + expect(spanId).withContext(`In ${browserName}`).not.toBeNull(); + + const ariaLabel = await page.evaluate(id => { + const img = document.querySelector(`#${id} > span[role="img"]`); + return img.getAttribute("aria-label"); + }, spanId); + + expect(ariaLabel) + .withContext(`In ${browserName}`) + .toEqual("A logo of a fox and a globe"); + }) + ); + }); + }); }); diff --git a/test/integration/highlight_editor_spec.mjs b/test/integration/highlight_editor_spec.mjs index 31379b6c89fa5..6a7c820e2aed7 100644 --- a/test/integration/highlight_editor_spec.mjs +++ b/test/integration/highlight_editor_spec.mjs @@ -2053,4 +2053,51 @@ describe("Highlight Editor", () => { ); }); }); + + describe("Free Highlight with an image in the struct tree", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait( + "bug1708040.pdf", + ".annotationEditorLayer", + null, + null, + { highlightEditorColors: "red=#AB0000" } + ); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must check that it's possible to draw on an image in a struct tree", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await switchToHighlight(page); + + const rect = await getRect(page, `.textLayer span[role="img"]`); + + const x = rect.x + rect.width / 2; + const y = rect.y + rect.height / 2; + const clickHandle = await waitForPointerUp(page); + await page.mouse.move(x, y); + await page.mouse.down(); + await page.mouse.move(rect.x - 1, rect.y - 1); + await page.mouse.up(); + await awaitPromise(clickHandle); + + await page.waitForSelector(getEditorSelector(0)); + const usedColor = await page.evaluate(() => { + const highlight = document.querySelector( + `.page[data-page-number = "1"] .canvasWrapper > svg.highlight` + ); + return highlight.getAttribute("fill"); + }); + + expect(usedColor).withContext(`In ${browserName}`).toEqual("#AB0000"); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 0ea0abe0585c2..0e3f5cf3c1168 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -664,3 +664,4 @@ !issue18561.pdf !highlights.pdf !highlight.pdf +!bug1708040.pdf diff --git a/test/pdfs/bug1708040.pdf b/test/pdfs/bug1708040.pdf new file mode 100755 index 0000000000000..2d0dbb29b03b2 Binary files /dev/null and b/test/pdfs/bug1708040.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 94b1458923e39..dc5140fd1e692 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -3807,11 +3807,13 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) role: "Figure", children: [{ type: "content", id: "p406R_mc11" }], alt: "d h c s logo", + bbox: [57.75, 676, 133.35, 752], }, { role: "Figure", children: [{ type: "content", id: "p406R_mc1" }], alt: "Great Seal of the State of California", + bbox: [481.5, 678, 544.5, 741], }, { role: "P", diff --git a/test/unit/struct_tree_spec.js b/test/unit/struct_tree_spec.js index a4841bffcc3ee..0551565597c79 100644 --- a/test/unit/struct_tree_spec.js +++ b/test/unit/struct_tree_spec.js @@ -107,4 +107,48 @@ describe("struct tree", function () { await loadingTask.destroy(); }); }); + + it("parses structure with a figure and its bounding box", async function () { + const filename = "bug1708040.pdf"; + const params = buildGetDocumentParams(filename); + const loadingTask = getDocument(params); + const doc = await loadingTask.promise; + const page = await doc.getPage(1); + const struct = await page.getStructTree(); + equalTrees( + { + children: [ + { + role: "Document", + children: [ + { + role: "Sect", + children: [ + { + role: "P", + children: [{ type: "content", id: "p21R_mc0" }], + lang: "EN-US", + }, + { + role: "P", + children: [{ type: "content", id: "p21R_mc1" }], + lang: "EN-US", + }, + { + role: "Figure", + children: [{ type: "content", id: "p21R_mc2" }], + alt: "A logo of a fox and a globe\u0000", + bbox: [72, 287.782, 456, 695.032], + }, + ], + }, + ], + }, + ], + role: "Root", + }, + struct + ); + await loadingTask.destroy(); + }); }); diff --git a/web/annotation_editor_layer_builder.css b/web/annotation_editor_layer_builder.css index aea5879b89527..8d01e4671821e 100644 --- a/web/annotation_editor_layer_builder.css +++ b/web/annotation_editor_layer_builder.css @@ -71,6 +71,10 @@ &:not(.free) span { cursor: var(--editorHighlight-editing-cursor); + + &[role="img"] { + cursor: var(--editorFreeHighlight-editing-cursor); + } } &.free span { diff --git a/web/pdf_page_view.js b/web/pdf_page_view.js index 3193e3785440e..e3c154b841880 100644 --- a/web/pdf_page_view.js +++ b/web/pdf_page_view.js @@ -474,10 +474,13 @@ class PDFPageView { } const treeDom = await this.structTreeLayer?.render(); - if (treeDom && this.canvas && treeDom.parentNode !== this.canvas) { - // Pause translation when inserting the structTree in the DOM. + if (treeDom) { this.l10n.pause(); - this.canvas.append(treeDom); + this.structTreeLayer?.addElementsToTextLayer(); + if (this.canvas && treeDom.parentNode !== this.canvas) { + // Pause translation when inserting the structTree in the DOM. + this.canvas.append(treeDom); + } this.l10n.resume(); } this.structTreeLayer?.show(); @@ -768,7 +771,7 @@ class PDFPageView { this.annotationLayer = null; this._annotationCanvasMap = null; } - if (this.structTreeLayer && !(this.textLayer || this.annotationLayer)) { + if (this.structTreeLayer && !this.textLayer) { this.structTreeLayer = null; } if ( @@ -1068,7 +1071,10 @@ class PDFPageView { await this.#finishRenderTask(renderTask); if (this.textLayer || this.annotationLayer) { - this.structTreeLayer ||= new StructTreeLayerBuilder(pdfPage); + this.structTreeLayer ||= new StructTreeLayerBuilder( + pdfPage, + viewport.rawDims + ); } this.#renderTextLayer(); diff --git a/web/struct_tree_layer_builder.js b/web/struct_tree_layer_builder.js index 4e117eacec99e..ead85bbf12507 100644 --- a/web/struct_tree_layer_builder.js +++ b/web/struct_tree_layer_builder.js @@ -82,8 +82,13 @@ class StructTreeLayerBuilder { #elementAttributes = new Map(); - constructor(pdfPage) { + #rawDims; + + #elementsToAddToTextLayer = null; + + constructor(pdfPage, rawDims) { this.#promise = pdfPage.getStructTree(); + this.#rawDims = rawDims; } async render() { @@ -156,6 +161,50 @@ class StructTreeLayerBuilder { } } + #addImageInTextLayer(node, element) { + const { alt, bbox, children } = node; + const child = children?.[0]; + if (!this.#rawDims || !alt || !bbox || child?.type !== "content") { + return false; + } + + const { id } = child; + if (!id) { + return false; + } + + // We cannot add the created element to the text layer immediately, as the + // text layer might not be ready yet. Instead, we store the element and add + // it later in `addElementsToTextLayer`. + + element.setAttribute("aria-owns", id); + const img = document.createElement("span"); + (this.#elementsToAddToTextLayer ||= new Map()).set(id, img); + img.setAttribute("role", "img"); + img.setAttribute("aria-label", removeNullCharacters(alt)); + + const { pageHeight, pageX, pageY } = this.#rawDims; + const calc = "calc(var(--scale-factor)*"; + const { style } = img; + style.width = `${calc}${bbox[2] - bbox[0]}px)`; + style.height = `${calc}${bbox[3] - bbox[1]}px)`; + style.left = `${calc}${bbox[0] - pageX}px)`; + style.top = `${calc}${pageHeight - bbox[3] + pageY}px)`; + + return true; + } + + addElementsToTextLayer() { + if (!this.#elementsToAddToTextLayer) { + return; + } + for (const [id, img] of this.#elementsToAddToTextLayer) { + document.getElementById(id)?.append(img); + } + this.#elementsToAddToTextLayer.clear(); + this.#elementsToAddToTextLayer = null; + } + #walk(node) { if (!node) { return null; @@ -171,6 +220,9 @@ class StructTreeLayerBuilder { } else if (PDF_ROLE_TO_HTML_ROLE[role]) { element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]); } + if (role === "Figure" && this.#addImageInTextLayer(node, element)) { + return element; + } } this.#setAttributes(node, element); diff --git a/web/text_layer_builder.css b/web/text_layer_builder.css index 841b47be7057a..8dbac995cbe35 100644 --- a/web/text_layer_builder.css +++ b/web/text_layer_builder.css @@ -52,6 +52,11 @@ } /*#endif*/ + span[role="img"] { + user-select: none; + cursor: default; + } + .highlight { --highlight-bg-color: rgb(180 0 170 / 0.25); --highlight-selected-bg-color: rgb(0 100 0 / 0.25);