diff --git a/docs/page.rst b/docs/page.rst index a6a63f71e..b58909fad 100644 --- a/docs/page.rst +++ b/docs/page.rst @@ -551,7 +551,7 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; insert_text pair: oc; insert_text - .. method:: insert_text(point, text, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, border_width=1, encoding=TEXT_ENCODING_LATIN, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, overlay=True, oc=0) + .. method:: insert_text(point, text, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, border_width=0.05, encoding=TEXT_ENCODING_LATIN, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, overlay=True, oc=0) * Changed in v1.18.4 @@ -575,7 +575,7 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; insert_textbox pair: oc; insert_textbox - .. method:: insert_textbox(rect, buffer, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, border_width=1, encoding=TEXT_ENCODING_LATIN, expandtabs=8, align=TEXT_ALIGN_LEFT, charwidths=None, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0, overlay=True) + .. method:: insert_textbox(rect, buffer, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, border_width=0.05, encoding=TEXT_ENCODING_LATIN, expandtabs=8, align=TEXT_ALIGN_LEFT, charwidths=None, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0, overlay=True) * Changed in v1.18.4 diff --git a/docs/shape.rst b/docs/shape.rst index 1e895e489..60cf81c88 100644 --- a/docs/shape.rst +++ b/docs/shape.rst @@ -289,7 +289,7 @@ Several draw methods can be executed in a row and each one of them will contribu Of the four shapes in above image, the top two each show three circles drawn in standard manner (anti-clockwise, look at the arrows). The lower two shapes contain one (the top-left) circle drawn clockwise. As can be seen, area orientation is irrelevant for the right column (even-odd rule). - .. method:: insert_text(point, text, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, lineheight=None, fill=None, render_mode=0, border_width=1, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: insert_text(point, text, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, lineheight=None, fill=None, render_mode=0, border_width=0.05, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) Insert text lines start at *point*. @@ -328,7 +328,7 @@ Several draw methods can be executed in a row and each one of them will contribu pair: rotate; insert_textbox pair: oc; insert_textbox - .. method:: insert_textbox(rect, buffer, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, fill=None, render_mode=0, border_width=1, expandtabs=8, align=TEXT_ALIGN_LEFT, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: insert_textbox(rect, buffer, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, fill=None, render_mode=0, border_width=0.05, expandtabs=8, align=TEXT_ALIGN_LEFT, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) PDF only: Insert text into the specified rectangle. The text will be split into lines and words and then filled into the available space, starting from one of the four rectangle corners, which depends on *rotate*. Line feeds and multiple space will be respected. @@ -583,7 +583,7 @@ Common Parameters **border_width** (*float*) - Set the border width for text insertions. New in v1.14.9. Relevant only if the render mode argument is used with a value greater zero. + New in v1.14.9. Set the border width for text insertions, as a percentage of the font size. Default value is 0.05 (5%). Relevant only if `render mode > 0`. ---- @@ -596,7 +596,7 @@ Common Parameters * For render mode 2, the glyphs are filled and stroked, using both color parameters and the specified border width. You can use this value to simulate **bold text** without using another font: choose the same value for *fill* and *color* and an appropriate value for *border_width*. * For render mode 3, the glyphs are neither stroked nor filled: the text becomes invisible. - The following examples use border_width=0.3, together with a fontsize of 15. Stroke color is blue and fill color is some yellow. + The following examples use border_width=0.02 (2% of the fontsize), together with a fontsize of 15. Stroke color is blue and fill color is some yellow. .. image:: images/img-rendermode.* diff --git a/docs/textwriter.rst b/docs/textwriter.rst index 6cc326c01..ac1fd3a54 100644 --- a/docs/textwriter.rst +++ b/docs/textwriter.rst @@ -136,9 +136,9 @@ Using this object entails three steps: .. note:: Use these methods as often as is required -- there is no technical limit (except memory constraints of your system). You can also mix :meth:`append` and text boxes and have multiple of both. Text positioning is exclusively controlled by the insertion point. Therefore there is no need to adhere to any order. *(Changed in v1.18.0:)* Raise an exception for an unsupported font -- checked via :attr:`Font.is_writable`. - .. method:: write_text(page, opacity=None, color=None, morph=None, overlay=True, oc=0, render_mode=0) + .. method:: write_text(page, opacity=None, color=None, morph=None, overlay=True, oc=0, render_mode=0, border_width=0.05) - Write the TextWriter text to a page, which is the only mandatory parameter. The other parameters can be used to temporarily override the values used when the TextWriter was created. + Write the TextWriter text to a page, which is the only mandatory parameter. The other parameters can be used to set or temporarily override the values used when the TextWriter was created. :arg page: write to this :ref:`Page`. :arg float opacity: override the value of the TextWriter for this output. @@ -147,8 +147,20 @@ Using this object entails three steps: :arg bool overlay: put in foreground (default) or background. :arg int oc: *(new in v1.18.4)* the :data:`xref` of an :data:`OCG` or :data:`OCMD`. :arg int render_mode: The PDF `Tr` operator value. Values: 0 (default), 1, 2, 3 (invisible). + :arg float border_width: Set the glyph border width for the PDF `Tr` operator value. This is only relevant for `render_mode > 0`. - .. image:: images/img-rendermode.* + In contrast to methods `.insert_text()` and `.insert_textbox()` of classes :ref:`Page` and :ref:`Shape`, TextWriter **does not support different colors** for the border and the interior of glyphs. + + To still accomplish this effect, write the same TextWriter multiple times with different parameters like this. + + :: + + (1) tw.write_text(page, color=yellow, render_mode=0) # write the interior + (2) tw.write_text(page, color=blue, render_mode=1, border_width=0.05) # write the border + + In the following picture, the first text line was generated by executing statement (1), the second line resulted from statement (2), and the last line is the result from executing both. + + .. image:: images/img-rendermode.* .. attribute:: text_rect diff --git a/fitz/fitz.i b/fitz/fitz.i index 8d1423b06..91d4b655f 100644 --- a/fitz/fitz.i +++ b/fitz/fitz.i @@ -4025,7 +4025,7 @@ if rbgroups: if not type(x) in (list, tuple): raise ValueError("bad RBGroup '%s'" % x) s = set(x).difference(ocgs) - if f != set(): + if s != set(): raise ValueError("bad OCGs in RBGroup: %s" % s) if basestate: @@ -5535,8 +5535,6 @@ struct Page { r = pdf_annot_rect(gctx, annot); r = fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0); pdf_set_annot_rect(gctx, annot, r); - int flags = PDF_ANNOT_IS_PRINT; - pdf_set_annot_flags(gctx, annot, flags); if (icon) pdf_set_annot_icon_name(gctx, annot, icon); @@ -5547,7 +5545,6 @@ struct Page { pdf_dict_put_text_string(gctx, annot_obj, PDF_NAME(Contents), filename); pdf_update_annot(gctx, annot); pdf_set_annot_rect(gctx, annot, r); - pdf_set_annot_flags(gctx, annot, flags); JM_add_annot_id(gctx, annot, "A"); } fz_always(gctx) { @@ -7609,10 +7606,9 @@ def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None, annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types] else: annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types] + for xref in annot_xrefs: - annot = self.load_annot(xref) - annot._yielded=True - yield annot + yield self.load_annot(xref) def widgets(self, types=None): @@ -11799,10 +11795,10 @@ struct TextPage { fz_print_stext_page_as_xhtml(gctx, out, this_tpage, 0); break; default: - JM_print_stext_page_as_text(gctx, out, this_tpage); + JM_print_stext_page_as_text(gctx, res, this_tpage); break; } - text = JM_UnicodeFromBuffer(gctx, res); + text = JM_EscapeStrFromBuffer(gctx, res); } fz_always(gctx) { @@ -12161,6 +12157,7 @@ struct TextWriter morph: tuple(Point, Matrix), apply a matrix with a fixpoint. matrix: Matrix to be used instead of 'morph' argument. render_mode: (int) PDF render mode operator 'Tr'. + border_width: (float) stroke line Width. Relevant for render mode > 0. """ CheckParent(page) @@ -12178,6 +12175,8 @@ struct TextWriter opacity = self.opacity if color is None: color = self.color + if render_mode < 0: + render_mode = 0 %} %pythonappend write_text%{ @@ -12227,7 +12226,7 @@ struct TextWriter temp = line.split() fsize = float(temp[1]) if render_mode != 0: - w = fsize * 0.05 + w = fsize * border_width else: w = 1 new_cont_lines.append("%g w" % w) @@ -12250,7 +12249,7 @@ struct TextWriter repair_mono_font(page, font) %} PyObject *write_text(struct Page *page, PyObject *color=NULL, float opacity=-1, int overlay=1, - PyObject *morph=NULL, PyObject *matrix=NULL, int render_mode=0, int oc=0) + PyObject *morph=NULL, PyObject *matrix=NULL, int render_mode=0, int oc=0, float border_width=0.05) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) page); pdf_obj *resources = NULL; diff --git a/fitz/helper-devices.i b/fitz/helper-devices.i index 631d0bcfa..86c5eb59a 100644 --- a/fitz/helper-devices.i +++ b/fitz/helper-devices.i @@ -307,7 +307,7 @@ jm_lineart_path(fz_context *ctx, jm_lineart_device *dev, const fz_path *path) DICT_SETITEM_DROP(dev_pathdict, dictkey_items, PyList_New(0)); fz_walk_path(ctx, path, &trace_path_walker, dev); // Check if any items were added ... - if (!PyList_Size(PyDict_GetItem(dev_pathdict, dictkey_items))) { + if (!PyDict_GetItem(dev_pathdict, dictkey_items) || !PyList_Size(PyDict_GetItem(dev_pathdict, dictkey_items))) { Py_CLEAR(dev_pathdict); } } @@ -468,6 +468,9 @@ jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int trace_device_ctm = ctm; //fz_concat(ctm, trace_device_ptm); path_type = CLIP_PATH; jm_lineart_path(ctx, dev, path); + if (!dev_pathdict) { + return; + } DICT_SETITEM_DROP(dev_pathdict, dictkey_type, PyUnicode_FromString("clip")); DICT_SETITEMSTR_DROP(dev_pathdict, "even_odd", JM_BOOL(even_odd)); if (!PyDict_GetItemString(dev_pathdict, "closePath")) { @@ -489,6 +492,9 @@ jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *pat trace_device_ctm = ctm; //fz_concat(ctm, trace_device_ptm); path_type = CLIP_STROKE_PATH; jm_lineart_path(ctx, dev, path); + if (!dev_pathdict) { + return; + } DICT_SETITEM_DROP(dev_pathdict, dictkey_type, PyUnicode_FromString("clip")); DICT_SETITEMSTR_DROP(dev_pathdict, "even_odd", Py_BuildValue("s", NULL)); if (!PyDict_GetItemString(dev_pathdict, "closePath")) { @@ -601,9 +607,11 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, const char *fontname = JM_font_name(ctx, span->font); float rgb[3]; PyObject *chars = PyTuple_New(span->len); - fz_matrix join = fz_concat(span->trm, ctm); - fz_point dir = fz_transform_vector(fz_make_point(1, 0), join); - double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d)); + fz_matrix mat = fz_concat(span->trm, ctm); // text transformation matrix + fz_point dir = fz_transform_vector(fz_make_point(1, 0), mat); // writing direction + dir = fz_normalize_vector(dir); + + double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d)); // font size double linewidth, adv, asc, dsc; double space_adv = 0; float x0, y0, x1, y1; @@ -613,35 +621,35 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, dsc = -0.1; asc = 0.9; } - + // compute effective ascender / descender double ascsize = asc * fsize / (asc - dsc); double dscsize = dsc * fsize / (asc - dsc); - int fflags = 0; + + int fflags = 0; // font flags int mono = fz_font_is_monospaced(ctx, span->font); fflags += mono * TEXT_FONT_MONOSPACED; fflags += fz_font_is_italic(ctx, span->font) * TEXT_FONT_ITALIC; fflags += fz_font_is_serif(ctx, span->font) * TEXT_FONT_SERIFED; fflags += fz_font_is_bold(ctx, span->font) * TEXT_FONT_BOLD; - fz_matrix mat = trace_device_ptm; - fz_matrix ctm_rot = fz_concat(ctm, trace_device_rot); - mat = fz_concat(mat, ctm_rot); - if (dev_linewidth > 0) { + if (dev_linewidth > 0) { // width of character border linewidth = (double) dev_linewidth; } else { - linewidth = fsize * 0.05; + linewidth = fsize * 0.05; // default: 5% of font size } fz_point char_orig; double last_adv = 0; // walk through characters of span fz_rect span_bbox; - dir = fz_normalize_vector(dir); fz_matrix rot = fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0); if (dir.x == -1) { // left-right flip rot.d = 1; } + //PySys_WriteStdout("mat: (%g, %g, %g, %g)\n", mat.a, mat.b, mat.c, mat.d); + //PySys_WriteStdout("rot: (%g, %g, %g, %g)\n", rot.a, rot.b, rot.c, rot.d); + for (i = 0; i < span->len; i++) { adv = 0; if (span->items[i].gid >= 0) { @@ -653,14 +661,14 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, space_adv = adv; } char_orig = fz_make_point(span->items[i].x, span->items[i].y); - char_orig.y = trace_device_ptm.f - char_orig.y; - char_orig = fz_transform_point(char_orig, mat); + char_orig = fz_transform_point(char_orig, ctm); fz_matrix m1 = fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y); m1 = fz_concat(m1, rot); m1 = fz_concat(m1, fz_make_matrix(1, 0, 0, 1, char_orig.x, char_orig.y)); x0 = char_orig.x; x1 = x0 + adv; - if (dir.x == 1 && span->trm.d < 0) { // up-down flip + if (mat.d > 0 && (dir.x == 1 || dir.x == -1) || + mat.b !=0 && mat.b == -mat.c) { // up-down flip y0 = char_orig.y + dscsize; y1 = char_orig.y + ascsize; } else { @@ -688,7 +696,7 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, space_adv = last_adv; } } else { - space_adv = last_adv; // for mono fonts this suffices + space_adv = last_adv; // for mono, any char width suffices } } // make the span dictionary @@ -701,24 +709,25 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, DICT_SETITEMSTR_DROP(span_dict, "bidi_dir", PyLong_FromLong((long) span->markup_dir)); DICT_SETITEM_DROP(span_dict, dictkey_ascender, PyFloat_FromDouble(asc)); DICT_SETITEM_DROP(span_dict, dictkey_descender, PyFloat_FromDouble(dsc)); + DICT_SETITEM_DROP(span_dict, dictkey_colorspace, PyLong_FromLong(3)); + if (colorspace) { fz_convert_color(ctx, colorspace, color, fz_device_rgb(ctx), rgb, NULL, fz_default_color_params); - DICT_SETITEM_DROP(span_dict, dictkey_colorspace, PyLong_FromLong(3)); - DICT_SETITEM_DROP(span_dict, dictkey_color, Py_BuildValue("fff", rgb[0], rgb[1], rgb[2])); } else { - DICT_SETITEM_DROP(span_dict, dictkey_colorspace, PyLong_FromLong(1)); - DICT_SETITEM_DROP(span_dict, dictkey_color, PyFloat_FromDouble(1)); + rgb[0] = rgb[1] = rgb[2] = 0; } + + DICT_SETITEM_DROP(span_dict, dictkey_color, Py_BuildValue("fff", rgb[0], rgb[1], rgb[2])); DICT_SETITEM_DROP(span_dict, dictkey_size, PyFloat_FromDouble(fsize)); DICT_SETITEMSTR_DROP(span_dict, "opacity", PyFloat_FromDouble((double) alpha)); DICT_SETITEMSTR_DROP(span_dict, "linewidth", PyFloat_FromDouble((double) linewidth)); DICT_SETITEMSTR_DROP(span_dict, "spacewidth", PyFloat_FromDouble(space_adv)); DICT_SETITEM_DROP(span_dict, dictkey_type, PyLong_FromLong((long) type)); - DICT_SETITEM_DROP(span_dict, dictkey_chars, chars); DICT_SETITEM_DROP(span_dict, dictkey_bbox, JM_py_from_rect(span_bbox)); DICT_SETITEMSTR_DROP(span_dict, "layer", JM_EscapeStrFromStr(layer_name)); DICT_SETITEMSTR_DROP(span_dict, "seqno", PyLong_FromSize_t(seqno)); + DICT_SETITEM_DROP(span_dict, dictkey_chars, chars); LIST_APPEND_DROP(out, span_dict); } diff --git a/fitz/utils.py b/fitz/utils.py index c6c009bf5..1611f19a9 100644 --- a/fitz/utils.py +++ b/fitz/utils.py @@ -1725,7 +1725,7 @@ def insert_textbox( align: int = 0, rotate: int = 0, render_mode: int = 0, - border_width: float = 1, + border_width: float = 0.05, morph: OptSeq = None, overlay: bool = True, stroke_opacity: float = 1, @@ -1791,7 +1791,7 @@ def insert_text( encoding: int = 0, color: OptSeq = None, fill: OptSeq = None, - border_width: float = 1, + border_width: float = 0.05, render_mode: int = 0, rotate: int = 0, morph: OptSeq = None, @@ -3430,7 +3430,7 @@ def insert_text( color: OptSeq = None, fill: OptSeq = None, render_mode: int = 0, - border_width: float = 1, + border_width: float = 0.05, rotate: int = 0, morph: OptSeq = None, stroke_opacity: float = 1, @@ -3561,10 +3561,11 @@ def insert_text( else: alpha = "/%s gs\n" % alpha nres = templ1 % (bdc, alpha, cm, left, top, fname, fontsize) + if render_mode > 0: nres += "%i Tr " % render_mode - if border_width != 1: - nres += "%g w " % border_width + nres += "%g w " % border_width * fontsize + if color is not None: nres += color_str if fill is not None: @@ -3613,7 +3614,7 @@ def insert_textbox( color: OptSeq = None, fill: OptSeq = None, expandtabs: int = 1, - border_width: float = 1, + border_width: float = 0.05, align: int = 0, render_mode: int = 0, rotate: int = 0, @@ -3634,7 +3635,7 @@ def insert_textbox( color -- RGB stroke color triple fill -- RGB fill color triple render_mode -- text rendering control - border_width -- thickness of glyph borders + border_width -- thickness of glyph borders as percentage of fontsize expandtabs -- handles tabulators with string function align -- left, center, right, justified rotate -- 0, 90, 180, or 270 degrees @@ -3891,8 +3892,11 @@ def pixlen(x): top = -height + pnt.y + self.y nres += templ % (left, top, fname, fontsize) + if render_mode > 0: nres += "%i Tr " % render_mode + nres += "%g w " % border_width * fontsize + if align == 3: nres += "%g Tw " % spacing @@ -3900,8 +3904,6 @@ def pixlen(x): nres += color_str if fill is not None: nres += fill_str - if border_width != 1: - nres += "%g w " % border_width nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering) nres += "ET\n%sQ\n" % emc diff --git a/tests/resources/test_2533.pdf b/tests/resources/test_2533.pdf new file mode 100644 index 000000000..cf88301f0 Binary files /dev/null and b/tests/resources/test_2533.pdf differ diff --git a/tests/test_drawings.py b/tests/test_drawings.py index e03ee36c9..414a39b9b 100644 --- a/tests/test_drawings.py +++ b/tests/test_drawings.py @@ -175,3 +175,17 @@ def test_2462(): doc = fitz.open(f"{scriptdir}/resources/test-2462.pdf") page = doc[0] vg = page.get_drawings(extended=True) + + +def test_2556(): + """Ensure that incomplete clip paths will be properly ignored.""" + doc = fitz.open() # new empty PDF + page = doc.new_page() # new page + # following contains an incomplete clip + c = b"q 50 697.6 400 100.0 re W n q 0 0 m W n Q " + xref = doc.get_new_xref() # prepare /Contents object for page + doc.update_object(xref,"<<>>") # new xref now is a dictionary + doc.update_stream(xref, c) # store drawing commands + page.set_contents(xref) # give the page this xref as /Contents + # following will bring down interpreter if fix not installed + assert page.get_drawings(extended=True) diff --git a/tests/test_general.py b/tests/test_general.py index fa8aeea6a..4b037320b 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -256,6 +256,24 @@ def test_texttrace(): print( f'page {i} json:\n{json.dumps(tt, indent=" ")}', file=f) +def test_2533(): + """Assert correct char bbox in page.get_texttrace(). + + Search for a unique char on page and confirm that page.get_texttrace() + returns the same bbox as the search method. + """ + doc = fitz.open(os.path.join(scriptdir, "resources", "test_2533.pdf")) + page = doc[0] + NEEDLE = "民" + ord_NEEDLE = ord(NEEDLE) + for span in page.get_texttrace(): + for char in span["chars"]: + if char[0] == ord_NEEDLE: + bbox = fitz.Rect(char[3]) + break + assert page.search_for(NEEDLE)[0] == bbox + + def test_2506(): """Ensure expected font size across text writing angles.""" doc = fitz.open()