From 3c157494826503bd2065d6472c83157bad9d300f Mon Sep 17 00:00:00 2001 From: Thomas Beutlich Date: Mon, 20 May 2024 17:08:46 +0200 Subject: [PATCH] Update libxml2 to v2.12.7 --- .travis.yml | 2 +- .../Resources/C-Sources/libxml2/HTMLparser.c | 5 ++- ExternData/Resources/C-Sources/libxml2/SAX2.c | 12 +------ .../libxml2/include/libxml/xmlversion.h | 8 ++--- .../libxml2/include/private/parser.h | 2 ++ .../Resources/C-Sources/libxml2/parser.c | 2 +- .../C-Sources/libxml2/parserInternals.c | 24 +++++++++++++ .../Resources/C-Sources/libxml2/xmlreader.c | 22 ++++++------ .../Resources/C-Sources/libxml2/xmlsave.c | 34 ++++++++++++------- 9 files changed, 69 insertions(+), 42 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3f5ff1e4..86b43542 100644 --- a/.travis.yml +++ b/.travis.yml @@ -109,7 +109,7 @@ before_install: esac - curl https://bitbucket.org/Swyter/bitbucket-curl-upload-to-repo-downloads/raw/default/upload-to-bitbucket.sh -O -J -L - chmod +x ./upload-to-bitbucket.sh - - git clone --branch v2.12.5 --depth 1 https://gitlab.gnome.org/GNOME/libxml2 + - git clone --branch v2.12.7 --depth 1 https://gitlab.gnome.org/GNOME/libxml2 - mkdir -p libxml2/$PLATFORM before_cache: diff --git a/ExternData/Resources/C-Sources/libxml2/HTMLparser.c b/ExternData/Resources/C-Sources/libxml2/HTMLparser.c index 097ed236..ea6a4f26 100644 --- a/ExternData/Resources/C-Sources/libxml2/HTMLparser.c +++ b/ExternData/Resources/C-Sources/libxml2/HTMLparser.c @@ -5987,7 +5987,7 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, xmlInitParser(); - buf = xmlAllocParserInputBuffer(enc); + buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); if (buf == NULL) return(NULL); ctxt = htmlNewSAXParserCtxt(sax, user_data); @@ -6018,6 +6018,9 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, inputPush(ctxt, inputStream); + if (enc != XML_CHAR_ENCODING_NONE) + xmlSwitchEncoding(ctxt, enc); + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { size_t pos = ctxt->input->cur - ctxt->input->base; diff --git a/ExternData/Resources/C-Sources/libxml2/SAX2.c b/ExternData/Resources/C-Sources/libxml2/SAX2.c index ed21a559..bb72e160 100644 --- a/ExternData/Resources/C-Sources/libxml2/SAX2.c +++ b/ExternData/Resources/C-Sources/libxml2/SAX2.c @@ -955,17 +955,7 @@ xmlSAX2EndDocument(void *ctx) doc = ctxt->myDoc; if ((doc != NULL) && (doc->encoding == NULL)) { - const xmlChar *encoding = NULL; - - if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) || - (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) { - /* Preserve encoding exactly */ - encoding = ctxt->encoding; - } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) { - encoding = BAD_CAST ctxt->input->buf->encoder->name; - } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) { - encoding = BAD_CAST "UTF-8"; - } + const xmlChar *encoding = xmlGetActualEncoding(ctxt); if (encoding != NULL) { doc->encoding = xmlStrdup(encoding); diff --git a/ExternData/Resources/C-Sources/libxml2/include/libxml/xmlversion.h b/ExternData/Resources/C-Sources/libxml2/include/libxml/xmlversion.h index cf7a0647..85b695e7 100644 --- a/ExternData/Resources/C-Sources/libxml2/include/libxml/xmlversion.h +++ b/ExternData/Resources/C-Sources/libxml2/include/libxml/xmlversion.h @@ -29,21 +29,21 @@ XMLPUBFUN void xmlCheckVersion(int version); * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.12.5" +#define LIBXML_DOTTED_VERSION "2.12.7" /** * LIBXML_VERSION: * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 21205 +#define LIBXML_VERSION 21207 /** * LIBXML_VERSION_STRING: * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "21205" +#define LIBXML_VERSION_STRING "21207" /** * LIBXML_VERSION_EXTRA: @@ -58,7 +58,7 @@ XMLPUBFUN void xmlCheckVersion(int version); * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(21205); +#define LIBXML_TEST_VERSION xmlCheckVersion(21207); #ifndef VMS #if 0 diff --git a/ExternData/Resources/C-Sources/libxml2/include/private/parser.h b/ExternData/Resources/C-Sources/libxml2/include/private/parser.h index 40d179fe..7f8f6912 100644 --- a/ExternData/Resources/C-Sources/libxml2/include/private/parser.h +++ b/ExternData/Resources/C-Sources/libxml2/include/private/parser.h @@ -48,6 +48,8 @@ XML_HIDDEN void xmlDetectEncoding(xmlParserCtxtPtr ctxt); XML_HIDDEN void xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding); +XML_HIDDEN const xmlChar * +xmlGetActualEncoding(xmlParserCtxtPtr ctxt); XML_HIDDEN xmlParserNsData * xmlParserNsCreate(void); diff --git a/ExternData/Resources/C-Sources/libxml2/parser.c b/ExternData/Resources/C-Sources/libxml2/parser.c index b7534ae3..1d99a48f 100644 --- a/ExternData/Resources/C-Sources/libxml2/parser.c +++ b/ExternData/Resources/C-Sources/libxml2/parser.c @@ -9424,7 +9424,7 @@ xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name, int nsIndex = (int) (ptrdiff_t) atts[2]; if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) : - (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) : + (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) : (uri == ctxt->nsTab[nsIndex * 2 + 1])) return(bucket->index); } diff --git a/ExternData/Resources/C-Sources/libxml2/parserInternals.c b/ExternData/Resources/C-Sources/libxml2/parserInternals.c index e6b4cb14..166397bd 100644 --- a/ExternData/Resources/C-Sources/libxml2/parserInternals.c +++ b/ExternData/Resources/C-Sources/libxml2/parserInternals.c @@ -1479,6 +1479,30 @@ xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) { } } +/** + * xmlGetActualEncoding: + * @ctxt: the parser context + * + * Returns the actual used to parse the document. This can differ from + * the declared encoding. + */ +const xmlChar * +xmlGetActualEncoding(xmlParserCtxtPtr ctxt) { + const xmlChar *encoding = NULL; + + if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) || + (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) { + /* Preserve encoding exactly */ + encoding = ctxt->encoding; + } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) { + encoding = BAD_CAST ctxt->input->buf->encoder->name; + } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) { + encoding = BAD_CAST "UTF-8"; + } + + return(encoding); +} + /************************************************************************ * * * Commodity functions to handle entities processing * diff --git a/ExternData/Resources/C-Sources/libxml2/xmlreader.c b/ExternData/Resources/C-Sources/libxml2/xmlreader.c index 1f903306..5fdeb2b8 100644 --- a/ExternData/Resources/C-Sources/libxml2/xmlreader.c +++ b/ExternData/Resources/C-Sources/libxml2/xmlreader.c @@ -40,6 +40,7 @@ #endif #include "private/buf.h" +#include "private/parser.h" #include "private/tree.h" #ifdef LIBXML_XINCLUDE_ENABLED #include "private/xinclude.h" @@ -2795,20 +2796,17 @@ xmlTextReaderReadAttributeValue(xmlTextReaderPtr reader) { */ const xmlChar * xmlTextReaderConstEncoding(xmlTextReaderPtr reader) { - xmlDocPtr doc = NULL; + const xmlChar *encoding = NULL; + if (reader == NULL) - return(NULL); - if (reader->doc != NULL) - doc = reader->doc; - else if (reader->ctxt != NULL) - doc = reader->ctxt->myDoc; - if (doc == NULL) - return(NULL); + return(NULL); - if (doc->encoding == NULL) - return(NULL); - else - return(CONSTSTR(doc->encoding)); + if (reader->ctxt != NULL) + encoding = xmlGetActualEncoding(reader->ctxt); + else if (reader->doc != NULL) + encoding = reader->doc->encoding; + + return(CONSTSTR(encoding)); } diff --git a/ExternData/Resources/C-Sources/libxml2/xmlsave.c b/ExternData/Resources/C-Sources/libxml2/xmlsave.c index 125853ff..e4cda3e5 100644 --- a/ExternData/Resources/C-Sources/libxml2/xmlsave.c +++ b/ExternData/Resources/C-Sources/libxml2/xmlsave.c @@ -1391,13 +1391,14 @@ xhtmlAttrListDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) { static void xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { int format = ctxt->format, addmeta; - xmlNodePtr tmp, root, unformattedNode = NULL; + xmlNodePtr tmp, root, unformattedNode = NULL, parent; xmlChar *start, *end; xmlOutputBufferPtr buf = ctxt->buf; if (cur == NULL) return; root = cur; + parent = cur->parent; while (1) { switch (cur->type) { case XML_DOCUMENT_NODE: @@ -1414,7 +1415,9 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { break; case XML_DOCUMENT_FRAG_NODE: - if (cur->children) { + /* Always validate cur->parent when descending. */ + if ((cur->parent == parent) && (cur->children != NULL)) { + parent = cur; cur = cur->children; continue; } @@ -1441,6 +1444,16 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { ctxt->indent_nr : ctxt->level), ctxt->indent); + /* + * Some users like lxml are known to pass nodes with a corrupted + * tree structure. Fall back to a recursive call to handle this + * case. + */ + if ((cur->parent != parent) && (cur->children != NULL)) { + xhtmlNodeDumpOutput(ctxt, cur); + break; + } + xmlOutputBufferWrite(buf, 1, "<"); if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); @@ -1461,10 +1474,10 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { if (cur->properties != NULL) xhtmlAttrListDumpOutput(ctxt, cur->properties); - if ((cur->parent != NULL) && - (cur->parent->parent == (xmlNodePtr) cur->doc) && + if ((parent != NULL) && + (parent->parent == (xmlNodePtr) cur->doc) && xmlStrEqual(cur->name, BAD_CAST"head") && - xmlStrEqual(cur->parent->name, BAD_CAST"html")) { + xmlStrEqual(parent->name, BAD_CAST"html")) { tmp = cur->children; while (tmp != NULL) { @@ -1570,6 +1583,7 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); if (ctxt->level >= 0) ctxt->level++; + parent = cur; cur = cur->children; continue; } @@ -1664,13 +1678,9 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { break; } - /* - * The parent should never be NULL here but we want to handle - * corrupted documents gracefully. - */ - if (cur->parent == NULL) - return; - cur = cur->parent; + cur = parent; + /* cur->parent was validated when descending. */ + parent = cur->parent; if (cur->type == XML_ELEMENT_NODE) { if (ctxt->level > 0) ctxt->level--;