From 6cb54628e42174fd8181feeb7d4b9122516e07f6 Mon Sep 17 00:00:00 2001 From: Kesara Rathnayake Date: Thu, 26 Sep 2024 14:00:52 +1200 Subject: [PATCH] feat: Add option to update datatracker references With new commandline option `--use-bib`, xinclude with datatracker.ietf.org are replaced with bib.ietf.org. Fixes #1167 --- xml2rfc/__init__.py | 4 ++-- xml2rfc/run.py | 13 ++++++++++- xml2rfc/writers/__init__.py | 3 ++- xml2rfc/writers/base.py | 1 + xml2rfc/writers/bib.py | 43 +++++++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 xml2rfc/writers/bib.py diff --git a/xml2rfc/__init__.py b/xml2rfc/__init__.py index 34b548489..ee70621d0 100644 --- a/xml2rfc/__init__.py +++ b/xml2rfc/__init__.py @@ -16,7 +16,7 @@ from xml2rfc.writers import ( BaseRfcWriter, RawTextRfcWriter, PaginatedTextRfcWriter, HtmlRfcWriter, NroffRfcWriter, ExpandedXmlWriter, RfcWriterError, V2v3XmlWriter, PrepToolWriter, TextWriter, HtmlWriter, PdfWriter, - ExpandV3XmlWriter, UnPrepWriter, DocWriter + ExpandV3XmlWriter, UnPrepWriter, DocWriter, DatatrackerToBibConverter, ) # This defines what 'from xml2rfc import *' actually imports: @@ -25,7 +25,7 @@ 'HtmlRfcWriter', 'NroffRfcWriter', 'ExpandedXmlWriter', 'RfcWriterError', 'V2v3XmlWriter', 'PrepToolWriter', 'TextWriter', 'HtmlWriter', 'PdfWriter', 'ExpandV3XmlWriter', 'UnPrepWriter', - 'DocWriter', + 'DocWriter', 'DatatrackerToBibConverter', ] try: diff --git a/xml2rfc/run.py b/xml2rfc/run.py index b1aee812f..71d9fa0d5 100755 --- a/xml2rfc/run.py +++ b/xml2rfc/run.py @@ -198,6 +198,8 @@ def main(): help='outputs formatted text to file, unpaginated (only v2 input)') formatgroup.add_argument('--expand', action='store_true', help='outputs XML to file with all references expanded') + formatgroup.add_argument('--use-bib', action='store_true', + help='update all datatracker references with bib.ietf.org') formatgroup.add_argument('--v2v3', action='store_true', help='convert vocabulary version 2 XML to version 3') formatgroup.add_argument('--preptool', action='store_true', @@ -456,7 +458,7 @@ def main(): options.output_path = options.basename options.basename = None # - num_formats = len([ o for o in [options.raw, options.text, options.nroff, options.html, options.expand, options.v2v3, options.preptool, options.info, options.pdf, options.unprep ] if o]) + num_formats = len([ o for o in [options.raw, options.text, options.nroff, options.html, options.expand, options.use_bib, options.v2v3, options.preptool, options.info, options.pdf, options.unprep ] if o]) if num_formats > 1 and (options.filename or options.output_filename): sys.exit('Cannot use an explicit output filename when generating more than one format, ' 'use --path instead.') @@ -662,6 +664,15 @@ def main(): options.output_filename = None # --- End of legacy formatter invocations --- + if options.use_bib: + xmlrfc = parser.parse(remove_comments=False, quiet=True, normalize=False, strip_cdata=False, add_xmlns=True) + filename = options.output_filename + if not filename: + filename = basename + '.bib.xml' + options.output_filename = filename + expander = xml2rfc.DatatrackerToBibConverter(xmlrfc, options=options, date=options.date) + expander.write(filename) + options.output_filename = None if options.expand and not options.legacy: xmlrfc = parser.parse(remove_comments=False, quiet=True, normalize=False, strip_cdata=False, add_xmlns=True) diff --git a/xml2rfc/writers/__init__.py b/xml2rfc/writers/__init__.py index 2b0b82812..42a49cbe7 100644 --- a/xml2rfc/writers/__init__.py +++ b/xml2rfc/writers/__init__.py @@ -14,11 +14,12 @@ from xml2rfc.writers.pdf import PdfWriter from xml2rfc.writers.unprep import UnPrepWriter from xml2rfc.writers.doc import DocWriter +from xml2rfc.writers.bib import DatatrackerToBibConverter # This defines what 'from xml2rfc.writers import *' actually imports: __all__ = ['BaseRfcWriter', 'RawTextRfcWriter', 'PaginatedTextRfcWriter', 'HtmlRfcWriter', 'NroffRfcWriter', 'ExpandedXmlWriter', 'RfcWriterError', 'V2v3XmlWriter', 'PrepToolWriter', 'TextWriter', 'HtmlWriter', 'PdfWriter', 'ExpandV3XmlWriter', 'UnPrepWriter', - 'DocWriter', + 'DocWriter', 'DatatrackerToBibConverter', ] diff --git a/xml2rfc/writers/base.py b/xml2rfc/writers/base.py index 16cf7819e..070935036 100644 --- a/xml2rfc/writers/base.py +++ b/xml2rfc/writers/base.py @@ -117,6 +117,7 @@ 'template_dir': os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates'), 'text': True, 'unprep': False, + 'use_bib': False, 'utf8': False, 'values': False, 'verbose': False, diff --git a/xml2rfc/writers/bib.py b/xml2rfc/writers/bib.py new file mode 100644 index 000000000..dba6c73d0 --- /dev/null +++ b/xml2rfc/writers/bib.py @@ -0,0 +1,43 @@ +# Copyright The IETF Trust 2024, All Rights Reserved +# -*- coding: utf-8 -*- +from __future__ import unicode_literals, print_function, division + +from io import open +from lxml import etree +from urllib.parse import urlparse + +from xml2rfc.writers.preptool import PrepToolWriter + + +class DatatrackerToBibConverter(PrepToolWriter): + """Writes a duplicate XML file but with datratracker references replaced with bib.ietf.org""" + + def write(self, filename): + """Public method to write the XML document to a file""" + self.convert() + with open(filename, "w", encoding="utf-8") as file: + text = etree.tostring(self.tree, encoding="unicode") + file.write("\n") + file.write(text) + if not self.options.quiet: + self.log(" Created file %s" % filename) + + def convert(self): + version = self.root.get("version", "3") + if version not in [ + "3", + ]: + self.die(self.root, 'Expected version="3", but found "%s"' % version) + self.convert_xincludes() + + def convert_xincludes(self): + ns = {"xi": b"http://www.w3.org/2001/XInclude"} + xincludes = self.root.xpath("//xi:include", namespaces=ns) + for xinclude in xincludes: + href = urlparse(xinclude.get("href")) + + if href.netloc == "datatracker.ietf.org": + reference_file = href.path.split("/")[-1] + xinclude.set( + "href", f"https://bib.ietf.org/public/rfc/bibxml-ids/{reference_file}" + )