From a5157d012d09fad58e4d28d2e26e2e9c698d337c Mon Sep 17 00:00:00 2001 From: voisardf Date: Thu, 29 Aug 2024 16:35:56 +0200 Subject: [PATCH] improved code as discussed and suggested --- dev/config/pyramid_oereb.yml.mako | 25 +++-- doc/source/changes.rst | 6 +- .../mapfish_print/mapfish_print.py | 92 +++++++------------ .../resources/test_config.yml | 2 +- .../test_mapfish_print_configuration.py | 4 +- 5 files changed, 51 insertions(+), 78 deletions(-) diff --git a/dev/config/pyramid_oereb.yml.mako b/dev/config/pyramid_oereb.yml.mako index 335fd302c6..991f220fd4 100644 --- a/dev/config/pyramid_oereb.yml.mako +++ b/dev/config/pyramid_oereb.yml.mako @@ -85,19 +85,18 @@ pyramid_oereb: # Will make an estimation of the total length of the Table of Content (TOC) and control that the page # numbering in the output pdf is consistent with TOC numbering. If it is known that the TOC is very long and # could run over more than one page, it is preferred to set this to true. The drawback is that it might need - # more time to generate the PDF. If set to false, it will assume that only one TOC page exists, and this can - # lead to wrong numbering in the TOC. - compute_toc_pages: true - # To avoid the potentially time consuming second computing of the PDF extract and skip the the computation - # of the estimated TOC length, you can specify a default length for the number of TOC pages. - # For most of the cantons the length of the TOC is pretty consistent unless a real estate is concerned by none - # or a huge number of restrictions. - # An additional page break might also occur if the number of published topics is close to a threshold number - # where the TOC fits just about on one or two pages. - for those case estimate the TOC length ist preferable. - # In both cases (computing an estimated length or setting a default length) the exact number of TOC pages is - # extracted from the created PDF and if it is different from the expected value the PDF extract is called a - # second time with the correct page numbers. - default_toc_length: 2 + # more time to generate the PDF. If set to false, the general_toc_length setting below will be used. If it is + # not set it will assume that only one TOC page exists, and this can lead to wrong numbering in the TOC, which + # will be fixed by a second PDF extract call that has an impact on performance. + compute_toc_pages: false + # In order to skip the computation of the estimated number of TOC pages which might return an erroneous result + # for your setting, you can specify a default for the number of TOC pages. For most of the cantons the number of + # TOC pages is pretty constant unless a real estate is concerned by none or a huge number of restrictions. + # In both cases (computing an estimate or setting a default for the number of TOC pages) the exact number of TOC + # pages is extracted from the created PDF and if it differs from the expected value the PDF is created a second + # time with the correct page numbers. + # Note that if "compute_toc_pages" is set true the "general_toc_length" is not taken into account. + general_toc_length: 2 # Specify any additional URL parameters that the print shall use for WMS calls wms_url_params: TRANSPARENT: 'true' diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 70e651fd30..af31a10664 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -8,9 +8,9 @@ to adapt in your project configuration, database etc. when upgrading to a new ve Version 2.6.0 ------------- -* New parameter 'default_toc_length' allows to define a default table of content pages number avoiding a second -call for the pdf extract in most cases. This value should be set if >95% of the PDF have the same number of TOC -pages. +* New parameter 'general_toc_length' allows to define a default table of content pages number avoiding a second +call for the pdf extract in most cases. This value should be set if most of the PDF extracts have the same number +of TOC pages. Default setting: 2 Version 2.5.3 diff --git a/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py b/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py index a34f986cfb..61491862d0 100644 --- a/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py +++ b/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py @@ -75,11 +75,11 @@ def __call__(self, value, system): print_config = Config.get('print', {}) - if Config.get('print', {}).get('compute_toc_pages', False): + if print_config.get('compute_toc_pages', False): extract_as_dict['nbTocPages'] = TocPages(extract_as_dict).getNbPages() else: - if Config.get('print', {}).get('default_toc_length', False): - extract_as_dict['nbTocPages'] = print_config.get('default_toc_length', 2) + if print_config.get('general_toc_length') and int(print_config.get('general_toc_length')) > 0: + extract_as_dict['nbTocPages'] = print_config.get('general_toc_length', 2) else: extract_as_dict['nbTocPages'] = 1 @@ -100,7 +100,7 @@ def __call__(self, value, system): ) spec = { - 'layout': Config.get('print', {})['template_name'], + 'layout': print_config['template_name'], 'outputFormat': 'pdf', 'lang': self._language, 'attributes': extract_as_dict, @@ -111,67 +111,41 @@ def __call__(self, value, system): if self._request.GET.get('getspec', 'no') != 'no': response.headers['Content-Type'] = 'application/json; charset=UTF-8' return json.dumps(spec, sort_keys=True, indent=4) - pdf_url = urlparse.urljoin(Config.get('print', {})['base_url'] + '/', 'buildreport.pdf') - pdf_headers = Config.get('print', {})['headers'] + pdf_url = urlparse.urljoin(print_config['base_url'] + '/', 'buildreport.pdf') + pdf_headers = print_config['headers'] print_result = requests.post( pdf_url, headers=pdf_headers, data=json.dumps(spec) ) try: - log.debug('Validation of the TOC length with compute_toc_pages set to {} and default_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('default_toc_length'))) # noqa - if Config.get('print', {}).get('compute_toc_pages', False): - with io.BytesIO() as pdf: - pdf.write(print_result.content) - pdf_reader = PdfReader(pdf) - x = [] - for i in range(len(pdf_reader.outline)): - if isinstance(pdf_reader.outline[i], list): - x.append(pdf_reader.outline[i][0]['/Page']['/StructParents']) - else: - x.append(pdf_reader.outline[i]['/Page']['/StructParents']) - try: - true_nb_of_toc = min(x)-1 - except ValueError: - true_nb_of_toc = 1 - - log.debug('True number of TOC pages is {}'.format(true_nb_of_toc)) - if true_nb_of_toc != extract_as_dict['nbTocPages']: - log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa - log.debug('Secondary PDF extract call STARTED') - extract_as_dict['nbTocPages'] = true_nb_of_toc - print_result = requests.post( - pdf_url, - headers=pdf_headers, - data=json.dumps(spec) - ) - log.debug('Secondary PDF extract call to fix TOC pages number FINISHED') - elif Config.get('print', {}).get('default_toc_length', 2): - with io.BytesIO() as pdf: - pdf.write(print_result.content) - pdf_reader = PdfReader(pdf) - x = [] - for i in range(len(pdf_reader.outline)): - if isinstance(pdf_reader.outline[i], list): - x.append(pdf_reader.outline[i][0]['/Page']['/StructParents']) - else: - x.append(pdf_reader.outline[i]['/Page']['/StructParents']) - try: - true_nb_of_toc = min(x)-1 - except ValueError: - true_nb_of_toc = 1 - - log.debug('True number of TOC pages is {}'.format(true_nb_of_toc)) - if true_nb_of_toc != extract_as_dict['nbTocPages']: - log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa - extract_as_dict['nbTocPages'] = true_nb_of_toc - log.debug('Secondary PDF extract call STARTED') - print_result = requests.post( - pdf_url, - headers=pdf_headers, - data=json.dumps(spec) - ) - log.debug('Secondary PDF extract call FINISHED') + log.debug('Validation of the TOC length with compute_toc_pages set to {} and general_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('general_toc_length'))) # noqa + with io.BytesIO() as pdf: + pdf.write(print_result.content) + pdf_reader = PdfReader(pdf) + x = [] + for i in range(len(pdf_reader.outline)): + if isinstance(pdf_reader.outline[i], list): + x.append(pdf_reader.outline[i][0]['/Page']['/StructParents']) + else: + x.append(pdf_reader.outline[i]['/Page']['/StructParents']) + try: + true_nb_of_toc = min(x)-1 + except ValueError: + true_nb_of_toc = 1 + + log.debug('True number of TOC pages is {}, expected number was {}'.format(true_nb_of_toc, extract_as_dict['nbTocPages'])) #noqa + if true_nb_of_toc != extract_as_dict['nbTocPages']: + log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa + log.debug('Secondary PDF extract call STARTED') + extract_as_dict['nbTocPages'] = true_nb_of_toc + print_result = requests.post( + pdf_url, + headers=pdf_headers, + data=json.dumps(spec) + ) + log.debug('Secondary PDF extract call to fix TOC pages number DONE') + except PdfReadError as e: err_msg = 'a problem occurred while generating the pdf file' log.error(err_msg + ': ' + str(e)) diff --git a/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml b/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml index 4776cc2245..6189143f97 100644 --- a/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml +++ b/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml @@ -5,7 +5,7 @@ pyramid_oereb: TRANSPARENT: 'true' OTHERCUSTOM: 'myvalue' compute_toc_pages: false - default_toc_length: 2 + general_toc_length: 2 theme: source: diff --git a/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py b/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py index f19e8268e8..cab5ea78c3 100644 --- a/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py +++ b/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py @@ -106,8 +106,8 @@ def test_toc_pages_default_config(): Config._config = None Config.init('./tests/contrib.print_proxy.mapfish_print/resources/test_config.yml', 'pyramid_oereb') compute_toc_pages = Config.get('print', {}).get('compute_toc_pages') - default_toc_length = Config.get('print', {}).get('default_toc_length') + general_toc_length = Config.get('print', {}).get('general_toc_length') assert isinstance(compute_toc_pages, bool) assert bool(compute_toc_pages) is False - assert default_toc_length == 2 + assert general_toc_length == 2