improved code as discussed and suggested

openoereb · Aug 29, 2024 · a5157d0 · a5157d0
1 parent e9023cb
commit a5157d0
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 78 deletions.
diff --git a/dev/config/pyramid_oereb.yml.mako b/dev/config/pyramid_oereb.yml.mako
@@ -85,19 +85,18 @@ pyramid_oereb:
     # Will make an estimation of the total length of the Table of Content (TOC) and control that the page
     # numbering in the output pdf is consistent with TOC numbering. If it is known that the TOC is very long and
     # could run over more than one page, it is preferred to set this to true. The drawback is that it might need
-    # more time to generate the PDF. If set to false, it will assume that only one TOC page exists, and this can
-    # lead to wrong numbering in the TOC.
-    compute_toc_pages: true
-    # To avoid the potentially time consuming second computing of the PDF extract and skip the the computation
-    # of the estimated TOC length, you can specify a default length for the number of TOC pages.
-    # For most of the cantons the length of the TOC is pretty consistent unless a real estate is concerned by none 
-    # or a huge number of restrictions.
-    # An additional page break might also occur if the number of published topics is close to a threshold number
-    # where the TOC fits just about on one or two pages. - for those case estimate the TOC length ist preferable.
-    # In both cases (computing an estimated length or setting a default length) the exact number of TOC pages is
-    # extracted from the created PDF and if it is different from the expected value the PDF extract is called a 
-    # second time with the correct page numbers.
-    default_toc_length: 2
+    # more time to generate the PDF. If set to false, the general_toc_length setting below will be used. If it is
+    # not set it will assume that only one TOC page exists, and this can lead to wrong numbering in the TOC, which
+    # will be fixed by a second PDF extract call that has an impact on performance.
+    compute_toc_pages: false
+    # In order to skip the computation of the estimated number of TOC pages which might return an erroneous result 
+    # for your setting, you can specify a default for the number of TOC pages. For most of the cantons the number of 
+    # TOC pages is pretty constant unless a real estate is concerned by none or a huge number of restrictions.
+    # In both cases (computing an estimate or setting a default for the number of TOC pages) the exact number of TOC 
+    # pages is extracted from the created PDF and if it differs from the expected value the PDF is created a second 
+    # time with the correct page numbers.
+    # Note that if "compute_toc_pages" is set true the "general_toc_length" is not taken into account.
+    general_toc_length: 2
     # Specify any additional URL parameters that the print shall use for WMS calls
     wms_url_params:
       TRANSPARENT: 'true'

diff --git a/doc/source/changes.rst b/doc/source/changes.rst
@@ -8,9 +8,9 @@ to adapt in your project configuration, database etc. when upgrading to a new ve
 
 Version 2.6.0
 -------------
-* New parameter 'default_toc_length' allows to define a default table of content pages number avoiding a second
-call for the pdf extract in most cases. This value should be set if >95% of the PDF have the same number of TOC 
-pages.
+* New parameter 'general_toc_length' allows to define a default table of content pages number avoiding a second
+call for the pdf extract in most cases. This value should be set if most of the PDF extracts have the same number
+of TOC pages.
 Default setting: 2
 
 Version 2.5.3

diff --git a/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py b/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py
@@ -75,11 +75,11 @@ def __call__(self, value, system):
 
         print_config = Config.get('print', {})
 
-        if Config.get('print', {}).get('compute_toc_pages', False):
+        if print_config.get('compute_toc_pages', False):
             extract_as_dict['nbTocPages'] = TocPages(extract_as_dict).getNbPages()
         else:
-            if Config.get('print', {}).get('default_toc_length', False):
-                extract_as_dict['nbTocPages'] = print_config.get('default_toc_length', 2)
+            if print_config.get('general_toc_length') and int(print_config.get('general_toc_length')) > 0:
+                extract_as_dict['nbTocPages'] = print_config.get('general_toc_length', 2)
             else:
                 extract_as_dict['nbTocPages'] = 1
 
@@ -100,7 +100,7 @@ def __call__(self, value, system):
         )
 
         spec = {
-            'layout': Config.get('print', {})['template_name'],
+            'layout': print_config['template_name'],
             'outputFormat': 'pdf',
             'lang': self._language,
             'attributes': extract_as_dict,
@@ -111,67 +111,41 @@ def __call__(self, value, system):
         if self._request.GET.get('getspec', 'no') != 'no':
             response.headers['Content-Type'] = 'application/json; charset=UTF-8'
             return json.dumps(spec, sort_keys=True, indent=4)
-        pdf_url = urlparse.urljoin(Config.get('print', {})['base_url'] + '/', 'buildreport.pdf')
-        pdf_headers = Config.get('print', {})['headers']
+        pdf_url = urlparse.urljoin(print_config['base_url'] + '/', 'buildreport.pdf')
+        pdf_headers = print_config['headers']
         print_result = requests.post(
             pdf_url,
             headers=pdf_headers,
             data=json.dumps(spec)
         )
         try:
-            log.debug('Validation of the TOC length with compute_toc_pages set to {} and default_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('default_toc_length'))) # noqa
-            if Config.get('print', {}).get('compute_toc_pages', False):
-                with io.BytesIO() as pdf:
-                    pdf.write(print_result.content)
-                    pdf_reader = PdfReader(pdf)
-                    x = []
-                    for i in range(len(pdf_reader.outline)):
-                        if isinstance(pdf_reader.outline[i], list):
-                            x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
-                        else:
-                            x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
-                    try:
-                        true_nb_of_toc = min(x)-1
-                    except ValueError:
-                        true_nb_of_toc = 1
-
-                    log.debug('True number of TOC pages is {}'.format(true_nb_of_toc))
-                    if true_nb_of_toc != extract_as_dict['nbTocPages']:
-                        log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
-                        log.debug('Secondary PDF extract call STARTED')
-                        extract_as_dict['nbTocPages'] = true_nb_of_toc
-                        print_result = requests.post(
-                            pdf_url,
-                            headers=pdf_headers,
-                            data=json.dumps(spec)
-                        )
-                        log.debug('Secondary PDF extract call to fix TOC pages number FINISHED')
-            elif Config.get('print', {}).get('default_toc_length', 2):
-                with io.BytesIO() as pdf:
-                    pdf.write(print_result.content)
-                    pdf_reader = PdfReader(pdf)
-                    x = []
-                    for i in range(len(pdf_reader.outline)):
-                        if isinstance(pdf_reader.outline[i], list):
-                            x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
-                        else:
-                            x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
-                    try:
-                        true_nb_of_toc = min(x)-1
-                    except ValueError:
-                        true_nb_of_toc = 1
-
-                    log.debug('True number of TOC pages is {}'.format(true_nb_of_toc))
-                    if true_nb_of_toc != extract_as_dict['nbTocPages']:
-                        log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
-                        extract_as_dict['nbTocPages'] = true_nb_of_toc
-                        log.debug('Secondary PDF extract call STARTED')
-                        print_result = requests.post(
-                            pdf_url,
-                            headers=pdf_headers,
-                            data=json.dumps(spec)
-                        )
-                        log.debug('Secondary PDF extract call FINISHED')
+            log.debug('Validation of the TOC length with compute_toc_pages set to {} and general_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('general_toc_length'))) # noqa
+            with io.BytesIO() as pdf:
+                pdf.write(print_result.content)
+                pdf_reader = PdfReader(pdf)
+                x = []
+                for i in range(len(pdf_reader.outline)):
+                    if isinstance(pdf_reader.outline[i], list):
+                        x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
+                    else:
+                        x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
+                try:
+                    true_nb_of_toc = min(x)-1
+                except ValueError:
+                    true_nb_of_toc = 1
+
+                log.debug('True number of TOC pages is {}, expected number was {}'.format(true_nb_of_toc, extract_as_dict['nbTocPages'])) #noqa
+                if true_nb_of_toc != extract_as_dict['nbTocPages']:
+                    log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
+                    log.debug('Secondary PDF extract call STARTED')
+                    extract_as_dict['nbTocPages'] = true_nb_of_toc
+                    print_result = requests.post(
+                        pdf_url,
+                        headers=pdf_headers,
+                        data=json.dumps(spec)
+                    )
+                    log.debug('Secondary PDF extract call to fix TOC pages number DONE')
+
         except PdfReadError as e:
             err_msg = 'a problem occurred while generating the pdf file'
             log.error(err_msg + ': ' + str(e))

diff --git a/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml b/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml
@@ -5,7 +5,7 @@ pyramid_oereb:
       TRANSPARENT: 'true'
       OTHERCUSTOM: 'myvalue'
     compute_toc_pages: false
-    default_toc_length: 2
+    general_toc_length: 2
 
   theme:
     source:

diff --git a/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py b/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py
@@ -106,8 +106,8 @@ def test_toc_pages_default_config():
     Config._config = None
     Config.init('./tests/contrib.print_proxy.mapfish_print/resources/test_config.yml', 'pyramid_oereb')
     compute_toc_pages = Config.get('print', {}).get('compute_toc_pages')
-    default_toc_length = Config.get('print', {}).get('default_toc_length')
+    general_toc_length = Config.get('print', {}).get('general_toc_length')
 
     assert isinstance(compute_toc_pages, bool)
     assert bool(compute_toc_pages) is False
-    assert default_toc_length == 2
+    assert general_toc_length == 2