Skip to content

Commit

Permalink
improved code as discussed and suggested
Browse files Browse the repository at this point in the history
  • Loading branch information
voisardf committed Aug 29, 2024
1 parent e9023cb commit a5157d0
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 78 deletions.
25 changes: 12 additions & 13 deletions dev/config/pyramid_oereb.yml.mako
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,18 @@ pyramid_oereb:
# Will make an estimation of the total length of the Table of Content (TOC) and control that the page
# numbering in the output pdf is consistent with TOC numbering. If it is known that the TOC is very long and
# could run over more than one page, it is preferred to set this to true. The drawback is that it might need
# more time to generate the PDF. If set to false, it will assume that only one TOC page exists, and this can
# lead to wrong numbering in the TOC.
compute_toc_pages: true
# To avoid the potentially time consuming second computing of the PDF extract and skip the the computation
# of the estimated TOC length, you can specify a default length for the number of TOC pages.
# For most of the cantons the length of the TOC is pretty consistent unless a real estate is concerned by none
# or a huge number of restrictions.
# An additional page break might also occur if the number of published topics is close to a threshold number
# where the TOC fits just about on one or two pages. - for those case estimate the TOC length ist preferable.
# In both cases (computing an estimated length or setting a default length) the exact number of TOC pages is
# extracted from the created PDF and if it is different from the expected value the PDF extract is called a
# second time with the correct page numbers.
default_toc_length: 2
# more time to generate the PDF. If set to false, the general_toc_length setting below will be used. If it is
# not set it will assume that only one TOC page exists, and this can lead to wrong numbering in the TOC, which
# will be fixed by a second PDF extract call that has an impact on performance.
compute_toc_pages: false
# In order to skip the computation of the estimated number of TOC pages which might return an erroneous result
# for your setting, you can specify a default for the number of TOC pages. For most of the cantons the number of
# TOC pages is pretty constant unless a real estate is concerned by none or a huge number of restrictions.
# In both cases (computing an estimate or setting a default for the number of TOC pages) the exact number of TOC
# pages is extracted from the created PDF and if it differs from the expected value the PDF is created a second
# time with the correct page numbers.
# Note that if "compute_toc_pages" is set true the "general_toc_length" is not taken into account.
general_toc_length: 2
# Specify any additional URL parameters that the print shall use for WMS calls
wms_url_params:
TRANSPARENT: 'true'
Expand Down
6 changes: 3 additions & 3 deletions doc/source/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ to adapt in your project configuration, database etc. when upgrading to a new ve

Version 2.6.0
-------------
* New parameter 'default_toc_length' allows to define a default table of content pages number avoiding a second
call for the pdf extract in most cases. This value should be set if >95% of the PDF have the same number of TOC
pages.
* New parameter 'general_toc_length' allows to define a default table of content pages number avoiding a second
call for the pdf extract in most cases. This value should be set if most of the PDF extracts have the same number
of TOC pages.
Default setting: 2

Version 2.5.3
Expand Down
92 changes: 33 additions & 59 deletions pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ def __call__(self, value, system):

print_config = Config.get('print', {})

if Config.get('print', {}).get('compute_toc_pages', False):
if print_config.get('compute_toc_pages', False):
extract_as_dict['nbTocPages'] = TocPages(extract_as_dict).getNbPages()
else:
if Config.get('print', {}).get('default_toc_length', False):
extract_as_dict['nbTocPages'] = print_config.get('default_toc_length', 2)
if print_config.get('general_toc_length') and int(print_config.get('general_toc_length')) > 0:
extract_as_dict['nbTocPages'] = print_config.get('general_toc_length', 2)

Check warning on line 82 in pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py

View check run for this annotation

Codecov / codecov/patch

pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py#L81-L82

Added lines #L81 - L82 were not covered by tests
else:
extract_as_dict['nbTocPages'] = 1

Check warning on line 84 in pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py

View check run for this annotation

Codecov / codecov/patch

pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py#L84

Added line #L84 was not covered by tests

Expand All @@ -100,7 +100,7 @@ def __call__(self, value, system):
)

spec = {
'layout': Config.get('print', {})['template_name'],
'layout': print_config['template_name'],
'outputFormat': 'pdf',
'lang': self._language,
'attributes': extract_as_dict,
Expand All @@ -111,67 +111,41 @@ def __call__(self, value, system):
if self._request.GET.get('getspec', 'no') != 'no':
response.headers['Content-Type'] = 'application/json; charset=UTF-8'
return json.dumps(spec, sort_keys=True, indent=4)
pdf_url = urlparse.urljoin(Config.get('print', {})['base_url'] + '/', 'buildreport.pdf')
pdf_headers = Config.get('print', {})['headers']
pdf_url = urlparse.urljoin(print_config['base_url'] + '/', 'buildreport.pdf')
pdf_headers = print_config['headers']
print_result = requests.post(
pdf_url,
headers=pdf_headers,
data=json.dumps(spec)
)
try:
log.debug('Validation of the TOC length with compute_toc_pages set to {} and default_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('default_toc_length'))) # noqa
if Config.get('print', {}).get('compute_toc_pages', False):
with io.BytesIO() as pdf:
pdf.write(print_result.content)
pdf_reader = PdfReader(pdf)
x = []
for i in range(len(pdf_reader.outline)):
if isinstance(pdf_reader.outline[i], list):
x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
else:
x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
try:
true_nb_of_toc = min(x)-1
except ValueError:
true_nb_of_toc = 1

log.debug('True number of TOC pages is {}'.format(true_nb_of_toc))
if true_nb_of_toc != extract_as_dict['nbTocPages']:
log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
log.debug('Secondary PDF extract call STARTED')
extract_as_dict['nbTocPages'] = true_nb_of_toc
print_result = requests.post(
pdf_url,
headers=pdf_headers,
data=json.dumps(spec)
)
log.debug('Secondary PDF extract call to fix TOC pages number FINISHED')
elif Config.get('print', {}).get('default_toc_length', 2):
with io.BytesIO() as pdf:
pdf.write(print_result.content)
pdf_reader = PdfReader(pdf)
x = []
for i in range(len(pdf_reader.outline)):
if isinstance(pdf_reader.outline[i], list):
x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
else:
x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
try:
true_nb_of_toc = min(x)-1
except ValueError:
true_nb_of_toc = 1

log.debug('True number of TOC pages is {}'.format(true_nb_of_toc))
if true_nb_of_toc != extract_as_dict['nbTocPages']:
log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
extract_as_dict['nbTocPages'] = true_nb_of_toc
log.debug('Secondary PDF extract call STARTED')
print_result = requests.post(
pdf_url,
headers=pdf_headers,
data=json.dumps(spec)
)
log.debug('Secondary PDF extract call FINISHED')
log.debug('Validation of the TOC length with compute_toc_pages set to {} and general_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('general_toc_length'))) # noqa
with io.BytesIO() as pdf:
pdf.write(print_result.content)
pdf_reader = PdfReader(pdf)
x = []
for i in range(len(pdf_reader.outline)):
if isinstance(pdf_reader.outline[i], list):
x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])

Check warning on line 129 in pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py

View check run for this annotation

Codecov / codecov/patch

pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py#L128-L129

Added lines #L128 - L129 were not covered by tests
else:
x.append(pdf_reader.outline[i]['/Page']['/StructParents'])

Check warning on line 131 in pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py

View check run for this annotation

Codecov / codecov/patch

pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py#L131

Added line #L131 was not covered by tests
try:
true_nb_of_toc = min(x)-1
except ValueError:
true_nb_of_toc = 1

log.debug('True number of TOC pages is {}, expected number was {}'.format(true_nb_of_toc, extract_as_dict['nbTocPages'])) #noqa
if true_nb_of_toc != extract_as_dict['nbTocPages']:
log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
log.debug('Secondary PDF extract call STARTED')
extract_as_dict['nbTocPages'] = true_nb_of_toc
print_result = requests.post(

Check warning on line 142 in pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py

View check run for this annotation

Codecov / codecov/patch

pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py#L139-L142

Added lines #L139 - L142 were not covered by tests
pdf_url,
headers=pdf_headers,
data=json.dumps(spec)
)
log.debug('Secondary PDF extract call to fix TOC pages number DONE')

Check warning on line 147 in pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py

View check run for this annotation

Codecov / codecov/patch

pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py#L147

Added line #L147 was not covered by tests

except PdfReadError as e:
err_msg = 'a problem occurred while generating the pdf file'
log.error(err_msg + ': ' + str(e))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pyramid_oereb:
TRANSPARENT: 'true'
OTHERCUSTOM: 'myvalue'
compute_toc_pages: false
default_toc_length: 2
general_toc_length: 2

theme:
source:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ def test_toc_pages_default_config():
Config._config = None
Config.init('./tests/contrib.print_proxy.mapfish_print/resources/test_config.yml', 'pyramid_oereb')
compute_toc_pages = Config.get('print', {}).get('compute_toc_pages')
default_toc_length = Config.get('print', {}).get('default_toc_length')
general_toc_length = Config.get('print', {}).get('general_toc_length')

assert isinstance(compute_toc_pages, bool)
assert bool(compute_toc_pages) is False
assert default_toc_length == 2
assert general_toc_length == 2

0 comments on commit a5157d0

Please sign in to comment.