From e64f1ca15ddd3bcb6197191bc7650708d900f843 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 12:05:14 +0100 Subject: [PATCH 01/13] Use python_ags4 backend to convert successful files --- app/ags.py | 73 +++++++++++++++++++++++++------------------ test/unit/test_ags.py | 5 +-- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/app/ags.py b/app/ags.py index 17391ec7..5e4e21fe 100644 --- a/app/ags.py +++ b/app/ags.py @@ -32,10 +32,7 @@ def validate(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> logger.info("Validate called for %", filename.name) # Prepare response with metadata - response = {'filename': filename.name, - 'filesize': filename.stat().st_size, - 'checker': f'python_ags4 v{python_ags4.__version__}', - 'time': dt.datetime.now(tz=dt.timezone.utc)} + response = _prepare_response_metadata(filename) # Select dictionary file if exists if standard_AGS4_dictionary: @@ -89,44 +86,46 @@ def to_plain_text(response: dict) -> str: return PLAIN_TEXT_TEMPLATE.render(response) -def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], str]: +def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]: """ Convert filename between .ags and .xlsx. Write output to file in - results_dir and return path alongside processing log.""" + results_dir and return path alongside job status data in dictionary.""" + # Prepare variables and directory new_extension = '.ags' if filename.suffix == '.xlsx' else '.xlsx' converted_file = results_dir / (filename.stem + new_extension) logger.info("Converting %s to %s", filename.name, converted_file.name) if not results_dir.exists(): results_dir.mkdir() - args = [ - 'ags4_cli', 'convert', filename, converted_file - ] - # Use subprocess to run the file. It will swallow errors. - # A timeout prevents the whole process hanging indefinitely. - result = subprocess.run(args, capture_output=True, - text=True, timeout=30) - logger.debug(result) - # Generate response based on result of subprocess - filesize = filename.stat().st_size / 1024 - time_utc = dt.datetime.now(tz=dt.timezone.utc).strftime('%Y-%m-%d %H:%M:%S') - if result.returncode != 0: - message = 'ERROR: ' + result.stderr - converted_file.unlink(missing_ok=True) - converted_file = None - elif result.stdout.startswith('ERROR: '): - message = result.stdout - converted_file.unlink(missing_ok=True) - converted_file = None - else: + # Prepare response with metadata + response = _prepare_response_metadata(filename) + + # Do the conversion + if filename.suffix == '.ags': + AGS4.AGS4_to_excel(filename, converted_file) + message = f"SUCCESS: {filename.name} converted to {converted_file.name}" + elif filename.suffix == '.xlsx': + AGS4.excel_to_AGS4(filename, converted_file) message = f"SUCCESS: {filename.name} converted to {converted_file.name}" + else: + message = f"ERROR: {filename.name} is not .ags or .xlsx format" + + # Generate response based on result of subprocess + #filesize = filename.stat().st_size / 1024 + #if result.returncode != 0: + # message = 'ERROR: ' + result.stderr + # converted_file.unlink(missing_ok=True) + # converted_file = None + #elif result.stdout.startswith('ERROR: '): + # message = result.stdout + # converted_file.unlink(missing_ok=True) + # converted_file = None + #else: + # message = f"SUCCESS: {filename.name} converted to {converted_file.name}" - log = RESPONSE_TEMPLATE.format(filename=filename.name, - filesize=filesize, - time_utc=time_utc, - message=message) + response['message'] = message - return (converted_file, log) + return (converted_file, response) def is_valid(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> bool: @@ -160,6 +159,18 @@ def line_of_error(filename: Path, char_no: int) -> Tuple[int, int, str, str]: return char_no, line_no, line, char +def _prepare_response_metadata(filename: Path) -> dict: + """ + Prepare a dictionary containing metadata to include in the response. + """ + response = {'filename': filename.name, + 'filesize': filename.stat().st_size, + 'checker': f'python_ags4 v{python_ags4.__version__}', + 'dictionary': '', # This is usually overwritten + 'time': dt.datetime.now(tz=dt.timezone.utc)} + return response + + class Ags4CliError(Exception): """Class for exceptions resulting from ags4_cli call.""" pass diff --git a/test/unit/test_ags.py b/test/unit/test_ags.py index a28733fb..e57325ad 100644 --- a/test/unit/test_ags.py +++ b/test/unit/test_ags.py @@ -71,11 +71,12 @@ def test_convert(tmp_path, filename, expected): results_dir.mkdir() # Act - converted_file, log = ags.convert(filename, results_dir) + converted_file, response = ags.convert(filename, results_dir) # Assert assert converted_file is not None and converted_file.exists() - assert re.search(expected, log) + assert filename.name == response['filename'] + assert re.search(expected, response['message']) @pytest.mark.parametrize('filename, expected', BAD_FILE_DATA) From 14e8ae2bacec43e65d52f68e16684adf7e837e16 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 12:56:29 +0100 Subject: [PATCH 02/13] Handle bad files in python_ags4 conversion --- app/ags.py | 50 +++++++++++++++++++++++---------------- app/response_templates.py | 10 -------- test/fixtures.py | 10 ++++---- test/unit/test_ags.py | 11 ++++----- 4 files changed, 39 insertions(+), 42 deletions(-) diff --git a/app/ags.py b/app/ags.py index 5e4e21fe..e9818b32 100644 --- a/app/ags.py +++ b/app/ags.py @@ -4,14 +4,13 @@ import logging from pathlib import Path import re -import subprocess from typing import Tuple, Optional import python_ags4 from python_ags4 import AGS4 +from app.response_templates import PLAIN_TEXT_TEMPLATE -from app.response_templates import PLAIN_TEXT_TEMPLATE, RESPONSE_TEMPLATE logger = logging.getLogger(__name__) @@ -101,29 +100,38 @@ def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]: response = _prepare_response_metadata(filename) # Do the conversion + success = True if filename.suffix == '.ags': - AGS4.AGS4_to_excel(filename, converted_file) - message = f"SUCCESS: {filename.name} converted to {converted_file.name}" + try: + AGS4.AGS4_to_excel(filename, converted_file) + except IndexError: + success = False + error_message = "ERROR: File does not have AGS format layout" + except UnboundLocalError: + # This error is thrown in response to a bug in the upstream code, + # which in turn is only triggered if the AGS file has duplicate + # headers. + success = False + error_message = "ERROR: File contains duplicate headers" elif filename.suffix == '.xlsx': - AGS4.excel_to_AGS4(filename, converted_file) - message = f"SUCCESS: {filename.name} converted to {converted_file.name}" + try: + AGS4.excel_to_AGS4(filename, converted_file) + except AttributeError as err: + # Include error details here in case they provide a clue e.g. which + # attribute is missing + success = False + error_message = f"ERROR: Bad spreadsheet layout ({err.args[0]})" else: - message = f"ERROR: {filename.name} is not .ags or .xlsx format" - - # Generate response based on result of subprocess - #filesize = filename.stat().st_size / 1024 - #if result.returncode != 0: - # message = 'ERROR: ' + result.stderr - # converted_file.unlink(missing_ok=True) - # converted_file = None - #elif result.stdout.startswith('ERROR: '): - # message = result.stdout - # converted_file.unlink(missing_ok=True) - # converted_file = None - #else: - # message = f"SUCCESS: {filename.name} converted to {converted_file.name}" + success = False + error_message = f"ERROR: {filename.name} is not .ags or .xlsx format" - response['message'] = message + # Update response and clean failed files + if success: + response['message'] = f"SUCCESS: {filename.name} converted to {converted_file.name}" + else: + response['message'] = error_message + converted_file.unlink(missing_ok=True) + converted_file = None return (converted_file, response) diff --git a/app/response_templates.py b/app/response_templates.py index 00a99784..49c6cd48 100644 --- a/app/response_templates.py +++ b/app/response_templates.py @@ -1,16 +1,6 @@ # Text templates used to build responses. -from textwrap import dedent - from jinja2 import Template -RESPONSE_TEMPLATE = dedent(""" - File Name: \t {filename} - File Size: \t {filesize:0.0f} kB - Time (UTC): \t {time_utc} - - {message} - """).strip() - PLAIN_TEXT_TEMPLATE = Template(""" {{ filename }}: {{ message }} diff --git a/test/fixtures.py b/test/fixtures.py index dc58be50..859679d4 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -19,11 +19,11 @@ ] BAD_FILE_DATA = [ - ('nonsense.ags', ('IndexError: At least one sheet must be visible', 0)), - ('empty.ags', ('IndexError: At least one sheet must be visible', 0)), - ('dummy.xlsx', ("AttributeError: 'DataFrame' object has no attribute 'HEADING'", 5)), - ('random_binary.ags', ('IndexError: At least one sheet must be visible', 1)), - ('real/A3040_03.ags', ("UnboundLocalError: local variable 'group' referenced before assignment", 258)), + ('nonsense.ags', ('ERROR: File does not have AGS format layout', 9)), + ('empty.ags', ('ERROR: File does not have AGS format layout', 0)), + ('dummy.xlsx', ("ERROR: Bad spreadsheet layout ('DataFrame' object has no attribute 'HEADING')", 4787)), + ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)), + ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)), ] DICTIONARIES = { diff --git a/test/unit/test_ags.py b/test/unit/test_ags.py index e57325ad..4d56bad0 100644 --- a/test/unit/test_ags.py +++ b/test/unit/test_ags.py @@ -75,7 +75,7 @@ def test_convert(tmp_path, filename, expected): # Assert assert converted_file is not None and converted_file.exists() - assert filename.name == response['filename'] + assert response['filename'] == filename.name assert re.search(expected, response['message']) @@ -89,14 +89,13 @@ def test_convert_bad_files(tmp_path, filename, expected): expected_message, expected_size = expected # Act - converted_file, log = ags.convert(filename, results_dir) + converted_file, response = ags.convert(filename, results_dir) # Assert assert converted_file is None - assert f"File Name: \t {filename.name}" in log - assert f"File Size: \t {expected_size:n} kB" in log - assert 'ERROR:' in log - assert re.search(expected_message, log) + assert response['filename'] == filename.name + assert response['filesize'] == expected_size + assert response['message'] == expected_message @pytest.mark.parametrize('filename, expected', ISVALID_RSP_DATA) From 9a913848b7f8936c5e66395fd337b8ed92d82cbf Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 13:14:56 +0100 Subject: [PATCH 03/13] Handle bad file extensions --- app/ags.py | 22 +++++++++++++++++++--- test/fixtures.py | 1 + test/fixtures_json.py | 13 ++++++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/app/ags.py b/app/ags.py index e9818b32..e533f724 100644 --- a/app/ags.py +++ b/app/ags.py @@ -44,6 +44,11 @@ def validate(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> else: dictionary_file = None + # Return early if file is not .ags format + if filename.suffix != '.ags': + response['message'] = f"ERROR: {filename.name} is not .ags format" + return response + # Get error information from file try: errors = AGS4.check_file(filename, standard_AGS4_dictionary=dictionary_file) @@ -128,8 +133,10 @@ def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]: # Update response and clean failed files if success: response['message'] = f"SUCCESS: {filename.name} converted to {converted_file.name}" + response['valid'] = True else: response['message'] = error_message + response['valid'] = False converted_file.unlink(missing_ok=True) converted_file = None @@ -171,11 +178,20 @@ def _prepare_response_metadata(filename: Path) -> dict: """ Prepare a dictionary containing metadata to include in the response. """ + try: + filesize = filename.stat().st_size + except FileNotFoundError: + filesize = 0 + response = {'filename': filename.name, - 'filesize': filename.stat().st_size, + 'filesize': filesize, 'checker': f'python_ags4 v{python_ags4.__version__}', - 'dictionary': '', # This is usually overwritten - 'time': dt.datetime.now(tz=dt.timezone.utc)} + 'time': dt.datetime.now(tz=dt.timezone.utc), + # The following are usually overwritten + 'message': '', + 'dictionary': '', + 'errors': {}, + 'valid': False} return response diff --git a/test/fixtures.py b/test/fixtures.py index 859679d4..b32bfe36 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -24,6 +24,7 @@ ('dummy.xlsx', ("ERROR: Bad spreadsheet layout ('DataFrame' object has no attribute 'HEADING')", 4787)), ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)), ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)), + ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0)) ] DICTIONARIES = { diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 268b40ab..0a0a764b 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -21102,7 +21102,18 @@ 'double quotes.', 'group': '', 'line': 2081}]}, - 'valid': False} + 'valid': False}, + 'extension_is.bad': { + 'filename': 'extension_is.bad', + 'filesize': 0, + 'checker': 'python_ags4 v0.3.6', + 'dictionary': '', + 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), + 'message': 'ERROR: extension_is.bad is not .ags format', + 'errors': {}, + 'valid': False + }, + } # These response values break the schema From 84376b5acdbef106d1f1b3d2689390d964117281 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 15:48:54 +0100 Subject: [PATCH 04/13] Add extra test cases for convert --- test/fixtures.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/fixtures.py b/test/fixtures.py index b32bfe36..89259890 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -24,7 +24,8 @@ ('dummy.xlsx', ("ERROR: Bad spreadsheet layout ('DataFrame' object has no attribute 'HEADING')", 4787)), ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)), ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)), - ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0)) + ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0)), + # ('real/E52A4379 (2).ags', ("ERROR: File contains duplicate headers", 0)) # This file crashes because it asks for user input ] DICTIONARIES = { From 77c1217ba1fa65924604b76c07577f6600764523 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 17:42:50 +0100 Subject: [PATCH 05/13] Add dummy bad extension file --- test/fixtures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/fixtures.py b/test/fixtures.py index 89259890..b794184b 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -11,6 +11,7 @@ ('random_binary.ags', False), ('real/CG014058_F.ags', False), ('real/Blackburn Southern Bypass.ags', False), # this file contains BOM character + ('extension_is.bad', False), ] GOOD_FILE_DATA = [ @@ -25,7 +26,8 @@ ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)), ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)), ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0)), - # ('real/E52A4379 (2).ags', ("ERROR: File contains duplicate headers", 0)) # This file crashes because it asks for user input + # This file crashes because it asks for user input + # ('real/E52A4379 (2).ags', ("ERROR: File contains duplicate headers", 0)) ] DICTIONARIES = { From 4a9cfbbc0ffd6cfa21504d05f035c89eca3d9be9 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 17:51:55 +0100 Subject: [PATCH 06/13] Remove unused functions and exceptions --- app/ags.py | 29 ----------------------------- test/files/extension_is.bad | 0 2 files changed, 29 deletions(-) create mode 100644 test/files/extension_is.bad diff --git a/app/ags.py b/app/ags.py index e533f724..d70a4ea5 100644 --- a/app/ags.py +++ b/app/ags.py @@ -150,30 +150,6 @@ def is_valid(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> return validate(filename, standard_AGS4_dictionary=standard_AGS4_dictionary)['valid'] -def get_unicode_message(stderr: str, filename: str) -> str: - """ - Generate useful message from Unicode error - """ - m = re.search(r'.*in position (\d+):.*', stderr) - char_no, line_no, line, char = line_of_error(filename, int(m.group(1))) - message = f'ERROR: Unreadable character "{char}" at position {char_no} on line: {line_no}\nStarting: {line}\n\n' - return message - - -def line_of_error(filename: Path, char_no: int) -> Tuple[int, int, str, str]: - """ - Return character, line number and start of line containing character at char_no. - Also return problem character - """ - with open(filename, encoding='ISO-8859-1') as f: - upto = f.read(char_no) - line_no = upto.count('\n') + 1 - line = upto.split('\n')[-1] - char_no = len(line) + 1 - char = f.read(1) - return char_no, line_no, line, char - - def _prepare_response_metadata(filename: Path) -> dict: """ Prepare a dictionary containing metadata to include in the response. @@ -193,8 +169,3 @@ def _prepare_response_metadata(filename: Path) -> dict: 'errors': {}, 'valid': False} return response - - -class Ags4CliError(Exception): - """Class for exceptions resulting from ags4_cli call.""" - pass diff --git a/test/files/extension_is.bad b/test/files/extension_is.bad new file mode 100644 index 00000000..e69de29b From 1073d45b87b24acde3191da1e4b29ee402efff3d Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 25 Aug 2021 17:52:07 +0100 Subject: [PATCH 07/13] Ignore .coverage directory --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3a921c83..9fddf2c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__ venv -.idea \ No newline at end of file +.idea +.coverage From b23443646e650d1509c3747ba4b0678a4625c751 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 26 Aug 2021 07:05:53 +0100 Subject: [PATCH 08/13] Convert dict to text for log --- app/routes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/routes.py b/app/routes.py index 6887b0a1..8bc99b63 100644 --- a/app/routes.py +++ b/app/routes.py @@ -173,7 +173,8 @@ async def convert_many(background_tasks: BackgroundTasks, contents = await file.read() local_file = tmp_dir / file.filename local_file.write_bytes(contents) - converted, log = ags.convert(local_file, results_dir) + converted, result = ags.convert(local_file, results_dir) + log = ags.to_plain_text(result) f.write(log) f.write('\n' + '=' * 80 + '\n') zipped_file = tmp_dir / RESULTS From 60cf473bc444f104ef82e43dcb93b5f1fba36e71 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 26 Aug 2021 07:35:00 +0100 Subject: [PATCH 09/13] Add two basic api tests for convert --- test/integration/test_api.py | 45 +++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/test/integration/test_api.py b/test/integration/test_api.py index e5e3286b..fd92fa6e 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -8,7 +8,8 @@ from requests_toolbelt.multipart.encoder import MultipartEncoder from app.main import app -from test.fixtures import DICTIONARIES, FROZEN_TIME, ISVALID_RSP_DATA +from test.fixtures import (BAD_FILE_DATA, DICTIONARIES, FROZEN_TIME, + GOOD_FILE_DATA, ISVALID_RSP_DATA) from test.fixtures_json import JSON_RESPONSES from test.fixtures_plain_text import PLAIN_TEXT_RESPONSES @@ -203,6 +204,48 @@ async def test_validatemany_text(async_client): assert log.strip() in response.text +@pytest.mark.asyncio +async def test_convert_good_files(async_client): + # Arrange + fields = [] + for name, data in GOOD_FILE_DATA: + filename = TEST_FILE_DIR / name + file = ('files', (filename.name, open(filename, 'rb'), 'text/plain')) + fields.append(file) + mp_encoder = MultipartEncoder(fields=fields) + + # Act + async with async_client as ac: + response = await ac.post( + '/convert/', + headers={'Content-Type': mp_encoder.content_type}, + data=mp_encoder.to_string()) + + # Assert + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_convert_bad_files(async_client): + # Arrange + fields = [] + for name, data in BAD_FILE_DATA: + filename = TEST_FILE_DIR / name + file = ('files', (filename.name, open(filename, 'rb'), 'text/plain')) + fields.append(file) + mp_encoder = MultipartEncoder(fields=fields) + + # Act + async with async_client as ac: + response = await ac.post( + '/convert/', + headers={'Content-Type': mp_encoder.content_type}, + data=mp_encoder.to_string()) + + # Assert + assert response.status_code == 200 + + @pytest.fixture(scope="function") def client(): return TestClient(app) From 5b019e3703f44fa677f576bf4acd6cc1f794c079 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 26 Aug 2021 07:36:17 +0100 Subject: [PATCH 10/13] Fix flake8 issues from previous work --- app/ags.py | 1 - app/routes.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/app/ags.py b/app/ags.py index d70a4ea5..93429f47 100644 --- a/app/ags.py +++ b/app/ags.py @@ -3,7 +3,6 @@ from functools import reduce import logging from pathlib import Path -import re from typing import Tuple, Optional import python_ags4 diff --git a/app/routes.py b/app/routes.py index 8bc99b63..721e62cd 100644 --- a/app/routes.py +++ b/app/routes.py @@ -6,7 +6,7 @@ from typing import List from fastapi import APIRouter, BackgroundTasks, File, Form, Request, UploadFile -from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse +from fastapi.responses import FileResponse, StreamingResponse from app import ags from app.errors import error_responses, InvalidPayloadError From 4c193d7833fcbb90ca0c440338a915a985cb63d2 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 26 Aug 2021 08:09:17 +0100 Subject: [PATCH 11/13] Extend test to check for files --- test/integration/test_api.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/test/integration/test_api.py b/test/integration/test_api.py index fd92fa6e..9c0aae54 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -1,5 +1,6 @@ """Tests for API responses.""" from pathlib import Path +import shutil from fastapi.testclient import TestClient from freezegun import freeze_time @@ -205,10 +206,10 @@ async def test_validatemany_text(async_client): @pytest.mark.asyncio -async def test_convert_good_files(async_client): +async def test_convert_good_files(async_client, tmp_path): # Arrange fields = [] - for name, data in GOOD_FILE_DATA: + for name, expected in GOOD_FILE_DATA: filename = TEST_FILE_DIR / name file = ('files', (filename.name, open(filename, 'rb'), 'text/plain')) fields.append(file) @@ -223,13 +224,24 @@ async def test_convert_good_files(async_client): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/x-zip-compressed' + assert response.headers['content-disposition'] == 'attachment; filename=results.zip' + + zip_file = tmp_path / 'results.zip' + unzipped_files = tmp_path / 'results' + with open(zip_file, 'wb') as f: + f.write(response.content) + shutil.unpack_archive(zip_file, unzipped_files, 'zip') + assert (unzipped_files / 'conversion.log').is_file() + for name, expected in GOOD_FILE_DATA: + assert (unzipped_files / name).is_file() @pytest.mark.asyncio -async def test_convert_bad_files(async_client): +async def test_convert_bad_files(async_client, tmp_path): # Arrange fields = [] - for name, data in BAD_FILE_DATA: + for name, expected in BAD_FILE_DATA: filename = TEST_FILE_DIR / name file = ('files', (filename.name, open(filename, 'rb'), 'text/plain')) fields.append(file) @@ -244,6 +256,17 @@ async def test_convert_bad_files(async_client): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/x-zip-compressed' + assert response.headers['content-disposition'] == 'attachment; filename=results.zip' + + zip_file = tmp_path / 'results.zip' + unzipped_files = tmp_path / 'results' + with open(zip_file, 'wb') as f: + f.write(response.content) + shutil.unpack_archive(zip_file, unzipped_files, 'zip') + assert (unzipped_files / 'conversion.log').is_file() + for name, expected in BAD_FILE_DATA: + assert not (unzipped_files / name).is_file() @pytest.fixture(scope="function") From 51437052159028546b3bc834da3d33eec011a0bf Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 26 Aug 2021 08:36:01 +0100 Subject: [PATCH 12/13] Fully extend convert tests, rename example files --- test/files/{example1.ags => example_ags.ags} | 0 test/files/{example1.xlsx => example_xlsx.xlsx} | Bin test/fixtures.py | 8 ++++---- test/fixtures_json.py | 4 ++-- test/fixtures_plain_text.py | 4 ++-- test/integration/test_api.py | 16 ++++++++++++---- test/unit/test_ags.py | 13 +++++++------ 7 files changed, 27 insertions(+), 18 deletions(-) rename test/files/{example1.ags => example_ags.ags} (100%) rename test/files/{example1.xlsx => example_xlsx.xlsx} (100%) diff --git a/test/files/example1.ags b/test/files/example_ags.ags similarity index 100% rename from test/files/example1.ags rename to test/files/example_ags.ags diff --git a/test/files/example1.xlsx b/test/files/example_xlsx.xlsx similarity index 100% rename from test/files/example1.xlsx rename to test/files/example_xlsx.xlsx diff --git a/test/fixtures.py b/test/fixtures.py index b794184b..790ea41a 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -3,11 +3,11 @@ FROZEN_TIME = "2021-08-23 14:25:43" ISVALID_RSP_DATA = [ - ('example1.ags', True), + ('example_ags.ags', True), ('nonsense.ags', False), ('empty.ags', False), ('real/A3040_03.ags', False), - ('example1.xlsx', False), + ('example_xlsx.xlsx', False), ('random_binary.ags', False), ('real/CG014058_F.ags', False), ('real/Blackburn Southern Bypass.ags', False), # this file contains BOM character @@ -15,8 +15,8 @@ ] GOOD_FILE_DATA = [ - ('example1.ags', 'SUCCESS: example1.ags converted to example1.xlsx'), - ('example1.xlsx', 'SUCCESS: example1.xlsx converted to example1.ags'), + ('example_ags.ags', ('SUCCESS: example_ags.ags converted to example_ags.xlsx', 'example_ags.xlsx')), + ('example_xlsx.xlsx', ('SUCCESS: example_xlsx.xlsx converted to example_xlsx.ags', 'example_xlsx.ags')), ] BAD_FILE_DATA = [ diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 0a0a764b..8512d3bf 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -1,8 +1,8 @@ import datetime as dt JSON_RESPONSES = { - 'example1.ags': { - 'filename': 'example1.ags', + 'example_ags.ags': { + 'filename': 'example_ags.ags', 'filesize': 4039, 'checker': 'python_ags4 v0.3.6', 'dictionary': 'Standard_dictionary_v4_1.ags', diff --git a/test/fixtures_plain_text.py b/test/fixtures_plain_text.py index 90db39b4..9b25e5b7 100644 --- a/test/fixtures_plain_text.py +++ b/test/fixtures_plain_text.py @@ -1,6 +1,6 @@ PLAIN_TEXT_RESPONSES = { - 'example1.ags': """ -example1.ags: All checks passed! + 'example_ags.ags': """ +example_ags.ags: All checks passed! # Metadata diff --git a/test/integration/test_api.py b/test/integration/test_api.py index 9c0aae54..bbb099d1 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -54,7 +54,7 @@ async def test_isvalid(async_client, filename, expected): @pytest.mark.asyncio async def test_isvalid_custom_dictionary(async_client, dictionary): # Arrange - filename = TEST_FILE_DIR / 'example1.ags' + filename = TEST_FILE_DIR / 'example_ags.ags' mp_encoder = MultipartEncoder( fields={'file': (filename.name, open(filename, 'rb'), 'text/plain'), 'std_dictionary': dictionary}) @@ -135,7 +135,7 @@ async def test_validatemany_json(async_client): @pytest.mark.asyncio async def test_validate_custom_dictionary(async_client, dictionary, expected): # Arrange - filename = TEST_FILE_DIR / 'example1.ags' + filename = TEST_FILE_DIR / 'example_ags.ags' mp_encoder = MultipartEncoder( fields={'file': (filename.name, open(filename, 'rb'), 'text/plain'), 'std_dictionary': dictionary}) @@ -152,7 +152,7 @@ async def test_validate_custom_dictionary(async_client, dictionary, expected): body = response.json() assert len(body['data']) == 1 # Assert - assert body['data'][0]['filename'] == 'example1.ags' + assert body['data'][0]['filename'] == 'example_ags.ags' assert body['data'][0]['dictionary'] == expected @@ -233,8 +233,12 @@ async def test_convert_good_files(async_client, tmp_path): f.write(response.content) shutil.unpack_archive(zip_file, unzipped_files, 'zip') assert (unzipped_files / 'conversion.log').is_file() + with open(unzipped_files / 'conversion.log', 'rt') as f: + log = f.read() for name, expected in GOOD_FILE_DATA: - assert (unzipped_files / name).is_file() + expected_message, expected_new_file_name = expected + assert (unzipped_files / expected_new_file_name).is_file() + assert expected_message in log @pytest.mark.asyncio @@ -265,8 +269,12 @@ async def test_convert_bad_files(async_client, tmp_path): f.write(response.content) shutil.unpack_archive(zip_file, unzipped_files, 'zip') assert (unzipped_files / 'conversion.log').is_file() + with open(unzipped_files / 'conversion.log', 'rt') as f: + log = f.read() for name, expected in BAD_FILE_DATA: + expected_message, expected_file_size = expected assert not (unzipped_files / name).is_file() + assert expected_message in log @pytest.fixture(scope="function") diff --git a/test/unit/test_ags.py b/test/unit/test_ags.py index 4d56bad0..d0c6a912 100644 --- a/test/unit/test_ags.py +++ b/test/unit/test_ags.py @@ -35,20 +35,20 @@ def test_validate(filename, expected): @pytest.mark.parametrize('dictionary', DICTIONARIES.values()) def test_validate_custom_dictionary(dictionary): # Arrange - filename = TEST_FILE_DIR / 'example1.ags' + filename = TEST_FILE_DIR / 'example_ags.ags' # Act response = ags.validate(filename, standard_AGS4_dictionary=dictionary) # Assert - assert response['filename'] == 'example1.ags' + assert response['filename'] == 'example_ags.ags' assert response['dictionary'] == dictionary def test_validate_custom_dictionary_bad_file(): # Arrange - filename = TEST_FILE_DIR / 'example1.ags' + filename = TEST_FILE_DIR / 'example_ags.ags' dictionary = 'bad_file.ags' # Act @@ -69,6 +69,7 @@ def test_convert(tmp_path, filename, expected): results_dir = tmp_path / 'results' if not results_dir.exists: results_dir.mkdir() + expected_message, expected_new_file_name = expected # Act converted_file, response = ags.convert(filename, results_dir) @@ -76,7 +77,7 @@ def test_convert(tmp_path, filename, expected): # Assert assert converted_file is not None and converted_file.exists() assert response['filename'] == filename.name - assert re.search(expected, response['message']) + assert re.search(expected_message, response['message']) @pytest.mark.parametrize('filename, expected', BAD_FILE_DATA) @@ -113,7 +114,7 @@ def test_is_valid(filename, expected): @pytest.mark.parametrize('dictionary', DICTIONARIES.values()) def test_is_valid_custom_dictionary(dictionary): # Arrange - filename = TEST_FILE_DIR / 'example1.ags' + filename = TEST_FILE_DIR / 'example_ags.ags' # Act result = ags.is_valid(filename, @@ -124,7 +125,7 @@ def test_is_valid_custom_dictionary(dictionary): @pytest.mark.parametrize('filename', [ - 'example1.ags', 'nonsense.ags', 'random_binary.ags', + 'example_ags.ags', 'nonsense.ags', 'random_binary.ags', 'real/Blackburn Southern Bypass.ags']) def test_to_plain_text(filename): # Arrange From 681d4ace74f727ed2e444ea5438665746dcb63a8 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 26 Aug 2021 08:42:40 +0100 Subject: [PATCH 13/13] Test all content types --- test/integration/test_api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/integration/test_api.py b/test/integration/test_api.py index bbb099d1..ab291978 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -21,6 +21,7 @@ def test_openapi_json(client): """A hello-world type test to confirm testing framework works.""" response = client.get('/openapi.json') assert response.status_code == 200 + assert response.headers['content-type'] == 'application/json' assert '/validate' in response.text @@ -41,6 +42,7 @@ async def test_isvalid(async_client, filename, expected): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/json' body = response.json() assert set(body.keys()) == {'msg', 'type', 'self', 'data'} assert body['msg'] is not None @@ -68,6 +70,7 @@ async def test_isvalid_custom_dictionary(async_client, dictionary): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/json' body = response.json() assert len(body['data']) == 1 assert body['data'][0] @@ -91,6 +94,7 @@ async def test_validate_json(async_client, filename, expected): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/json' body = response.json() assert set(body.keys()) == {'msg', 'type', 'self', 'data'} assert body['msg'] is not None @@ -122,6 +126,7 @@ async def test_validatemany_json(async_client): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/json' body = response.json() assert set(body.keys()) == {'msg', 'type', 'self', 'data'} assert body['msg'] is not None @@ -149,6 +154,7 @@ async def test_validate_custom_dictionary(async_client, dictionary, expected): # Assert assert response.status_code == 200 + assert response.headers['content-type'] == 'application/json' body = response.json() assert len(body['data']) == 1 # Assert @@ -176,6 +182,7 @@ async def test_validate_text(async_client, filename, expected): # Assert assert response.status_code == 200 + assert 'text/plain' in response.headers['content-type'] assert response.text.strip() == expected.strip() @@ -201,6 +208,7 @@ async def test_validatemany_text(async_client): # Assert assert response.status_code == 200 + assert 'text/plain' in response.headers['content-type'] for log in PLAIN_TEXT_RESPONSES.values(): assert log.strip() in response.text