From e64f1ca15ddd3bcb6197191bc7650708d900f843 Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 12:05:14 +0100
Subject: [PATCH 01/13] Use python_ags4 backend to convert successful files

---
 app/ags.py            | 73 +++++++++++++++++++++++++------------------
 test/unit/test_ags.py |  5 +--
 2 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/app/ags.py b/app/ags.py
index 17391ec7..5e4e21fe 100644
--- a/app/ags.py
+++ b/app/ags.py
@@ -32,10 +32,7 @@ def validate(filename: Path, standard_AGS4_dictionary: Optional[str] = None) ->
     logger.info("Validate called for %", filename.name)
 
     # Prepare response with metadata
-    response = {'filename': filename.name,
-                'filesize': filename.stat().st_size,
-                'checker': f'python_ags4 v{python_ags4.__version__}',
-                'time': dt.datetime.now(tz=dt.timezone.utc)}
+    response = _prepare_response_metadata(filename)
 
     # Select dictionary file if exists
     if standard_AGS4_dictionary:
@@ -89,44 +86,46 @@ def to_plain_text(response: dict) -> str:
     return PLAIN_TEXT_TEMPLATE.render(response)
 
 
-def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], str]:
+def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]:
     """
     Convert filename between .ags and .xlsx.  Write output to file in
-    results_dir and return path alongside processing log."""
+    results_dir and return path alongside job status data in dictionary."""
+    # Prepare variables and directory
     new_extension = '.ags' if filename.suffix == '.xlsx' else '.xlsx'
     converted_file = results_dir / (filename.stem + new_extension)
     logger.info("Converting %s to %s", filename.name, converted_file.name)
     if not results_dir.exists():
         results_dir.mkdir()
 
-    args = [
-        'ags4_cli', 'convert', filename, converted_file
-    ]
-    # Use subprocess to run the file.  It will swallow errors.
-    # A timeout prevents the whole process hanging indefinitely.
-    result = subprocess.run(args, capture_output=True,
-                            text=True, timeout=30)
-    logger.debug(result)
-    # Generate response based on result of subprocess
-    filesize = filename.stat().st_size / 1024
-    time_utc = dt.datetime.now(tz=dt.timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
-    if result.returncode != 0:
-        message = 'ERROR: ' + result.stderr
-        converted_file.unlink(missing_ok=True)
-        converted_file = None
-    elif result.stdout.startswith('ERROR: '):
-        message = result.stdout
-        converted_file.unlink(missing_ok=True)
-        converted_file = None
-    else:
+    # Prepare response with metadata
+    response = _prepare_response_metadata(filename)
+
+    # Do the conversion
+    if filename.suffix == '.ags':
+        AGS4.AGS4_to_excel(filename, converted_file)
+        message = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+    elif filename.suffix == '.xlsx':
+        AGS4.excel_to_AGS4(filename, converted_file)
         message = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+    else:
+        message = f"ERROR: {filename.name} is not .ags or .xlsx format"
+
+    # Generate response based on result of subprocess
+    #filesize = filename.stat().st_size / 1024
+    #if result.returncode != 0:
+    #    message = 'ERROR: ' + result.stderr
+    #    converted_file.unlink(missing_ok=True)
+    #    converted_file = None
+    #elif result.stdout.startswith('ERROR: '):
+    #    message = result.stdout
+    #    converted_file.unlink(missing_ok=True)
+    #    converted_file = None
+    #else:
+    #    message = f"SUCCESS: {filename.name} converted to {converted_file.name}"
 
-    log = RESPONSE_TEMPLATE.format(filename=filename.name,
-                                   filesize=filesize,
-                                   time_utc=time_utc,
-                                   message=message)
+    response['message'] = message
 
-    return (converted_file, log)
+    return (converted_file, response)
 
 
 def is_valid(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> bool:
@@ -160,6 +159,18 @@ def line_of_error(filename: Path, char_no: int) -> Tuple[int, int, str, str]:
     return char_no, line_no, line, char
 
 
+def _prepare_response_metadata(filename: Path) -> dict:
+    """
+    Prepare a dictionary containing metadata to include in the response.
+    """
+    response = {'filename': filename.name,
+                'filesize': filename.stat().st_size,
+                'checker': f'python_ags4 v{python_ags4.__version__}',
+                'dictionary': '',  # This is usually overwritten
+                'time': dt.datetime.now(tz=dt.timezone.utc)}
+    return response
+
+
 class Ags4CliError(Exception):
     """Class for exceptions resulting from ags4_cli call."""
     pass
diff --git a/test/unit/test_ags.py b/test/unit/test_ags.py
index a28733fb..e57325ad 100644
--- a/test/unit/test_ags.py
+++ b/test/unit/test_ags.py
@@ -71,11 +71,12 @@ def test_convert(tmp_path, filename, expected):
         results_dir.mkdir()
 
     # Act
-    converted_file, log = ags.convert(filename, results_dir)
+    converted_file, response = ags.convert(filename, results_dir)
 
     # Assert
     assert converted_file is not None and converted_file.exists()
-    assert re.search(expected, log)
+    assert filename.name == response['filename']
+    assert re.search(expected, response['message'])
 
 
 @pytest.mark.parametrize('filename, expected', BAD_FILE_DATA)

From 14e8ae2bacec43e65d52f68e16684adf7e837e16 Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 12:56:29 +0100
Subject: [PATCH 02/13] Handle bad files in python_ags4 conversion

---
 app/ags.py                | 50 +++++++++++++++++++++++----------------
 app/response_templates.py | 10 --------
 test/fixtures.py          | 10 ++++----
 test/unit/test_ags.py     | 11 ++++-----
 4 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/app/ags.py b/app/ags.py
index 5e4e21fe..e9818b32 100644
--- a/app/ags.py
+++ b/app/ags.py
@@ -4,14 +4,13 @@
 import logging
 from pathlib import Path
 import re
-import subprocess
 from typing import Tuple, Optional
 
 import python_ags4
 from python_ags4 import AGS4
 
+from app.response_templates import PLAIN_TEXT_TEMPLATE
 
-from app.response_templates import PLAIN_TEXT_TEMPLATE, RESPONSE_TEMPLATE
 
 logger = logging.getLogger(__name__)
 
@@ -101,29 +100,38 @@ def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]:
     response = _prepare_response_metadata(filename)
 
     # Do the conversion
+    success = True
     if filename.suffix == '.ags':
-        AGS4.AGS4_to_excel(filename, converted_file)
-        message = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+        try:
+            AGS4.AGS4_to_excel(filename, converted_file)
+        except IndexError:
+            success = False
+            error_message = "ERROR: File does not have AGS format layout"
+        except UnboundLocalError:
+            # This error is thrown in response to a bug in the upstream code,
+            # which in turn is only triggered if the AGS file has duplicate
+            # headers.
+            success = False
+            error_message = "ERROR: File contains duplicate headers"
     elif filename.suffix == '.xlsx':
-        AGS4.excel_to_AGS4(filename, converted_file)
-        message = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+        try:
+            AGS4.excel_to_AGS4(filename, converted_file)
+        except AttributeError as err:
+            # Include error details here in case they provide a clue e.g. which
+            # attribute is missing
+            success = False
+            error_message = f"ERROR: Bad spreadsheet layout ({err.args[0]})"
     else:
-        message = f"ERROR: {filename.name} is not .ags or .xlsx format"
-
-    # Generate response based on result of subprocess
-    #filesize = filename.stat().st_size / 1024
-    #if result.returncode != 0:
-    #    message = 'ERROR: ' + result.stderr
-    #    converted_file.unlink(missing_ok=True)
-    #    converted_file = None
-    #elif result.stdout.startswith('ERROR: '):
-    #    message = result.stdout
-    #    converted_file.unlink(missing_ok=True)
-    #    converted_file = None
-    #else:
-    #    message = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+        success = False
+        error_message = f"ERROR: {filename.name} is not .ags or .xlsx format"
 
-    response['message'] = message
+    # Update response and clean failed files
+    if success:
+        response['message'] = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+    else:
+        response['message'] = error_message
+        converted_file.unlink(missing_ok=True)
+        converted_file = None
 
     return (converted_file, response)
 
diff --git a/app/response_templates.py b/app/response_templates.py
index 00a99784..49c6cd48 100644
--- a/app/response_templates.py
+++ b/app/response_templates.py
@@ -1,16 +1,6 @@
 # Text templates used to build responses.
-from textwrap import dedent
-
 from jinja2 import Template
 
-RESPONSE_TEMPLATE = dedent("""
-    File Name: \t {filename}
-    File Size: \t {filesize:0.0f} kB
-    Time (UTC): \t {time_utc}
-
-    {message}
-    """).strip()
-
 PLAIN_TEXT_TEMPLATE = Template("""
 {{ filename }}: {{ message }}
 
diff --git a/test/fixtures.py b/test/fixtures.py
index dc58be50..859679d4 100644
--- a/test/fixtures.py
+++ b/test/fixtures.py
@@ -19,11 +19,11 @@
 ]
 
 BAD_FILE_DATA = [
-    ('nonsense.ags', ('IndexError: At least one sheet must be visible', 0)),
-    ('empty.ags', ('IndexError: At least one sheet must be visible', 0)),
-    ('dummy.xlsx', ("AttributeError: 'DataFrame' object has no attribute 'HEADING'", 5)),
-    ('random_binary.ags', ('IndexError: At least one sheet must be visible', 1)),
-    ('real/A3040_03.ags', ("UnboundLocalError: local variable 'group' referenced before assignment", 258)),
+    ('nonsense.ags', ('ERROR: File does not have AGS format layout', 9)),
+    ('empty.ags', ('ERROR: File does not have AGS format layout', 0)),
+    ('dummy.xlsx', ("ERROR: Bad spreadsheet layout ('DataFrame' object has no attribute 'HEADING')", 4787)),
+    ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)),
+    ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)),
 ]
 
 DICTIONARIES = {
diff --git a/test/unit/test_ags.py b/test/unit/test_ags.py
index e57325ad..4d56bad0 100644
--- a/test/unit/test_ags.py
+++ b/test/unit/test_ags.py
@@ -75,7 +75,7 @@ def test_convert(tmp_path, filename, expected):
 
     # Assert
     assert converted_file is not None and converted_file.exists()
-    assert filename.name == response['filename']
+    assert response['filename'] == filename.name
     assert re.search(expected, response['message'])
 
 
@@ -89,14 +89,13 @@ def test_convert_bad_files(tmp_path, filename, expected):
     expected_message, expected_size = expected
 
     # Act
-    converted_file, log = ags.convert(filename, results_dir)
+    converted_file, response = ags.convert(filename, results_dir)
 
     # Assert
     assert converted_file is None
-    assert f"File Name: \t {filename.name}" in log
-    assert f"File Size: \t {expected_size:n} kB" in log
-    assert 'ERROR:' in log
-    assert re.search(expected_message, log)
+    assert response['filename'] == filename.name
+    assert response['filesize'] == expected_size
+    assert response['message'] == expected_message
 
 
 @pytest.mark.parametrize('filename, expected', ISVALID_RSP_DATA)

From 9a913848b7f8936c5e66395fd337b8ed92d82cbf Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 13:14:56 +0100
Subject: [PATCH 03/13] Handle bad file extensions

---
 app/ags.py            | 22 +++++++++++++++++++---
 test/fixtures.py      |  1 +
 test/fixtures_json.py | 13 ++++++++++++-
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/app/ags.py b/app/ags.py
index e9818b32..e533f724 100644
--- a/app/ags.py
+++ b/app/ags.py
@@ -44,6 +44,11 @@ def validate(filename: Path, standard_AGS4_dictionary: Optional[str] = None) ->
     else:
         dictionary_file = None
 
+    # Return early if file is not .ags format
+    if filename.suffix != '.ags':
+        response['message'] = f"ERROR: {filename.name} is not .ags format"
+        return response
+
     # Get error information from file
     try:
         errors = AGS4.check_file(filename, standard_AGS4_dictionary=dictionary_file)
@@ -128,8 +133,10 @@ def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]:
     # Update response and clean failed files
     if success:
         response['message'] = f"SUCCESS: {filename.name} converted to {converted_file.name}"
+        response['valid'] = True
     else:
         response['message'] = error_message
+        response['valid'] = False
         converted_file.unlink(missing_ok=True)
         converted_file = None
 
@@ -171,11 +178,20 @@ def _prepare_response_metadata(filename: Path) -> dict:
     """
     Prepare a dictionary containing metadata to include in the response.
     """
+    try:
+        filesize = filename.stat().st_size
+    except FileNotFoundError:
+        filesize = 0
+
     response = {'filename': filename.name,
-                'filesize': filename.stat().st_size,
+                'filesize': filesize,
                 'checker': f'python_ags4 v{python_ags4.__version__}',
-                'dictionary': '',  # This is usually overwritten
-                'time': dt.datetime.now(tz=dt.timezone.utc)}
+                'time': dt.datetime.now(tz=dt.timezone.utc),
+                # The following are usually overwritten
+                'message': '',
+                'dictionary': '',
+                'errors': {},
+                'valid': False}
     return response
 
 
diff --git a/test/fixtures.py b/test/fixtures.py
index 859679d4..b32bfe36 100644
--- a/test/fixtures.py
+++ b/test/fixtures.py
@@ -24,6 +24,7 @@
     ('dummy.xlsx', ("ERROR: Bad spreadsheet layout ('DataFrame' object has no attribute 'HEADING')", 4787)),
     ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)),
     ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)),
+    ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0))
 ]
 
 DICTIONARIES = {
diff --git a/test/fixtures_json.py b/test/fixtures_json.py
index 268b40ab..0a0a764b 100644
--- a/test/fixtures_json.py
+++ b/test/fixtures_json.py
@@ -21102,7 +21102,18 @@
                                  'double quotes.',
                          'group': '',
                          'line': 2081}]},
-        'valid': False}
+        'valid': False},
+    'extension_is.bad': {
+        'filename': 'extension_is.bad',
+        'filesize': 0,
+        'checker': 'python_ags4 v0.3.6',
+        'dictionary': '',
+        'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc),
+        'message': 'ERROR: extension_is.bad is not .ags format',
+        'errors': {},
+        'valid': False
+    },
+
 }
 
 # These response values break the schema

From 84376b5acdbef106d1f1b3d2689390d964117281 Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 15:48:54 +0100
Subject: [PATCH 04/13] Add extra test cases for convert

---
 test/fixtures.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/fixtures.py b/test/fixtures.py
index b32bfe36..89259890 100644
--- a/test/fixtures.py
+++ b/test/fixtures.py
@@ -24,7 +24,8 @@
     ('dummy.xlsx', ("ERROR: Bad spreadsheet layout ('DataFrame' object has no attribute 'HEADING')", 4787)),
     ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)),
     ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)),
-    ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0))
+    ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0)),
+    # ('real/E52A4379 (2).ags', ("ERROR: File contains duplicate headers", 0))  # This file crashes because it asks for user input
 ]
 
 DICTIONARIES = {

From 77c1217ba1fa65924604b76c07577f6600764523 Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 17:42:50 +0100
Subject: [PATCH 05/13] Add dummy bad extension file

---
 test/fixtures.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/fixtures.py b/test/fixtures.py
index 89259890..b794184b 100644
--- a/test/fixtures.py
+++ b/test/fixtures.py
@@ -11,6 +11,7 @@
     ('random_binary.ags', False),
     ('real/CG014058_F.ags', False),
     ('real/Blackburn Southern Bypass.ags', False),  # this file contains BOM character
+    ('extension_is.bad', False),
 ]
 
 GOOD_FILE_DATA = [
@@ -25,7 +26,8 @@
     ('random_binary.ags', ('ERROR: File does not have AGS format layout', 1024)),
     ('real/A3040_03.ags', ("ERROR: File contains duplicate headers", 264526)),
     ('extension_is.bad', ("ERROR: extension_is.bad is not .ags or .xlsx format", 0)),
-    # ('real/E52A4379 (2).ags', ("ERROR: File contains duplicate headers", 0))  # This file crashes because it asks for user input
+    # This file crashes because it asks for user input
+    # ('real/E52A4379 (2).ags', ("ERROR: File contains duplicate headers", 0))
 ]
 
 DICTIONARIES = {

From 4a9cfbbc0ffd6cfa21504d05f035c89eca3d9be9 Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 17:51:55 +0100
Subject: [PATCH 06/13] Remove unused functions and exceptions

---
 app/ags.py                  | 29 -----------------------------
 test/files/extension_is.bad |  0
 2 files changed, 29 deletions(-)
 create mode 100644 test/files/extension_is.bad

diff --git a/app/ags.py b/app/ags.py
index e533f724..d70a4ea5 100644
--- a/app/ags.py
+++ b/app/ags.py
@@ -150,30 +150,6 @@ def is_valid(filename: Path, standard_AGS4_dictionary: Optional[str] = None) ->
     return validate(filename, standard_AGS4_dictionary=standard_AGS4_dictionary)['valid']
 
 
-def get_unicode_message(stderr: str, filename: str) -> str:
-    """
-    Generate useful message from Unicode error
-    """
-    m = re.search(r'.*in position (\d+):.*', stderr)
-    char_no, line_no, line, char = line_of_error(filename, int(m.group(1)))
-    message = f'ERROR: Unreadable character "{char}" at position {char_no} on line: {line_no}\nStarting: {line}\n\n'
-    return message
-
-
-def line_of_error(filename: Path, char_no: int) -> Tuple[int, int, str, str]:
-    """
-    Return character, line number and start of line containing character at char_no.
-    Also return problem character
-    """
-    with open(filename, encoding='ISO-8859-1') as f:
-        upto = f.read(char_no)
-        line_no = upto.count('\n') + 1
-        line = upto.split('\n')[-1]
-        char_no = len(line) + 1
-        char = f.read(1)
-    return char_no, line_no, line, char
-
-
 def _prepare_response_metadata(filename: Path) -> dict:
     """
     Prepare a dictionary containing metadata to include in the response.
@@ -193,8 +169,3 @@ def _prepare_response_metadata(filename: Path) -> dict:
                 'errors': {},
                 'valid': False}
     return response
-
-
-class Ags4CliError(Exception):
-    """Class for exceptions resulting from ags4_cli call."""
-    pass
diff --git a/test/files/extension_is.bad b/test/files/extension_is.bad
new file mode 100644
index 00000000..e69de29b

From 1073d45b87b24acde3191da1e4b29ee402efff3d Mon Sep 17 00:00:00 2001
From: John A Stevenson <jostev@bgs.ac.uk>
Date: Wed, 25 Aug 2021 17:52:07 +0100
Subject: [PATCH 07/13] Ignore .coverage directory

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 3a921c83..9fddf2c8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__
 venv
-.idea
\ No newline at end of file
+.idea
+.coverage

From b23443646e650d1509c3747ba4b0678a4625c751 Mon Sep 17 00:00:00 2001
From: Colin Blackburn <colb@bgs.ac.uk>
Date: Thu, 26 Aug 2021 07:05:53 +0100
Subject: [PATCH 08/13] Convert dict to text for log

---
 app/routes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/routes.py b/app/routes.py
index 6887b0a1..8bc99b63 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -173,7 +173,8 @@ async def convert_many(background_tasks: BackgroundTasks,
             contents = await file.read()
             local_file = tmp_dir / file.filename
             local_file.write_bytes(contents)
-            converted, log = ags.convert(local_file, results_dir)
+            converted, result = ags.convert(local_file, results_dir)
+            log = ags.to_plain_text(result)
             f.write(log)
             f.write('\n' + '=' * 80 + '\n')
     zipped_file = tmp_dir / RESULTS

From 60cf473bc444f104ef82e43dcb93b5f1fba36e71 Mon Sep 17 00:00:00 2001
From: Colin Blackburn <colb@bgs.ac.uk>
Date: Thu, 26 Aug 2021 07:35:00 +0100
Subject: [PATCH 09/13] Add two basic api tests for convert

---
 test/integration/test_api.py | 45 +++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/test/integration/test_api.py b/test/integration/test_api.py
index e5e3286b..fd92fa6e 100644
--- a/test/integration/test_api.py
+++ b/test/integration/test_api.py
@@ -8,7 +8,8 @@
 from requests_toolbelt.multipart.encoder import MultipartEncoder
 
 from app.main import app
-from test.fixtures import DICTIONARIES, FROZEN_TIME, ISVALID_RSP_DATA
+from test.fixtures import (BAD_FILE_DATA, DICTIONARIES, FROZEN_TIME,
+                           GOOD_FILE_DATA, ISVALID_RSP_DATA)
 from test.fixtures_json import JSON_RESPONSES
 from test.fixtures_plain_text import PLAIN_TEXT_RESPONSES
 
@@ -203,6 +204,48 @@ async def test_validatemany_text(async_client):
         assert log.strip() in response.text
 
 
+@pytest.mark.asyncio
+async def test_convert_good_files(async_client):
+    # Arrange
+    fields = []
+    for name, data in GOOD_FILE_DATA:
+        filename = TEST_FILE_DIR / name
+        file = ('files', (filename.name, open(filename, 'rb'), 'text/plain'))
+        fields.append(file)
+    mp_encoder = MultipartEncoder(fields=fields)
+
+    # Act
+    async with async_client as ac:
+        response = await ac.post(
+            '/convert/',
+            headers={'Content-Type': mp_encoder.content_type},
+            data=mp_encoder.to_string())
+
+    # Assert
+    assert response.status_code == 200
+
+
+@pytest.mark.asyncio
+async def test_convert_bad_files(async_client):
+    # Arrange
+    fields = []
+    for name, data in BAD_FILE_DATA:
+        filename = TEST_FILE_DIR / name
+        file = ('files', (filename.name, open(filename, 'rb'), 'text/plain'))
+        fields.append(file)
+    mp_encoder = MultipartEncoder(fields=fields)
+
+    # Act
+    async with async_client as ac:
+        response = await ac.post(
+            '/convert/',
+            headers={'Content-Type': mp_encoder.content_type},
+            data=mp_encoder.to_string())
+
+    # Assert
+    assert response.status_code == 200
+
+
 @pytest.fixture(scope="function")
 def client():
     return TestClient(app)

From 5b019e3703f44fa677f576bf4acd6cc1f794c079 Mon Sep 17 00:00:00 2001
From: Colin Blackburn <colb@bgs.ac.uk>
Date: Thu, 26 Aug 2021 07:36:17 +0100
Subject: [PATCH 10/13] Fix flake8 issues from previous work

---
 app/ags.py    | 1 -
 app/routes.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/app/ags.py b/app/ags.py
index d70a4ea5..93429f47 100644
--- a/app/ags.py
+++ b/app/ags.py
@@ -3,7 +3,6 @@
 from functools import reduce
 import logging
 from pathlib import Path
-import re
 from typing import Tuple, Optional
 
 import python_ags4
diff --git a/app/routes.py b/app/routes.py
index 8bc99b63..721e62cd 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -6,7 +6,7 @@
 from typing import List
 
 from fastapi import APIRouter, BackgroundTasks, File, Form, Request, UploadFile
-from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
+from fastapi.responses import FileResponse, StreamingResponse
 
 from app import ags
 from app.errors import error_responses, InvalidPayloadError

From 4c193d7833fcbb90ca0c440338a915a985cb63d2 Mon Sep 17 00:00:00 2001
From: Colin Blackburn <colb@bgs.ac.uk>
Date: Thu, 26 Aug 2021 08:09:17 +0100
Subject: [PATCH 11/13] Extend test to check for files

---
 test/integration/test_api.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/test/integration/test_api.py b/test/integration/test_api.py
index fd92fa6e..9c0aae54 100644
--- a/test/integration/test_api.py
+++ b/test/integration/test_api.py
@@ -1,5 +1,6 @@
 """Tests for API responses."""
 from pathlib import Path
+import shutil
 
 from fastapi.testclient import TestClient
 from freezegun import freeze_time
@@ -205,10 +206,10 @@ async def test_validatemany_text(async_client):
 
 
 @pytest.mark.asyncio
-async def test_convert_good_files(async_client):
+async def test_convert_good_files(async_client, tmp_path):
     # Arrange
     fields = []
-    for name, data in GOOD_FILE_DATA:
+    for name, expected in GOOD_FILE_DATA:
         filename = TEST_FILE_DIR / name
         file = ('files', (filename.name, open(filename, 'rb'), 'text/plain'))
         fields.append(file)
@@ -223,13 +224,24 @@ async def test_convert_good_files(async_client):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/x-zip-compressed'
+    assert response.headers['content-disposition'] == 'attachment; filename=results.zip'
+
+    zip_file = tmp_path / 'results.zip'
+    unzipped_files = tmp_path / 'results'
+    with open(zip_file, 'wb') as f:
+        f.write(response.content)
+    shutil.unpack_archive(zip_file, unzipped_files, 'zip')
+    assert (unzipped_files / 'conversion.log').is_file()
+    for name, expected in GOOD_FILE_DATA:
+        assert (unzipped_files / name).is_file()
 
 
 @pytest.mark.asyncio
-async def test_convert_bad_files(async_client):
+async def test_convert_bad_files(async_client, tmp_path):
     # Arrange
     fields = []
-    for name, data in BAD_FILE_DATA:
+    for name, expected in BAD_FILE_DATA:
         filename = TEST_FILE_DIR / name
         file = ('files', (filename.name, open(filename, 'rb'), 'text/plain'))
         fields.append(file)
@@ -244,6 +256,17 @@ async def test_convert_bad_files(async_client):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/x-zip-compressed'
+    assert response.headers['content-disposition'] == 'attachment; filename=results.zip'
+
+    zip_file = tmp_path / 'results.zip'
+    unzipped_files = tmp_path / 'results'
+    with open(zip_file, 'wb') as f:
+        f.write(response.content)
+    shutil.unpack_archive(zip_file, unzipped_files, 'zip')
+    assert (unzipped_files / 'conversion.log').is_file()
+    for name, expected in BAD_FILE_DATA:
+        assert not (unzipped_files / name).is_file()
 
 
 @pytest.fixture(scope="function")

From 51437052159028546b3bc834da3d33eec011a0bf Mon Sep 17 00:00:00 2001
From: Colin Blackburn <colb@bgs.ac.uk>
Date: Thu, 26 Aug 2021 08:36:01 +0100
Subject: [PATCH 12/13] Fully extend convert tests, rename example files

---
 test/files/{example1.ags => example_ags.ags}    |   0
 test/files/{example1.xlsx => example_xlsx.xlsx} | Bin
 test/fixtures.py                                |   8 ++++----
 test/fixtures_json.py                           |   4 ++--
 test/fixtures_plain_text.py                     |   4 ++--
 test/integration/test_api.py                    |  16 ++++++++++++----
 test/unit/test_ags.py                           |  13 +++++++------
 7 files changed, 27 insertions(+), 18 deletions(-)
 rename test/files/{example1.ags => example_ags.ags} (100%)
 rename test/files/{example1.xlsx => example_xlsx.xlsx} (100%)

diff --git a/test/files/example1.ags b/test/files/example_ags.ags
similarity index 100%
rename from test/files/example1.ags
rename to test/files/example_ags.ags
diff --git a/test/files/example1.xlsx b/test/files/example_xlsx.xlsx
similarity index 100%
rename from test/files/example1.xlsx
rename to test/files/example_xlsx.xlsx
diff --git a/test/fixtures.py b/test/fixtures.py
index b794184b..790ea41a 100644
--- a/test/fixtures.py
+++ b/test/fixtures.py
@@ -3,11 +3,11 @@
 FROZEN_TIME = "2021-08-23 14:25:43"
 
 ISVALID_RSP_DATA = [
-    ('example1.ags', True),
+    ('example_ags.ags', True),
     ('nonsense.ags', False),
     ('empty.ags', False),
     ('real/A3040_03.ags', False),
-    ('example1.xlsx', False),
+    ('example_xlsx.xlsx', False),
     ('random_binary.ags', False),
     ('real/CG014058_F.ags', False),
     ('real/Blackburn Southern Bypass.ags', False),  # this file contains BOM character
@@ -15,8 +15,8 @@
 ]
 
 GOOD_FILE_DATA = [
-    ('example1.ags', 'SUCCESS: example1.ags converted to example1.xlsx'),
-    ('example1.xlsx', 'SUCCESS: example1.xlsx converted to example1.ags'),
+    ('example_ags.ags', ('SUCCESS: example_ags.ags converted to example_ags.xlsx', 'example_ags.xlsx')),
+    ('example_xlsx.xlsx', ('SUCCESS: example_xlsx.xlsx converted to example_xlsx.ags', 'example_xlsx.ags')),
 ]
 
 BAD_FILE_DATA = [
diff --git a/test/fixtures_json.py b/test/fixtures_json.py
index 0a0a764b..8512d3bf 100644
--- a/test/fixtures_json.py
+++ b/test/fixtures_json.py
@@ -1,8 +1,8 @@
 import datetime as dt
 
 JSON_RESPONSES = {
-    'example1.ags': {
-        'filename': 'example1.ags',
+    'example_ags.ags': {
+        'filename': 'example_ags.ags',
         'filesize': 4039,
         'checker': 'python_ags4 v0.3.6',
         'dictionary': 'Standard_dictionary_v4_1.ags',
diff --git a/test/fixtures_plain_text.py b/test/fixtures_plain_text.py
index 90db39b4..9b25e5b7 100644
--- a/test/fixtures_plain_text.py
+++ b/test/fixtures_plain_text.py
@@ -1,6 +1,6 @@
 PLAIN_TEXT_RESPONSES = {
-    'example1.ags': """
-example1.ags: All checks passed!
+    'example_ags.ags': """
+example_ags.ags: All checks passed!
 
 # Metadata
 
diff --git a/test/integration/test_api.py b/test/integration/test_api.py
index 9c0aae54..bbb099d1 100644
--- a/test/integration/test_api.py
+++ b/test/integration/test_api.py
@@ -54,7 +54,7 @@ async def test_isvalid(async_client, filename, expected):
 @pytest.mark.asyncio
 async def test_isvalid_custom_dictionary(async_client, dictionary):
     # Arrange
-    filename = TEST_FILE_DIR / 'example1.ags'
+    filename = TEST_FILE_DIR / 'example_ags.ags'
     mp_encoder = MultipartEncoder(
         fields={'file': (filename.name, open(filename, 'rb'), 'text/plain'),
                 'std_dictionary': dictionary})
@@ -135,7 +135,7 @@ async def test_validatemany_json(async_client):
 @pytest.mark.asyncio
 async def test_validate_custom_dictionary(async_client, dictionary, expected):
     # Arrange
-    filename = TEST_FILE_DIR / 'example1.ags'
+    filename = TEST_FILE_DIR / 'example_ags.ags'
     mp_encoder = MultipartEncoder(
         fields={'file': (filename.name, open(filename, 'rb'), 'text/plain'),
                 'std_dictionary': dictionary})
@@ -152,7 +152,7 @@ async def test_validate_custom_dictionary(async_client, dictionary, expected):
     body = response.json()
     assert len(body['data']) == 1
     # Assert
-    assert body['data'][0]['filename'] == 'example1.ags'
+    assert body['data'][0]['filename'] == 'example_ags.ags'
     assert body['data'][0]['dictionary'] == expected
 
 
@@ -233,8 +233,12 @@ async def test_convert_good_files(async_client, tmp_path):
         f.write(response.content)
     shutil.unpack_archive(zip_file, unzipped_files, 'zip')
     assert (unzipped_files / 'conversion.log').is_file()
+    with open(unzipped_files / 'conversion.log', 'rt') as f:
+        log = f.read()
     for name, expected in GOOD_FILE_DATA:
-        assert (unzipped_files / name).is_file()
+        expected_message, expected_new_file_name = expected
+        assert (unzipped_files / expected_new_file_name).is_file()
+        assert expected_message in log
 
 
 @pytest.mark.asyncio
@@ -265,8 +269,12 @@ async def test_convert_bad_files(async_client, tmp_path):
         f.write(response.content)
     shutil.unpack_archive(zip_file, unzipped_files, 'zip')
     assert (unzipped_files / 'conversion.log').is_file()
+    with open(unzipped_files / 'conversion.log', 'rt') as f:
+        log = f.read()
     for name, expected in BAD_FILE_DATA:
+        expected_message, expected_file_size = expected
         assert not (unzipped_files / name).is_file()
+        assert expected_message in log
 
 
 @pytest.fixture(scope="function")
diff --git a/test/unit/test_ags.py b/test/unit/test_ags.py
index 4d56bad0..d0c6a912 100644
--- a/test/unit/test_ags.py
+++ b/test/unit/test_ags.py
@@ -35,20 +35,20 @@ def test_validate(filename, expected):
 @pytest.mark.parametrize('dictionary', DICTIONARIES.values())
 def test_validate_custom_dictionary(dictionary):
     # Arrange
-    filename = TEST_FILE_DIR / 'example1.ags'
+    filename = TEST_FILE_DIR / 'example_ags.ags'
 
     # Act
     response = ags.validate(filename,
                             standard_AGS4_dictionary=dictionary)
 
     # Assert
-    assert response['filename'] == 'example1.ags'
+    assert response['filename'] == 'example_ags.ags'
     assert response['dictionary'] == dictionary
 
 
 def test_validate_custom_dictionary_bad_file():
     # Arrange
-    filename = TEST_FILE_DIR / 'example1.ags'
+    filename = TEST_FILE_DIR / 'example_ags.ags'
     dictionary = 'bad_file.ags'
 
     # Act
@@ -69,6 +69,7 @@ def test_convert(tmp_path, filename, expected):
     results_dir = tmp_path / 'results'
     if not results_dir.exists:
         results_dir.mkdir()
+    expected_message, expected_new_file_name = expected
 
     # Act
     converted_file, response = ags.convert(filename, results_dir)
@@ -76,7 +77,7 @@ def test_convert(tmp_path, filename, expected):
     # Assert
     assert converted_file is not None and converted_file.exists()
     assert response['filename'] == filename.name
-    assert re.search(expected, response['message'])
+    assert re.search(expected_message, response['message'])
 
 
 @pytest.mark.parametrize('filename, expected', BAD_FILE_DATA)
@@ -113,7 +114,7 @@ def test_is_valid(filename, expected):
 @pytest.mark.parametrize('dictionary', DICTIONARIES.values())
 def test_is_valid_custom_dictionary(dictionary):
     # Arrange
-    filename = TEST_FILE_DIR / 'example1.ags'
+    filename = TEST_FILE_DIR / 'example_ags.ags'
 
     # Act
     result = ags.is_valid(filename,
@@ -124,7 +125,7 @@ def test_is_valid_custom_dictionary(dictionary):
 
 
 @pytest.mark.parametrize('filename', [
-    'example1.ags', 'nonsense.ags', 'random_binary.ags',
+    'example_ags.ags', 'nonsense.ags', 'random_binary.ags',
     'real/Blackburn Southern Bypass.ags'])
 def test_to_plain_text(filename):
     # Arrange

From 681d4ace74f727ed2e444ea5438665746dcb63a8 Mon Sep 17 00:00:00 2001
From: Colin Blackburn <colb@bgs.ac.uk>
Date: Thu, 26 Aug 2021 08:42:40 +0100
Subject: [PATCH 13/13] Test all content types

---
 test/integration/test_api.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/integration/test_api.py b/test/integration/test_api.py
index bbb099d1..ab291978 100644
--- a/test/integration/test_api.py
+++ b/test/integration/test_api.py
@@ -21,6 +21,7 @@ def test_openapi_json(client):
     """A hello-world type test to confirm testing framework works."""
     response = client.get('/openapi.json')
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/json'
     assert '/validate' in response.text
 
 
@@ -41,6 +42,7 @@ async def test_isvalid(async_client, filename, expected):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/json'
     body = response.json()
     assert set(body.keys()) == {'msg', 'type', 'self', 'data'}
     assert body['msg'] is not None
@@ -68,6 +70,7 @@ async def test_isvalid_custom_dictionary(async_client, dictionary):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/json'
     body = response.json()
     assert len(body['data']) == 1
     assert body['data'][0]
@@ -91,6 +94,7 @@ async def test_validate_json(async_client, filename, expected):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/json'
     body = response.json()
     assert set(body.keys()) == {'msg', 'type', 'self', 'data'}
     assert body['msg'] is not None
@@ -122,6 +126,7 @@ async def test_validatemany_json(async_client):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/json'
     body = response.json()
     assert set(body.keys()) == {'msg', 'type', 'self', 'data'}
     assert body['msg'] is not None
@@ -149,6 +154,7 @@ async def test_validate_custom_dictionary(async_client, dictionary, expected):
 
     # Assert
     assert response.status_code == 200
+    assert response.headers['content-type'] == 'application/json'
     body = response.json()
     assert len(body['data']) == 1
     # Assert
@@ -176,6 +182,7 @@ async def test_validate_text(async_client, filename, expected):
 
     # Assert
     assert response.status_code == 200
+    assert 'text/plain' in response.headers['content-type']
     assert response.text.strip() == expected.strip()
 
 
@@ -201,6 +208,7 @@ async def test_validatemany_text(async_client):
 
     # Assert
     assert response.status_code == 200
+    assert 'text/plain' in response.headers['content-type']
     for log in PLAIN_TEXT_RESPONSES.values():
         assert log.strip() in response.text