From 8742013b6ce0325e6c743ba1b0507a7b71acb524 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 20 Aug 2024 15:40:25 +0000 Subject: [PATCH] Add type hints to the characterize_file script This also removes an unnecessary argument from the workflow definition and adds a typed argument parser to the script. --- pyproject.toml | 2 + .../lib/clientScripts/characterize_file.py | 43 ++++++++--- src/MCPServer/lib/assets/workflow.json | 6 +- src/archivematicaCommon/lib/dicts.py | 5 +- tests/MCPClient/test_characterize_file.py | 76 ++++++++++--------- 5 files changed, 82 insertions(+), 50 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ea2f1907fc..1b3a778dca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,11 +57,13 @@ warn_unused_configs = true [[tool.mypy.overrides]] module = [ "src.MCPClient.lib.client.*", + "src.MCPClient.lib.clientScripts.characterize_file", "src.MCPClient.lib.clientScripts.identify_file_format", "src.MCPClient.lib.clientScripts.normalize", "src.MCPClient.lib.clientScripts.policy_check", "src.MCPClient.lib.clientScripts.validate_file", "tests.MCPClient.conftest", + "tests.MCPClient.test_characterize_file", "tests.MCPClient.test_identify_file_format", "tests.MCPClient.test_normalize", "tests.MCPClient.test_policy_check", diff --git a/src/MCPClient/lib/clientScripts/characterize_file.py b/src/MCPClient/lib/clientScripts/characterize_file.py index 3fe46fabad..8c1497a17b 100755 --- a/src/MCPClient/lib/clientScripts/characterize_file.py +++ b/src/MCPClient/lib/clientScripts/characterize_file.py @@ -6,34 +6,42 @@ # # If a tool has no defined characterization commands, then the default # will be run instead (currently FITS). +import argparse +import dataclasses import multiprocessing +import uuid +from typing import List import django -from lxml import etree django.setup() + +from client.job import Job from databaseFunctions import insertIntoFPCommandOutput from dicts import ReplacementDict from dicts import replace_string_values from django.conf import settings as mcpclient_settings from django.core.exceptions import ValidationError from django.db import transaction - -# archivematicaCommon from executeOrRunSubProcess import executeOrRun from fpr.models import FormatVersion from fpr.models import FPRule from lib import setup_dicts - -# dashboard +from lxml import etree from main.models import FPCommandOutput -def concurrent_instances(): +@dataclasses.dataclass +class CharacterizeFileArgs: + file_uuid: uuid.UUID + sip_uuid: uuid.UUID + + +def concurrent_instances() -> int: return multiprocessing.cpu_count() -def main(job, file_path, file_uuid, sip_uuid): +def main(job: Job, file_uuid: uuid.UUID, sip_uuid: uuid.UUID) -> int: setup_dicts(mcpclient_settings) failed = False @@ -117,8 +125,25 @@ def main(job, file_path, file_uuid, sip_uuid): return 0 -def call(jobs): +def get_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Identify file formats.") + parser.add_argument("file_uuid", type=uuid.UUID) + parser.add_argument("sip_uuid", type=uuid.UUID) + + return parser + + +def parse_args(parser: argparse.ArgumentParser, job: Job) -> CharacterizeFileArgs: + namespace = parser.parse_args(job.args[1:]) + + return CharacterizeFileArgs(**vars(namespace)) + + +def call(jobs: List[Job]) -> None: + parser = get_parser() + with transaction.atomic(): for job in jobs: with job.JobContext(): - job.set_status(main(job, *job.args[1:])) + args = parse_args(parser, job) + job.set_status(main(job, args.file_uuid, args.sip_uuid)) diff --git a/src/MCPServer/lib/assets/workflow.json b/src/MCPServer/lib/assets/workflow.json index 8f0ce2e563..9a917cea55 100644 --- a/src/MCPServer/lib/assets/workflow.json +++ b/src/MCPServer/lib/assets/workflow.json @@ -1286,7 +1286,7 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%fileUUID%\" \"%SIPUUID%\"", + "arguments": "\"%fileUUID%\" \"%SIPUUID%\"", "execute": "characterizeFile_v0.0", "filter_subdir": "objects/metadata/" }, @@ -3480,7 +3480,7 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%fileUUID%\" \"%SIPUUID%\"", + "arguments": "\"%fileUUID%\" \"%SIPUUID%\"", "execute": "characterizeFile_v0.0", "filter_subdir": "objects" }, @@ -3703,7 +3703,7 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%fileUUID%\" \"%SIPUUID%\"", + "arguments": "\"%fileUUID%\" \"%SIPUUID%\"", "execute": "characterizeFile_v0.0", "filter_subdir": "objects/submissionDocumentation" }, diff --git a/src/archivematicaCommon/lib/dicts.py b/src/archivematicaCommon/lib/dicts.py index fecaf1be33..18218dc17b 100644 --- a/src/archivematicaCommon/lib/dicts.py +++ b/src/archivematicaCommon/lib/dicts.py @@ -17,6 +17,7 @@ import ast import os import re +import uuid from main import models @@ -83,9 +84,9 @@ def frommodel(type_="file", sip=None, file_=None, expand_path=True): # In order to make this code accessible to MCPServer, # we need to support passing in UUID strings instead # of models. - if isinstance(file_, str): + if isinstance(file_, (str, uuid.UUID)): file_ = models.File.objects.get(uuid=file_) - if isinstance(sip, str): + if isinstance(sip, (str, uuid.UUID)): # sip can be a SIP or Transfer try: sip = models.SIP.objects.get(uuid=sip) diff --git a/tests/MCPClient/test_characterize_file.py b/tests/MCPClient/test_characterize_file.py index f89faf66d1..5267971b0d 100644 --- a/tests/MCPClient/test_characterize_file.py +++ b/tests/MCPClient/test_characterize_file.py @@ -8,7 +8,9 @@ @pytest.fixture -def rule_with_xml_output_format(format_version): +def rule_with_xml_output_format( + format_version: fprmodels.FormatVersion, +) -> fprmodels.FPRule: return fprmodels.FPRule.objects.create( command=fprmodels.FPCommand.objects.create( output_format=fprmodels.FormatVersion.objects.get(pronom_id="fmt/101") @@ -19,25 +21,28 @@ def rule_with_xml_output_format(format_version): @pytest.fixture -def fpcommand_output(sip_file, fprule_characterization): +def fpcommand_output( + sip_file: models.File, fprule_characterization: fprmodels.FPRule +) -> models.FPCommandOutput: return models.FPCommandOutput.objects.create( file=sip_file, rule=fprule_characterization ) @pytest.fixture -def delete_characterization_rules(db): +def delete_characterization_rules() -> None: fprmodels.FPRule.objects.filter( purpose__in=["characterization", "default_characterization"] ).delete() @pytest.mark.django_db -def test_job_succeeds_if_file_is_already_characterized(sip_file, sip, fpcommand_output): +def test_job_succeeds_if_file_is_already_characterized( + sip_file: models.File, sip: models.SIP, fpcommand_output: models.FPCommandOutput +) -> None: job = mock.Mock( args=[ "characterize_file", - "file_path_not_used", str(sip_file.uuid), str(sip.uuid), ], @@ -52,12 +57,11 @@ def test_job_succeeds_if_file_is_already_characterized(sip_file, sip, fpcommand_ @pytest.mark.django_db def test_job_succeeds_if_no_characterization_rules_exist( - sip_file, sip, delete_characterization_rules -): + sip_file: models.File, sip: models.SIP, delete_characterization_rules: None +) -> None: job = mock.Mock( args=[ "characterize_file", - "file_path_not_used", str(sip_file.uuid), str(sip.uuid), ], @@ -78,13 +82,13 @@ def test_job_succeeds_if_no_characterization_rules_exist( ) @mock.patch("characterize_file.executeOrRun") def test_job_executes_command( - execute_or_run, - script_type, - sip_file, - sip, - fprule_characterization, - sip_file_format_version, -): + execute_or_run: mock.Mock, + script_type: str, + sip_file: models.File, + sip: models.SIP, + fprule_characterization: fprmodels.FPRule, + sip_file_format_version: models.FileFormatVersion, +) -> None: fprule_characterization.command.script_type = script_type fprule_characterization.command.save() exit_code = 0 @@ -94,7 +98,6 @@ def test_job_executes_command( job = mock.Mock( args=[ "characterize_file", - "file_path_not_used", str(sip_file.uuid), str(sip.uuid), ], @@ -117,8 +120,12 @@ def test_job_executes_command( @pytest.mark.django_db @mock.patch("characterize_file.executeOrRun") def test_job_fails_if_command_fails( - execute_or_run, sip_file, sip, fprule_characterization, sip_file_format_version -): + execute_or_run: mock.Mock, + sip_file: models.File, + sip: models.SIP, + fprule_characterization: fprmodels.FPRule, + sip_file_format_version: models.FileFormatVersion, +) -> None: exit_code = 1 stdout = "" stderr = "error!" @@ -126,7 +133,6 @@ def test_job_fails_if_command_fails( job = mock.Mock( args=[ "characterize_file", - "file_path_not_used", str(sip_file.uuid), str(sip.uuid), ], @@ -151,14 +157,14 @@ def test_job_fails_if_command_fails( @mock.patch("characterize_file.etree") @mock.patch("characterize_file.executeOrRun") def test_job_saves_valid_xml_command_output( - execute_or_run, - etree, - insert_into_fp_command_output, - sip_file, - sip, - rule_with_xml_output_format, - sip_file_format_version, -): + execute_or_run: mock.Mock, + etree: mock.Mock, + insert_into_fp_command_output: mock.Mock, + sip_file: models.File, + sip: models.SIP, + rule_with_xml_output_format: fprmodels.FPRule, + sip_file_format_version: models.FileFormatVersion, +) -> None: exit_code = 0 stdout = "success" stderr = "" @@ -166,7 +172,6 @@ def test_job_saves_valid_xml_command_output( job = mock.Mock( args=[ "characterize_file", - "file_path_not_used", str(sip_file.uuid), str(sip.uuid), ], @@ -186,19 +191,19 @@ def test_job_saves_valid_xml_command_output( job.write_error.assert_called_once_with(stderr) etree.fromstring.assert_called_once_with(stdout.encode()) insert_into_fp_command_output.assert_called_once_with( - str(sip_file.uuid), stdout, rule_with_xml_output_format.uuid + sip_file.uuid, stdout, rule_with_xml_output_format.uuid ) @pytest.mark.django_db @mock.patch("characterize_file.executeOrRun") def test_job_fails_with_invalid_xml_command_output( - execute_or_run, - sip_file, - sip, - rule_with_xml_output_format, - sip_file_format_version, -): + execute_or_run: mock.Mock, + sip_file: models.File, + sip: models.SIP, + rule_with_xml_output_format: fprmodels.FPRule, + sip_file_format_version: models.FileFormatVersion, +) -> None: exit_code = 0 stdout = "invalid xml" stderr = "" @@ -206,7 +211,6 @@ def test_job_fails_with_invalid_xml_command_output( job = mock.Mock( args=[ "characterize_file", - "file_path_not_used", str(sip_file.uuid), str(sip.uuid), ],