From 247309f3dc4b3fdd154da2bea743fcbfc1e482ea Mon Sep 17 00:00:00 2001 From: Ross Spencer Date: Sat, 13 Apr 2019 17:02:49 +0200 Subject: [PATCH] Fixes to anticipate Archivematica's name cleanup When a user submits a structmap they don't need to anticipate the transformation likely to be completed on that path by the Archivematica name clean-up task. As such users don't have to worry about characters such as spaces or Unicode. --- .../lib/clientScripts/create_mets_v2.py | 20 +++++++++++++++---- .../dir-with-dashes/file with spaces.bin | 0 .../path_with_spaces_structmap.xml | 10 ++++++++++ .../custom_structmaps/model/files.json | 18 +++++++++++++++++ src/MCPClient/tests/test_create_aip_mets.py | 17 +++++++++++++++- 5 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/dir-with-dashes/file with spaces.bin create mode 100644 src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/path_with_spaces_structmap.xml diff --git a/src/MCPClient/lib/clientScripts/create_mets_v2.py b/src/MCPClient/lib/clientScripts/create_mets_v2.py index 3c3b9a3df6..2b08c34d91 100755 --- a/src/MCPClient/lib/clientScripts/create_mets_v2.py +++ b/src/MCPClient/lib/clientScripts/create_mets_v2.py @@ -73,6 +73,7 @@ ) from custom_handlers import get_script_logger import namespaces as ns +from sanitize_names import sanitizeName from bagit import Bag, BagError @@ -872,6 +873,15 @@ def getAMDSec( return ret +def fixup_path_input_by_user(job, path): + """Fix-up paths submitted by a user, e.g. in custom structmap examples so + that they don't have to anticipate the Archivematica normalization process. + """ + return os.path.join( + "", *[sanitizeName(name.encode("utf8")) for name in path.split(os.path.sep)] + ) + + def include_custom_structmap( job, baseDirectoryPath, state, custom_structmap="mets_structmap.xml" ): @@ -904,13 +914,15 @@ def include_custom_structmap( "//*[@CONTENTIDS]", namespaces={"mets:": ns.metsNS} ) for item in fileids: - fileName = item.get("CONTENTIDS") - if fileName in state.fileNameToFileID: - item.set("FILEID", state.fileNameToFileID[fileName]) + file_path = item.get("CONTENTIDS") + normalized_path = fixup_path_input_by_user(job, file_path) + if normalized_path in state.fileNameToFileID: + item.set("FILEID", state.fileNameToFileID[normalized_path]) else: job.pyprint( "Custom structmap error: no fileUUID for", - fileName, + file_path, + normalized_path, file=sys.stderr, ) state.error_accumulator.error_count += 1 diff --git a/src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/dir-with-dashes/file with spaces.bin b/src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/dir-with-dashes/file with spaces.bin new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/path_with_spaces_structmap.xml b/src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/path_with_spaces_structmap.xml new file mode 100644 index 0000000000..8c797830b3 --- /dev/null +++ b/src/MCPClient/tests/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/path_with_spaces_structmap.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/src/MCPClient/tests/fixtures/custom_structmaps/model/files.json b/src/MCPClient/tests/fixtures/custom_structmaps/model/files.json index 8d6bc2798b..7ad49f53d5 100644 --- a/src/MCPClient/tests/fixtures/custom_structmaps/model/files.json +++ b/src/MCPClient/tests/fixtures/custom_structmaps/model/files.json @@ -161,6 +161,24 @@ "size": 2600 } }, +{ + "pk": "2cd8d06a-62cf-4814-b5b3-1896e0b8464f", + "model": "main.file", + "fields": { + "filegrpuuid": "", + "sip": "3a915449-d1bb-4920-b274-c917c7bb5929", + "originallocation": "%SIPDirectory%objects/dir-with-dashes/file with spaces.bin", + "transfer": null, + "filegrpuse": "original", + "removedtime": null, + "label": "", + "checksum": "", + "enteredsystem": "2019-04-10T23:13:00Z", + "modificationtime": "1970-01-01T00:00:00Z", + "currentlocation": "%SIPDirectory%objects/dir-with-dashes/file with spaces.bin", + "size": 2600 + } +}, { "pk": "9691d9cd-10b4-4c50-abcd-f8c22a37aac5", "model": "main.file", diff --git a/src/MCPClient/tests/test_create_aip_mets.py b/src/MCPClient/tests/test_create_aip_mets.py index 5628bf0f7c..0d7a167e89 100644 --- a/src/MCPClient/tests/test_create_aip_mets.py +++ b/src/MCPClient/tests/test_create_aip_mets.py @@ -788,8 +788,15 @@ def validate_mets(mets_xsd, mets_structmap): ) ) + def fixup_fileid_state(self): + """For items on-disk we have to mimix the filename cleanup process.""" + for key, _ in dict(self.state.fileNameToFileID).items(): + self.state.fileNameToFileID[ + create_mets_v2.fixup_path_input_by_user(Job("stub", "stub", []), key) + ] = self.state.fileNameToFileID.pop(key) + def generate_aip_mets_v2_state(self): - """Generate fileSec + """Generate fileSec state State will be generated that we will help us to test the units involved with creating a custom structmap in the AIP METS. @@ -879,6 +886,7 @@ def test_create_file_sec(self): def test_get_included_structmap(self): """Test the output of custom structmaps in create_mets_v2.""" self.generate_aip_mets_v2_state() + self.fixup_fileid_state() default_structmap = "mets_structmap.xml" Result = collections.namedtuple( "Result", "structmap_name files replaced_count structmap_id broken" @@ -918,6 +926,13 @@ def test_get_included_structmap(self): None, False, ), + Result( + "path_with_spaces_structmap.xml", + ["objects/dir-with-dashes/file with spaces.bin"], + 1, + None, + False, + ), ] for res in results: structmap_path = os.path.join(