Skip to content

Commit

Permalink
Fixes to anticipate Archivematica's name cleanup
Browse files Browse the repository at this point in the history
When a user submits a structmap they don't need to anticipate the
transformation likely to be completed on that path by the
Archivematica name clean-up task. As such users don't have to worry
about characters such as spaces or Unicode.
  • Loading branch information
ross-spencer committed Apr 13, 2019
1 parent 3d18779 commit 247309f
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 5 deletions.
20 changes: 16 additions & 4 deletions src/MCPClient/lib/clientScripts/create_mets_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
)
from custom_handlers import get_script_logger
import namespaces as ns
from sanitize_names import sanitizeName

from bagit import Bag, BagError

Expand Down Expand Up @@ -872,6 +873,15 @@ def getAMDSec(
return ret


def fixup_path_input_by_user(job, path):
"""Fix-up paths submitted by a user, e.g. in custom structmap examples so
that they don't have to anticipate the Archivematica normalization process.
"""
return os.path.join(
"", *[sanitizeName(name.encode("utf8")) for name in path.split(os.path.sep)]
)


def include_custom_structmap(
job, baseDirectoryPath, state, custom_structmap="mets_structmap.xml"
):
Expand Down Expand Up @@ -904,13 +914,15 @@ def include_custom_structmap(
"//*[@CONTENTIDS]", namespaces={"mets:": ns.metsNS}
)
for item in fileids:
fileName = item.get("CONTENTIDS")
if fileName in state.fileNameToFileID:
item.set("FILEID", state.fileNameToFileID[fileName])
file_path = item.get("CONTENTIDS")
normalized_path = fixup_path_input_by_user(job, file_path)
if normalized_path in state.fileNameToFileID:
item.set("FILEID", state.fileNameToFileID[normalized_path])
else:
job.pyprint(
"Custom structmap error: no fileUUID for",
fileName,
file_path,
normalized_path,
file=sys.stderr,
)
state.error_accumulator.error_count += 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/">
<mets:structMap TYPE="logical">
<mets:div TYPE="Testing a path with spaces" LABEL="Archivemtica Tests">
<mets:div TYPE="binary" LABEL="Complete test">
<mets:fptr FILEID="FILE001" CONTENTIDS="objects/dir-with-dashes/file with spaces.bin"/>
</mets:div>
</mets:div>
</mets:structMap>
</mets:mets>
18 changes: 18 additions & 0 deletions src/MCPClient/tests/fixtures/custom_structmaps/model/files.json
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,24 @@
"size": 2600
}
},
{
"pk": "2cd8d06a-62cf-4814-b5b3-1896e0b8464f",
"model": "main.file",
"fields": {
"filegrpuuid": "",
"sip": "3a915449-d1bb-4920-b274-c917c7bb5929",
"originallocation": "%SIPDirectory%objects/dir-with-dashes/file with spaces.bin",
"transfer": null,
"filegrpuse": "original",
"removedtime": null,
"label": "",
"checksum": "",
"enteredsystem": "2019-04-10T23:13:00Z",
"modificationtime": "1970-01-01T00:00:00Z",
"currentlocation": "%SIPDirectory%objects/dir-with-dashes/file with spaces.bin",
"size": 2600
}
},
{
"pk": "9691d9cd-10b4-4c50-abcd-f8c22a37aac5",
"model": "main.file",
Expand Down
17 changes: 16 additions & 1 deletion src/MCPClient/tests/test_create_aip_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,8 +788,15 @@ def validate_mets(mets_xsd, mets_structmap):
)
)

def fixup_fileid_state(self):
"""For items on-disk we have to mimix the filename cleanup process."""
for key, _ in dict(self.state.fileNameToFileID).items():
self.state.fileNameToFileID[
create_mets_v2.fixup_path_input_by_user(Job("stub", "stub", []), key)
] = self.state.fileNameToFileID.pop(key)

def generate_aip_mets_v2_state(self):
"""Generate fileSec
"""Generate fileSec state
State will be generated that we will help us to test the units involved
with creating a custom structmap in the AIP METS.
Expand Down Expand Up @@ -879,6 +886,7 @@ def test_create_file_sec(self):
def test_get_included_structmap(self):
"""Test the output of custom structmaps in create_mets_v2."""
self.generate_aip_mets_v2_state()
self.fixup_fileid_state()
default_structmap = "mets_structmap.xml"
Result = collections.namedtuple(
"Result", "structmap_name files replaced_count structmap_id broken"
Expand Down Expand Up @@ -918,6 +926,13 @@ def test_get_included_structmap(self):
None,
False,
),
Result(
"path_with_spaces_structmap.xml",
["objects/dir-with-dashes/file with spaces.bin"],
1,
None,
False,
),
]
for res in results:
structmap_path = os.path.join(
Expand Down

0 comments on commit 247309f

Please sign in to comment.