diff --git a/hack/Dockerfile b/hack/Dockerfile index 3625f501e1..e02c1cef97 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -168,7 +168,6 @@ RUN set -ex \ clamav \ coreutils \ ffmpeg \ - fits \ g++ \ gcc \ gearman \ @@ -190,7 +189,6 @@ RUN set -ex \ md5deep \ mediaconch \ mediainfo \ - nailgun \ nfs-common \ openjdk-8-jre-headless \ p7zip-full \ diff --git a/hack/README.md b/hack/README.md index 72d2d3b1e1..1c0063b104 100644 --- a/hack/README.md +++ b/hack/README.md @@ -64,7 +64,6 @@ am-archivematica-mcp-server-1 39.43MiB / 7.763GiB am-archivematica-storage-service-1 83.96MiB / 7.763GiB am-nginx-1 2.715MiB / 7.763GiB am-elasticsearch-1 900.2MiB / 7.763GiB -am-fits-1 71.09MiB / 7.763GiB am-gearmand-1 3.395MiB / 7.763GiB am-mysql-1 551.9MiB / 7.763GiB am-clamavd-1 570MiB / 7.763GiB @@ -312,7 +311,6 @@ echo workers | socat - tcp:127.0.0.1:62004,shut-none | grep "_v0.0" | awk '{prin | mysql | `tcp/3306` | `tcp/62001` | | elasticsearch | `tcp/9200` | `tcp/62002` | | gearman | `tcp/4730` | `tcp/62004` | -| fits | `tcp/2113` | `tcp/62005` | | clamavd | `tcp/3310` | `tcp/62006` | | nginx » archivematica-dashboard | `tcp/80` | `tcp/62080` | | nginx » archivematica-storage-service | `tcp/8000` | `tcp/62081` | diff --git a/hack/docker-compose.yml b/hack/docker-compose.yml index 9e4be90de0..b123bb4d32 100644 --- a/hack/docker-compose.yml +++ b/hack/docker-compose.yml @@ -76,14 +76,6 @@ services: ports: - "127.0.0.1:62004:4730" - fits: - image: "artefactual/fits-ngserver:0.8.4" - user: ${USER_ID:-1000} - ports: - - "127.0.0.1:62005:2113" - volumes: - - "archivematica_pipeline_data:/var/archivematica/sharedDirectory:rw" # Read and write needed! - clamavd: image: "artefactual/clamav:latest" environment: @@ -146,8 +138,6 @@ services: environment: DJANGO_SECRET_KEY: "12345" DJANGO_SETTINGS_MODULE: "settings.common" - NAILGUN_SERVER: "fits" - NAILGUN_PORT: "2113" ARCHIVEMATICA_MCPCLIENT_CLIENT_USER: "archivematica" ARCHIVEMATICA_MCPCLIENT_CLIENT_PASSWORD: "demo" ARCHIVEMATICA_MCPCLIENT_CLIENT_HOST: "mysql" @@ -169,7 +159,6 @@ services: - "../:/src" - "archivematica_pipeline_data:/var/archivematica/sharedDirectory:rw" links: - - "fits" - "clamavd" - "mysql" - "gearmand" diff --git a/src/MCPClient/lib/archivematicaClientModules b/src/MCPClient/lib/archivematicaClientModules index 28f259eabb..4dff8ceab2 100644 --- a/src/MCPClient/lib/archivematicaClientModules +++ b/src/MCPClient/lib/archivematicaClientModules @@ -26,7 +26,6 @@ removeunneededfiles_v0.0 = remove_unneeded_files archivematicaclamscan_v0.0 = archivematica_clamscan createevent_v0.0 = create_event examinecontents_v0.0 = examine_contents -fits_v0.0 = fits identifydspacefiles_v0.0 = identify_dspace_files identifydspacemetsfiles_v0.0 = identify_dspace_mets_files identifyfileformat_v0.0 = identify_file_format diff --git a/src/MCPClient/lib/clientScripts/characterize_file.py b/src/MCPClient/lib/clientScripts/characterize_file.py index f9c0cc239e..dd875d5a2a 100755 --- a/src/MCPClient/lib/clientScripts/characterize_file.py +++ b/src/MCPClient/lib/clientScripts/characterize_file.py @@ -5,7 +5,7 @@ # b) Prints the tool's stdout, for tools which do not output XML # # If a tool has no defined characterization commands, then the default -# will be run instead (currently FITS). +# will be run instead. import argparse import dataclasses import multiprocessing diff --git a/src/MCPClient/lib/clientScripts/fits.py b/src/MCPClient/lib/clientScripts/fits.py deleted file mode 100755 index 6012d0e969..0000000000 --- a/src/MCPClient/lib/clientScripts/fits.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -# This file is part of Archivematica. -# -# Copyright 2010-2013 Artefactual Systems Inc. -# -# Archivematica is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Archivematica is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Archivematica. If not, see . -import os -import tempfile - -import django -import lxml.etree as etree -from archivematicaFunctions import getTagged -from custom_handlers import get_script_logger -from databaseFunctions import insertIntoFPCommandOutput -from django.db import transaction -from executeOrRunSubProcess import executeOrRun - -# archivematicaCommon - -django.setup() -# dashboard -from main.models import FPCommandOutput - -logger = get_script_logger("archivematica.mcp.client.FITS") - -FITSNS = "{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}" - - -def exclude_jhove_properties(fits): - """ - Exclude from "/fits/toolOutput/tool[name=Jhove]/repInfo" - because that field contains unnecessary excess data and the key data are - covered by output from other FITS tools. - """ - format_validation = None - tools = getTagged(getTagged(fits, FITSNS + "toolOutput")[0], FITSNS + "tool") - for tool in tools: - if tool.get("name") == "Jhove": - format_validation = tool - break - if format_validation is None: - return fits - repInfo = getTagged(format_validation, "repInfo")[0] - properties = getTagged(repInfo, "properties") - if len(properties): - repInfo.remove(properties[0]) - return fits - - -def main(target, xml_file, date, event_uuid, file_uuid, file_grpuse): - """ - Note: xml_file, date and event_uuid are not being used. - """ - if file_grpuse in ("DSPACEMETS", "maildirFile"): - logger.error("File's fileGrpUse in exclusion list, skipping") - return 0 - - if not FPCommandOutput.objects.filter(file=file_uuid).exists(): - logger.error("Warning: Fits has already run on this file. Not running again.") - return 0 - - _, temp_file = tempfile.mkstemp() - args = ["fits.sh", "-i", target, "-o", temp_file] - try: - logger.info("Executing %s", args) - retcode, stdout, stderr = executeOrRun( - "command", args, printing=False, capture_output=True - ) - - if retcode != 0: - logger.error( - "fits.sh exited with status code %s, %s, %s", retcode, stdout, stderr - ) - return retcode - - try: - tree = etree.parse(temp_file) - except Exception: - logger.exception("Failed to read Fits's XML.") - return 2 - - fits = tree.getroot() - fits = exclude_jhove_properties(fits) - - # NOTE: This is hardcoded for now because FPCommandOutput references FPRule for future development, - # when characterization will become user-configurable and be decoupled from FITS specifically. - # Thus a stub rule must exist for FITS; this will be replaced with a real rule in the future. - logger.info("Storing output of file characterization...") - insertIntoFPCommandOutput( - file_uuid, - etree.tostring(fits, pretty_print=False, encoding="utf8"), - "3a19de70-0e42-4145-976b-3a248d43b462", - ) - - except (OSError, ValueError): - logger.exception("Execution failed") - return 1 - - finally: - # We are responsible for removing the temporary file and we do it here - # to ensure that it's going to happen whatever occurs inside our try - # block. - os.remove(temp_file) - - return 0 - - -def call(jobs): - with transaction.atomic(): - for job in jobs: - with job.JobContext(logger=logger): - args = job.args[1:] - job.set_status(main(*args)) diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py b/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py index bbfd3e6d29..c0d711820a 100755 --- a/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py +++ b/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py @@ -120,7 +120,12 @@ def main(job): # We found the original file somewhere above job.print_output( "Matched original file %s (%s) to preservation file %s (%s)" - % (original_file.currentlocation, original_file.uuid, filePath, fileUUID) + % ( + original_file.currentlocation.decode(), + original_file.uuid, + filePath, + fileUUID, + ) ) # Generate the new preservation path: path/to/original/filename-uuid.ext basename = os.path.basename(filePath) diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py index cbecf6fd78..2b0345b889 100755 --- a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py +++ b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py @@ -106,7 +106,7 @@ def main(job): } f = File.objects.get(**kwargs) else: - if isinstance(e, File.DoesNotExist, ValidationError): + if isinstance(e, (File.DoesNotExist, ValidationError)): job.print_error( "No matching file for: ", opts.filePath.replace(opts.SIPDirectory, "%SIPDirectory%", 1), diff --git a/src/MCPServer/lib/assets/workflow.json b/src/MCPServer/lib/assets/workflow.json index 9a917cea55..a1d72cf024 100644 --- a/src/MCPServer/lib/assets/workflow.json +++ b/src/MCPServer/lib/assets/workflow.json @@ -1927,15 +1927,15 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"", - "execute": "FITS_v0.0", + "arguments": "\"%fileUUID%\" \"%SIPUUID%\"", + "execute": "characterizeFile_v0.0", "filter_subdir": "objects/manualNormalization/preservation" }, "description": { - "en": "Run FITS on manual normalized preservation files", - "no": "Kjør FITS på manuelt normaliserte bevaringsfiler", - "pt_BR": "Executar o FITS em arquivos de preservação normalizados manuaimente", - "sv": "Kör FITS på manuellt normaliserade bevarandefiler" + "en": "Characterize and extract metadata on manual normalized preservation files", + "no": "Karakteriser og hent ut metadata på manuelt normaliserte bevaringsfiler", + "pt_BR": "Caracterizar e extrair metadados em arquivos de preservação normalizados manuaimente", + "sv": "Karaktärisera och extrahera metadata på manuellt normaliserade bevarandefiler" }, "exit_codes": { "0": { @@ -1948,7 +1948,7 @@ "group": { "en": "Process manually normalized files", "es": "Procesar manualmente ficheros normalizados", - "no": "Prosesser normaliserte filer manuelt", + "no": "Prosesser manuelt normaliserte filer", "pt_BR": "Processar arquivos normalizados manualmente", "sv": "Bearbeta manuellt normaliserade filer" } @@ -2934,11 +2934,11 @@ "exit_codes": { "0": { "job_status": "Completed successfully", - "link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32" + "link_id": "1b1a4565-b501-407b-b40f-2f20889423f1" } }, "fallback_job_status": "Failed", - "fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32", + "fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1", "group": { "en": "Extract packages", "es": "Extraer paquetes", @@ -7667,7 +7667,7 @@ } }, "fallback_job_status": "Failed", - "fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32", + "fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1", "group": { "en": "Extract packages", "es": "Extraer paquetes", @@ -9044,41 +9044,6 @@ "sv": "Normalisera" } }, - "bd382151-afd0-41bf-bb7a-b39aef728a32": { - "config": { - "@manager": "linkTaskManagerFiles", - "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"", - "execute": "FITS_v0.0", - "filter_subdir": "objects/attachments" - }, - "description": { - "en": "Characterize and extract metadata for attachments", - "es": "Caracterizar y extraer los metadatos de los adjuntos", - "fr": "Caractériser et extraire les métadonnées pour mettre en pièces jointes", - "ja": "添付ファイルのメタデータの特徴付けと抽出", - "no": "Karakteriser og hent ut metadata fra vedlegg", - "pt_BR": "Caracterizar e extrair metadados para anexos", - "sv": "Karaktärisera och extrahera metadata för bilagor" - }, - "exit_codes": { - "0": { - "job_status": "Completed successfully", - "link_id": "1b1a4565-b501-407b-b40f-2f20889423f1" - } - }, - "fallback_job_status": "Failed", - "fallback_link_id": "61c316a6-0a50-4f65-8767-1f44b1eeb6dd", - "group": { - "en": "Characterize and extract metadata", - "es": "Caracterizar y extraer metadatos", - "fr": "Caractériser et extraire les métadonnées", - "ja": "メタデータの特徴付けと抽出", - "no": "Karakteriser og hent ut metadata", - "pt_BR": "Caracterizar e extrair metadados", - "sv": "Karaktärisera och extrahera metadata" - } - }, "bd792750-a55b-42e9-903a-8c898bb77df1": { "config": { "@manager": "linkTaskManagerDirectories", @@ -9276,6 +9241,38 @@ "sv": "Byt namn på SIP-mappen med SIP UUID" } }, + "bf0ea0f6-211b-4b34-8f25-8a68145403c8": { + "config": { + "@manager": "linkTaskManagerFiles", + "@model": "StandardTaskConfig", + "arguments": "\"True\" \"%relativeLocation%\" \"%fileUUID%\" --disable-reidentify", + "execute": "identifyFileFormat_v0.0", + "filter_subdir": "objects/manualNormalization/preservation" + }, + "description": { + "en": "Identify file format", + "es": "Identificar formato de fichero", + "fr": "Identifier le format de fichier", + "no": "Identifiser filformat", + "pt_BR": "Identifique o formato do arquivo", + "sv": "Identifiera filformat" + }, + "exit_codes": { + "0": { + "job_status": "Completed successfully", + "link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8" + } + }, + "fallback_job_status": "Failed", + "fallback_link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8", + "group": { + "en": "Process manually normalized files", + "es": "Procesar manualmente ficheros normalizados", + "no": "Prosesser manuelt normaliserte filer", + "pt_BR": "Processar arquivos normalizados manualmente", + "sv": "Bearbeta manuellt normaliserade filer" + } + }, "c103b2fb-9a6b-4b68-8112-b70597a6cd14": { "config": { "@manager": "linkTaskManagerDirectories", @@ -10760,7 +10757,7 @@ "exit_codes": { "0": { "job_status": "Completed successfully", - "link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8" + "link_id": "bf0ea0f6-211b-4b34-8f25-8a68145403c8" } }, "fallback_job_status": "Failed", diff --git a/src/dashboard/src/fpr/migrations/0044_remove_fits.py b/src/dashboard/src/fpr/migrations/0044_remove_fits.py new file mode 100644 index 0000000000..85f53d0099 --- /dev/null +++ b/src/dashboard/src/fpr/migrations/0044_remove_fits.py @@ -0,0 +1,12 @@ +from django.db import migrations + + +def data_migration(apps, schema_editor): + FPTool = apps.get_model("fpr", "FPTool") + FPTool.objects.filter(description="FITS").delete() + + +class Migration(migrations.Migration): + dependencies = [("fpr", "0043_update_default_thumbnail_command")] + + operations = [migrations.RunPython(data_migration, migrations.RunPython.noop)]