Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add type hints to the transcribe_file script #1981

Merged
merged 2 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ module = [
"src.MCPClient.lib.clientScripts.identify_file_format",
"src.MCPClient.lib.clientScripts.normalize",
"src.MCPClient.lib.clientScripts.policy_check",
"src.MCPClient.lib.clientScripts.transcribe_file",
"src.MCPClient.lib.clientScripts.validate_file",
"tests.MCPClient.conftest",
"tests.MCPClient.test_characterize_file",
"tests.MCPClient.test_identify_file_format",
"tests.MCPClient.test_normalize",
"tests.MCPClient.test_policy_check",
"tests.MCPClient.test_transcribe_file",
"tests.MCPClient.test_validate_file",
]
check_untyped_defs = true
Expand Down
2 changes: 1 addition & 1 deletion src/MCPClient/lib/clientScripts/characterize_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def main(job: Job, file_uuid: uuid.UUID, sip_uuid: uuid.UUID) -> int:


def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Identify file formats.")
parser = argparse.ArgumentParser(description="Characterize file.")
parser.add_argument("file_uuid", type=uuid.UUID)
parser.add_argument("sip_uuid", type=uuid.UUID)

Expand Down
2 changes: 1 addition & 1 deletion src/MCPClient/lib/clientScripts/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def main(job: Job, opts: NormalizeArgs) -> int:


def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Identify file formats.")
parser = argparse.ArgumentParser(description="Normalize.")
parser.add_argument(
"purpose", type=str, help='"preservation", "access", "thumbnail"'
)
Expand Down
75 changes: 54 additions & 21 deletions src/MCPClient/lib/clientScripts/transcribe_file.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#!/usr/bin/env python
import argparse
import dataclasses
import multiprocessing
import os
from uuid import uuid4
import uuid
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple

import django
from django.db import transaction

django.setup()
# dashboard

import databaseFunctions
import fileOperations

# archivematicaCommon
from client.job import Job
from dicts import ReplacementDict
from django.conf import settings as mcpclient_settings
from django.core.exceptions import ValidationError
from django.db import transaction
from django.utils import timezone
from executeOrRunSubProcess import executeOrRun
from fpr.models import FPRule
Expand All @@ -24,19 +29,27 @@
from main.models import FileFormatVersion


def concurrent_instances():
@dataclasses.dataclass
class TranscribeFileArgs:
task_uuid: uuid.UUID
file_uuid: uuid.UUID


def concurrent_instances() -> int:
return multiprocessing.cpu_count()


def insert_transcription_event(status, file_uuid, rule, relative_location):
def insert_transcription_event(
status: int, file_uuid: uuid.UUID, rule: FPRule, relative_location: str
) -> str:
outcome = "transcribed" if status == 0 else "not transcribed"

tool = rule.command.tool
event_detail = 'program={}; version={}; command="{}"'.format(
tool.description, tool.version, rule.command.command.replace('"', r"\"")
)

event_uuid = str(uuid4())
event_uuid = str(uuid.uuid4())

databaseFunctions.insertIntoEvents(
fileUUID=file_uuid,
Expand All @@ -51,9 +64,14 @@


def insert_file_into_database(
task_uuid, file_uuid, sip_uuid, event_uuid, rule, output_path, relative_path
):
transcription_uuid = str(uuid4())
task_uuid: uuid.UUID,
file_uuid: uuid.UUID,
sip_uuid: str,
event_uuid: str,
output_path: str,
relative_path: str,
) -> None:
transcription_uuid = str(uuid.uuid4())
today = timezone.now()
fileOperations.addFileToSIP(
relative_path,
Expand All @@ -66,7 +84,7 @@
)

fileOperations.updateSizeAndChecksum(
transcription_uuid, output_path, today, str(uuid4())
transcription_uuid, output_path, today, str(uuid.uuid4())
)

databaseFunctions.insertIntoDerivations(
Expand All @@ -76,17 +94,18 @@
)


def fetch_rules_for(file_):
def fetch_rules_for(file_: File) -> Sequence[FPRule]:
try:
format = FileFormatVersion.objects.get(file_uuid=file_)
return FPRule.active.filter(
result: Sequence[FPRule] = FPRule.active.filter(
format=format.format_version, purpose="transcription"
)
return result
except (FileFormatVersion.DoesNotExist, ValidationError):
return []


def fetch_rules_for_derivatives(file_):
def fetch_rules_for_derivatives(file_: File) -> Tuple[Optional[File], Sequence[FPRule]]:
derivs = Derivation.objects.filter(source_file=file_)
for deriv in derivs:
derived_file = deriv.derived_file
Expand All @@ -101,7 +120,7 @@
return None, []


def main(job, task_uuid, file_uuid):
def main(job: Job, task_uuid: uuid.UUID, file_uuid: uuid.UUID) -> int:
setup_dicts(mcpclient_settings)

succeeded = True
Expand Down Expand Up @@ -163,19 +182,33 @@
file_uuid,
rd["%SIPUUID%"],
event,
rule,
output_path,
relative_path,
)

return 0 if succeeded else 1


def call(jobs):
def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Transcribe file.")
parser.add_argument("task_uuid", type=uuid.UUID)
parser.add_argument("file_uuid", type=uuid.UUID)

Check warning on line 195 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L193-L195

Added lines #L193 - L195 were not covered by tests

return parser

Check warning on line 197 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L197

Added line #L197 was not covered by tests


def parse_args(parser: argparse.ArgumentParser, job: Job) -> TranscribeFileArgs:
namespace = parser.parse_args(job.args[1:])

Check warning on line 201 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L201

Added line #L201 was not covered by tests

return TranscribeFileArgs(**vars(namespace))

Check warning on line 203 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L203

Added line #L203 was not covered by tests


def call(jobs: List[Job]) -> None:
parser = get_parser()

Check warning on line 207 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L207

Added line #L207 was not covered by tests

with transaction.atomic():
for job in jobs:
with job.JobContext():
task_uuid = job.args[1]
file_uuid = job.args[2]
args = parse_args(parser, job)

Check warning on line 212 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L212

Added line #L212 was not covered by tests

job.set_status(main(job, task_uuid, file_uuid))
job.set_status(main(job, args.task_uuid, args.file_uuid))

Check warning on line 214 in src/MCPClient/lib/clientScripts/transcribe_file.py

View check run for this annotation

Codecov / codecov/patch

src/MCPClient/lib/clientScripts/transcribe_file.py#L214

Added line #L214 was not covered by tests
Loading