Skip to content

Commit

Permalink
Merge branch 'release/v2.12.3'
Browse files Browse the repository at this point in the history
  • Loading branch information
jtellnes committed Dec 9, 2024
2 parents df1fd15 + 5f1e8e4 commit a21bc37
Show file tree
Hide file tree
Showing 23 changed files with 183 additions and 109 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,5 @@ paket-files/
# Local launch settings
launchSettings.json

# Directory for Visual Studio extension ContextKeeper
.contextkeeper/
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
</ItemGroup>

<ItemGroup>
<None Remove="TestData\DiskUsage\docx.docx" />
<None Remove="TestData\DiskUsage\pdf.pdf" />
<None Remove="TestData\FileTypes\docx.docx" />
<None Remove="TestData\FileTypes\pdf.pdf" />
<None Remove="TestData\FileTypes\pdfA-1b.pdf" />
Expand All @@ -39,6 +41,12 @@
</ItemGroup>

<ItemGroup>
<Content Include="TestData\DiskUsage\docx.docx">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="TestData\DiskUsage\pdf.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="TestData\FileTypes\docx.docx">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
Expand Down Expand Up @@ -103,6 +111,12 @@
<None Update="TestData\FileTypes\fileTypes.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\FileTypes\nested_archive-files\A.tar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\FileTypes\nested_archive-files\B.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\jegerreg-98-dos\arkivuttrekk.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
35 changes: 23 additions & 12 deletions src/Arkivverket.Arkade.Core.Tests/Metadata/DiasMetsCreatorTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public void ShouldCreateMetsFromMetadata()

// CREATEDATE:

metsHdr.CREATEDATE.Should().Be(new DateTime(2023, 01, 01));
metsHdr.CREATEDATE.Should().BeCloseTo(DateTime.Now, TimeSpan.FromSeconds(30));

// ARCHIVEDESCRIPTION:

Expand Down Expand Up @@ -288,17 +288,28 @@ public void ShouldCreateMetsFromMetadata()

var metsFile = mets.fileSec.fileGrp[0].Items[0] as fileType;

metsFile?.ID.Should().Be("fileId_1");
metsFile?.MIMETYPE.Should().Be(mdSecTypeMdRefMIMETYPE.imagepdf);
metsFile?.USE.Should().Be("Datafile");
metsFile?.CHECKSUMTYPE.Should().Be(mdSecTypeMdRefCHECKSUMTYPE.SHA256);
metsFile?.CHECKSUM.Should().Be("3b29dfcc4286e50b180af8f21904c86f8aa42a23c4055c3a71d0512f9ae3886f");
metsFile?.SIZE.Should().Be(2325452);
metsFile?.CREATED.Year.Should().Be(2017);
metsFile?.CREATED.Month.Should().Be(06);
metsFile?.CREATED.Day.Should().Be(30);
metsFile?.FLocat.href.Should().Be("file:someDirectory/someFileName.pdf");
metsFile?.FLocat.LOCTYPE.Should().Be(mdSecTypeMdRefLOCTYPE.URL);
metsFile.ID.Should().Be("fileId_1");
metsFile.MIMETYPE.Should().Be(mdSecTypeMdRefMIMETYPE.applicationxml);
metsFile.USE.Should().Be("Datafile");
metsFile.CHECKSUMTYPE.Should().Be(mdSecTypeMdRefCHECKSUMTYPE.SHA256);
metsFile.CHECKSUM.Should().Be("3b29dfcc4286e50b180af8f21904c86f8aa42a23c4055c3a71d0512f9ae3886f");
metsFile.SIZE.Should().Be(2325452);
metsFile.CREATED.Year.Should().Be(2017);
metsFile.CREATED.Month.Should().Be(06);
metsFile.CREATED.Day.Should().Be(30);
metsFile.FLocat.href.Should().Be("file:someFileName.xml");
metsFile.FLocat.LOCTYPE.Should().Be(mdSecTypeMdRefLOCTYPE.URL);

var metsArchiveExtractionFileGroup = mets.fileSec.fileGrp[0].Items[1] as fileGrpType;

metsArchiveExtractionFileGroup!.USE.Should().BeEquivalentTo(ArkadeConstants.MetsArchiveExtractionFileGroupUse);
metsArchiveExtractionFileGroup.VERSDATESpecified.Should().BeTrue();
metsArchiveExtractionFileGroup.VERSDATE.Should().Be(new DateTime(2022, 01, 01));

var metsArchiveExtractionFile = metsArchiveExtractionFileGroup.Items[0] as fileType;

metsArchiveExtractionFile!.ID.Should().Be("fileId_2");
metsArchiveExtractionFile.FLocat.href.Should().Be($"file:{ArkadeConstants.DirectoryNameContent}/someFileName.xml");

// PACKAGE TYPE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public void DiasMetsIsSuccessfullyLoadedIntoArchiveMetadataObject()

archiveMetadata.StartDate.Should().Be(new DateTime(2017, 01, 01));
archiveMetadata.EndDate.Should().Be(new DateTime(2020, 01, 01));
archiveMetadata.ExtractionDate.Should().Be(new DateTime(2023, 01, 01));
archiveMetadata.ExtractionDate.Should().Be(new DateTime(2022, 01, 01));
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.IO;
using Arkivverket.Arkade.Core.ExternalModels.SubmissionDescription;
using Arkivverket.Arkade.Core.Metadata;
Expand Down Expand Up @@ -64,7 +64,7 @@ public void ShouldCreateMetsFromMetadata()

// CREATEDATE:

metsHdr.CREATEDATE.Should().Be(new DateTime(2023, 01, 01));
metsHdr.CREATEDATE.Should().BeCloseTo(DateTime.Now, TimeSpan.FromSeconds(30));

// ARCHIVEDESCRIPTION:

Expand Down Expand Up @@ -241,17 +241,17 @@ public void ShouldCreateMetsFromMetadata()

var metsFile = mets.fileSec.fileGrp[0].Items[0] as fileType;

metsFile?.ID.Should().Be("fileId_1");
metsFile?.MIMETYPE.Should().Be("application/pdf");
metsFile?.USE.Should().Be("Datafile");
metsFile?.CHECKSUMTYPE.Should().Be(fileTypeCHECKSUMTYPE.SHA256);
metsFile?.CHECKSUM.Should().Be("3b29dfcc4286e50b180af8f21904c86f8aa42a23c4055c3a71d0512f9ae3886f");
metsFile?.SIZE.Should().Be(2325452);
metsFile?.CREATED.Year.Should().Be(2017);
metsFile?.CREATED.Month.Should().Be(06);
metsFile?.CREATED.Day.Should().Be(30);
metsFile?.FLocat.href.Should().Be("file:someDirectory/someFileName.pdf");
metsFile?.FLocat.LOCTYPE.Should().Be(mdSecTypeMdRefLOCTYPE.URL);
metsFile.ID.Should().Be("fileId_1");
metsFile.MIMETYPE.Should().Be("application/xml");
metsFile.USE.Should().Be("Datafile");
metsFile.CHECKSUMTYPE.Should().Be(fileTypeCHECKSUMTYPE.SHA256);
metsFile.CHECKSUM.Should().Be("3b29dfcc4286e50b180af8f21904c86f8aa42a23c4055c3a71d0512f9ae3886f");
metsFile.SIZE.Should().Be(2325452);
metsFile.CREATED.Year.Should().Be(2017);
metsFile.CREATED.Month.Should().Be(06);
metsFile.CREATED.Day.Should().Be(30);
metsFile.FLocat.href.Should().Be("file:someFileName.xml");
metsFile.FLocat.LOCTYPE.Should().Be(mdSecTypeMdRefLOCTYPE.URL);

// PACKAGE TYPE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public void DiasMetsIsSuccessfullyLoadedIntoArchiveMetadataObject()

archiveMetadata.StartDate.Should().Be(new DateTime(2017, 01, 01));
archiveMetadata.EndDate.Should().Be(new DateTime(2020, 01, 01));
archiveMetadata.ExtractionDate.Should().Be(new DateTime(2023, 01, 01));
archiveMetadata.ExtractionDate.Should().NotHaveValue();
}
}
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,17 @@
<file ID="fileId_1" MIMETYPE="application/xml" SIZE="5307" CREATED="2018-08-30T09:48:07.471132+02:00" CHECKSUM="c8b1b1993988e8126a556b724c173896c9e47600da6f75333c3f7092a9b297dc" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:dias-mets.xml" />
</file>
<file ID="fileId_2" MIMETYPE="image/pdf" SIZE="20637" CREATED="2017-11-13T15:26:06.1458311+01:00" CHECKSUM="3b29dfcc4286e50b180af8f21904c86f8aa42a23c4055c3a71d0512f9ae3886f" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:content/dokumenter/5000000.pdf" />
</file>
<file ID="fileId_3" MIMETYPE="image/pdf" SIZE="18432" CREATED="2017-11-13T15:26:06.1543543+01:00" CHECKSUM="2ea3a86de226d791a07abb5279b0e2813b037730730ba630de4f14dea7c32208" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:content/dokumenter/5000001.pdf" />
</file>
<file ID="fileId_4" MIMETYPE="image/pdf" SIZE="18432" CREATED="2017-11-13T15:26:06.1623847+01:00" CHECKSUM="2ea3a86de226d791a07abb5279b0e2813b037730730ba630de4f14dea7c32208" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:content/dokumenter/underkatalog/5000002.pdf" />
</file>
<fileGrp VERSDATE="2022-01-01T00:00:00" USE="ArchiveExtraction">
<file ID="fileId_2" MIMETYPE="image/pdf" SIZE="20637" CREATED="2017-11-13T15:26:06.1458311+01:00" CHECKSUM="3b29dfcc4286e50b180af8f21904c86f8aa42a23c4055c3a71d0512f9ae3886f" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:content/dokumenter/5000000.pdf" />
</file>
<file ID="fileId_3" MIMETYPE="image/pdf" SIZE="18432" CREATED="2017-11-13T15:26:06.1543543+01:00" CHECKSUM="2ea3a86de226d791a07abb5279b0e2813b037730730ba630de4f14dea7c32208" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:content/dokumenter/5000001.pdf" />
</file>
<file ID="fileId_4" MIMETYPE="image/pdf" SIZE="18432" CREATED="2017-11-13T15:26:06.1623847+01:00" CHECKSUM="2ea3a86de226d791a07abb5279b0e2813b037730730ba630de4f14dea7c32208" CHECKSUMTYPE="SHA-256" USE="Datafile">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file:content/dokumenter/underkatalog/5000002.pdf" />
</file>
</fileGrp>
</fileGrp>
</fileSec>
<structMap>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,34 @@ public void IdentifyArchiveFileContentTest()
r.Version.Equals("3a"));
}

[Fact, Trait("Category", "Integration")]
[Trait("Dependency", "IO")]
public void IdentifyNestedArchiveFilesTest()
{
IFileFormatIdentifier formatIdentifier = CreateFileFormatIdentifier();

string directoryPath = Path.Combine("TestData", "FileTypes", "nested_archive-files");
List<IFileFormatInfo> directoryInputResults = formatIdentifier.IdentifyFormats(directoryPath, FileFormatScanMode.Directory).ToList();

var expectedFileNames = new List<string>
{
"A.tar", "B.zip",
"A.tar#AA.tar", "A.tar#AB.zip",
"B.zip#BA.tar", "B.zip#BB.zip",
"A.tar#AA.tar#AAA.tar", "A.tar#AA.tar#AAB.zip",
"A.tar#AB.zip#ABA.tar", "A.tar#AB.zip#ABB.zip",
"B.zip#BA.tar#BAA.tar", "B.zip#BA.tar#BAB.zip",
"B.zip#BB.zip#BBA.tar", "B.zip#BB.zip#BBB.zip"
};

expectedFileNames.ForEach(fileName => directoryInputResults.Should().Contain(r => r.FileName.Equals(Path.Combine(directoryPath, fileName))));

List<IFileFormatInfo> archiveFileAInputResults = formatIdentifier.IdentifyFormats(Path.Combine(directoryPath, "A.tar"), FileFormatScanMode.Archive).ToList();
List<IFileFormatInfo> archiveFileBInputResults = formatIdentifier.IdentifyFormats(Path.Combine(directoryPath, "B.zip"), FileFormatScanMode.Archive).ToList();

archiveFileAInputResults.Concat(archiveFileBInputResults).Should().BeEquivalentTo(directoryInputResults);
}

private static IFileFormatIdentifier CreateFileFormatIdentifier()
{
IStatusEventHandler statusEventHandler = new Mock<IStatusEventHandler>().Object;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public FileSystemInfoSizeCalculatorTest()
[Trait("Category", "Integration")]
public void ShouldCalculateCorrectTotalFileSize()
{
string pathToFiles = Path.Combine("TestData", "FileTypes");
string pathToFiles = Path.Combine("TestData", "DiskUsage");

var fileSystemInfoSizeCalculator = new FileSystemInfoSizeCalculator(_statusEventHandler);

Expand All @@ -33,13 +33,9 @@ public void ShouldCalculateCorrectTotalFileSize()
}

long docxByteSize = 12895L;
long zipByteSize = 89899L;
long pdfByteSize = 27182L;
long pdfA1bByteSize = 34155L;
long pdfA3aByteSize = 32506L;

long totalSize = docxByteSize + zipByteSize + pdfByteSize + pdfA1bByteSize + pdfA3aByteSize;
_totalFileSize.Should().Be(totalSize); // 5 files + where one is a .zip with 4 files inside
_totalFileSize.Should().Be(docxByteSize + pdfByteSize);
}

private void OnTargetSizeCalculatorFinished(object o, TargetSizeCalculatorEventArgs e)
Expand Down
32 changes: 24 additions & 8 deletions src/Arkivverket.Arkade.Core/Metadata/DiasMetsCreator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,7 @@ private static void CreateMetsElementAttributes(mets mets, ArchiveMetadata metad

private static void CreateMetsHdr(mets mets, ArchiveMetadata metadata)
{
var metsHdr = new metsTypeMetsHdr();

if (metadata.ExtractionDate != null)
metsHdr.CREATEDATE = metadata.ExtractionDate.Value;
var metsHdr = new metsTypeMetsHdr { CREATEDATE = DateTime.Now };

if (!string.IsNullOrEmpty(metadata.RecordStatus))
metsHdr.RECORDSTATUS = metadata.RecordStatus;
Expand All @@ -149,8 +146,7 @@ private static void CreateMetsHdr(mets mets, ArchiveMetadata metadata)

CreateHdrAgents(metsHdr, metadata);

if (metadata.ExtractionDate != null || metsHdr.altRecordID != null || metsHdr.agent != null)
mets.metsHdr = metsHdr;
mets.metsHdr = metsHdr;
}

private static void CreateAltRecordIDs(metsTypeMetsHdr metsHdr, ArchiveMetadata metadata)
Expand Down Expand Up @@ -494,10 +490,11 @@ private static void CreateFileSec(mets mets, ArchiveMetadata metadata)
return;

var metsFiles = new List<object>();
var metsArchiveExtractionFiles = new List<object>();

foreach (FileDescription fileDescription in metadata.FileDescriptions)
{
metsFiles.Add(new fileType
var metsFile = new fileType
{
ID = $"fileId_{fileDescription.Id}",
MIMETYPE = MimeTypeParser(fileDescription),
Expand All @@ -511,9 +508,28 @@ private static void CreateFileSec(mets mets, ArchiveMetadata metadata)
href = "file:" + fileDescription.Name.Replace("\\", "/"),
LOCTYPE = mdSecTypeMdRefLOCTYPE.URL
}
});
};

if (fileDescription.Name.StartsWith(ArkadeConstants.DirectoryNameContent))
metsArchiveExtractionFiles.Add(metsFile);
else
metsFiles.Add(metsFile);
}

var archiveExtractionFileGroup = new fileGrpType
{
USE = ArkadeConstants.MetsArchiveExtractionFileGroupUse,
Items = metsArchiveExtractionFiles.ToArray()
};

if (metadata.ExtractionDate.HasValue)
{
archiveExtractionFileGroup.VERSDATESpecified = true;
archiveExtractionFileGroup.VERSDATE = metadata.ExtractionDate.Value;
}

metsFiles.Add(archiveExtractionFileGroup);

var metsTypeFileSecFileGrp = new metsTypeFileSecFileGrp
{
ID = "fileGroup001",
Expand Down
29 changes: 27 additions & 2 deletions src/Arkivverket.Arkade.Core/Metadata/DiasMetsLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,45 @@ public static ArchiveMetadata Load(string diasMetsFile)
if (mets.metsHdr != null)
LoadMetsHdr(archiveMetadata, mets.metsHdr);

if (mets.fileSec?.fileGrp != null)
LoadExtractionDate(archiveMetadata, mets.fileSec?.fileGrp);

MetadataLoader.HandleLabelPlaceholder(archiveMetadata);

return archiveMetadata;
}

private static void LoadExtractionDate(ArchiveMetadata archiveMetadata, IEnumerable<fileGrpType> fileGroups)
{
var archiveExtractionFileGroups = new List<fileGrpType>();

foreach (fileGrpType fileGroup in fileGroups)
CollectArchiveExtractionFileGroups(fileGroup, archiveExtractionFileGroups);

if (archiveExtractionFileGroups.Count != 1)
return;

if (archiveExtractionFileGroups.First() is { VERSDATESpecified: true } extractionDatedFileGroup)
archiveMetadata.ExtractionDate = extractionDatedFileGroup.VERSDATE;
}

private static void CollectArchiveExtractionFileGroups(fileGrpType fileGroup, List<fileGrpType> archiveExtractionFileGroups)
{
if (string.Equals(fileGroup.USE?.ToLower(), ArkadeConstants.MetsArchiveExtractionFileGroupUse.ToLower()))
archiveExtractionFileGroups.Add(fileGroup);

if (fileGroup.Items != null)
foreach (fileGrpType subFileGroup in fileGroup.Items.Where(f => f is fileGrpType))
CollectArchiveExtractionFileGroups(subFileGroup, archiveExtractionFileGroups);
}

private static void LoadMetsElementAttributes(ArchiveMetadata archiveMetadata, mets mets)
{
archiveMetadata.Label = mets.LABEL;
}

private static void LoadMetsHdr(ArchiveMetadata archiveMetadata, metsTypeMetsHdr metsHdr)
{
archiveMetadata.ExtractionDate = metsHdr.CREATEDATE;

archiveMetadata.RecordStatus = metsHdr.RECORDSTATUS;

if (metsHdr.altRecordID != null)
Expand Down
15 changes: 12 additions & 3 deletions src/Arkivverket.Arkade.Core/Metadata/MetadataExampleCreator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,25 @@ public static ArchiveMetadata Create(MetadataExamplePurpose purpose)
new FileDescription
{
Id = 1,
Name = "someDirectory\\someFileName.pdf",
Extension = "pdf",
Name = "someFileName.xml",
Extension = "xml",
Sha256Checksum = "3B29DFCC4286E50B180AF8F21904C86F8AA42A23C4055C3A71D0512F9AE3886F",
Size = 2325452,
ModifiedTime = new DateTime(2017, 06, 30)
},
new FileDescription
{
Id = 2,
Name = "content\\someFileName.xml",
Extension = "xml",
Sha256Checksum = "000CDCA105BD9722759FF81BCB2977E09E6A9A473735CCC540866989444198A2",
Size = 2427358,
ModifiedTime = new DateTime(2017, 06, 30)
}
},
StartDate = new DateTime(2017, 01, 01),
EndDate = new DateTime(2020, 01, 01),
ExtractionDate = new DateTime(2023, 01, 01),
ExtractionDate = new DateTime(2022, 01, 01),
};

if (purpose == MetadataExamplePurpose.UserExample)
Expand Down
Loading

0 comments on commit a21bc37

Please sign in to comment.