Skip to content

Commit

Permalink
[MAINTENANCE] Split magicGetPhysicalStructure() function to increas…
Browse files Browse the repository at this point in the history
…e readability (kitodo#1267)

Co-authored-by: Sebastian Meyer <[email protected]>
  • Loading branch information
beatrycze-volk and sebastian-meyer authored Jul 5, 2024
1 parent 231f76c commit a47a53e
Showing 1 changed file with 154 additions and 86 deletions.
240 changes: 154 additions & 86 deletions Classes/Common/MetsDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

namespace Kitodo\Dlf\Common;

use \DOMElement;
use \DOMXPath;
use \SimpleXMLElement;
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
Expand Down Expand Up @@ -53,14 +56,14 @@
* @property-read string $thumbnail this holds the document's thumbnail location
* @property bool $thumbnailLoaded flag with information if the thumbnail is loaded
* @property-read string $toplevelId this holds the toplevel structure's "@ID" (METS) or the manifest's "@id" (IIIF)
* @property \SimpleXMLElement $xml this holds the whole XML file as \SimpleXMLElement object
* @property SimpleXMLElement $xml this holds the whole XML file as SimpleXMLElement object
* @property-read array $mdSec associative array of METS metadata sections indexed by their IDs.
* @property bool $mdSecLoaded flag with information if the array of METS metadata sections is loaded
* @property-read array $dmdSec subset of `$mdSec` storing only the dmdSec entries; kept for compatibility.
* @property-read array $fileGrps this holds the file ID -> USE concordance
* @property bool $fileGrpsLoaded flag with information if file groups array is loaded
* @property-read array $fileInfos additional information about files (e.g., ADMID), indexed by ID.
* @property-read \SimpleXMLElement $mets this holds the XML file's METS part as \SimpleXMLElement object
* @property-read SimpleXMLElement $mets this holds the XML file's METS part as SimpleXMLElement object
* @property-read string $parentHref URL of the parent document (determined via mptr element), or empty string if none is available
*/
final class MetsDocument extends AbstractDocument
Expand Down Expand Up @@ -126,9 +129,9 @@ final class MetsDocument extends AbstractDocument

/**
* @access protected
* @var \SimpleXMLElement This holds the XML file's METS part as \SimpleXMLElement object
* @var SimpleXMLElement This holds the XML file's METS part as SimpleXMLElement object
*/
protected \SimpleXMLElement $mets;
protected SimpleXMLElement $mets;

/**
* @access protected
Expand Down Expand Up @@ -297,12 +300,12 @@ public function getLogicalStructure(string $id, bool $recursive = false): array
*
* @access protected
*
* @param \SimpleXMLElement $structure The logical structure node
* @param SimpleXMLElement $structure The logical structure node
* @param bool $recursive Whether to include the child elements
*
* @return array Array of the element's id, label, type and physical page indexes/mptr link
*/
protected function getLogicalStructureInfo(\SimpleXMLElement $structure, bool $recursive = false): array
protected function getLogicalStructureInfo(SimpleXMLElement $structure, bool $recursive = false): array
{
$attributes = $structure->attributes();

Expand Down Expand Up @@ -341,11 +344,61 @@ protected function getLogicalStructureInfo(\SimpleXMLElement $structure, bool $r
$this->magicGetSmLinks();
// Load physical structure.
$this->magicGetPhysicalStructure();
// Get the physical page or external file this structure element is pointing at.
// Is there a mptr node?
if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {

$this->getPage($details, $structure->children('http://www.loc.gov/METS/')->mptr);
$this->getFiles($details, $structure->children('http://www.loc.gov/METS/')->fptr);

// Keep for later usage.
$this->logicalUnits[$details['id']] = $details;
// Walk the structure recursively? And are there any children of the current element?
if (
$recursive
&& count($structure->children('http://www.loc.gov/METS/')->div)
) {
$details['children'] = [];
foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
// Repeat for all children.
$details['children'][] = $this->getLogicalStructureInfo($child, true);
}
}
return $details;
}

/**
* Get the files this structure element is pointing at.
*
* @param ?SimpleXMLElement $filePointers
*
* @return void
*/
private function getFiles(array &$details, ?SimpleXMLElement $filePointers): void
{
$fileUse = $this->magicGetFileGrps();
// Get the file representations from fileSec node.
foreach ($filePointers as $filePointer) {
$fileId = (string) $filePointer->attributes()->FILEID;
// Check if file has valid @USE attribute.
if (!empty($fileUse[$fileId])) {
$details['files'][$fileUse[$fileId]] = $fileId;
}
}
}

/**
* Get the physical page or external file this structure element is pointing at.
*
* @access private
*
* @param array $details passed as reference
* @param ?SimpleXMLElement $metsPointers
*
* @return void
*/
private function getPage(array &$details, ?SimpleXMLElement $metsPointers): void
{
if (count($metsPointers)) {
// Yes. Get the file reference.
$details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
$details['points'] = (string) $metsPointers[0]->attributes('http://www.w3.org/1999/xlink')->href;
} elseif (
!empty($this->physicalStructure)
&& array_key_exists($details['id'], $this->smLinks['l2p'])
Expand All @@ -363,29 +416,6 @@ protected function getLogicalStructureInfo(\SimpleXMLElement $structure, bool $r
if ($details['thumbnailId'] === null) {
unset($details['thumbnailId']);
}
// Get the files this structure element is pointing at.
$fileUse = $this->magicGetFileGrps();
// Get the file representations from fileSec node.
foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) {
// Check if file has valid @USE attribute.
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
$details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
}
}
// Keep for later usage.
$this->logicalUnits[$details['id']] = $details;
// Walk the structure recursively? And are there any children of the current element?
if (
$recursive
&& count($structure->children('http://www.loc.gov/METS/')->div)
) {
$details['children'] = [];
foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
// Repeat for all children.
$details['children'][] = $this->getLogicalStructureInfo($child, true);
}
}
return $details;
}

/**
Expand Down Expand Up @@ -583,7 +613,7 @@ private function extractAndProcessMetadata(string $dmdId, string $mdSectionType,
$additionalMetadata = $this->getAdditionalMetadataFromDatabase($cPid, $dmdId);
// We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
$domNode = dom_import_simplexml($this->mdSec[$dmdId]['xml']);
$domXPath = new \DOMXPath($domNode->ownerDocument);
$domXPath = new DOMXPath($domNode->ownerDocument);
$this->registerNamespaces($domXPath);

$this->processAdditionalMetadata($additionalMetadata, $domXPath, $domNode, $metadata);
Expand Down Expand Up @@ -613,13 +643,13 @@ private function hasMetadataSection(array $metadataSections, string $currentMeta
* @access private
*
* @param array $additionalMetadata
* @param \DOMXPath $domXPath
* @param \DOMElement $domNode
* @param DOMXPath $domXPath
* @param DOMElement $domNode
* @param array $metadata
*
* @return void
*/
private function processAdditionalMetadata(array $additionalMetadata, \DOMXPath $domXPath, \DOMElement $domNode, array &$metadata): void
private function processAdditionalMetadata(array $additionalMetadata, DOMXPath $domXPath, DOMElement $domNode, array &$metadata): void
{
foreach ($additionalMetadata as $resArray) {
$this->setMetadataFieldValues($resArray, $domXPath, $domNode, $metadata);
Expand All @@ -634,13 +664,13 @@ private function processAdditionalMetadata(array $additionalMetadata, \DOMXPath
* @access private
*
* @param array $resArray
* @param \DOMXPath $domXPath
* @param \DOMElement $domNode
* @param DOMXPath $domXPath
* @param DOMElement $domNode
* @param array $metadata
*
* @return void
*/
private function setMetadataFieldValues(array $resArray, \DOMXPath $domXPath, \DOMElement $domNode, array &$metadata): void
private function setMetadataFieldValues(array $resArray, DOMXPath $domXPath, DOMElement $domNode, array &$metadata): void
{
if ($resArray['format'] > 0 && !empty($resArray['xpath'])) {
$values = $domXPath->evaluate($resArray['xpath'], $domNode);
Expand Down Expand Up @@ -678,13 +708,13 @@ private function setDefaultMetadataValue(array $resArray, array &$metadata): voi
* @access private
*
* @param array $resArray
* @param \DOMXPath $domXPath
* @param \DOMElement $domNode
* @param $domXPath
* @param DOMElement $domNode
* @param array $metadata
*
* @return void
*/
private function setSortableMetadataValue(array $resArray, \DOMXPath $domXPath, \DOMElement $domNode, array &$metadata): void
private function setSortableMetadataValue(array $resArray, DOMXPath $domXPath, DOMElement $domNode, array &$metadata): void
{
if (!empty($metadata[$resArray['index_name']]) && $resArray['is_sortable']) {
if ($resArray['format'] > 0 && !empty($resArray['xpath_sorting'])) {
Expand Down Expand Up @@ -991,7 +1021,7 @@ protected function ensureHasFulltextIsSet(): void
protected function setPreloadedDocument($preloadedDocument): bool
{

if ($preloadedDocument instanceof \SimpleXMLElement) {
if ($preloadedDocument instanceof SimpleXMLElement) {
$this->xml = $preloadedDocument;
return true;
}
Expand All @@ -1001,7 +1031,7 @@ protected function setPreloadedDocument($preloadedDocument): bool
/**
* @see AbstractDocument::getDocument()
*/
protected function getDocument(): \SimpleXMLElement
protected function getDocument(): SimpleXMLElement
{
return $this->mets;
}
Expand Down Expand Up @@ -1074,11 +1104,11 @@ protected function magicGetDmdSec(): array
*
* @access protected
*
* @param \SimpleXMLElement $element
* @param SimpleXMLElement $element
*
* @return array|null The processed metadata section
*/
protected function processMdSec(\SimpleXMLElement $element): ?array
protected function processMdSec(SimpleXMLElement $element): ?array
{
$mdId = (string) $element->attributes()->ID;
if (empty($mdId)) {
Expand Down Expand Up @@ -1188,9 +1218,9 @@ protected function prepareMetadataArray(int $cPid): void
*
* @access protected
*
* @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object
* @return SimpleXMLElement The XML's METS part as SimpleXMLElement object
*/
protected function magicGetMets(): \SimpleXMLElement
protected function magicGetMets(): SimpleXMLElement
{
return $this->mets;
}
Expand Down Expand Up @@ -1219,48 +1249,86 @@ protected function magicGetPhysicalStructure(): array
$this->physicalStructureInfo[$id]['orderlabel'] = isset($firstNode['ORDERLABEL']) ? (string) $firstNode['ORDERLABEL'] : '';
$this->physicalStructureInfo[$id]['type'] = (string) $firstNode['TYPE'];
$this->physicalStructureInfo[$id]['contentIds'] = isset($firstNode['CONTENTIDS']) ? (string) $firstNode['CONTENTIDS'] : '';
// Get the file representations from fileSec node.
foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) {
// Check if file has valid @USE attribute.
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
$this->physicalStructureInfo[$id]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
}
}
// Build the physical elements' array from the physical structMap node.
$elements = [];
foreach ($elementNodes as $elementNode) {
$id = (string) $elementNode['ID'];
$order = (int) $elementNode['ORDER'];
$elements[$order] = $id;
$this->physicalStructureInfo[$elements[$order]]['id'] = $id;
$this->physicalStructureInfo[$elements[$order]]['dmdId'] = isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '';
$this->physicalStructureInfo[$elements[$order]]['admId'] = isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : '';
$this->physicalStructureInfo[$elements[$order]]['order'] = isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : '';
$this->physicalStructureInfo[$elements[$order]]['label'] = isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '';
$this->physicalStructureInfo[$elements[$order]]['orderlabel'] = isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '';
$this->physicalStructureInfo[$elements[$order]]['type'] = (string) $elementNode['TYPE'];
$this->physicalStructureInfo[$elements[$order]]['contentIds'] = isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '';
// Get the file representations from fileSec node.
foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
// Check if file has valid @USE attribute.
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
$this->physicalStructureInfo[$elements[$order]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
}
}
}
// Sort array by keys (= @ORDER).
ksort($elements);
// Set total number of pages/tracks.
$this->numPages = count($elements);
// Merge and re-index the array to get numeric indexes.
array_unshift($elements, $id);
$this->physicalStructure = $elements;

$this->getFileRepresentation($id, $firstNode);

$this->physicalStructure = $this->getPhysicalElements($elementNodes, $fileUse);
}
$this->physicalStructureLoaded = true;
}
return $this->physicalStructure;
}

/**
* Get the file representations from fileSec node.
*
* @access private
*
* @param string $id
* @param SimpleXMLElement $physicalNode
*
* @return void
*/
private function getFileRepresentation(string $id, SimpleXMLElement $physicalNode): void
{
// Get file groups.
$fileUse = $this->magicGetFileGrps();

foreach ($physicalNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
$fileId = (string) $fptr->attributes()->FILEID;
// Check if file has valid @USE attribute.
if (!empty($fileUse[$fileId])) {
$this->physicalStructureInfo[$id]['files'][$fileUse[$fileId]] = $fileId;
}
}
}

/**
* Build the physical elements' array from the physical structMap node.
*
* @access private
*
* @param array $elementNodes
* @param array $fileUse
*
* @return array
*/
private function getPhysicalElements(array $elementNodes, array $fileUse): array
{
$elements = [];
$id = '';

foreach ($elementNodes as $elementNode) {
$id = (string) $elementNode['ID'];
$order = (int) $elementNode['ORDER'];
$elements[$order] = $id;
$this->physicalStructureInfo[$elements[$order]]['id'] = $id;
$this->physicalStructureInfo[$elements[$order]]['dmdId'] = isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '';
$this->physicalStructureInfo[$elements[$order]]['admId'] = isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : '';
$this->physicalStructureInfo[$elements[$order]]['order'] = isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : '';
$this->physicalStructureInfo[$elements[$order]]['label'] = isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '';
$this->physicalStructureInfo[$elements[$order]]['orderlabel'] = isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '';
$this->physicalStructureInfo[$elements[$order]]['type'] = (string) $elementNode['TYPE'];
$this->physicalStructureInfo[$elements[$order]]['contentIds'] = isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '';
// Get the file representations from fileSec node.
foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
// Check if file has valid @USE attribute.
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
$this->physicalStructureInfo[$elements[$order]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
}
}
}

// Sort array by keys (= @ORDER).
ksort($elements);
// Set total number of pages/tracks.
$this->numPages = count($elements);
// Merge and re-index the array to get numeric indexes.
array_unshift($elements, $id);

return $elements;
}

/**
* @see AbstractDocument::magicGetSmLinks()
*/
Expand Down Expand Up @@ -1416,7 +1484,7 @@ public function magicGetParentHref(): string
*/
public function __sleep(): array
{
// \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
// SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
$this->asXML = $this->xml->asXML();
return ['pid', 'recordId', 'parentId', 'asXML'];
}
Expand Down

0 comments on commit a47a53e

Please sign in to comment.