From e0a62de93ecc422efb63161f1c7158ad96b0289d Mon Sep 17 00:00:00 2001 From: Ignacio Alles Date: Wed, 15 May 2024 11:08:48 -0300 Subject: [PATCH 1/3] update xml sample with utf8 char right after whitespace and update utf8 artist test --- tests/Controller/ParserControllerTest.php | 1 + tests/samples/016_utf8_artists.xml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/Controller/ParserControllerTest.php b/tests/Controller/ParserControllerTest.php index 465ccd5..5e553b5 100644 --- a/tests/Controller/ParserControllerTest.php +++ b/tests/Controller/ParserControllerTest.php @@ -206,6 +206,7 @@ public function testSample016Utf8Artist() { /* @var $srdbt_zero SoundRecordingDetailsByTerritoryType */ $srdbt_zero = $resource_zero->getSoundRecordingDetailsByTerritory()[0]; $this->assertEquals("Mirko Kordić", $srdbt_zero->getDisplayArtist()[0]->getPartyName()[0]->getFullName()); + $this->assertEquals("N. Áutor", $srdbt_zero->getResourceContributor()[1]->getPartyName()[0]->getFullName()); // Check that Reference Title of Sound Recording 3 (idx 2) is Zvečansko kolo /* @var $resource_two \DedexBundle\Entity\Ern382\SoundRecordingType */ diff --git a/tests/samples/016_utf8_artists.xml b/tests/samples/016_utf8_artists.xml index 9a54680..5303854 100644 --- a/tests/samples/016_utf8_artists.xml +++ b/tests/samples/016_utf8_artists.xml @@ -52,7 +52,7 @@ - N.Autor + N. Áutor Producer From 4384afae06a3ca4077328f494ca52a1b02c2d24c Mon Sep 17 00:00:00 2001 From: Ignacio Alles Date: Wed, 15 May 2024 12:14:20 -0300 Subject: [PATCH 2/3] do not store trimed value in lastElement to avoid losing whitespaces --- src/Controller/ErnParserController.php | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Controller/ErnParserController.php b/src/Controller/ErnParserController.php index 01c3d31..3c8dfec 100644 --- a/src/Controller/ErnParserController.php +++ b/src/Controller/ErnParserController.php @@ -431,10 +431,11 @@ private function listPossibleFunctionNames($prefix, $tag) { * 'MESSAGERECIPIENT', 'PARTYID'], then will call * $this->ern->getMESSAGEHEADER()->getMESSAGERECIPIENT()->setPARTYID($value) * - * @param type $value Value to set + * @param string $value Value to set */ private function setCurrentElement($value) { - $this->log($value . ": " . implode("->", array_keys($this->pile))); + $value_clean = trim($value); + $this->log($value_clean . ": " . implode("->", array_keys($this->pile))); // Use last element in pile $keys = array_keys($this->pile); @@ -451,12 +452,13 @@ private function setCurrentElement($value) { // xml_parser is known to split values when encountering multibyte chars and call the character_data_handler multiple times if (!empty($this->lastElement) && $this->lastElement[0] === $elem && $this->lastElement[1] === $tag) { $value = $this->lastElement[2] . $value; + $value_clean = trim($value); } [$func_name, $elem] = $this->getValidFunctionName("set", $tag, $elem); // It's possible we're trying to set a text but it's expecting an // object (where text should be placed in value). - $value_inst = $this->instanciateTypeFromDoc($elem, $func_name, $value); + $value_inst = $this->instanciateTypeFromDoc($elem, $func_name, $value_clean); $this->lastElement = [$elem, $tag, $value]; @@ -603,13 +605,12 @@ private function instanciateTypeFromDoc($class, $function, $value_default) { * @param string $data */ private function callbackCharacterData($parser, string $data) { - $data_clean = trim($data); - if ($data_clean === "") { + if (trim($data) === "") { // do nothing return; } - $this->setCurrentElement($data_clean); + $this->setCurrentElement($data); } /** From 1fe7671060f78d8faa8f7ee595d04b99cc08abfd Mon Sep 17 00:00:00 2001 From: Ignacio Alles Date: Mon, 3 Jun 2024 12:08:59 -0300 Subject: [PATCH 3/3] clean value and log after concatenation --- src/Controller/ErnParserController.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Controller/ErnParserController.php b/src/Controller/ErnParserController.php index 3c8dfec..e9fd527 100644 --- a/src/Controller/ErnParserController.php +++ b/src/Controller/ErnParserController.php @@ -434,9 +434,6 @@ private function listPossibleFunctionNames($prefix, $tag) { * @param string $value Value to set */ private function setCurrentElement($value) { - $value_clean = trim($value); - $this->log($value_clean . ": " . implode("->", array_keys($this->pile))); - // Use last element in pile $keys = array_keys($this->pile); @@ -452,8 +449,9 @@ private function setCurrentElement($value) { // xml_parser is known to split values when encountering multibyte chars and call the character_data_handler multiple times if (!empty($this->lastElement) && $this->lastElement[0] === $elem && $this->lastElement[1] === $tag) { $value = $this->lastElement[2] . $value; - $value_clean = trim($value); } + $value_clean = trim($value); + $this->log($value_clean . ": " . implode("->", array_keys($this->pile))); [$func_name, $elem] = $this->getValidFunctionName("set", $tag, $elem); // It's possible we're trying to set a text but it's expecting an