From b0b3447ef6b3abfb1e7bedc27c198e976893dc3a Mon Sep 17 00:00:00 2001 From: Andrei Khalipau Date: Fri, 3 Feb 2023 15:58:21 -0500 Subject: [PATCH] #143: Multi-byte character titles blank out citations. --- src/Util/StringHelper.php | 8 +- .../humans/bugfix-github-143.txt | 1257 +++++++++++++++++ tests/src/BugfixTest.php | 5 + 3 files changed, 1269 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/basic-tests/processor-tests/humans/bugfix-github-143.txt diff --git a/src/Util/StringHelper.php b/src/Util/StringHelper.php index 86685aa..843b233 100644 --- a/src/Util/StringHelper.php +++ b/src/Util/StringHelper.php @@ -142,12 +142,18 @@ public static function keepLowerCase($word) public static function mb_ucfirst($string, $encoding = 'UTF-8') {// phpcs:enable $strlen = mb_strlen($string, $encoding); + if ($strlen == 0) return ''; $firstChar = mb_substr($string, 0, 1, $encoding); $then = mb_substr($string, 1, $strlen - 1, $encoding); /** @noinspection PhpInternalEntityUsedInspection */ + // We can not rely on mb_detect_encoding. See https://www.php.net/manual/en/function.mb-detect-encoding.php. + // We need to double-check if the first char is not a multibyte char otherwise mb_strtoupper() process it + // incorrectly, and it causes issues later. For example 'こ' transforms to 'Á�'. + $original_ord = mb_ord($firstChar, $encoding); $encoding = mb_detect_encoding($firstChar, self::ISO_ENCODINGS, true); - return in_array($encoding, self::ISO_ENCODINGS) ? + $new_ord = mb_ord($firstChar, $encoding); + return $original_ord === $new_ord && in_array($encoding, self::ISO_ENCODINGS) ? mb_strtoupper($firstChar, $encoding).$then : $firstChar.$then; } // phpcs:disable diff --git a/tests/fixtures/basic-tests/processor-tests/humans/bugfix-github-143.txt b/tests/fixtures/basic-tests/processor-tests/humans/bugfix-github-143.txt new file mode 100644 index 0000000..4535eb8 --- /dev/null +++ b/tests/fixtures/basic-tests/processor-tests/humans/bugfix-github-143.txt @@ -0,0 +1,1257 @@ +>>===== MODE =====>> +bibliography +<<===== MODE =====<< + +>>===== RESULT =====>> +
+
Anderson, John. こんにちは世界.
+
+<<===== RESULT =====<< + +>>===== CSL =====>> + + +<<===== CSL =====<< + +>>===== INPUT =====>> +[ + { + "author": [ + { + "family": "Anderson", + "given": "John" + } + ], + "title": "こんにちは世界", + "id": "ITEM-2", + "type": "book" + } +] +<<===== INPUT =====<< diff --git a/tests/src/BugfixTest.php b/tests/src/BugfixTest.php index 2c52799..add064d 100644 --- a/tests/src/BugfixTest.php +++ b/tests/src/BugfixTest.php @@ -163,4 +163,9 @@ public function testBugfixGithub114() { $this->runTestSuite('bugfix-github-114'); } + + public function testBugfixGithub143() + { + $this->runTestSuite('bugfix-github-143'); + } }