From f45ba8bc7962b7356847724989a6949b68d975a2 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Wed, 26 Mar 2014 11:47:25 +0100 Subject: [PATCH] fix #18 u::wordwrap() now relies on native behavior --- CHANGELOG.md | 5 ++ class/Patchwork/Utf8.php | 79 +++++++++++++----------------- tests/Patchwork/Tests/Utf8Test.php | 6 +++ 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f843302..3289f8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## v1.1.21 (2014-03-26) + +- fix #18 u::wordwrap() now relies on native behavior + +## v1.1.20 (2014-03-01) ## v1.1.19 (2014-03-01) - fix mb_regex_encoding() being disabled on some hosting providers diff --git a/class/Patchwork/Utf8.php b/class/Patchwork/Utf8.php index 602526a..f260679 100644 --- a/class/Patchwork/Utf8.php +++ b/class/Patchwork/Utf8.php @@ -257,65 +257,54 @@ static function strtoupper($s) {return mb_strtoupper($s, 'UTF-8');} static function wordwrap($s, $width = 75, $break = "\n", $cut = false) { - // This implementation could be extended to handle unicode word boundaries, - // but that's enough work for today (see http://www.unicode.org/reports/tr29/) + if (false === wordwrap('-', $width, $break, $cut)) return false; - $width = (int) $width; - $s = explode($break, $s); + is_string($break) or $break = (string) $break; + $w = ''; + $s = explode($break, $s); $iLen = count($s); - $result = array(); - $line = ''; - $lineLen = 0; + $chars = array(); + + if (1 === $iLen && '' === $s[0]) + return ''; for ($i = 0; $i < $iLen; ++$i) { - $words = explode(' ', $s[$i]); - $line && $result[] = $line; - $lineLen = grapheme_strlen($line); - $jLen = count($words); - - for ($j = 0; $j < $jLen; ++$j) + if ($i) { - $w = $words[$j]; - $wLen = grapheme_strlen($w); - - if ($lineLen + $wLen < $width) - { - if ($j) $line .= ' '; - $line .= $w; - $lineLen += $wLen + 1; - } - else - { - if ($j || $i) $result[] = $line; - $line = ''; - $lineLen = 0; + $chars[] = $break; + $w .= '#'; + } - if ($cut && $wLen > $width) - { - $w = self::str_split($w); + $c = $s[$i]; + unset($s[$i]); - do - { - $result[] = implode('', array_slice($w, 0, $width)); - $line = implode('', $w = array_slice($w, $width)); - $lineLen = $wLen -= $width; - } - while ($wLen > $width); + foreach (self::str_split($c) as $c) + { + $chars[] = $c; + $w .= ' ' === $c ? ' ' : '?'; + } + } - $w = implode('', $w); - } + $s = ''; + $j = 0; + $b = $i = -1; + $w = wordwrap($w, $width, '#', $cut); - $line = $w; - $lineLen = $wLen; - } + while (false !== $b = strpos($w, '#', $b+1)) + { + for (++$i; $i < $b; ++$i) + { + $s .= $chars[$j]; + unset($chars[$j++]); } - } - $line && $result[] = $line; + if ($break === $chars[$j] || ' ' === $chars[$j]) unset($chars[$j++]); + $s .= $break; + } - return implode($break, $result); + return $s . implode('', $chars); } static function chr($c) diff --git a/tests/Patchwork/Tests/Utf8Test.php b/tests/Patchwork/Tests/Utf8Test.php index f721461..1b16b93 100644 --- a/tests/Patchwork/Tests/Utf8Test.php +++ b/tests/Patchwork/Tests/Utf8Test.php @@ -212,6 +212,12 @@ function testStrstr() */ function testWordwrap() { + $text = "string\nwith\nnew\nlines"; + $this->assertSame( $text, u::wordwrap($text) ); + + $text = "a #b"; + $this->assertSame( wordwrap($text, 2, '#', false), u::wordwrap($text, 2, '#', false) ); + $text = 'A very long woooooooooooord.'; $this->assertSame( wordwrap($text, 8, "\n", false), u::wordwrap($text, 8, "\n", false) );