Skip to content
This repository has been archived by the owner on Jan 8, 2021. It is now read-only.

Commit

Permalink
add Patchwork\Utf8::wrapPath() to easy portability
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolas-grekas committed Aug 5, 2014
1 parent 8641674 commit 5084b91
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 32 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ It can also serve as a documentation source referencing the practical problems
that arise when handling UTF-8 in PHP: Unicode concepts, related algorithms,
bugs in PHP core, workarounds, etc.

Version 1.2 adds best-fit mappings for UTF-8 to *Code Page* approximations.
It also adds Unicode filesystem access under Windows,
using [wfio](https://github.com/kenjiuno/php-wfio) when possible
or a COM based fallback otherwise.

Portability
-----------

Expand Down Expand Up @@ -63,6 +68,7 @@ Some more functions are also provided to help handling UTF-8 strings:
- *toAscii()*: generic UTF-8 to ASCII transliteration,
- *strtocasefold()*: unicode transformation for caseless matching,
- *strtonatfold()*: generic case sensitive transformation for collation matching
- *wrapPath()*: unicode filesystem access under Windows and other OSes.

Mirrored string functions are:
*strlen, substr, strpos, stripos, strrpos, strripos, strstr, stristr, strrchr,
Expand Down Expand Up @@ -124,7 +130,8 @@ through. When dealing with badly formed UTF-8, you should not try to fix it
Instead, consider it as [CP-1252](http://wikipedia.org/wiki/CP-1252) and use
`Patchwork\Utf8::utf8_encode()` to get an UTF-8 string. Don't forget also to
choose one unicode normalization form and stick to it. NFC is now the defacto
standard. `Patchwork\Utf8::filter()` implements this behavior.
standard. `Patchwork\Utf8::filter()` implements this behavior: it converts from
CP1252 and to NFC.

This library is orthogonal to `mbstring.func_overload` and will not work if the
php.ini setting is enabled.
Expand Down
22 changes: 22 additions & 0 deletions class/Patchwork/Utf8.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Utf8
{
protected static

$pathPrefix,
$commonCaseFold = array(
array('µ','ſ',"\xCD\x85",'ς',"\xCF\x90","\xCF\x91","\xCF\x95","\xCF\x96","\xCF\xB0","\xCF\xB1","\xCF\xB5","\xE1\xBA\x9B","\xE1\xBE\xBE"),
array('μ','s','ι', 'σ','β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1",'ι' )
Expand Down Expand Up @@ -89,6 +90,27 @@ static function toAscii($s, $subst_chr = '?')
return $s;
}

static function wrapPath($path = '')
{
if (null === static::$pathPrefix)
{
if (extension_loaded('wfio'))
{
static::$pathPrefix = 'wfio://';
}
else if (defined('PHP_WINDOWS_VERSION_BUILD'))
{
static::$pathPrefix = 'utf8'.mt_rand();
stream_wrapper_register(static::$pathPrefix, 'Patchwork\Utf8\WindowsStreamWrapper');
static::$pathPrefix .= '://';
} else {
static::$pathPrefix = 'file://';
}
}

return static::$pathPrefix . $path;
}

static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '')
{
switch (gettype($var))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
* Unicode UTF-8 aware stream based filesystem access on MS-Windows.
*
* Based on COM Scripting.FileSystemObject object and short paths.
* Enabled by e.g.: stream_wrapper_register('win', 'Patchwork\Utf8\WinFsStreamWrapper');
* See Patchwork\Utf8::wrapPath()
*
* See also https://code.google.com/p/php-wfio/ for a PHP extension
* and comments on http://www.rooftopsolutions.nl/blog/filesystem-encoding-and-php
*/
class WinFsStreamWrapper
class WindowsStreamWrapper
{
public $context;

Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
},
"extra": {
"branch-alias": {
"dev-master": "1.1-dev"
"dev-master": "1.2-dev"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::<!public>
* @covers Patchwork\Utf8\WindowsStreamWrapper::<!public>
*/
class WinFsStreamWrapperTest extends \PHPUnit_Framework_TestCase
class WindowsStreamWrapperTest extends \PHPUnit_Framework_TestCase
{
protected static $dir;

static function setUpBeforeClass()
{
if (extension_loaded('com_dotnet'))
{
stream_wrapper_register('win', 'Patchwork\Utf8\WinFsStreamWrapper');
stream_wrapper_register('win', 'Patchwork\Utf8\WindowsStreamWrapper');
$dir = __DIR__;
list(,$dir) = \Patchwork\Utf8\WinFsStreamWrapper::fs($dir, false); // Convert $dir to UTF-8
list(,$dir) = \Patchwork\Utf8\WindowsStreamWrapper::fs($dir, false); // Convert $dir to UTF-8
self::$dir = 'win://' . $dir . '/../µ€';
mkdir(self::$dir);
}
Expand All @@ -35,7 +35,7 @@ static function tearDownAfterClass()
{
if (extension_loaded('com_dotnet'))
{
list($fs, $path) = \Patchwork\Utf8\WinFsStreamWrapper::fs(self::$dir);
list($fs, $path) = \Patchwork\Utf8\WindowsStreamWrapper::fs(self::$dir);
if ($fs->FolderExists($path)) $fs->GetFolder($path)->Delete(true);
stream_wrapper_unregister('win');
}
Expand All @@ -47,7 +47,7 @@ function setUp()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::fs
* @covers Patchwork\Utf8\WindowsStreamWrapper::fs
*/
function testRelDir()
{
Expand All @@ -59,10 +59,10 @@ function testRelDir()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::dir_opendir
* @covers Patchwork\Utf8\WinFsStreamWrapper::dir_readdir
* @covers Patchwork\Utf8\WinFsStreamWrapper::dir_rewinddir
* @covers Patchwork\Utf8\WinFsStreamWrapper::dir_closedir
* @covers Patchwork\Utf8\WindowsStreamWrapper::dir_opendir
* @covers Patchwork\Utf8\WindowsStreamWrapper::dir_readdir
* @covers Patchwork\Utf8\WindowsStreamWrapper::dir_rewinddir
* @covers Patchwork\Utf8\WindowsStreamWrapper::dir_closedir
*/
function testDir()
{
Expand Down Expand Up @@ -95,8 +95,8 @@ function testDir()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::rename
* @covers Patchwork\Utf8\WinFsStreamWrapper::unlink
* @covers Patchwork\Utf8\WindowsStreamWrapper::rename
* @covers Patchwork\Utf8\WindowsStreamWrapper::unlink
*/
function testFileOp()
{
Expand All @@ -108,11 +108,11 @@ function testFileOp()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::stream_open
* @covers Patchwork\Utf8\WinFsStreamWrapper::stream_write
* @covers Patchwork\Utf8\WinFsStreamWrapper::stream_read
* @covers Patchwork\Utf8\WinFsStreamWrapper::stream_eof
* @covers Patchwork\Utf8\WinFsStreamWrapper::stream_close
* @covers Patchwork\Utf8\WindowsStreamWrapper::stream_open
* @covers Patchwork\Utf8\WindowsStreamWrapper::stream_write
* @covers Patchwork\Utf8\WindowsStreamWrapper::stream_read
* @covers Patchwork\Utf8\WindowsStreamWrapper::stream_eof
* @covers Patchwork\Utf8\WindowsStreamWrapper::stream_close
*/
function testFilePutGetContents()
{
Expand All @@ -126,8 +126,8 @@ function testFilePutGetContents()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::fopen
* @covers Patchwork\Utf8\WinFsStreamWrapper::fclose
* @covers Patchwork\Utf8\WindowsStreamWrapper::fopen
* @covers Patchwork\Utf8\WindowsStreamWrapper::fclose
*/
function testFopenX()
{
Expand All @@ -142,8 +142,8 @@ function testFopenX()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::mkdir
* @covers Patchwork\Utf8\WinFsStreamWrapper::rmdir
* @covers Patchwork\Utf8\WindowsStreamWrapper::mkdir
* @covers Patchwork\Utf8\WindowsStreamWrapper::rmdir
*/
function testMkdir()
{
Expand Down Expand Up @@ -174,8 +174,8 @@ function testMkdir()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::mkdir
* @covers Patchwork\Utf8\WinFsStreamWrapper::rmdir
* @covers Patchwork\Utf8\WindowsStreamWrapper::mkdir
* @covers Patchwork\Utf8\WindowsStreamWrapper::rmdir
*/
function testMkdirRecursive()
{
Expand All @@ -189,20 +189,20 @@ function testMkdirRecursive()
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::url_stat
* @covers Patchwork\Utf8\WindowsStreamWrapper::url_stat
*/
function testStat()
{
$this->assertTrue(is_dir(self::$dir));
}

/**
* @covers Patchwork\Utf8\WinFsStreamWrapper::stream_metadata
* @covers Patchwork\Utf8\WinFsStreamWrapper::unlink
* @covers Patchwork\Utf8\WindowsStreamWrapper::stream_metadata
* @covers Patchwork\Utf8\WindowsStreamWrapper::unlink
*/
function testStreamtMetadata()
{
$win = new \Patchwork\Utf8\WinFsStreamWrapper;
$win = new \Patchwork\Utf8\WindowsStreamWrapper;
$f = self::$dir . '/это';

$this->assertFalse(file_exists($f));
Expand Down
2 changes: 1 addition & 1 deletion tests/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

require $dir . '/class/Patchwork/Utf8/Bootup.php';
require $dir . '/class/Patchwork/Utf8/BestFit.php';
require $dir . '/class/Patchwork/Utf8/WinFsStreamWrapper.php';
require $dir . '/class/Patchwork/Utf8/WindowsStreamWrapper.php';
require $dir . '/class/Patchwork/Utf8.php';
require $dir . '/class/Patchwork/TurkishUtf8.php';
require $dir . '/class/Patchwork/PHP/Shim/Xml.php';
Expand Down

0 comments on commit 5084b91

Please sign in to comment.