Skip to content

Commit

Permalink
TriGParser, NQuadsParser and JsonLdParser skip UTF-8 BOM in input str…
Browse files Browse the repository at this point in the history
…eams

(closes #10)
  • Loading branch information
zozlak committed Sep 14, 2024
1 parent f8d3214 commit 3f5a315
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 3 deletions.
3 changes: 3 additions & 0 deletions src/quickRdfIo/JsonLdParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ public function next(): void {
}

public function parse(string $input): iQuadIterator {
if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
$input = substr($input, 3);
}
$this->quads = JsonLD::toRdf($input, ['base' => $this->baseUri]);
return $this;
}
Expand Down
5 changes: 4 additions & 1 deletion src/quickRdfIo/NQuadsParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
*/
class NQuadsParser implements iParser, iQuadIterator {

use TmpStreamParserTrait;
use StreamSkipBomTrait;

Check failure on line 47 in src/quickRdfIo/NQuadsParser.php

View workflow job for this annotation

GitHub Actions / phpstan

Class quickRdfIo\NQuadsParser uses unknown trait quickRdfIo\StreamSkipBomTrait.

const MODE_TRIPLES = 1;
const MODE_QUADS = 2;
const MODE_TRIPLES_STAR = 3;
Expand All @@ -66,7 +69,6 @@ class NQuadsParser implements iParser, iQuadIterator {
const STAR_START = '%\\G\s*<<%';
const STAR_END = '%\\G\s*>>%';
const READ_BUF_SIZE = 8096;
use TmpStreamParserTrait;

/**
* See https://www.w3.org/TR/n-quads/#grammar-production-ECHAR
Expand Down Expand Up @@ -233,6 +235,7 @@ public function rewind(): void {
if ($this->input->tell() !== 0) {
$this->input->rewind();
}
$this->skipBom($this->input);

Check failure on line 238 in src/quickRdfIo/NQuadsParser.php

View workflow job for this annotation

GitHub Actions / phpstan

Call to an undefined method quickRdfIo\NQuadsParser::skipBom().
if ($this->mode === self::MODE_TRIPLES || $this->mode === self::MODE_QUADS) {
$this->quads = $this->quadGenerator();
} else {
Expand Down
2 changes: 2 additions & 0 deletions src/quickRdfIo/TriGParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
class TriGParser implements iParser, iQuadIterator {

use TmpStreamParserTrait;
use StreamSkipBomTrait;

const CHUNK_SIZE = 8192;

Expand Down Expand Up @@ -181,6 +182,7 @@ public function rewind(): void {
if ($this->input->tell() !== 0) {
$this->input->rewind();
}
$this->skipBom($this->input);
$this->next();
}

Expand Down
22 changes: 20 additions & 2 deletions tests/JsonLdTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

namespace quickRdfIo;

use quickRdf\DataFactory;
use quickRdf\DataFactory as DF;
use quickRdf\Dataset;

/**
Expand All @@ -42,7 +42,7 @@ class JsonLdTest extends \PHPUnit\Framework\TestCase {
private JsonLdSerializer $serializer;

public function setUp(): void {
$this->df = new DataFactory();
$this->df = new DF();
$this->refParser = new NQuadsParser($this->df, false, NQuadsParser::MODE_QUADS);
$this->parser = new JsonLdParser($this->df);
$this->serializer = new JsonLdSerializer(null);
Expand Down Expand Up @@ -71,4 +71,22 @@ public function testBig(): void {
$this->assertEquals($ref->count(), $dataset->count());
$this->assertTrue($ref->equals($dataset));
}

/**
* https://github.com/sweetrdf/quickRdfIo/issues/10
*/
public function testBom(): void {
$ref = new Dataset();
$quad = DF::quad(df::namedNode('http://foo'), DF::namedNode('http://bar'), DF::namedNode('http://baz'));
$ref->add($quad);
$output = tmpfile();
fwrite($output, "\xEF\xBB\xBF");
$this->serializer->serializeStream($output, $ref);

fseek($output, 0);
$dataset = new Dataset();
$dataset->add($this->parser->parseStream($output));
$this->assertCount(1, $dataset);
$this->assertTrue($quad->equals($dataset[0]));
}
}
26 changes: 26 additions & 0 deletions tests/NQuadsParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -264,4 +264,30 @@ public function testIssue7(): void {
$this->assertCount(2, $dataset);
}
}

/**
* https://github.com/sweetrdf/quickRdfIo/issues/10
*/
public function testBom(): void {
$df = new DF();
$parser = new NQuadsParser($df);
$inputs = [
'issue10_utf16be.nq' => "UTF-16 BE",
'issue10_utf32le.nq' => "UTF-32 LE",
'issue10_utf7.nq' => "UTF-7",
];
foreach ($inputs as $file => $enc) {
try {
$parser->parseStream(fopen(__DIR__ . '/files/' . $file, 'r'));
} catch (RdfIoException $ex) {
$this->assertEquals("Input stream has wrong encoding $enc", $ex->getMessage());
}
}

$dataset = new \quickRdf\Dataset();
$dataset->add($parser->parseStream(fopen(__DIR__ . '/files/issue10_utf8.nq', 'r')));
$this->assertCount(1, $dataset);
$q = $df->quad(df::namedNode('http://foo'), DF::namedNode('http://bar'), DF::namedNode('http://baz'));
$this->assertTrue($q->equals($dataset[0]));
}
}
26 changes: 26 additions & 0 deletions tests/TriGParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,30 @@ public function testUtfChunk(): void {
$triples = iterator_to_array($iter);
$this->assertCount(148, $triples);
}

/**
* https://github.com/sweetrdf/quickRdfIo/issues/10
*/
public function testBom(): void {
$df = new DF();
$parser = new TriGParser($df);
$inputs = [
'issue10_utf16be.nq' => "UTF-16 BE",
'issue10_utf32le.nq' => "UTF-32 LE",
'issue10_utf7.nq' => "UTF-7",
];
foreach ($inputs as $file => $enc) {
try {
$parser->parseStream(fopen(__DIR__ . '/files/' . $file, 'r'));
} catch (RdfIoException $ex) {
$this->assertEquals("Input stream has wrong encoding $enc", $ex->getMessage());
}
}

$dataset = new \quickRdf\Dataset();
$dataset->add($parser->parseStream(fopen(__DIR__ . '/files/issue10_utf8.nq', 'r')));
$this->assertCount(1, $dataset);
$q = $df->quad(df::namedNode('http://foo'), DF::namedNode('http://bar'), DF::namedNode('http://baz'));
$this->assertTrue($q->equals($dataset[0]));
}
}

0 comments on commit 3f5a315

Please sign in to comment.