Skip to content

Commit

Permalink
add XMLParser
Browse files Browse the repository at this point in the history
  • Loading branch information
sursir committed Nov 6, 2015
1 parent 1502548 commit 3222e15
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 13 deletions.
111 changes: 111 additions & 0 deletions stringXMLParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
<?php

error_reporting(E_ALL);

/* Data can be send to coroutines using `$coroutine->send($data)`. The sent data will then
* be the result of the `yield` expression. Thus it can be received using a code like
* `$data = yield;`.
*/

/* What we're building in this script is a coroutine-based streaming XML parser. The PHP
* extension for parsing streamed XML is xml_parser. It is used by defining a set of
* callback functions for various events (like start tag, end tag, content).
*
* This event model makes the parsing process very complicated, because you basically
* have to implement your own state machine (which is a lot of boilerplate code the
* more complicated the XML gets).
*
* To solve this problem, we build a wrapper (the following function), which redirects
* the events to a coroutine ($target). This is done simply using
* `$target->send([$eventName, $data])`.
*/
function streamingXMLParser($target) {
$xmlParser = xml_parser_create();
xml_set_element_handler(
$xmlParser,
function ($xmlParser, $name, array $attributes) use ($target) {
$target->send(['start', [$name, $attributes]]);
},
function ($xmlParser, $name) use ($target) {
$target->send(['end', $name]);
}
);
xml_set_character_data_handler(
$xmlParser,
function ($xmlParser, $text) use ($target) {
$target->send(['text', $text]);
}
);

while ($data = yield) {
if (!xml_parse($xmlParser, $data)) {
throw new Exception(sprintf(
'XML error "%s" on line %d',
xml_error_string(xml_get_error_code($xmlParser)),
xml_get_current_line_number($xmlParser)
));
}
}

xml_parser_free($xmlParser);
}

/* Inside the target coroutine the actual parsing happens. The events are received
* using `list($event, $data) = yield`. The main advantage that coroutines bring
* here is that you can fetch the events in nested loops. This way you are implicitly
* building a state machine (but the state is managed by PHP, not you!)
*
* This particular coroutine parses bus location data (for samples scroll down). The
* result is passed to another $target coroutine.
*/
function busXMLParser($target) {
while (true) {
list($event, $data) = yield;
if ($event == 'start' && $data[0] == 'BUS') {
$dict = [];
$content = '';
while (true) {
list($event, $data) = yield;
if ($event == 'start') {
$content = '';
} elseif ($event == 'text') {
$content .= $data;
} elseif ($event == 'end') {
if ($data == 'BUS') {
$target->send($dict);
break;
}

$dict[strtolower($data)] = $content;
}
}
}
}
}

/* This coroutine prints out the info it receives from the bus XML parser. */
function busLocationPrinter() {
while (true) {
$data = yield;
echo "Bus $data[id] is currently at $data[latitude]/$data[longitude]\n";
}
}

/* Here we are building up a coroutine pipeline. You should read this as:
* The streaming XML parser is passing data to the bus XML parser, which
* is passing data to the bus location printer.
*/
$parser = streamingXMLParser(busXMLParser(busLocationPrinter()));

/* I don't have access to a real bus location API, so I'll just stream some
* fictional sample data */
$parser->send('<?xml version="1.0"?><buses>');
while (true) {
sleep(1);
$parser->send(sprintf(
'<bus><id>%d</id><latitude>%f</latitude><longitude>%f</longitude></bus>',
mt_rand(1, 1000), lcg_value(), lcg_value()
));
}

/* If your head is buzzing now, that's a good thing :P */
48 changes: 35 additions & 13 deletions t.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ function gen()
{
$i = 1;
while (true) {
echo 'xx';
yield $i += yield;
echo $i, "\n";
}
Expand All @@ -32,17 +33,38 @@ function gen()
$gen = gen();
var_dump($gen->send(3));
$gen->next();
echo "----\n";
var_dump($gen->send(4));
$gen->next();
echo "----\n";
$gen->send(5);
$gen->next();
echo "----\n";
$gen->send(6);
$gen->next();
echo "----\n";
$gen->send(7);
$gen->next();
echo "----\n";
// echo "----\n";
// var_dump($gen->send(4));
// $gen->next();
// echo "----\n";
// $gen->send(5);
// $gen->next();
// echo "----\n";
// $gen->send(6);
// $gen->next();
// echo "----\n";
// $gen->send(7);
// $gen->next();
// echo "----\n";


echo "\n\n";
function agen()
{
while (true) {
try {
var_dump (yield);
} catch (Exception $e) {
var_dump($e);
}
}
}

$ag = agen();

$ag->send(1);
$ag->send(2);
$ag->throw(new Exception('a Exception'));
$ag->send(3);
$ag->send(4);
$ag->send(5);

0 comments on commit 3222e15

Please sign in to comment.