diff --git a/highlight-html.php b/highlight-html.php new file mode 100644 index 0000000000000..1ddb46b621f30 --- /dev/null +++ b/highlight-html.php @@ -0,0 +1,176 @@ +set_bookmark('here'); + $here = $this->bookmarks['_here']; + return substr( $this->html, $here->start, $here->length ); + } +}; + +$p = $p::create_Full_parser( $html ); + +while ( $p->next_token() ) { + switch ( $p->get_token_type() ) { + case '#comment': + echo C_COMMENT . ''; + break; + + case '#doctype': + echo C_SYNTAX . 'get_modifiable_text() . '>'; + break; + + case '#tag': + print_tag( $p ); + break; + + case '#text': + print_text( $p ); + break; + + default: + die( "Unsupported syntax: {$p->get_token_type()}" ); + } +} + +echo "\e[m\n"; + +function print_text( $p ) { + $token_name = $p->get_token_name(); + if ( in_array( $token_name, [ 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ], true ) ) { + return C_TEXT . $p->get_modifiable_text(); + } + + $raw_token = $p->get_raw_token(); + $text = C_TEXT; + $at = 0; + $was_at = 0; + while ( $at < strlen( $raw_token ) ) { + $next_at = strpos( $raw_token, '&', $at ); + if ( false === $next_at ) { + break; + } + + $replacement = WP_HTML_Decoder::read_character_reference( 'data', $raw_token, $next_at, $skip_bytes ); + if ( isset( $replacement ) ) { + $text .= substr( $raw_token, $was_at, $next_at - $was_at ) . C_REF . substr( $raw_token, $next_at, $skip_bytes ) . C_TEXT; + $at = $next_at + $skip_bytes; + $was_at = $at; + continue; + } + + ++$at; + } + if ( $was_at < strlen( $raw_token ) ) { + $text .= substr( $raw_token, $was_at ); + } + echo C_TEXT . $text; +} + +function print_tag( $p ) { + global $do_format; + + static $depth = 0; + + $tag_name = $p->get_tag(); + $is_closer = $p->is_tag_closer(); + $closer = $is_closer ? '/' : ''; + $is_void = WP_HTML_Processor::is_void( $tag_name ); + $voider = $is_void ? '/' : ''; + + if ( $is_closer && in_array( $tag_name, [ 'HEAD', 'BODY', 'OL', 'UL', 'DIV' ], true ) ) { + $depth--; + } + + $indent = str_pad( '', $depth * 2, ' ' ); + + if ( $do_format && ( + ( + ! $is_closer && in_array( $tag_name, [ + 'DIV', 'P', 'UL', 'OL', 'DETAILS', 'SVG', 'PATH', 'G', + 'LINK', 'META', 'HTML', 'HEAD', 'BODY', 'TITLE', 'TEXTAREA', + 'PRE', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HGROUP', + 'PICTURE', 'SOURCE', 'FIGURE', 'FORM', 'TABLE', 'TR', + 'FIGCAPTION', 'BLOCKQUOTE', 'OBJECT', 'EMBED', 'IFRAME', + 'SCRIPT', 'STYLE', 'NOSCRIPT', 'NAV', 'LI' + ], true ) + ) || ( + $is_closer && in_array( $tag_name, [ + 'HEAD', 'HTML', 'BODY', 'PICTURE', 'FIGURE', 'TABLE' + ], true ) + ) + ) ) { + echo "\n{$indent}"; + } + echo C_SYNTAX . '<' . $closer; + + echo C_TAGNAME . strtolower( $p->get_tag() ); + $attributes = $p->get_attribute_names_with_prefix( '' ) ?? array(); + + foreach( $attributes as $name ) { + $value = $p->get_attribute( $name ); + + echo ' ' . C_ANAME . $name; + if ( true === $value ) { + continue; + } + + echo C_SYNTAX . '="'; + echo C_AVALUE . str_replace( '"', '"', $value ); + echo C_SYNTAX . '"'; + } + echo C_SYNTAX . '>'; + + $text = $p->get_modifiable_text(); + if ( ! empty( $text ) ) { + echo 'TITLE' === $p->get_tag() ? C_TEXT : C_COMMENT; + + $add_newlines = ( + $do_format && + strlen( trim( $text ) ) > 0 && + ( + 'SCRIPT' === $tag_name || + 'STYLE' === $tag_name || + 'TEXTAREA' === $tag_name || + 'PRE' === $tag_name + ) + ); + + if ( $add_newlines ) { + echo "\n" . trim( $text, "\n" ) . "\n"; + } else { + echo $text; + } + + echo C_SYNTAX . 'get_tag() ) . C_SYNTAX . '>'; + } elseif ( in_array( $tag_name, [ 'SCRIPT', 'STYLE', 'TEXTAREA', 'PRE' ], true ) ) { + echo C_SYNTAX . 'get_tag() ) . C_SYNTAX . '>'; + } + + if ( ! $is_closer && in_array( $tag_name, [ 'HEAD', 'BODY', 'OL', 'UL', 'DIV' ], true ) ) { + $depth++; + } +} diff --git a/html-grep.php b/html-grep.php new file mode 100644 index 0000000000000..07741d6f5624e --- /dev/null +++ b/html-grep.php @@ -0,0 +1,187 @@ + 0 ) + ? (int) $opts['m'] + : 1; + + $input = in_array( '-', $argv, true ) ? 'php://stdin' : $opts['i']; + Grepper::scan( $input, $opts['p'], $lines_before, $lines_after, $max ); +} + +class Debugger extends WP_HTML_Tag_Processor { + public function h() { + return $this->html; + } + + public function extend( $line ) { + $this->html .= $line; + + if ( + $this->parser_state === self::STATE_COMPLETE || + $this->parser_state === self::STATE_INCOMPLETE_INPUT + ) { + $this->parser_state = self::STATE_READY; + } + } + + public function next_token() { + $r = parent::next_token(); + $this->set_bookmark( 'here' ); + return $r; + } + + public function at() { + return $this->bookmarks['here']; + } +} + +class Grepper { + public static function scan( $input, $pattern, $before, $after, $max ) { + $f = fopen( $input, 'r' ); + $c = 0; + $n = 0; + $lines = []; + $lc = 1 + $before + $after; + $o = static function ( $s ) { return WP_HTML_Decoder::decode_text_node( $s ); }; + $ws = static function ( $s ) { return preg_replace( '~[ \r\f\t\n]+~', ' ', $s ); }; + $pre_depth = 0; + $p = new Debugger( '' ); + $t = ''; + + while ( false !== ( $line = fgets( $f ) ) ) { + $n++; + + $p->extend( $line ); + while ( $p->next_token() ) { + $at = $p->at(); + $type = $p->get_token_type(); + $node_text = $o( $p->get_modifiable_text() ); + $node_text = $pre_depth > 0 ? $node_text : $ws( $node_text ); + + if ( '#tag' !== $type && '#text' !== $type ) { + continue; + } + + switch ( $p->get_token_name() ) { + case 'PRE': + $pre_depth += $p->is_tag_closer() ? -1 : 1; + break; + + case '#text': + $t .= $node_text; + } + + if ( preg_match( $pattern, $t, $match, PREG_OFFSET_CAPTURE ) ) { + $h = ( + "\e[32m" . + ltrim( mb_strcut( $t, 0, $match[0][1] ) ) . + "\e[33m" . + $match[0][0] . + "\e[32m" . + rtrim( mb_strcut( $t, $match[0][1] + strlen( $match[0][0] ) ) ) . + "\e[90m" + ); + + for ( $i = 0; $i < $after; $i++ ) { + $line = fgets( $f ); + if ( false !== $line ) { + $p->extend( $line ); + } + } + + $cb = mb_strcut( $p->h(), 0, $at->start ); + $cc = mb_strcut( $p->h(), $at->start, $at->length ); + $ca = mb_strcut( $p->h(), $at->start + $at->length ); + + // Limit context to N lines preview + $cb = explode( "\n", $cb ); + $cb = array_slice( $cb, -$before ); + $cb = mb_strcut( implode( "\n", $cb ), -$before * 80 ); + + // Limit context to N lines preview + $ca = explode( "\n", $ca ); + $ca = array_slice( $ca, 0, $after ); + $ca = mb_strcut( implode( "\n", $ca ), 0, $after * 80 ); + + // If contained in last node. + $tt = $p->get_modifiable_text(); + if ( preg_match( $pattern, $tt, $mm, PREG_OFFSET_CAPTURE ) ) { + $cc = ( + "\e[90m" . + mb_strcut( $tt, 0, $mm[0][1] ) . + "\e[33m" . + $mm[0][0] . + "\e[90m" . + mb_strcut( $tt, $mm[0][1] + strlen( $mm[0][0] ) ) + ); + } + + echo "\n\e[32m{$n}\e[90m: \e[31m{$p->get_token_name()} \e[90m{$h}\e[m\n"; + echo "\e[90m{$cb}\e[33m{$cc}\e[90m{$ca}\e[m"; + + if ( ++$c >= $max ) { + fclose( $f ); + exit; + } + + $t = ''; + } + + $t = mb_strcut( $t, -100 ); + } + + } + } + + public static function indent( $lines ) { + return implode( "\n", array_map( + static function ( $line ) { return ' ' . $line; }, + explode( "\n", $lines ) + ) ); + } +} + +main(); + +function is_line_breaker( $tag_name ) { + switch ( $tag_name ) { + case 'BLOCKQUOTE': + case 'BR': + case 'DD': + case 'DIV': + case 'DL': + case 'DT': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + case 'HR': + case 'LI': + case 'OL': + case 'P': + case 'UL': + return true; + } + + return false; +} diff --git a/html-to-text.php b/html-to-text.php new file mode 100644 index 0000000000000..93a2dc83355ac --- /dev/null +++ b/html-to-text.php @@ -0,0 +1,374 @@ +next_token() ) { + $node_name = $p->get_token_name(); + + if ( in_array( strtolower( $node_name ), $skip_first_nodes, true ) ) { + $depth = $p->get_current_depth(); + while ( $p->get_current_depth() >= $depth ) { + $p->next_token(); + } + array_shift( $skip_first_nodes ); + continue; + } + + if ( in_array( strtolower( $node_name ), $skip_nodes, true ) ) { + $depth = $p->get_current_depth(); + while ( $p->get_current_depth() >= $depth ) { + $p->next_token(); + } + continue; + } + + $node_text = WP_HTML_Decoder::decode_text_node( $p->get_modifiable_text() ); + $tag_name = '#tag' === $p->get_token_type() + ? ( ( $p->is_tag_closer() ? '-' : '+' ) . $node_name ) + : $node_name; + + if ( '#tag' === $p->get_token_type() && ! $p->is_tag_closer() && is_line_breaker( $node_name ) ) { + $needs_newline = ! $prev_was_li; + } + + if ( $ansi ) { + if ( + '+MAIN' === $tag_name || + 'main' === $p->get_attribute( 'role' ) || + 'main-content' === $p->get_attribute( 'id' ) || // cloudflare. + 'hnmain' === $p->get_attribute( 'id' ) // Hackernews. + ) { + $text_content .= "\e]1337;SetMark\x07"; + } + + switch ( $tag_name ) { + case '+A': + $href = $p->get_attribute( 'href' ); + if ( is_string( $href ) && preg_match( '~^https?://~', $href ) ) { + // External link, probably. + $text_content .= "\e[32m\e]8;;{$href}\x07"; + } elseif ( str_starts_with( $href, 'javascript:' ) ) { + break; + } else { + // Internal link, probably. + $text_content .= "\e[90m\e]8;;{$base_url}{$href}\x07"; + } + break; + + case '-A': + $text_content .= "\e]8;;\x07\e[m"; + break; + + case '+B': + case '+STRONG': + $text_content .= "\e[2m"; + break; + + case '-B': + case '-STRONG': + $text_content .= "\e[22m"; + break; + + case '+C-': + $rgb = color_for_syntax_element( $p ); + if ( null !== $rgb ) { + $text_content .= "\e[38;2;{$rgb[0]};{$rgb[1]};{$rgb[2]}m"; + } + break; + + case '-C-': + $text_content .= "\e[m"; + break; + + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + $text_content .= "\e[1m"; + break; + + case '-H1': + case '-H2': + case '-H3': + case '-H4': + case '-H5': + case '-H6': + $text_content .= "\e[22m"; + break; + + case '+I': + case '+EM': + $text_content .= "\e[3m"; + break; + + case '-I': + case '-EM': + $text_content .= "\e[23m"; + break; + + case '+SUB': + $text_content .= "\e[74m"; + break; + + case '+SUP': + $text_content .= "\e[73m"; + break; + + case '-SUB': + case '-SUP': + $text_content .= "\e[75m"; + break; + + case '+TITLE': + $text_content .= "\e]0;{$node_text}\x07"; + break; + } + } + + switch ( $tag_name ) { + case '+LI': + $text_content .= "\n \e[31m•\e[39m "; + $needs_newline = false; + break; + + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + $text_content .= "\n\n" . str_pad( '', intval( $node_name[1] ), '#' ) . ' '; + $needs_newline = false; + break; + + case '+CITE': + $text_content .= ' «'; + break; + + case '-CITE': + $text_content .= '»'; + break; + + case '+CODE': + case '-CODE': + if ( $ansi && ! $p->is_tag_closer() ) { + $text_content .= "\e[90m"; + } + if ( $in_pre ) { + $text_content .= $p->is_tag_closer() ? "\n```" : "\n```\n"; + } else { + $text_content .= '`'; + } + if ( $ansi && $p->is_tag_closer() ) { + $text_content .= "\e[m"; + } + break; + + case '+DT': + $text_content .= "\n\n✏️ "; + $needs_newline = false; + break; + + case '+DD': + $text_content .= "\n 📝 "; + $needs_newline = false; + break; + + case '+IMG': + $alt = $p->get_attribute( 'alt' ); + if ( is_string( $alt ) && ! empty( $alt ) ) { + $text_content .= "[\e[31m{$alt}\e[m]"; + } + break; + + case '+PRE': + case '-PRE': + if ( $p->is_tag_closer() ) { + $in_pre = false; + $text_content .= "\e[90m```\e[m\n"; + } else { + $in_pre = true; + $text_content .= "\n\n\e[90m```"; + $lang = $p->get_attribute( 'lang' ); + if ( is_string( $lang ) ) { + $text_content .= $lang; + } + $text_content .= "\e[m\n"; + } + + break; + + case '+TABLE': + $text_content .= "\n\n"; + break; + + case '+TH': + $text_content .= "\e[1;3m"; + break; + + case '-TD': + case '-TH': + $text_content .= "\t\e[0;90m|\e[m "; + break; + + case '+TR': + $text_content .= "\e[90m| \e[m"; + break; + + case '-TR': + $text_content .= "\e[90m |\e[m\n"; + break; + + case '#text': + if ( $needs_newline ) { + $text_content .= "\n\n"; + $needs_newline = false; + } + $text_content .= $in_pre ? $node_text : preg_replace( '~[ \t\r\f\n]+~', ' ', $node_text ); + } + + $prev_was_li = '+LI' === $tag_name; +} + +echo trim( $text_content ); + +if ( null !== $p->get_last_error() ) { + echo "\n\e[31mFailed\e[90m because of '\e[2,31m{$p->get_last_error()}\e[0,90m'\e[m\n"; + $unsupported = $p->get_unsupported_exception(); + if ( isset( $unsupported ) ) { + echo "\e[90m ┤ {$unsupported->getMessage()}\e[m\n"; + } +} else if ( $p->paused_at_incomplete_token() ) { + echo trim( $text_content ); + echo "\n\e[31mIncomplete input\e[90m found at end of document; unable to proceed.\e[m\n"; +} + +function is_line_breaker( $tag_name ) { + switch ( $tag_name ) { + case 'BLOCKQUOTE': + case 'BR': + case 'DD': + case 'DIV': + case 'DL': + case 'DT': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + case 'HR': + case 'LI': + case 'OL': + case 'P': + case 'UL': + return true; + } + + return false; +} + +function color_for_syntax_element( $processor ) { + static $colors = [ + 'a' => [0x99, 0x00, 0x55], + 'b' => [0x99, 0x00, 0x55], + 'c' => [0x70, 0x80, 0x90], + 'd' => [0x70, 0x80, 0x90], + 'e' => [0x00, 0x77, 0xaa], + 'f' => [0x66, 0x99, 0x00], + 'g' => [0x22, 0x22, 0x22], + 'k' => [0x99, 0x00, 0x55], + 'l' => [0x00, 0x00, 0x00], + 'm' => [0x00, 0x00, 0x00], + 'n' => [0x00, 0x77, 0xaa], + 'o' => [0x99, 0x99, 0x99], + 'p' => [0x99, 0x99, 0x99], + 's' => [0xa6, 0x7f, 0x59], + 't' => [0xa6, 0x7f, 0x59], + 'u' => [0xa6, 0x7f, 0x59], + 'cp' => [0x70, 0x80, 0x90], + 'c1' => [0x70, 0x80, 0x90], + 'cs' => [0x70, 0x80, 0x90], + 'kc' => [0x99, 0x00, 0x55], + 'kn' => [0x99, 0x00, 0x55], + 'kp' => [0x99, 0x00, 0x55], + 'kr' => [0x99, 0x00, 0x55], + 'ld' => [0x00, 0x00, 0x00], + 'nc' => [0x00, 0x77, 0xaa], + 'no' => [0x00, 0x77, 0xaa], + 'nd' => [0x00, 0x77, 0xaa], + 'ni' => [0x00, 0x77, 0xaa], + 'ne' => [0x00, 0x77, 0xaa], + 'nf' => [0x00, 0x77, 0xaa], + 'nl' => [0x00, 0x77, 0xaa], + 'nn' => [0x00, 0x77, 0xaa], + 'py' => [0x00, 0x77, 0xaa], + 'ow' => [0x99, 0x99, 0x99], + 'mb' => [0x00, 0x00, 0x00], + 'mf' => [0x00, 0x00, 0x00], + 'mh' => [0x00, 0x00, 0x00], + 'mi' => [0x00, 0x00, 0x00], + 'mo' => [0x00, 0x00, 0x00], + 'sb' => [0xa6, 0x7f, 0x59], + 'sc' => [0xa6, 0x7f, 0x59], + 'sd' => [0xa6, 0x7f, 0x59], + 'se' => [0xa6, 0x7f, 0x59], + 'sh' => [0xa6, 0x7f, 0x59], + 'si' => [0xa6, 0x7f, 0x59], + 'sx' => [0xa6, 0x7f, 0x59], + 'sr' => [0xa6, 0x7f, 0x59], + 'ss' => [0xa6, 0x7f, 0x59], + 'vc' => [0x00, 0x77, 0xaa], + 'vg' => [0x00, 0x77, 0xaa], + 'vi' => [0x00, 0x77, 0xaa], + 'il' => [0x00, 0x00, 0x00], + ]; + + foreach ( $colors as $name => $rgb ) { + if ( $processor->get_attribute( $name ) ) { + return $rgb; + } + } + + return null; +} diff --git a/parse-tokens.php b/parse-tokens.php new file mode 100644 index 0000000000000..b291918594b8e --- /dev/null +++ b/parse-tokens.php @@ -0,0 +1,89 @@ + +Just another <img> post + + + +This is a <img> post + +
Not all is HTML
+

This is like , , and and .

+
An abridged CDATA is 3]]>
+ +There is a syntax. +HTML; + +//$html = ''; +//$html = file_get_contents( '~/Downloads/single-page.html' ); + +if ( isset( $argv[1] ) ) { + $html = file_get_contents( 'php://stdin' ); +} + +$p = new WP_HTML_Tag_Processor( $html ); + +echo "\e[32m{$html}\e[m\n\n"; + +$text_content = ''; +$pre_depth = 0; +while ( $p->next_token() ) { + $prefix = $p->is_tag_closer() ? '/' : ''; + $suffix = $p->has_self_closing_flag() ? '/' : ''; + $text = str_replace( "\n", '␤', $p->get_modifiable_text() ?? '' ); + $node_text = html_entity_decode( $p->get_modifiable_text(), ENT_HTML5 | ENT_QUOTES ); + echo "\e[35m{$p->get_token_type()}\e[90m \e[24G\e[36m{$prefix}\e[33m{$p->get_token_name()}\e[35m{$suffix}\e[90m \e[42G\"\e[34m{$text}\e[90m\"\e[m\n"; + + if ( 'PRE' === $p->get_token_name() ) { + $pre_depth += $p->is_tag_closer() ? -1 : 1; + } + + if ( is_line_breaker( $p->get_token_name() ) && ! $p->is_tag_closer() ) { + $text_content .= "\n"; + } + + switch ( $p->get_token_name() ) { + case '#text': + $text_content .= $pre_depth > 0 ? $node_text : preg_replace( '~[ \r\t\f\n]+~', ' ', $node_text ); + } +} + +echo "\n" . $text_content; + +function is_line_breaker( $tag_name ) { + switch ( $tag_name ) { + case 'BLOCKQUOTE': + case 'BR': + case 'DD': + case 'DIV': + case 'DL': + case 'DT': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + case 'HR': + case 'LI': + case 'OL': + case 'P': + case 'UL': + return true; + } + + return false; +} diff --git a/scan-all-tags.php b/scan-all-tags.php new file mode 100644 index 0000000000000..ac5801c11090e --- /dev/null +++ b/scan-all-tags.php @@ -0,0 +1,65 @@ + 0 ) { + if ( ! $p->next_tag() ) { + if ( null === $p->get_last_error() ) { + echo "\e[90mFinished document \e[34msuccessfully\e[m\n"; + die(); + } + + echo "\e[90mAborted document: \e[33m{$p->get_last_error()}\e[m\n"; + die(); + } + + $closer = $p->is_tag_closer() ? '/' : ''; + $voider = WP_HTML_Processor::is_void( $p->get_tag() ) ? '/' : ''; + $crumbs = []; + $last_crumb = null; + foreach ( $p->get_breadcrumbs() as $tag ) { + if ( $tag !== $last_crumb ) { + $crumbs[] = [ $tag, 1 ]; + } else { + $crumb = array_pop( $crumbs ); + $crumb[1]++; + $crumbs[] = $crumb; + } + + $last_crumb = $tag; + } + foreach ( $crumbs as &$c ) { + $c = $c[1] === 1 ? $c[0] : "{$c[0]} (x{$c[1]})"; + } + $crumbs = implode( "\e[90m, \e[31m", $crumbs ); + + $first_attribute = ''; + foreach ( $p->get_attribute_names_with_prefix( '' ) ?? array() as $name ) { + $value = $p->get_attribute( $name ); + + if ( true === $value ) { + $first_attribute = " \e[38;2;102;153;0m{$name}\e[38;2;153;153;153m\e[m"; + } else { + $value = str_replace( "\n", "␤", $value ); + if ( strlen( $value ) > 23 ) { + $value = substr( $value, 0, 20 ) . "..."; + } + $first_attribute = " \e[38;2;102;153;0m{$name}\e[38;2;153;153;153m=\"\e[38;2;0;119;170m{$value}\e[38;2;153;153;153m\"\e[m"; + } + } + + $modifiable_text = str_replace( "\n", "␤", $p->get_modifiable_text() ); + if ( strlen( $modifiable_text ) > 30 ) { + $modifiable_text = substr( $modifiable_text, 0, 27 ) . "..."; + } + echo "\e[90mFound \e[36m{$closer}\e[32m{$p->get_tag()}\e[35m{$voider}\e[90m at \e[31m{$crumbs}{$first_attribute}\e[90m {$modifiable_text}\e[m\n"; +} diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 233d47eb8da95..e98766b24034a 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4500,4 +4500,31 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { * @since 6.7.0 */ const TEXT_IS_WHITESPACE = 'TEXT_IS_WHITESPACE'; + + /* + * Debug helpers. + */ + + public function debug_current_position() { + echo "\n\e[31mCurrent Position\e[m\n"; + echo "\e[32m{$this->html}\e[m\n"; + echo str_pad( '', $this->bytes_already_parsed, ' ' ); + echo "\e[33m^\e[m\n"; + } + + public function debug_classname_updates() { + echo "\n\e[31m\"\e[35mclass\e[31m\" Updates\e[m\n"; + foreach ( $this->classname_updates as $name => $change ) { + $op = $change === true ? 'added' : 'removed'; + echo " \e[32m{$name}\e[90m should be \e[35m{$op}\e[m\n"; + } + } + + public function debug_lexical_updates() { + echo "\n\e[31mLexical Updates\e[m\n"; + foreach ( $this->lexical_updates as $index => $update ) { + $old = substr( $this->html, $update->start, $update->length ); + echo " \e[32m{$index}\e[90m changes (\e[34m{$update->start}\e[90m, \e[34m{$update->length}\e[90m) from \"\e[35m{$old}\e[90m\" to \"\e[31m{$update->text}\e[90m\"\e[m\n"; + } + } }