Skip to content

Commit

Permalink
GH-244: Sanitize HTML
Browse files Browse the repository at this point in the history
  • Loading branch information
magicsunday committed Sep 27, 2023
1 parent 9fb0b0e commit 908d149
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions Classes/Backend/Preview/RteImagePreviewRenderer.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,22 @@ class RteImagePreviewRenderer extends TextPreviewRenderer
*/
public function renderPageModulePreviewContent(GridColumnItem $item): string
{
$row = $item->getRecord();
$row = $item->getRecord();
$html = $row['bodytext'] ?? '';

// Sanitize HTML (replaces invalid chars with U+FFFD)<.
// - Invalid control chars: [\x00-\x08\x0B\x0C\x0E-\x1F]
// - UTF-16 surrogates: \xED[\xA0-\xBF].
// - Non-characters U+FFFE and U+FFFF: \xEF\xBF[\xBE\xBF]
$html = preg_replace(
'/[\x00-\x08\x0B\x0C\x0E-\x1F]|\xED[\xA0-\xBF].|\xEF\xBF[\xBE\xBF]/',
"\xEF\xBF\xBD",
$html
);

return $this
->linkEditContent(
$this->renderTextWithHtml(htmlentities($row['bodytext']) ?? ''),
$this->renderTextWithHtml($html),
$row
)
. '<br />';
Expand Down Expand Up @@ -79,12 +90,18 @@ protected function renderTextWithHtml(string $input): string
*/
private function truncate(string $html, int $length): string
{
// Set error level
$internalErrors = libxml_use_internal_errors(true);

$dom = new DOMDocument();
$dom->loadHTML(
'<?xml encoding="UTF-8">' . $html,
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
);

// Restore error level
libxml_use_internal_errors($internalErrors);

$toRemove = $this->walk($dom, $length);

// Remove any nodes that exceed limit
Expand Down

0 comments on commit 908d149

Please sign in to comment.