From 871e40ac8aef298449578b16aafe65259dedcbd8 Mon Sep 17 00:00:00 2001 From: Eric Heikes Date: Wed, 13 Sep 2017 23:16:31 -0500 Subject: [PATCH] Clean up the HTML before searching for the . Strips <script> and <style> tags from the page. Fixes sites like Kotaku. --- toreadapi.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/toreadapi.php b/toreadapi.php index 2063869..d8f2f5e 100644 --- a/toreadapi.php +++ b/toreadapi.php @@ -257,7 +257,9 @@ function postEntry() { } // Save the <title>. - if (preg_match("#<title[^>]*>(.*)#iU", $result, $matches) + $cleanedHtml = preg_replace('##isU', '', $result); + $cleanedHtml = preg_replace('##isU', '', $cleanedHtml); + if (preg_match("#]*>(.*)#iU", $cleanedHtml, $matches) and $matches[1] != "") { $title = mb_substr($matches[1], 0, $maxTitleLength, 'UTF-8');