From afa90209f3e2551d928543675c99df70b1afcfa0 Mon Sep 17 00:00:00 2001 From: svfcode Date: Thu, 24 Oct 2024 08:05:45 +0300 Subject: [PATCH] Upd. Scan. Updated heuristic module. --- .../HeuristicAnalyser/Modules/CodeStyle.php | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/lib/CleantalkSP/Common/Scanner/HeuristicAnalyser/Modules/CodeStyle.php b/lib/CleantalkSP/Common/Scanner/HeuristicAnalyser/Modules/CodeStyle.php index dc25041cb..007be9a74 100644 --- a/lib/CleantalkSP/Common/Scanner/HeuristicAnalyser/Modules/CodeStyle.php +++ b/lib/CleantalkSP/Common/Scanner/HeuristicAnalyser/Modules/CodeStyle.php @@ -27,6 +27,10 @@ class CodeStyle * @const Maximum word length to check for random structures */ const RANDOM_MAX_WORD_LEN = 5; + /** + * @const Sensitivity for comments noise + */ + const COMMENTS_NOISE_THRESHOLD = 1.00; /** * @const Sensitivity for random total weight */ @@ -169,11 +173,11 @@ public function analyseHumanUnreadableCode($content) { $proportion_spec_symbols = $this->proportionOfSpecialSymbols(); $weight = $this->getWeightOfRandomCharStructures($content); + $comments_noise = $this->getWeightOfCommentsNoise($content); - if ( - $proportion_spec_symbols >= self::SPECIAL_CHARS_PROPORTION_THRESHOLD - || - $weight > self::RANDOM_TOTAL_WEIGHT_THRESHOLD + if ($proportion_spec_symbols >= self::SPECIAL_CHARS_PROPORTION_THRESHOLD || + $weight > self::RANDOM_TOTAL_WEIGHT_THRESHOLD || + $comments_noise > self::COMMENTS_NOISE_THRESHOLD ) { $this->is_unreadable = true; } @@ -301,6 +305,33 @@ private function proportionOfSpecialSymbols() return 0.0; } + /** + * Check if the content contains comments noise (3 or more multiline comments in one string). + * @param string $content File content. + * @return float Normal value is < 1.00 + */ + private function getWeightOfCommentsNoise($content) + { + $weight = 0.0; + + $lines = preg_split("/((\r?\n)|(\r\n?))/", $content); + + for ( $line_num = 1; isset($lines[$line_num - 1]); $line_num++ ) { + try { + $line = $lines[$line_num - 1]; + + preg_match_all('#\/\*\s*\w*\s*\*\/#', $line, $this->matches); + if (count($this->matches[0]) >= 3) { + $weight += 0.1; + } + } catch (\Exception $_e) { + continue; + } + } + + return $weight; + } + /** * Break the content to a several `words` and run a couple of checks * to calculate weight of random-char structures in this.