Skip to content

Commit

Permalink
Merge pull request #431 from CleanTalk/upd-heuristic
Browse files Browse the repository at this point in the history
Upd. Scan. Updated heuristic module.
  • Loading branch information
svfcode authored Nov 2, 2024
2 parents c65a936 + afa9020 commit 5e494a3
Showing 1 changed file with 35 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class CodeStyle
* @const Maximum word length to check for random structures
*/
const RANDOM_MAX_WORD_LEN = 5;
/**
* @const Sensitivity for comments noise
*/
const COMMENTS_NOISE_THRESHOLD = 1.00;
/**
* @const Sensitivity for random total weight
*/
Expand Down Expand Up @@ -169,11 +173,11 @@ public function analyseHumanUnreadableCode($content)
{
$proportion_spec_symbols = $this->proportionOfSpecialSymbols();
$weight = $this->getWeightOfRandomCharStructures($content);
$comments_noise = $this->getWeightOfCommentsNoise($content);

if (
$proportion_spec_symbols >= self::SPECIAL_CHARS_PROPORTION_THRESHOLD
||
$weight > self::RANDOM_TOTAL_WEIGHT_THRESHOLD
if ($proportion_spec_symbols >= self::SPECIAL_CHARS_PROPORTION_THRESHOLD ||
$weight > self::RANDOM_TOTAL_WEIGHT_THRESHOLD ||
$comments_noise > self::COMMENTS_NOISE_THRESHOLD
) {
$this->is_unreadable = true;
}
Expand Down Expand Up @@ -301,6 +305,33 @@ private function proportionOfSpecialSymbols()
return 0.0;
}

/**
* Check if the content contains comments noise (3 or more multiline comments in one string).
* @param string $content File content.
* @return float Normal value is < 1.00
*/
private function getWeightOfCommentsNoise($content)
{
$weight = 0.0;

$lines = preg_split("/((\r?\n)|(\r\n?))/", $content);

for ( $line_num = 1; isset($lines[$line_num - 1]); $line_num++ ) {
try {
$line = $lines[$line_num - 1];

preg_match_all('#\/\*\s*\w*\s*\*\/#', $line, $this->matches);
if (count($this->matches[0]) >= 3) {
$weight += 0.1;
}
} catch (\Exception $_e) {
continue;
}
}

return $weight;
}

/**
* Break the content to a several `words` and run a couple of checks
* to calculate weight of random-char structures in this.
Expand Down

0 comments on commit 5e494a3

Please sign in to comment.