Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upd. Scan. Updated heuristic module. #431

Merged
merged 1 commit into from
Nov 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class CodeStyle
* @const Maximum word length to check for random structures
*/
const RANDOM_MAX_WORD_LEN = 5;
/**
* @const Sensitivity for comments noise
*/
const COMMENTS_NOISE_THRESHOLD = 1.00;
/**
* @const Sensitivity for random total weight
*/
Expand Down Expand Up @@ -169,11 +173,11 @@ public function analyseHumanUnreadableCode($content)
{
$proportion_spec_symbols = $this->proportionOfSpecialSymbols();
$weight = $this->getWeightOfRandomCharStructures($content);
$comments_noise = $this->getWeightOfCommentsNoise($content);

if (
$proportion_spec_symbols >= self::SPECIAL_CHARS_PROPORTION_THRESHOLD
||
$weight > self::RANDOM_TOTAL_WEIGHT_THRESHOLD
if ($proportion_spec_symbols >= self::SPECIAL_CHARS_PROPORTION_THRESHOLD ||
$weight > self::RANDOM_TOTAL_WEIGHT_THRESHOLD ||
$comments_noise > self::COMMENTS_NOISE_THRESHOLD
) {
$this->is_unreadable = true;
}
Expand Down Expand Up @@ -301,6 +305,33 @@ private function proportionOfSpecialSymbols()
return 0.0;
}

/**
* Check if the content contains comments noise (3 or more multiline comments in one string).
* @param string $content File content.
* @return float Normal value is < 1.00
*/
private function getWeightOfCommentsNoise($content)
{
$weight = 0.0;

$lines = preg_split("/((\r?\n)|(\r\n?))/", $content);

for ( $line_num = 1; isset($lines[$line_num - 1]); $line_num++ ) {
try {
$line = $lines[$line_num - 1];

preg_match_all('#\/\*\s*\w*\s*\*\/#', $line, $this->matches);
if (count($this->matches[0]) >= 3) {
$weight += 0.1;
}
} catch (\Exception $_e) {
continue;
}
}

return $weight;
}

/**
* Break the content to a several `words` and run a couple of checks
* to calculate weight of random-char structures in this.
Expand Down
Loading