Skip to content

Commit

Permalink
Revert "SONARPHP-1590 S1192 should not raise for HTML tags (#1343)"
Browse files Browse the repository at this point in the history
This reverts commit e37fe4f.
  • Loading branch information
jonas-wielage-sonarsource authored Dec 19, 2024
1 parent e37fe4f commit f2a888a
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 262 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
"PHPWord:src/PhpWord/Writer/Word2007/Part/Styles.php": [
101
],
"PHPWord:src/PhpWord/Writer/Word2007/Part/Theme.php": [
224,
227,
228
],
"PHPWord:src/PhpWord/Writer/Word2007/Style/Font.php": [
45
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.sonar.check.Rule;
import org.sonar.check.RuleProperty;
import org.sonar.plugins.php.api.tree.CompilationUnitTree;
Expand All @@ -29,57 +30,20 @@
import org.sonar.plugins.php.api.visitors.PHPVisitorCheck;
import org.sonar.plugins.php.api.visitors.PreciseIssue;

import static org.sonar.php.checks.utils.RegexUtils.firstOf;
import static org.sonar.php.checks.utils.RegexUtils.oneOrMore;
import static org.sonar.php.checks.utils.RegexUtils.optional;
import static org.sonar.php.checks.utils.RegexUtils.zeroOrMore;

@Rule(key = "S1192")
public class StringLiteralDuplicatedCheck extends PHPVisitorCheck {

private static final String MESSAGE = "Define a constant instead of duplicating this literal \"%s\" %s times.";
private static final String SECONDARY_MESSAGE = "Duplication.";

private static final String ONLY_ALPHANUMERIC_UNDERSCORES_HYPHENS_AND_PERIODS = "^[a-zA-Z_][.\\-\\w]+$";

// Single elements
private static final String IDENTIFIER = "[a-zA-Z][a-zA-Z0-9\\-_:.]*+";
private static final String OPT_SPACING = "\\s*+";
private static final String OPT_TEXT_OUTSIDE_OF_TAGS = "[^<>]*+";
private static final String DOUBLE_QUOTED_STRING = "\"(?:\\\\.|[^\"])*+\"";
private static final String SINGLE_QUOTED_STRING = "'(?:\\\\.|[^'])*+'";
private static final String NO_QUOTED_STRING = "[a-zA-Z0-9\\-_:./]++";

// Partial elements matching
private static final String DOUBLE_QUOTED_STRING_PARTIAL_START = "\"(?:\\\\.|[^\"])*+";
private static final String SINGLE_QUOTED_STRING_PARTIAL_START = "'(?:\\\\.|[^'])*+";
private static final String NO_QUOTED_STRING_PARTIAL_START = "[a-zA-Z0-9\\-_:./]++";
private static final String TAG_ATTRIBUTE_PARTIAL_START = OPT_SPACING
+ optional("=", OPT_SPACING, optional(firstOf(DOUBLE_QUOTED_STRING_PARTIAL_START, SINGLE_QUOTED_STRING_PARTIAL_START, NO_QUOTED_STRING_PARTIAL_START)));

// Complex regexes
private static final String TAG_ATTRIBUTE = IDENTIFIER + OPT_SPACING + optional("=", OPT_SPACING, firstOf(DOUBLE_QUOTED_STRING, SINGLE_QUOTED_STRING, NO_QUOTED_STRING));
private static final String HTML_TAG_FULL = "</?" + OPT_SPACING + IDENTIFIER + OPT_SPACING + zeroOrMore(TAG_ATTRIBUTE, OPT_SPACING) + "/?+>";
private static final String HTML_TAG_PARTIAL_START = OPT_SPACING + "</?+" + OPT_SPACING
+ optional(IDENTIFIER, OPT_SPACING, zeroOrMore(TAG_ATTRIBUTE, OPT_SPACING), optional(TAG_ATTRIBUTE_PARTIAL_START));
private static final String HTML_TAG_PARTIAL_END = "[\"']?+" + OPT_SPACING + zeroOrMore(TAG_ATTRIBUTE, OPT_SPACING) + "/?+>";
private static final String HTML_CONTENT = optional(HTML_TAG_PARTIAL_END) + oneOrMore(OPT_TEXT_OUTSIDE_OF_TAGS, HTML_TAG_FULL) + OPT_TEXT_OUTSIDE_OF_TAGS
+ optional(HTML_TAG_PARTIAL_START);

private static final String FULL_ALLOWED_LITERALS_REGEX = firstOf(
HTML_CONTENT,
OPT_TEXT_OUTSIDE_OF_TAGS + HTML_TAG_PARTIAL_START,
HTML_TAG_PARTIAL_END + OPT_TEXT_OUTSIDE_OF_TAGS,
HTML_TAG_PARTIAL_END + OPT_TEXT_OUTSIDE_OF_TAGS + HTML_TAG_PARTIAL_START,
ONLY_ALPHANUMERIC_UNDERSCORES_HYPHENS_AND_PERIODS);
private static final Pattern ALLOWED_DUPLICATED_LITERALS = Pattern.compile(FULL_ALLOWED_LITERALS_REGEX);

public static final int THRESHOLD_DEFAULT = 3;
public static final int MINIMAL_LITERAL_LENGTH_DEFAULT = 5;
private static final Pattern ALLOWED_DUPLICATED_LITERALS = Pattern.compile("^[a-zA-Z_][.\\-\\w]+$");

private final Map<String, LiteralTree> firstOccurrenceTrees = new HashMap<>();
private final Map<String, List<LiteralTree>> sameLiteralOccurrences = new HashMap<>();

public static final int THRESHOLD_DEFAULT = 3;
public static final int MINIMAL_LITERAL_LENGTH_DEFAULT = 5;

@RuleProperty(
key = "threshold",
defaultValue = "" + THRESHOLD_DEFAULT)
Expand All @@ -102,10 +66,9 @@ public void visitCompilationUnit(CompilationUnitTree tree) {

private void finish() {
for (Map.Entry<String, List<LiteralTree>> literalOccurrences : sameLiteralOccurrences.entrySet()) {
String value = literalOccurrences.getKey();
List<LiteralTree> occurrences = literalOccurrences.getValue();

if (occurrences.size() >= threshold && !ALLOWED_DUPLICATED_LITERALS.matcher(value).matches()) {
if (occurrences.size() >= threshold) {
String literal = literalOccurrences.getKey();
String message = String.format(MESSAGE, literal, occurrences.size());
LiteralTree firstOccurrenceTree = firstOccurrenceTrees.get(literal);
Expand All @@ -120,25 +83,22 @@ private void finish() {
@Override
public void visitLiteral(LiteralTree tree) {
if (tree.is(Kind.REGULAR_STRING_LITERAL)) {
String literal = tree.value().replace("\\'", "'").replace("\\\"", "\"");
String value = removeQuotesAndQuotesEscaping(literal);
String literal = tree.value();
String value = StringUtils.substring(literal, 1, literal.length() - 1);

if (value.length() >= minimalLiteralLength && !ALLOWED_DUPLICATED_LITERALS.matcher(value).find()) {

if (value.length() >= minimalLiteralLength) {
if (!sameLiteralOccurrences.containsKey(value)) {
List<LiteralTree> occurrences = new ArrayList<>();
occurrences.add(tree);
sameLiteralOccurrences.put(value, occurrences);
firstOccurrenceTrees.put(value, tree);

} else {
sameLiteralOccurrences.get(value).add(tree);
}
}
}
}

private static String removeQuotesAndQuotesEscaping(String s) {
var quote = s.charAt(0);
return s.substring(1, s.length() - 1).replace("\\" + quote, String.valueOf(quote));
}

}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
echo "$totototo";


$a["name1_name-2."]; // OK - Only alphanumeric, -, _ and .
$a["name1_name-2."];
$a["name1_name-2."];
$a["name1_name-2."];
$a["name1_name-2."];
$a["name1_name-2"]; // OK - Only alphanumeric, - and _
$a["name1_name-2"];
$a["name1_name-2"];
$a["name1_name-2"];
$a["name1_name-2"];

$a["name1/name2"]; // Noncompliant {{Define a constant instead of duplicating this literal "name1/name2" 5 times.}}
$a["name1/name2"];
Expand All @@ -53,117 +53,3 @@
$score = $request->getParam('_score'); // OK - Only alphanumeric, -, _ and .
$score = $request->getParam('_score');
$score = $request->getParam('_score');

$output = '';
$output .= '<span class="payment">'.$total.'</span>';
$output .= '<span class="payment-summary">'.$total_payment.'</span>';
$output .= '<span class="badge normal">'.$inc_vat_text.'</span>';

$output .= '<div>'.$total.'</div>';
$output .= '<div>'.$total_payment.'</div>';
$output .= '<div>'.$inc_vat_text.'</div>';

$output = '<custom-tag_name:example.class>'.$var1.'</custom-tag_name:example.class>';
$output = '<custom-tag_name:example.class>'.$var2.'</custom-tag_name:example.class>';
$output = '<custom-tag_name:example.class>'.$var3.'</custom-tag_name:example.class>';

$output .= '<div class="beautiful">'.$total.'</div>';
$output .= '<div class="beautiful">'.$total_payment.'</div>';
$output .= '<div class="beautiful">'.$inc_vat_text.'</div>';

$output .= '<span class="payment">'.$total.'</span' + '>';
$output .= '<span class="payment-summary">'.$total_payment.'</span' + '>';
$output .= '<span class="badge normal">'.$inc_vat_text.'</span' + '>';

// HTML tag with attribute and escaping '
$output .= '<div some-attribute-1="value1" some-attribute-2=\'value2\' some-attribute-3=value3 some-attribute-4>'.$total.'</div>';
$output .= '<div some-attribute-1="value1" some-attribute-2=\'value2\' some-attribute-3=value3 some-attribute-4>'.$total.'</div>';
$output .= '<div some-attribute-1="value1" some-attribute-2=\'value2\' some-attribute-3=value3 some-attribute-4>'.$total.'</div>';

// HTML tag with attribute and escaping "
$output .= "<span class=\"payment\">".$total.'</span>';
$output .= "<span class=\"payment\">".$total.'</span>';
$output .= "<span class=\"payment\">".$total.'</span>';

// partial HTML tag start
$output .= '<div some-attr='.$value.'>';
$output .= '<div some-attr='.$value.'>';
$output .= '<div some-attr='.$value.'>';

// partial HTML tag start with content before
$test = 'When handling an <a href="http://';
$test = 'When handling an <a href="http://';
$test = 'When handling an <a href="http://';

// partial HTML tag end
$output .= '<div><'.$tag_name.' with="value">';
$output .= '<div><'.$tag_name.' with="value">';
$output .= '<div><'.$tag_name.' with="value">';

// partial HTML tag end with content after
$test = 'with="value"> some content after';
$test = 'with="value"> some content after';
$test = 'with="value"> some content after';

// multiple HTML tags
$output .= '<div><p><span>'.$tag_name.'</div></p></span>';
$output .= '<div><p><span>'.$tag_name.'</div></p></span>';
$output .= '<div><p><span>'.$tag_name.'</div></p></span>';

// multiple HTML tags with text content outside of them
$output .= '<div> some <p> content <span> here '.$tag_name.'</div> and </p> there </span>';
$output .= '<div> some <p> content <span> here '.$tag_name.'</div> and </p> there </span>';
$output .= '<div> some <p> content <span> here '.$tag_name.'</div> and </p> there </span>';

// multiple HTML tags with partial end or start tag
$output .= ' left-from-previous="string"> here '.$tag_name.'</div><div';
$output .= ' left-from-previous="string"> here '.$tag_name.'</div><div';
$output .= ' left-from-previous="string"> here '.$tag_name.'</div><div';

// end and start tag in the same string literal
$output .= 'end-of-tag="val"><div start-of-tag="val" '.$additionl_attribute;
$output .= 'end-of-tag="val"><div start-of-tag="val" '.$additionl_attribute;
$output .= 'end-of-tag="val"><div start-of-tag="val" '.$additionl_attribute;

// end and start tag with content between
$test = 'end-of-tag="val"> some content <div start-of-tag="val" ';
$test = 'end-of-tag="val"> some content <div start-of-tag="val" ';
$test = 'end-of-tag="val"> some content <div start-of-tag="val" ';

// end and start tag with content and HTML tags between
$test = 'end-of-tag="val"> some <i>extra</i> content <div start-of-tag="val" ';
$test = 'end-of-tag="val"> some <i>extra</i> content <div start-of-tag="val" ';
$test = 'end-of-tag="val"> some <i>extra</i> content <div start-of-tag="val" ';

// self closing tag
$test = "<br/>";
$test = "<br/>";
$test = "<br/>";
$test = "<br />";
$test = "<br />";
$test = "<br />";

// Examples that still raise an issue
$test = "<!-- /wp:query -->"; // Noncompliant
$test = "<!-- /wp:query -->";
$test = "<!-- /wp:query -->";

$test = "/^<div>Content<\/div>$/"; // Noncompliant
$test = "/^<div>Content<\/div>$/";
$test = "/^<div>Content<\/div>$/";

$test = '<<B>%s</B>%s>'; // Noncompliant
$test = '<<B>%s</B>%s>';
$test = '<<B>%s</B>%s>';

$test = '<div><!--[if (gte mso 9)|(IE)]>'; // Noncompliant
$test = '<div><!--[if (gte mso 9)|(IE)]>';
$test = '<div><!--[if (gte mso 9)|(IE)]>';

$test = '$age < 5'; // Noncompliant
$test = '$age < 5';
$test = '$age < 5';

$test = '$name => "abc"'; // Noncompliant
$test = '$name => "abc"';
$test = '$name => "abc"';

0 comments on commit f2a888a

Please sign in to comment.