Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Auto-add r prefix when string has no backslashes for unraw-re-pattern (RUF039) #14536

Merged
merged 3 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
use std::fmt::{Display, Formatter};
use std::str::FromStr;

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{
BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, StringLiteral,
};
use ruff_python_semantic::{Modules, SemanticModel};

use memchr::memchr;
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
Expand Down Expand Up @@ -41,6 +44,7 @@ pub struct UnrawRePattern {
}

impl Violation for UnrawRePattern {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;
#[derive_message_formats]
fn message(&self) -> String {
let Self { module, func, kind } = &self;
Expand Down Expand Up @@ -158,8 +162,26 @@ fn check_string(checker: &mut Checker, literal: &StringLiteral, module: RegexMod
let kind = PatternKind::String;
let func = func.to_string();
let range = literal.range;
let diagnostic = Diagnostic::new(UnrawRePattern { module, func, kind }, range);

let mut diagnostic = Diagnostic::new(UnrawRePattern { module, func, kind }, range);

if
// The (no-op) `u` prefix is a syntax error when combined with `r`
!literal.flags.prefix().is_unicode()
&& memchr(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't the we need the performance of memchr here. That's why I would use .contains which is also easier to read

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you help me get a sense of what size strings the performance starts to make a difference? I only mention it because I have seen some long regexes in my life...

But happy to change it to contains!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's less about the length of a string and more that the fix code isn't a hot path.

I don't have too good a sense for when to use memchr otherwise, beyond what the crate documentation mentions

b'\\',
// We are looking for backslash characters
// in the raw source code here, because `\n`
// gets converted to a single character already
// at the lexing stage.
checker.locator().slice(literal.range()).as_bytes(),
)
.is_none()
{
diagnostic.set_fix(Fix::safe_edit(Edit::insertion(
"r".to_string(),
literal.range().start(),
)));
}
checker.diagnostics.push(diagnostic);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
snapshot_kind: text
---
RUF039.py:5:12: RUF039 First argument to `re.compile()` is not raw string
RUF039.py:5:12: RUF039 [*] First argument to `re.compile()` is not raw string
|
4 | # Errors
5 | re.compile('single free-spacing', flags=re.X)
Expand All @@ -12,6 +11,16 @@ RUF039.py:5:12: RUF039 First argument to `re.compile()` is not raw string
|
= help: Replace with raw string

ℹ Safe fix
2 2 | import regex
3 3 |
4 4 | # Errors
5 |-re.compile('single free-spacing', flags=re.X)
5 |+re.compile(r'single free-spacing', flags=re.X)
6 6 | re.findall('si\ngle')
7 7 | re.finditer("dou\ble")
8 8 | re.fullmatch('''t\riple single''')

RUF039.py:6:12: RUF039 First argument to `re.findall()` is not raw string
|
4 | # Errors
Expand Down Expand Up @@ -56,7 +65,7 @@ RUF039.py:9:10: RUF039 First argument to `re.match()` is not raw string
|
= help: Replace with raw string

RUF039.py:10:11: RUF039 First argument to `re.search()` is not raw string
RUF039.py:10:11: RUF039 [*] First argument to `re.search()` is not raw string
|
8 | re.fullmatch('''t\riple single''')
9 | re.match("""\triple double""")
Expand All @@ -67,7 +76,17 @@ RUF039.py:10:11: RUF039 First argument to `re.search()` is not raw string
|
= help: Replace with raw string

RUF039.py:11:10: RUF039 First argument to `re.split()` is not raw string
ℹ Safe fix
7 7 | re.finditer("dou\ble")
8 8 | re.fullmatch('''t\riple single''')
9 9 | re.match("""\triple double""")
10 |-re.search('two', 'args')
10 |+re.search(r'two', 'args')
11 11 | re.split("raw", r'second')
12 12 | re.sub(u'''nicode''', u"f(?i)rst")
13 13 | re.subn(b"""ytes are""", f"\u006e")

RUF039.py:11:10: RUF039 [*] First argument to `re.split()` is not raw string
|
9 | re.match("""\triple double""")
10 | re.search('two', 'args')
Expand All @@ -78,6 +97,16 @@ RUF039.py:11:10: RUF039 First argument to `re.split()` is not raw string
|
= help: Replace with raw string

ℹ Safe fix
8 8 | re.fullmatch('''t\riple single''')
9 9 | re.match("""\triple double""")
10 10 | re.search('two', 'args')
11 |-re.split("raw", r'second')
11 |+re.split(r"raw", r'second')
12 12 | re.sub(u'''nicode''', u"f(?i)rst")
13 13 | re.subn(b"""ytes are""", f"\u006e")
14 14 |

RUF039.py:12:8: RUF039 First argument to `re.sub()` is not raw string
|
10 | re.search('two', 'args')
Expand All @@ -99,7 +128,7 @@ RUF039.py:13:9: RUF039 First argument to `re.subn()` is not raw bytes literal
|
= help: Replace with raw bytes literal

RUF039.py:15:15: RUF039 First argument to `regex.compile()` is not raw string
RUF039.py:15:15: RUF039 [*] First argument to `regex.compile()` is not raw string
|
13 | re.subn(b"""ytes are""", f"\u006e")
14 |
Expand All @@ -110,6 +139,16 @@ RUF039.py:15:15: RUF039 First argument to `regex.compile()` is not raw string
|
= help: Replace with raw string

ℹ Safe fix
12 12 | re.sub(u'''nicode''', u"f(?i)rst")
13 13 | re.subn(b"""ytes are""", f"\u006e")
14 14 |
15 |-regex.compile('single free-spacing', flags=regex.X)
15 |+regex.compile(r'single free-spacing', flags=regex.X)
16 16 | regex.findall('si\ngle')
17 17 | regex.finditer("dou\ble")
18 18 | regex.fullmatch('''t\riple single''')

RUF039.py:16:15: RUF039 First argument to `regex.findall()` is not raw string
|
15 | regex.compile('single free-spacing', flags=regex.X)
Expand Down Expand Up @@ -153,7 +192,7 @@ RUF039.py:19:13: RUF039 First argument to `regex.match()` is not raw string
|
= help: Replace with raw string

RUF039.py:20:14: RUF039 First argument to `regex.search()` is not raw string
RUF039.py:20:14: RUF039 [*] First argument to `regex.search()` is not raw string
|
18 | regex.fullmatch('''t\riple single''')
19 | regex.match("""\triple double""")
Expand All @@ -164,7 +203,17 @@ RUF039.py:20:14: RUF039 First argument to `regex.search()` is not raw string
|
= help: Replace with raw string

RUF039.py:21:13: RUF039 First argument to `regex.split()` is not raw string
ℹ Safe fix
17 17 | regex.finditer("dou\ble")
18 18 | regex.fullmatch('''t\riple single''')
19 19 | regex.match("""\triple double""")
20 |-regex.search('two', 'args')
20 |+regex.search(r'two', 'args')
21 21 | regex.split("raw", r'second')
22 22 | regex.sub(u'''nicode''', u"f(?i)rst")
23 23 | regex.subn(b"""ytes are""", f"\u006e")

RUF039.py:21:13: RUF039 [*] First argument to `regex.split()` is not raw string
|
19 | regex.match("""\triple double""")
20 | regex.search('two', 'args')
Expand All @@ -175,6 +224,16 @@ RUF039.py:21:13: RUF039 First argument to `regex.split()` is not raw string
|
= help: Replace with raw string

ℹ Safe fix
18 18 | regex.fullmatch('''t\riple single''')
19 19 | regex.match("""\triple double""")
20 20 | regex.search('two', 'args')
21 |-regex.split("raw", r'second')
21 |+regex.split(r"raw", r'second')
22 22 | regex.sub(u'''nicode''', u"f(?i)rst")
23 23 | regex.subn(b"""ytes are""", f"\u006e")
24 24 |

RUF039.py:22:11: RUF039 First argument to `regex.sub()` is not raw string
|
20 | regex.search('two', 'args')
Expand All @@ -196,7 +255,7 @@ RUF039.py:23:12: RUF039 First argument to `regex.subn()` is not raw bytes litera
|
= help: Replace with raw bytes literal

RUF039.py:25:16: RUF039 First argument to `regex.template()` is not raw string
RUF039.py:25:16: RUF039 [*] First argument to `regex.template()` is not raw string
|
23 | regex.subn(b"""ytes are""", f"\u006e")
24 |
Expand All @@ -209,3 +268,13 @@ RUF039.py:25:16: RUF039 First argument to `regex.template()` is not raw string
| |___^ RUF039
|
= help: Replace with raw string

ℹ Safe fix
22 22 | regex.sub(u'''nicode''', u"f(?i)rst")
23 23 | regex.subn(b"""ytes are""", f"\u006e")
24 24 |
25 |-regex.template("""(?m)
25 |+regex.template(r"""(?m)
26 26 | (?:ulti)?
27 27 | (?=(?<!(?<=(?!l)))
28 28 | l(?i:ne)
Loading
Loading