Skip to content

Commit

Permalink
fix: Escape regex symbols in like operator (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
waralexrom authored Sep 27, 2023
1 parent 93c26ef commit c296882
Showing 1 changed file with 37 additions and 23 deletions.
60 changes: 37 additions & 23 deletions arrow/src/compute/kernels/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,29 +261,43 @@ fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
let re = if let Some(ref regex) = map.get(pat) {
regex
} else {
let mut prev_char = None;
let mut re_pattern = pat
.replace(
|c| {
let res = c == '%' && prev_char != Some('\\');
prev_char = Some(c);
res
},
".*",
)
.replace("\\%", "%");

let mut prev_char = None;
re_pattern = re_pattern
.replace(
|c| {
let res = c == '_' && prev_char != Some('\\');
prev_char = Some(c);
res
},
".",
)
.replace("\\_", "_");
let mut is_escaped = false;
let mut re_pattern = String::new();
let regex_chars = "-[]{}()*+?.,^$|#";
for c in pat.chars() {
if is_escaped {
is_escaped = false;
if c == '%' {
re_pattern.push('%');
continue;
} else if c == '_' {
re_pattern.push('_');
continue;
} else if c == '\\' {
re_pattern.push_str("\\\\");
continue;
}
}

if regex_chars.find(c).is_some() {
re_pattern.push('\\');
re_pattern.push(c);
} else if c == '%' {
re_pattern.push_str(".*");
} else if c == '_' {
re_pattern.push('.');
} else if c == '\\' {
is_escaped = true;
} else {
re_pattern.push(c);
}
}
if is_escaped {
return Err(ArrowError::InvalidArgumentError(format!(
"LIKE pattern must not end with escape character. Pattern {}",
pat
)));
}
let re = RegexBuilder::new(&format!("^{}$", re_pattern))
.case_insensitive(!case_sensitive)
.build()
Expand Down

0 comments on commit c296882

Please sign in to comment.