Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix string view ILIKE checks with NULL values #6705

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 118 additions & 108 deletions arrow-string/src/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1720,33 +1720,36 @@ mod tests {
"%a%", // can_execute as contains("a")
"%a%b_c_d%e", // can_execute as regular expression
] {
let a = Scalar::new(StringArray::new_null(1));
let b = StringArray::new_scalar(pattern);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = Scalar::new(StringArray::new_null(1));
let b = StringArray::from_iter_values([pattern]);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringArray::new_null(1);
let b = StringArray::from_iter_values([pattern]);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringArray::new_null(1);
let b = StringArray::new_scalar(pattern);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
// These tests focus on the null handling, but are case-insensitive
for like_f in [like, ilike, nlike, nilike] {
let a = Scalar::new(StringArray::new_null(1));
let b = StringArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = Scalar::new(StringArray::new_null(1));
let b = StringArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringArray::new_null(1);
let b = StringArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringArray::new_null(1);
let b = StringArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
}
}
}

Expand All @@ -1763,95 +1766,102 @@ mod tests {
"%a%", // can_execute as contains("a")
"%a%b_c_d%e", // can_execute as regular expression
] {
let a = Scalar::new(StringViewArray::new_null(1));
let b = StringViewArray::new_scalar(pattern);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = Scalar::new(StringViewArray::new_null(1));
let b = StringViewArray::from_iter_values([pattern]);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringViewArray::new_null(1);
let b = StringViewArray::from_iter_values([pattern]);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringViewArray::new_null(1);
let b = StringViewArray::new_scalar(pattern);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
// These tests focus on the null handling, but are case-insensitive
for like_f in [like, ilike, nlike, nilike] {
let a = Scalar::new(StringViewArray::new_null(1));
let b = StringViewArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = Scalar::new(StringViewArray::new_null(1));
let b = StringViewArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringViewArray::new_null(1);
let b = StringViewArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");

let a = StringViewArray::new_null(1);
let b = StringViewArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
}
}
}

#[test]
fn string_like_scalar_null() {
let a = StringArray::new_scalar("a");
let b = Scalar::new(StringArray::new_null(1));
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringArray::from_iter_values(["a"]);
let b = Scalar::new(StringArray::new_null(1));
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringArray::from_iter_values(["a"]);
let b = StringArray::new_null(1);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringArray::new_scalar("a");
let b = StringArray::new_null(1);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
for like_f in [like, ilike, nlike, nilike] {
let a = StringArray::new_scalar("a");
let b = Scalar::new(StringArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringArray::from_iter_values(["a"]);
let b = Scalar::new(StringArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringArray::from_iter_values(["a"]);
let b = StringArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringArray::new_scalar("a");
let b = StringArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
}
}

#[test]
fn string_view_like_scalar_null() {
let a = StringViewArray::new_scalar("a");
let b = Scalar::new(StringViewArray::new_null(1));
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringViewArray::from_iter_values(["a"]);
let b = Scalar::new(StringViewArray::new_null(1));
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringViewArray::from_iter_values(["a"]);
let b = StringViewArray::new_null(1);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringViewArray::new_scalar("a");
let b = StringViewArray::new_null(1);
let r = like(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
for like_f in [like, ilike, nlike, nilike] {
let a = StringViewArray::new_scalar("a");
let b = Scalar::new(StringViewArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringViewArray::from_iter_values(["a"]);
let b = Scalar::new(StringViewArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringViewArray::from_iter_values(["a"]);
let b = StringViewArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));

let a = StringViewArray::new_scalar("a");
let b = StringViewArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
}
}
}
14 changes: 8 additions & 6 deletions arrow-string/src/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ impl<'a> Predicate<'a> {
}
Predicate::IStartsWithAscii(v) => {
if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() {
// TODO respect null buffer
BooleanArray::from(
let nulls = string_view_array.logical_nulls();
let values = BooleanBuffer::from(
string_view_array
.prefix_bytes_iter(v.len())
.map(|haystack| {
Expand All @@ -150,7 +150,8 @@ impl<'a> Predicate<'a> {
) != negate
})
.collect::<Vec<_>>(),
)
);
BooleanArray::new(values, nulls)
} else {
BooleanArray::from_unary(array, |haystack| {
starts_with(haystack, v, equals_ignore_ascii_case_kernel) != negate
Expand All @@ -177,8 +178,8 @@ impl<'a> Predicate<'a> {
}
Predicate::IEndsWithAscii(v) => {
if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() {
// TODO respect null buffer
BooleanArray::from(
let nulls = string_view_array.logical_nulls();
let values = BooleanBuffer::from(
string_view_array
.suffix_bytes_iter(v.len())
.map(|haystack| {
Expand All @@ -189,7 +190,8 @@ impl<'a> Predicate<'a> {
) != negate
})
.collect::<Vec<_>>(),
)
);
BooleanArray::new(values, nulls)
} else {
BooleanArray::from_unary(array, |haystack| {
ends_with(haystack, v, equals_ignore_ascii_case_kernel) != negate
Expand Down
Loading