diff --git a/tig-algorithms/src/vector_search/invector_hybrid/benchmarker_outbound.rs b/tig-algorithms/src/vector_search/invector_hybrid/benchmarker_outbound.rs new file mode 100644 index 00000000..6a1aa55e --- /dev/null +++ b/tig-algorithms/src/vector_search/invector_hybrid/benchmarker_outbound.rs @@ -0,0 +1,395 @@ +/*! +Copyright 2024 syebastian + +Licensed under the TIG Benchmarker Outbound Game License v1.0 (the "License"); you +may not use this file except in compliance with the License. You may obtain a copy +of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + + +use anyhow::Ok; +use tig_challenges::vector_search::*; +use std::cmp::Ordering; +use std::collections::BinaryHeap; + +struct KDNode<'a> { + point: &'a [f32], + left: Option>>, + right: Option>>, + index: usize, +} + +impl<'a> KDNode<'a> { + fn new(point: &'a [f32], index: usize) -> Self { + KDNode { + point, + left: None, + right: None, + index, + } + } +} +fn quickselect_by(arr: &mut [(&[f32], usize)], k: usize, compare: &F) +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + if arr.len() <= 1 { + return; + } + + let pivot_index = partition(arr, compare); + if k < pivot_index { + quickselect_by(&mut arr[..pivot_index], k, compare); + } else if k > pivot_index { + quickselect_by(&mut arr[pivot_index + 1..], k - pivot_index - 1, compare); + } +} + +fn partition(arr: &mut [(&[f32], usize)], compare: &F) -> usize +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + let pivot_index = arr.len() >> 1; + arr.swap(pivot_index, arr.len() - 1); + + let mut store_index = 0; + for i in 0..arr.len() - 1 { + if compare(&arr[i], &arr[arr.len() - 1]) == Ordering::Less { + arr.swap(i, store_index); + store_index += 1; + } + } + arr.swap(store_index, arr.len() - 1); + store_index +} + +fn build_kd_tree<'a>(points: &mut [(&'a [f32], usize)]) -> Option>> { + if points.is_empty() { + return None; + } + + const NUM_DIMENSIONS: usize = 250; + let mut stack: Vec<(usize, usize, usize, Option<*mut KDNode<'a>>, bool)> = Vec::new(); + let mut root: Option>> = None; + + stack.push((0, points.len(), 0, None, false)); + + while let Some((start, end, depth, parent_ptr, is_left)) = stack.pop() { + if start >= end { + continue; + } + + let axis = depth % NUM_DIMENSIONS; + let median = (start + end) / 2; + quickselect_by(&mut points[start..end], median - start, &|a, b| { + a.0[axis].partial_cmp(&b.0[axis]).unwrap() + }); + + let (median_point, median_index) = points[median]; + let mut new_node = Box::new(KDNode::new(median_point, median_index)); + let new_node_ptr: *mut KDNode = &mut *new_node; + + if let Some(parent_ptr) = parent_ptr { + unsafe { + if is_left { + (*parent_ptr).left = Some(new_node); + } else { + (*parent_ptr).right = Some(new_node); + } + } + } else { + root = Some(new_node); + } + + stack.push((median + 1, end, depth + 1, Some(new_node_ptr), false)); + stack.push((start, median, depth + 1, Some(new_node_ptr), true)); + } + + root +} + +#[inline(always)] +fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +#[inline(always)] +fn early_stopping_distance(a: &[f32], b: &[f32], current_min: f32) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + if sum > current_min { + return f32::MAX; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +fn nearest_neighbor_search<'a>( + root: &Option>>, + target: &[f32], + best: &mut (f32, Option), +) { + let num_dimensions = target.len(); + let mut stack = Vec::with_capacity(64); + + if let Some(node) = root { + stack.push((node.as_ref(), 0)); + } + + while let Some((node, depth)) = stack.pop() { + let axis = depth % num_dimensions; + let dist = early_stopping_distance(&node.point, target, best.0); + + if dist < best.0 { + best.0 = dist; + best.1 = Some(node.index); + } + + let diff = target[axis] - node.point[axis]; + let sqr_diff = diff * diff; + + let (nearer, farther) = if diff < 0.0 { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + if let Some(nearer_node) = nearer { + stack.push((nearer_node.as_ref(), depth + 1)); + } + + if sqr_diff < best.0 { + if let Some(farther_node) = farther { + stack.push((farther_node.as_ref(), depth + 1)); + } + } + } +} +fn calculate_mean_vector(vectors: &[&[f32]]) -> Vec { + let num_vectors = vectors.len(); + let num_dimensions = 250; + + let mut mean_vector = vec![0.0f64; num_dimensions]; + + for vector in vectors { + for i in 0..num_dimensions { + mean_vector[i] += vector[i] as f64; + } + } + for i in 0..num_dimensions { + mean_vector[i] /= num_vectors as f64; + } + mean_vector.into_iter().map(|x| x as f32).collect() +} + +#[derive(Debug)] +struct FloatOrd(f32); + +impl PartialEq for FloatOrd { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for FloatOrd {} + +impl PartialOrd for FloatOrd { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl Ord for FloatOrd { + fn cmp(&self, other: &Self) -> Ordering { + + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +fn filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, +) -> Vec<(f32, &'a [f32], usize)> { + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, vector) in database.iter().enumerate() { + if heap.len() < k + { + let dist = squared_euclidean_distance(&mean_query_vector, vector); + let ord_dist = FloatOrd(dist); + + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() + { + let dist = early_stopping_distance(&mean_query_vector, vector, top_dist); + let ord_dist = FloatOrd(dist); + if dist < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + heap.into_sorted_vec() + .into_iter() + .map(|(FloatOrd(dist), index)| (dist, &database[index][..], index)) + .collect() +} + +pub fn solve_challenge(challenge: &Challenge) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let max_fuel = 2000000000.0; + let base_fuel = 760000000.0; + let alpha = 1700.0 * challenge.difficulty.num_queries as f64; + + let m = ((max_fuel - base_fuel) / alpha) as usize; + let n = (m as f32 * 1.2) as usize; + let r = n - m; + + let closest_vectors = filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + n, + ); + + let (m_slice, r_slice) = closest_vectors.split_at(m); + let m_vectors: Vec<_> = m_slice.to_vec(); + let r_vectors: Vec<_> = r_slice.to_vec(); + + let mut kd_tree_vectors: Vec<(&[f32], usize)> = m_vectors.iter().map(|&(_, v, i)| (v, i)).collect(); + let kd_tree = build_kd_tree(&mut kd_tree_vectors); + + let mut best_indexes = Vec::with_capacity(query_count); + let mut distances = Vec::with_capacity(query_count); + + for query in &challenge.query_vectors { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + distances.push(best.0); + best_indexes.push(best.1.unwrap_or(0)); + } + + let brute_force_count = (query_count as f32 * 0.1) as usize; + let mut distance_indices: Vec<_> = distances.iter().enumerate().collect(); + distance_indices.sort_unstable_by(|a, b| b.1.partial_cmp(a.1).unwrap()); + let high_distance_indices: Vec<_> = distance_indices.into_iter() + .take(brute_force_count) + .map(|(index, _)| index) + .collect(); + + for &query_index in &high_distance_indices { + let query = &challenge.query_vectors[query_index]; + let mut best = (distances[query_index], best_indexes[query_index]); + + for &(_, vec, index) in &r_vectors { + let dist = squared_euclidean_distance(query, vec); + if dist < best.0 { + best = (dist, index); + } + } + + best_indexes[query_index] = best.1; + } + + Ok(Some(Solution { + indexes: best_indexes, + })) +} + +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/invector_hybrid/commercial.rs b/tig-algorithms/src/vector_search/invector_hybrid/commercial.rs new file mode 100644 index 00000000..c82abc35 --- /dev/null +++ b/tig-algorithms/src/vector_search/invector_hybrid/commercial.rs @@ -0,0 +1,395 @@ +/*! +Copyright 2024 syebastian + +Licensed under the TIG Commercial License v1.0 (the "License"); you +may not use this file except in compliance with the License. You may obtain a copy +of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + + +use anyhow::Ok; +use tig_challenges::vector_search::*; +use std::cmp::Ordering; +use std::collections::BinaryHeap; + +struct KDNode<'a> { + point: &'a [f32], + left: Option>>, + right: Option>>, + index: usize, +} + +impl<'a> KDNode<'a> { + fn new(point: &'a [f32], index: usize) -> Self { + KDNode { + point, + left: None, + right: None, + index, + } + } +} +fn quickselect_by(arr: &mut [(&[f32], usize)], k: usize, compare: &F) +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + if arr.len() <= 1 { + return; + } + + let pivot_index = partition(arr, compare); + if k < pivot_index { + quickselect_by(&mut arr[..pivot_index], k, compare); + } else if k > pivot_index { + quickselect_by(&mut arr[pivot_index + 1..], k - pivot_index - 1, compare); + } +} + +fn partition(arr: &mut [(&[f32], usize)], compare: &F) -> usize +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + let pivot_index = arr.len() >> 1; + arr.swap(pivot_index, arr.len() - 1); + + let mut store_index = 0; + for i in 0..arr.len() - 1 { + if compare(&arr[i], &arr[arr.len() - 1]) == Ordering::Less { + arr.swap(i, store_index); + store_index += 1; + } + } + arr.swap(store_index, arr.len() - 1); + store_index +} + +fn build_kd_tree<'a>(points: &mut [(&'a [f32], usize)]) -> Option>> { + if points.is_empty() { + return None; + } + + const NUM_DIMENSIONS: usize = 250; + let mut stack: Vec<(usize, usize, usize, Option<*mut KDNode<'a>>, bool)> = Vec::new(); + let mut root: Option>> = None; + + stack.push((0, points.len(), 0, None, false)); + + while let Some((start, end, depth, parent_ptr, is_left)) = stack.pop() { + if start >= end { + continue; + } + + let axis = depth % NUM_DIMENSIONS; + let median = (start + end) / 2; + quickselect_by(&mut points[start..end], median - start, &|a, b| { + a.0[axis].partial_cmp(&b.0[axis]).unwrap() + }); + + let (median_point, median_index) = points[median]; + let mut new_node = Box::new(KDNode::new(median_point, median_index)); + let new_node_ptr: *mut KDNode = &mut *new_node; + + if let Some(parent_ptr) = parent_ptr { + unsafe { + if is_left { + (*parent_ptr).left = Some(new_node); + } else { + (*parent_ptr).right = Some(new_node); + } + } + } else { + root = Some(new_node); + } + + stack.push((median + 1, end, depth + 1, Some(new_node_ptr), false)); + stack.push((start, median, depth + 1, Some(new_node_ptr), true)); + } + + root +} + +#[inline(always)] +fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +#[inline(always)] +fn early_stopping_distance(a: &[f32], b: &[f32], current_min: f32) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + if sum > current_min { + return f32::MAX; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +fn nearest_neighbor_search<'a>( + root: &Option>>, + target: &[f32], + best: &mut (f32, Option), +) { + let num_dimensions = target.len(); + let mut stack = Vec::with_capacity(64); + + if let Some(node) = root { + stack.push((node.as_ref(), 0)); + } + + while let Some((node, depth)) = stack.pop() { + let axis = depth % num_dimensions; + let dist = early_stopping_distance(&node.point, target, best.0); + + if dist < best.0 { + best.0 = dist; + best.1 = Some(node.index); + } + + let diff = target[axis] - node.point[axis]; + let sqr_diff = diff * diff; + + let (nearer, farther) = if diff < 0.0 { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + if let Some(nearer_node) = nearer { + stack.push((nearer_node.as_ref(), depth + 1)); + } + + if sqr_diff < best.0 { + if let Some(farther_node) = farther { + stack.push((farther_node.as_ref(), depth + 1)); + } + } + } +} +fn calculate_mean_vector(vectors: &[&[f32]]) -> Vec { + let num_vectors = vectors.len(); + let num_dimensions = 250; + + let mut mean_vector = vec![0.0f64; num_dimensions]; + + for vector in vectors { + for i in 0..num_dimensions { + mean_vector[i] += vector[i] as f64; + } + } + for i in 0..num_dimensions { + mean_vector[i] /= num_vectors as f64; + } + mean_vector.into_iter().map(|x| x as f32).collect() +} + +#[derive(Debug)] +struct FloatOrd(f32); + +impl PartialEq for FloatOrd { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for FloatOrd {} + +impl PartialOrd for FloatOrd { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl Ord for FloatOrd { + fn cmp(&self, other: &Self) -> Ordering { + + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +fn filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, +) -> Vec<(f32, &'a [f32], usize)> { + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, vector) in database.iter().enumerate() { + if heap.len() < k + { + let dist = squared_euclidean_distance(&mean_query_vector, vector); + let ord_dist = FloatOrd(dist); + + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() + { + let dist = early_stopping_distance(&mean_query_vector, vector, top_dist); + let ord_dist = FloatOrd(dist); + if dist < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + heap.into_sorted_vec() + .into_iter() + .map(|(FloatOrd(dist), index)| (dist, &database[index][..], index)) + .collect() +} + +pub fn solve_challenge(challenge: &Challenge) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let max_fuel = 2000000000.0; + let base_fuel = 760000000.0; + let alpha = 1700.0 * challenge.difficulty.num_queries as f64; + + let m = ((max_fuel - base_fuel) / alpha) as usize; + let n = (m as f32 * 1.2) as usize; + let r = n - m; + + let closest_vectors = filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + n, + ); + + let (m_slice, r_slice) = closest_vectors.split_at(m); + let m_vectors: Vec<_> = m_slice.to_vec(); + let r_vectors: Vec<_> = r_slice.to_vec(); + + let mut kd_tree_vectors: Vec<(&[f32], usize)> = m_vectors.iter().map(|&(_, v, i)| (v, i)).collect(); + let kd_tree = build_kd_tree(&mut kd_tree_vectors); + + let mut best_indexes = Vec::with_capacity(query_count); + let mut distances = Vec::with_capacity(query_count); + + for query in &challenge.query_vectors { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + distances.push(best.0); + best_indexes.push(best.1.unwrap_or(0)); + } + + let brute_force_count = (query_count as f32 * 0.1) as usize; + let mut distance_indices: Vec<_> = distances.iter().enumerate().collect(); + distance_indices.sort_unstable_by(|a, b| b.1.partial_cmp(a.1).unwrap()); + let high_distance_indices: Vec<_> = distance_indices.into_iter() + .take(brute_force_count) + .map(|(index, _)| index) + .collect(); + + for &query_index in &high_distance_indices { + let query = &challenge.query_vectors[query_index]; + let mut best = (distances[query_index], best_indexes[query_index]); + + for &(_, vec, index) in &r_vectors { + let dist = squared_euclidean_distance(query, vec); + if dist < best.0 { + best = (dist, index); + } + } + + best_indexes[query_index] = best.1; + } + + Ok(Some(Solution { + indexes: best_indexes, + })) +} + +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/invector_hybrid/inbound.rs b/tig-algorithms/src/vector_search/invector_hybrid/inbound.rs new file mode 100644 index 00000000..9a283e9d --- /dev/null +++ b/tig-algorithms/src/vector_search/invector_hybrid/inbound.rs @@ -0,0 +1,395 @@ +/*! +Copyright 2024 syebastian + +Licensed under the TIG Inbound Game License v1.0 or (at your option) any later +version (the "License"); you may not use this file except in compliance with the +License. You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + + +use anyhow::Ok; +use tig_challenges::vector_search::*; +use std::cmp::Ordering; +use std::collections::BinaryHeap; + +struct KDNode<'a> { + point: &'a [f32], + left: Option>>, + right: Option>>, + index: usize, +} + +impl<'a> KDNode<'a> { + fn new(point: &'a [f32], index: usize) -> Self { + KDNode { + point, + left: None, + right: None, + index, + } + } +} +fn quickselect_by(arr: &mut [(&[f32], usize)], k: usize, compare: &F) +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + if arr.len() <= 1 { + return; + } + + let pivot_index = partition(arr, compare); + if k < pivot_index { + quickselect_by(&mut arr[..pivot_index], k, compare); + } else if k > pivot_index { + quickselect_by(&mut arr[pivot_index + 1..], k - pivot_index - 1, compare); + } +} + +fn partition(arr: &mut [(&[f32], usize)], compare: &F) -> usize +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + let pivot_index = arr.len() >> 1; + arr.swap(pivot_index, arr.len() - 1); + + let mut store_index = 0; + for i in 0..arr.len() - 1 { + if compare(&arr[i], &arr[arr.len() - 1]) == Ordering::Less { + arr.swap(i, store_index); + store_index += 1; + } + } + arr.swap(store_index, arr.len() - 1); + store_index +} + +fn build_kd_tree<'a>(points: &mut [(&'a [f32], usize)]) -> Option>> { + if points.is_empty() { + return None; + } + + const NUM_DIMENSIONS: usize = 250; + let mut stack: Vec<(usize, usize, usize, Option<*mut KDNode<'a>>, bool)> = Vec::new(); + let mut root: Option>> = None; + + stack.push((0, points.len(), 0, None, false)); + + while let Some((start, end, depth, parent_ptr, is_left)) = stack.pop() { + if start >= end { + continue; + } + + let axis = depth % NUM_DIMENSIONS; + let median = (start + end) / 2; + quickselect_by(&mut points[start..end], median - start, &|a, b| { + a.0[axis].partial_cmp(&b.0[axis]).unwrap() + }); + + let (median_point, median_index) = points[median]; + let mut new_node = Box::new(KDNode::new(median_point, median_index)); + let new_node_ptr: *mut KDNode = &mut *new_node; + + if let Some(parent_ptr) = parent_ptr { + unsafe { + if is_left { + (*parent_ptr).left = Some(new_node); + } else { + (*parent_ptr).right = Some(new_node); + } + } + } else { + root = Some(new_node); + } + + stack.push((median + 1, end, depth + 1, Some(new_node_ptr), false)); + stack.push((start, median, depth + 1, Some(new_node_ptr), true)); + } + + root +} + +#[inline(always)] +fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +#[inline(always)] +fn early_stopping_distance(a: &[f32], b: &[f32], current_min: f32) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + if sum > current_min { + return f32::MAX; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +fn nearest_neighbor_search<'a>( + root: &Option>>, + target: &[f32], + best: &mut (f32, Option), +) { + let num_dimensions = target.len(); + let mut stack = Vec::with_capacity(64); + + if let Some(node) = root { + stack.push((node.as_ref(), 0)); + } + + while let Some((node, depth)) = stack.pop() { + let axis = depth % num_dimensions; + let dist = early_stopping_distance(&node.point, target, best.0); + + if dist < best.0 { + best.0 = dist; + best.1 = Some(node.index); + } + + let diff = target[axis] - node.point[axis]; + let sqr_diff = diff * diff; + + let (nearer, farther) = if diff < 0.0 { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + if let Some(nearer_node) = nearer { + stack.push((nearer_node.as_ref(), depth + 1)); + } + + if sqr_diff < best.0 { + if let Some(farther_node) = farther { + stack.push((farther_node.as_ref(), depth + 1)); + } + } + } +} +fn calculate_mean_vector(vectors: &[&[f32]]) -> Vec { + let num_vectors = vectors.len(); + let num_dimensions = 250; + + let mut mean_vector = vec![0.0f64; num_dimensions]; + + for vector in vectors { + for i in 0..num_dimensions { + mean_vector[i] += vector[i] as f64; + } + } + for i in 0..num_dimensions { + mean_vector[i] /= num_vectors as f64; + } + mean_vector.into_iter().map(|x| x as f32).collect() +} + +#[derive(Debug)] +struct FloatOrd(f32); + +impl PartialEq for FloatOrd { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for FloatOrd {} + +impl PartialOrd for FloatOrd { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl Ord for FloatOrd { + fn cmp(&self, other: &Self) -> Ordering { + + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +fn filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, +) -> Vec<(f32, &'a [f32], usize)> { + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, vector) in database.iter().enumerate() { + if heap.len() < k + { + let dist = squared_euclidean_distance(&mean_query_vector, vector); + let ord_dist = FloatOrd(dist); + + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() + { + let dist = early_stopping_distance(&mean_query_vector, vector, top_dist); + let ord_dist = FloatOrd(dist); + if dist < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + heap.into_sorted_vec() + .into_iter() + .map(|(FloatOrd(dist), index)| (dist, &database[index][..], index)) + .collect() +} + +pub fn solve_challenge(challenge: &Challenge) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let max_fuel = 2000000000.0; + let base_fuel = 760000000.0; + let alpha = 1700.0 * challenge.difficulty.num_queries as f64; + + let m = ((max_fuel - base_fuel) / alpha) as usize; + let n = (m as f32 * 1.2) as usize; + let r = n - m; + + let closest_vectors = filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + n, + ); + + let (m_slice, r_slice) = closest_vectors.split_at(m); + let m_vectors: Vec<_> = m_slice.to_vec(); + let r_vectors: Vec<_> = r_slice.to_vec(); + + let mut kd_tree_vectors: Vec<(&[f32], usize)> = m_vectors.iter().map(|&(_, v, i)| (v, i)).collect(); + let kd_tree = build_kd_tree(&mut kd_tree_vectors); + + let mut best_indexes = Vec::with_capacity(query_count); + let mut distances = Vec::with_capacity(query_count); + + for query in &challenge.query_vectors { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + distances.push(best.0); + best_indexes.push(best.1.unwrap_or(0)); + } + + let brute_force_count = (query_count as f32 * 0.1) as usize; + let mut distance_indices: Vec<_> = distances.iter().enumerate().collect(); + distance_indices.sort_unstable_by(|a, b| b.1.partial_cmp(a.1).unwrap()); + let high_distance_indices: Vec<_> = distance_indices.into_iter() + .take(brute_force_count) + .map(|(index, _)| index) + .collect(); + + for &query_index in &high_distance_indices { + let query = &challenge.query_vectors[query_index]; + let mut best = (distances[query_index], best_indexes[query_index]); + + for &(_, vec, index) in &r_vectors { + let dist = squared_euclidean_distance(query, vec); + if dist < best.0 { + best = (dist, index); + } + } + + best_indexes[query_index] = best.1; + } + + Ok(Some(Solution { + indexes: best_indexes, + })) +} + +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/invector_hybrid/innovator_outbound.rs b/tig-algorithms/src/vector_search/invector_hybrid/innovator_outbound.rs new file mode 100644 index 00000000..624671f6 --- /dev/null +++ b/tig-algorithms/src/vector_search/invector_hybrid/innovator_outbound.rs @@ -0,0 +1,395 @@ +/*! +Copyright 2024 syebastian + +Licensed under the TIG Innovator Outbound Game License v1.0 (the "License"); you +may not use this file except in compliance with the License. You may obtain a copy +of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + + +use anyhow::Ok; +use tig_challenges::vector_search::*; +use std::cmp::Ordering; +use std::collections::BinaryHeap; + +struct KDNode<'a> { + point: &'a [f32], + left: Option>>, + right: Option>>, + index: usize, +} + +impl<'a> KDNode<'a> { + fn new(point: &'a [f32], index: usize) -> Self { + KDNode { + point, + left: None, + right: None, + index, + } + } +} +fn quickselect_by(arr: &mut [(&[f32], usize)], k: usize, compare: &F) +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + if arr.len() <= 1 { + return; + } + + let pivot_index = partition(arr, compare); + if k < pivot_index { + quickselect_by(&mut arr[..pivot_index], k, compare); + } else if k > pivot_index { + quickselect_by(&mut arr[pivot_index + 1..], k - pivot_index - 1, compare); + } +} + +fn partition(arr: &mut [(&[f32], usize)], compare: &F) -> usize +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + let pivot_index = arr.len() >> 1; + arr.swap(pivot_index, arr.len() - 1); + + let mut store_index = 0; + for i in 0..arr.len() - 1 { + if compare(&arr[i], &arr[arr.len() - 1]) == Ordering::Less { + arr.swap(i, store_index); + store_index += 1; + } + } + arr.swap(store_index, arr.len() - 1); + store_index +} + +fn build_kd_tree<'a>(points: &mut [(&'a [f32], usize)]) -> Option>> { + if points.is_empty() { + return None; + } + + const NUM_DIMENSIONS: usize = 250; + let mut stack: Vec<(usize, usize, usize, Option<*mut KDNode<'a>>, bool)> = Vec::new(); + let mut root: Option>> = None; + + stack.push((0, points.len(), 0, None, false)); + + while let Some((start, end, depth, parent_ptr, is_left)) = stack.pop() { + if start >= end { + continue; + } + + let axis = depth % NUM_DIMENSIONS; + let median = (start + end) / 2; + quickselect_by(&mut points[start..end], median - start, &|a, b| { + a.0[axis].partial_cmp(&b.0[axis]).unwrap() + }); + + let (median_point, median_index) = points[median]; + let mut new_node = Box::new(KDNode::new(median_point, median_index)); + let new_node_ptr: *mut KDNode = &mut *new_node; + + if let Some(parent_ptr) = parent_ptr { + unsafe { + if is_left { + (*parent_ptr).left = Some(new_node); + } else { + (*parent_ptr).right = Some(new_node); + } + } + } else { + root = Some(new_node); + } + + stack.push((median + 1, end, depth + 1, Some(new_node_ptr), false)); + stack.push((start, median, depth + 1, Some(new_node_ptr), true)); + } + + root +} + +#[inline(always)] +fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +#[inline(always)] +fn early_stopping_distance(a: &[f32], b: &[f32], current_min: f32) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + if sum > current_min { + return f32::MAX; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +fn nearest_neighbor_search<'a>( + root: &Option>>, + target: &[f32], + best: &mut (f32, Option), +) { + let num_dimensions = target.len(); + let mut stack = Vec::with_capacity(64); + + if let Some(node) = root { + stack.push((node.as_ref(), 0)); + } + + while let Some((node, depth)) = stack.pop() { + let axis = depth % num_dimensions; + let dist = early_stopping_distance(&node.point, target, best.0); + + if dist < best.0 { + best.0 = dist; + best.1 = Some(node.index); + } + + let diff = target[axis] - node.point[axis]; + let sqr_diff = diff * diff; + + let (nearer, farther) = if diff < 0.0 { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + if let Some(nearer_node) = nearer { + stack.push((nearer_node.as_ref(), depth + 1)); + } + + if sqr_diff < best.0 { + if let Some(farther_node) = farther { + stack.push((farther_node.as_ref(), depth + 1)); + } + } + } +} +fn calculate_mean_vector(vectors: &[&[f32]]) -> Vec { + let num_vectors = vectors.len(); + let num_dimensions = 250; + + let mut mean_vector = vec![0.0f64; num_dimensions]; + + for vector in vectors { + for i in 0..num_dimensions { + mean_vector[i] += vector[i] as f64; + } + } + for i in 0..num_dimensions { + mean_vector[i] /= num_vectors as f64; + } + mean_vector.into_iter().map(|x| x as f32).collect() +} + +#[derive(Debug)] +struct FloatOrd(f32); + +impl PartialEq for FloatOrd { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for FloatOrd {} + +impl PartialOrd for FloatOrd { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl Ord for FloatOrd { + fn cmp(&self, other: &Self) -> Ordering { + + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +fn filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, +) -> Vec<(f32, &'a [f32], usize)> { + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, vector) in database.iter().enumerate() { + if heap.len() < k + { + let dist = squared_euclidean_distance(&mean_query_vector, vector); + let ord_dist = FloatOrd(dist); + + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() + { + let dist = early_stopping_distance(&mean_query_vector, vector, top_dist); + let ord_dist = FloatOrd(dist); + if dist < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + heap.into_sorted_vec() + .into_iter() + .map(|(FloatOrd(dist), index)| (dist, &database[index][..], index)) + .collect() +} + +pub fn solve_challenge(challenge: &Challenge) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let max_fuel = 2000000000.0; + let base_fuel = 760000000.0; + let alpha = 1700.0 * challenge.difficulty.num_queries as f64; + + let m = ((max_fuel - base_fuel) / alpha) as usize; + let n = (m as f32 * 1.2) as usize; + let r = n - m; + + let closest_vectors = filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + n, + ); + + let (m_slice, r_slice) = closest_vectors.split_at(m); + let m_vectors: Vec<_> = m_slice.to_vec(); + let r_vectors: Vec<_> = r_slice.to_vec(); + + let mut kd_tree_vectors: Vec<(&[f32], usize)> = m_vectors.iter().map(|&(_, v, i)| (v, i)).collect(); + let kd_tree = build_kd_tree(&mut kd_tree_vectors); + + let mut best_indexes = Vec::with_capacity(query_count); + let mut distances = Vec::with_capacity(query_count); + + for query in &challenge.query_vectors { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + distances.push(best.0); + best_indexes.push(best.1.unwrap_or(0)); + } + + let brute_force_count = (query_count as f32 * 0.1) as usize; + let mut distance_indices: Vec<_> = distances.iter().enumerate().collect(); + distance_indices.sort_unstable_by(|a, b| b.1.partial_cmp(a.1).unwrap()); + let high_distance_indices: Vec<_> = distance_indices.into_iter() + .take(brute_force_count) + .map(|(index, _)| index) + .collect(); + + for &query_index in &high_distance_indices { + let query = &challenge.query_vectors[query_index]; + let mut best = (distances[query_index], best_indexes[query_index]); + + for &(_, vec, index) in &r_vectors { + let dist = squared_euclidean_distance(query, vec); + if dist < best.0 { + best = (dist, index); + } + } + + best_indexes[query_index] = best.1; + } + + Ok(Some(Solution { + indexes: best_indexes, + })) +} + +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/invector_hybrid/mod.rs b/tig-algorithms/src/vector_search/invector_hybrid/mod.rs new file mode 100644 index 00000000..fcec9672 --- /dev/null +++ b/tig-algorithms/src/vector_search/invector_hybrid/mod.rs @@ -0,0 +1,4 @@ +mod benchmarker_outbound; +pub use benchmarker_outbound::solve_challenge; +#[cfg(feature = "cuda")] +pub use benchmarker_outbound::{cuda_solve_challenge, KERNEL}; \ No newline at end of file diff --git a/tig-algorithms/src/vector_search/invector_hybrid/open_data.rs b/tig-algorithms/src/vector_search/invector_hybrid/open_data.rs new file mode 100644 index 00000000..e5284668 --- /dev/null +++ b/tig-algorithms/src/vector_search/invector_hybrid/open_data.rs @@ -0,0 +1,395 @@ +/*! +Copyright 2024 syebastian + +Licensed under the TIG Open Data License v1.0 or (at your option) any later version +(the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + + +use anyhow::Ok; +use tig_challenges::vector_search::*; +use std::cmp::Ordering; +use std::collections::BinaryHeap; + +struct KDNode<'a> { + point: &'a [f32], + left: Option>>, + right: Option>>, + index: usize, +} + +impl<'a> KDNode<'a> { + fn new(point: &'a [f32], index: usize) -> Self { + KDNode { + point, + left: None, + right: None, + index, + } + } +} +fn quickselect_by(arr: &mut [(&[f32], usize)], k: usize, compare: &F) +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + if arr.len() <= 1 { + return; + } + + let pivot_index = partition(arr, compare); + if k < pivot_index { + quickselect_by(&mut arr[..pivot_index], k, compare); + } else if k > pivot_index { + quickselect_by(&mut arr[pivot_index + 1..], k - pivot_index - 1, compare); + } +} + +fn partition(arr: &mut [(&[f32], usize)], compare: &F) -> usize +where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, +{ + let pivot_index = arr.len() >> 1; + arr.swap(pivot_index, arr.len() - 1); + + let mut store_index = 0; + for i in 0..arr.len() - 1 { + if compare(&arr[i], &arr[arr.len() - 1]) == Ordering::Less { + arr.swap(i, store_index); + store_index += 1; + } + } + arr.swap(store_index, arr.len() - 1); + store_index +} + +fn build_kd_tree<'a>(points: &mut [(&'a [f32], usize)]) -> Option>> { + if points.is_empty() { + return None; + } + + const NUM_DIMENSIONS: usize = 250; + let mut stack: Vec<(usize, usize, usize, Option<*mut KDNode<'a>>, bool)> = Vec::new(); + let mut root: Option>> = None; + + stack.push((0, points.len(), 0, None, false)); + + while let Some((start, end, depth, parent_ptr, is_left)) = stack.pop() { + if start >= end { + continue; + } + + let axis = depth % NUM_DIMENSIONS; + let median = (start + end) / 2; + quickselect_by(&mut points[start..end], median - start, &|a, b| { + a.0[axis].partial_cmp(&b.0[axis]).unwrap() + }); + + let (median_point, median_index) = points[median]; + let mut new_node = Box::new(KDNode::new(median_point, median_index)); + let new_node_ptr: *mut KDNode = &mut *new_node; + + if let Some(parent_ptr) = parent_ptr { + unsafe { + if is_left { + (*parent_ptr).left = Some(new_node); + } else { + (*parent_ptr).right = Some(new_node); + } + } + } else { + root = Some(new_node); + } + + stack.push((median + 1, end, depth + 1, Some(new_node_ptr), false)); + stack.push((start, median, depth + 1, Some(new_node_ptr), true)); + } + + root +} + +#[inline(always)] +fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +#[inline(always)] +fn early_stopping_distance(a: &[f32], b: &[f32], current_min: f32) -> f32 { + let mut sum = 0.0; + let mut i = 0; + let len = a.len(); + + if a.len() != b.len() || a.len() < 8 { + return f32::MAX; + } + + while i + 7 < len { + unsafe { + let diff0 = *a.get_unchecked(i) - *b.get_unchecked(i); + let diff1 = *a.get_unchecked(i + 1) - *b.get_unchecked(i + 1); + let diff2 = *a.get_unchecked(i + 2) - *b.get_unchecked(i + 2); + let diff3 = *a.get_unchecked(i + 3) - *b.get_unchecked(i + 3); + let diff4 = *a.get_unchecked(i + 4) - *b.get_unchecked(i + 4); + let diff5 = *a.get_unchecked(i + 5) - *b.get_unchecked(i + 5); + let diff6 = *a.get_unchecked(i + 6) - *b.get_unchecked(i + 6); + let diff7 = *a.get_unchecked(i + 7) - *b.get_unchecked(i + 7); + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3 + + diff4 * diff4 + diff5 * diff5 + diff6 * diff6 + diff7 * diff7; + } + + if sum > current_min { + return f32::MAX; + } + + i += 8; + } + + while i < len { + unsafe { + let diff = *a.get_unchecked(i) - *b.get_unchecked(i); + sum += diff * diff; + } + i += 1; + } + sum +} + +fn nearest_neighbor_search<'a>( + root: &Option>>, + target: &[f32], + best: &mut (f32, Option), +) { + let num_dimensions = target.len(); + let mut stack = Vec::with_capacity(64); + + if let Some(node) = root { + stack.push((node.as_ref(), 0)); + } + + while let Some((node, depth)) = stack.pop() { + let axis = depth % num_dimensions; + let dist = early_stopping_distance(&node.point, target, best.0); + + if dist < best.0 { + best.0 = dist; + best.1 = Some(node.index); + } + + let diff = target[axis] - node.point[axis]; + let sqr_diff = diff * diff; + + let (nearer, farther) = if diff < 0.0 { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + if let Some(nearer_node) = nearer { + stack.push((nearer_node.as_ref(), depth + 1)); + } + + if sqr_diff < best.0 { + if let Some(farther_node) = farther { + stack.push((farther_node.as_ref(), depth + 1)); + } + } + } +} +fn calculate_mean_vector(vectors: &[&[f32]]) -> Vec { + let num_vectors = vectors.len(); + let num_dimensions = 250; + + let mut mean_vector = vec![0.0f64; num_dimensions]; + + for vector in vectors { + for i in 0..num_dimensions { + mean_vector[i] += vector[i] as f64; + } + } + for i in 0..num_dimensions { + mean_vector[i] /= num_vectors as f64; + } + mean_vector.into_iter().map(|x| x as f32).collect() +} + +#[derive(Debug)] +struct FloatOrd(f32); + +impl PartialEq for FloatOrd { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for FloatOrd {} + +impl PartialOrd for FloatOrd { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl Ord for FloatOrd { + fn cmp(&self, other: &Self) -> Ordering { + + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +fn filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, +) -> Vec<(f32, &'a [f32], usize)> { + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, vector) in database.iter().enumerate() { + if heap.len() < k + { + let dist = squared_euclidean_distance(&mean_query_vector, vector); + let ord_dist = FloatOrd(dist); + + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() + { + let dist = early_stopping_distance(&mean_query_vector, vector, top_dist); + let ord_dist = FloatOrd(dist); + if dist < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + heap.into_sorted_vec() + .into_iter() + .map(|(FloatOrd(dist), index)| (dist, &database[index][..], index)) + .collect() +} + +pub fn solve_challenge(challenge: &Challenge) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let max_fuel = 2000000000.0; + let base_fuel = 760000000.0; + let alpha = 1700.0 * challenge.difficulty.num_queries as f64; + + let m = ((max_fuel - base_fuel) / alpha) as usize; + let n = (m as f32 * 1.2) as usize; + let r = n - m; + + let closest_vectors = filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + n, + ); + + let (m_slice, r_slice) = closest_vectors.split_at(m); + let m_vectors: Vec<_> = m_slice.to_vec(); + let r_vectors: Vec<_> = r_slice.to_vec(); + + let mut kd_tree_vectors: Vec<(&[f32], usize)> = m_vectors.iter().map(|&(_, v, i)| (v, i)).collect(); + let kd_tree = build_kd_tree(&mut kd_tree_vectors); + + let mut best_indexes = Vec::with_capacity(query_count); + let mut distances = Vec::with_capacity(query_count); + + for query in &challenge.query_vectors { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + distances.push(best.0); + best_indexes.push(best.1.unwrap_or(0)); + } + + let brute_force_count = (query_count as f32 * 0.1) as usize; + let mut distance_indices: Vec<_> = distances.iter().enumerate().collect(); + distance_indices.sort_unstable_by(|a, b| b.1.partial_cmp(a.1).unwrap()); + let high_distance_indices: Vec<_> = distance_indices.into_iter() + .take(brute_force_count) + .map(|(index, _)| index) + .collect(); + + for &query_index in &high_distance_indices { + let query = &challenge.query_vectors[query_index]; + let mut best = (distances[query_index], best_indexes[query_index]); + + for &(_, vec, index) in &r_vectors { + let dist = squared_euclidean_distance(query, vec); + if dist < best.0 { + best = (dist, index); + } + } + + best_indexes[query_index] = best.1; + } + + Ok(Some(Solution { + indexes: best_indexes, + })) +} + +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/mod.rs b/tig-algorithms/src/vector_search/mod.rs index bf245d39..cee7ac52 100644 --- a/tig-algorithms/src/vector_search/mod.rs +++ b/tig-algorithms/src/vector_search/mod.rs @@ -83,7 +83,8 @@ pub use invector as c004_a034; // c004_a041 -// c004_a042 +pub mod invector_hybrid; +pub use invector_hybrid as c004_a042; // c004_a043 diff --git a/tig-algorithms/wasm/vector_search/invector_hybrid.wasm b/tig-algorithms/wasm/vector_search/invector_hybrid.wasm new file mode 100644 index 00000000..2ba2b5f4 Binary files /dev/null and b/tig-algorithms/wasm/vector_search/invector_hybrid.wasm differ