diff --git a/src/engine/chunk.rs b/src/engine/chunk.rs index 4de33fb..0e5bad5 100644 --- a/src/engine/chunk.rs +++ b/src/engine/chunk.rs @@ -14,10 +14,10 @@ impl EngineStrategy> for Array { let chunk_to_index = index as u64 % rows_per_shard(self); let ans = self.retrieve_chunk_subset_elements( &[index_to_chunk, 0], - &ArraySubset::new_with_start_end_inc( - vec![chunk_to_index, 0], - vec![chunk_to_index, columns_per_shard(self) - 1], - )?, + &ArraySubset::new_with_ranges(&[ + chunk_to_index..chunk_to_index + 1, + 0..columns_per_shard(self), + ]), )?; Ok(ans.to_vec()) } @@ -27,9 +27,9 @@ impl EngineStrategy> for Array { } fn get_third_term(&self, index: usize) -> EngineResult> { - let start = vec![0, index as u64]; - let end = vec![self.shape()[0], index as u64]; - let shape = &ArraySubset::new_with_start_end_inc(start, end)?; + let last_chunk = self.shape()[0] / rows_per_shard(self); + let col = index as u64; + let shape = &ArraySubset::new_with_ranges(&[0..last_chunk, col..col + 1]); let ans = self.retrieve_array_subset_elements(shape)?; Ok(ans.to_vec()) } diff --git a/src/error.rs b/src/error.rs index 7b41ac1..ed446fe 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,6 +3,7 @@ use thiserror::Error; use zarrs::array::codec::bytes_to_bytes::gzip::GzipCompressionLevelError; use zarrs::array::ArrayCreateError; use zarrs::array::ArrayError; +use zarrs::array::NonZeroError; use zarrs::array_subset::IncompatibleDimensionalityError; use zarrs::array_subset::IncompatibleStartEndIndicesError; use zarrs::group::GroupCreateError; @@ -56,6 +57,8 @@ pub enum RemoteHDTError { ReadOnlyBackend, #[error("Error while parsing the RDF graph")] RdfParse, + #[error(transparent)] + NonZero(#[from] NonZeroError), } #[derive(Error, Debug)] diff --git a/src/storage/layout/matrix.rs b/src/storage/layout/matrix.rs index 8d19e1b..9bd5d13 100644 --- a/src/storage/layout/matrix.rs +++ b/src/storage/layout/matrix.rs @@ -85,13 +85,8 @@ impl Layout for MatrixLayout { &self, dimensionality: &Dimensionality, ) -> StorageResult> { - let mut sharding_codec_builder = ShardingCodecBuilder::new( - vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(dimensionality.get_third_term_size()).unwrap(), - ] - .into(), - ); + let mut sharding_codec_builder = + ShardingCodecBuilder::new(vec![1, dimensionality.get_third_term_size()].try_into()?); sharding_codec_builder.bytes_to_bytes_codecs(vec![Box::new(GzipCodec::new(5)?)]); Ok(Box::new(sharding_codec_builder.build())) } @@ -128,9 +123,7 @@ impl LayoutOps for MatrixLayout { fn retrieve_chunk_elements( &mut self, matrix: &Mutex>, - i: u64, - number_of_columns: u64, - first_term_idx: usize, + first_term_index: usize, chunk: &[usize], ) { chunk @@ -138,11 +131,9 @@ impl LayoutOps for MatrixLayout { .enumerate() .for_each(|(third_term_idx, &second_term_idx)| { if second_term_idx != 0 { - matrix.lock().add_triplet( - first_term_idx + (i * number_of_columns) as usize, - third_term_idx, - second_term_idx, - ); + matrix + .lock() + .add_triplet(first_term_index, third_term_idx, second_term_idx); } }) } diff --git a/src/storage/layout/mod.rs b/src/storage/layout/mod.rs index 03b5859..f21281b 100644 --- a/src/storage/layout/mod.rs +++ b/src/storage/layout/mod.rs @@ -29,7 +29,7 @@ type ArrayToBytesCodec = Box; pub mod matrix; pub mod tabular; -pub(crate) trait LayoutOps { +pub trait LayoutOps { fn retrieve_attributes(&mut self, arr: &Array) -> StorageResult { // 4. We get the attributes so we can obtain some values that we will need let attributes = arr.attributes(); @@ -70,13 +70,13 @@ pub(crate) trait LayoutOps { let iter = binding.chunks_exact(rows_per_shard(&arr) as usize); let remainder = iter.remainder(); - let _ = iter.map(|chunk| { - count.fetch_add(1, Ordering::Relaxed); + for chunk in iter { arr.store_chunk_elements( &[count.load(Ordering::Relaxed), 0], self.store_chunk_elements(chunk, columns), - ) - }); + )?; + count.fetch_add(1, Ordering::Relaxed); + } if !remainder.is_empty() { arr.store_array_subset_elements( @@ -108,10 +108,11 @@ pub(crate) trait LayoutOps { dimensionality.third_term_size, // we obtain the size of the third terms ))); - // We compute the number of chunks; for us to achieve so, we have to obtain + // We compute the number of shards; for us to achieve so, we have to obtain // first dimension of the chunk grid - let number_of_chunks = match arr.chunk_grid_shape() { + let number_of_shards = match arr.chunk_grid_shape() { Some(chunk_grid) => chunk_grid[0], + None => 0, }; @@ -123,16 +124,16 @@ pub(crate) trait LayoutOps { // low, as instead of parsing the whole array, we process smaller pieces // of it. Once we have all the pieces processed, we will have parsed the // whole array - for i in 0..number_of_chunks { - arr.retrieve_chunk_elements(&[i, 0])? + for shard in 0..number_of_shards { + arr.retrieve_chunk_elements(&[shard, 0])? + // We divide each shard by the number of columns, as a shard is + // composed of chunks having the size of [1, number of cols] .chunks(number_of_columns) .enumerate() .for_each(|(first_term_idx, chunk)| { self.retrieve_chunk_elements( &matrix, - i, - number_of_columns as u64, - first_term_idx, + first_term_idx + (shard * rows_per_shard(arr)) as usize, chunk, ); }) @@ -150,15 +151,13 @@ pub(crate) trait LayoutOps { fn retrieve_chunk_elements( &mut self, matrix: &Mutex>, - i: u64, - number_of_columns: u64, first_term_idx: usize, chunk: &[usize], ); fn sharding_factor(&self, dimensionality: &Dimensionality) -> usize; } -pub(crate) trait Layout: LayoutOps { +pub trait Layout: LayoutOps { fn shape(&self, dimensionality: &Dimensionality) -> Vec; fn data_type(&self) -> DataType; fn chunk_shape( diff --git a/src/storage/layout/tabular.rs b/src/storage/layout/tabular.rs index 7ee9c15..b6a5455 100644 --- a/src/storage/layout/tabular.rs +++ b/src/storage/layout/tabular.rs @@ -51,9 +51,7 @@ impl Layout for TabularLayout { &self, _: &Dimensionality, ) -> StorageResult> { - let mut sharding_codec_builder = ShardingCodecBuilder::new( - vec![NonZeroU64::new(1).unwrap(), NonZeroU64::new(3).unwrap()].into(), - ); + let mut sharding_codec_builder = ShardingCodecBuilder::new(vec![1, 3].try_into()?); sharding_codec_builder.bytes_to_bytes_codecs(vec![Box::new(GzipCodec::new(5)?)]); Ok(Box::new(sharding_codec_builder.build())) } @@ -86,9 +84,7 @@ impl LayoutOps for TabularLayout { fn retrieve_chunk_elements( &mut self, matrix: &Mutex>, - i: u64, - number_of_columns: u64, - first_term_idx: usize, + first_term_index: usize, // TODO: will first_term_index instead of chunk[0] do the trick? chunk: &[usize], ) { matrix diff --git a/tests/write_read_test.rs b/tests/write_read_test.rs index 0243266..6ee9550 100644 --- a/tests/write_read_test.rs +++ b/tests/write_read_test.rs @@ -13,7 +13,7 @@ fn write_read_tabular_test() { let mut storage = Storage::new(TabularLayout, Serialization::Sparse); common::setup( - common::MATRIX_ZARR, + common::TABULAR_ZARR, &mut storage, ChunkingStrategy::Chunk, ReferenceSystem::SPO, @@ -32,6 +32,7 @@ fn write_read_tabular_test() { #[test] fn write_read_matrix_test() { let mut storage = Storage::new(MatrixLayout, Serialization::Sparse); + common::setup( common::MATRIX_ZARR, &mut storage,