Skip to content

Commit

Permalink
fixing some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
angelip2303 committed Feb 12, 2024
1 parent 3517f2d commit 1851df4
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 44 deletions.
14 changes: 7 additions & 7 deletions src/engine/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ impl<T: ReadableStorageTraits> EngineStrategy<Vec<usize>> for Array<T> {
let chunk_to_index = index as u64 % rows_per_shard(self);
let ans = self.retrieve_chunk_subset_elements(
&[index_to_chunk, 0],
&ArraySubset::new_with_start_end_inc(
vec![chunk_to_index, 0],
vec![chunk_to_index, columns_per_shard(self) - 1],
)?,
&ArraySubset::new_with_ranges(&[
chunk_to_index..chunk_to_index + 1,
0..columns_per_shard(self),
]),
)?;
Ok(ans.to_vec())
}
Expand All @@ -27,9 +27,9 @@ impl<T: ReadableStorageTraits> EngineStrategy<Vec<usize>> for Array<T> {
}

fn get_third_term(&self, index: usize) -> EngineResult<Vec<usize>> {
let start = vec![0, index as u64];
let end = vec![self.shape()[0], index as u64];
let shape = &ArraySubset::new_with_start_end_inc(start, end)?;
let last_chunk = self.shape()[0] / rows_per_shard(self);
let col = index as u64;
let shape = &ArraySubset::new_with_ranges(&[0..last_chunk, col..col + 1]);
let ans = self.retrieve_array_subset_elements(shape)?;
Ok(ans.to_vec())
}
Expand Down
3 changes: 3 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use thiserror::Error;
use zarrs::array::codec::bytes_to_bytes::gzip::GzipCompressionLevelError;
use zarrs::array::ArrayCreateError;
use zarrs::array::ArrayError;
use zarrs::array::NonZeroError;
use zarrs::array_subset::IncompatibleDimensionalityError;
use zarrs::array_subset::IncompatibleStartEndIndicesError;
use zarrs::group::GroupCreateError;
Expand Down Expand Up @@ -56,6 +57,8 @@ pub enum RemoteHDTError {
ReadOnlyBackend,
#[error("Error while parsing the RDF graph")]
RdfParse,
#[error(transparent)]
NonZero(#[from] NonZeroError),
}

#[derive(Error, Debug)]
Expand Down
21 changes: 6 additions & 15 deletions src/storage/layout/matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,8 @@ impl Layout<Chunk> for MatrixLayout {
&self,
dimensionality: &Dimensionality,
) -> StorageResult<Box<dyn ArrayToBytesCodecTraits>> {
let mut sharding_codec_builder = ShardingCodecBuilder::new(
vec![
NonZeroU64::new(1).unwrap(),
NonZeroU64::new(dimensionality.get_third_term_size()).unwrap(),
]
.into(),
);
let mut sharding_codec_builder =
ShardingCodecBuilder::new(vec![1, dimensionality.get_third_term_size()].try_into()?);
sharding_codec_builder.bytes_to_bytes_codecs(vec![Box::new(GzipCodec::new(5)?)]);
Ok(Box::new(sharding_codec_builder.build()))
}
Expand Down Expand Up @@ -128,21 +123,17 @@ impl LayoutOps<Chunk> for MatrixLayout {
fn retrieve_chunk_elements(
&mut self,
matrix: &Mutex<TriMat<usize>>,
i: u64,
number_of_columns: u64,
first_term_idx: usize,
first_term_index: usize,
chunk: &[usize],
) {
chunk
.iter()
.enumerate()
.for_each(|(third_term_idx, &second_term_idx)| {
if second_term_idx != 0 {
matrix.lock().add_triplet(
first_term_idx + (i * number_of_columns) as usize,
third_term_idx,
second_term_idx,
);
matrix
.lock()
.add_triplet(first_term_index, third_term_idx, second_term_idx);
}
})
}
Expand Down
29 changes: 14 additions & 15 deletions src/storage/layout/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type ArrayToBytesCodec = Box<dyn ArrayToBytesCodecTraits>;
pub mod matrix;
pub mod tabular;

pub(crate) trait LayoutOps<C> {
pub trait LayoutOps<C> {
fn retrieve_attributes(&mut self, arr: &Array<OpendalStore>) -> StorageResult<Dictionary> {
// 4. We get the attributes so we can obtain some values that we will need
let attributes = arr.attributes();
Expand Down Expand Up @@ -70,13 +70,13 @@ pub(crate) trait LayoutOps<C> {
let iter = binding.chunks_exact(rows_per_shard(&arr) as usize);
let remainder = iter.remainder();

let _ = iter.map(|chunk| {
count.fetch_add(1, Ordering::Relaxed);
for chunk in iter {
arr.store_chunk_elements(
&[count.load(Ordering::Relaxed), 0],
self.store_chunk_elements(chunk, columns),
)
});
)?;
count.fetch_add(1, Ordering::Relaxed);
}

if !remainder.is_empty() {
arr.store_array_subset_elements(
Expand Down Expand Up @@ -108,10 +108,11 @@ pub(crate) trait LayoutOps<C> {
dimensionality.third_term_size, // we obtain the size of the third terms
)));

// We compute the number of chunks; for us to achieve so, we have to obtain
// We compute the number of shards; for us to achieve so, we have to obtain
// first dimension of the chunk grid
let number_of_chunks = match arr.chunk_grid_shape() {
let number_of_shards = match arr.chunk_grid_shape() {
Some(chunk_grid) => chunk_grid[0],

None => 0,
};

Expand All @@ -123,16 +124,16 @@ pub(crate) trait LayoutOps<C> {
// low, as instead of parsing the whole array, we process smaller pieces
// of it. Once we have all the pieces processed, we will have parsed the
// whole array
for i in 0..number_of_chunks {
arr.retrieve_chunk_elements(&[i, 0])?
for shard in 0..number_of_shards {
arr.retrieve_chunk_elements(&[shard, 0])?
// We divide each shard by the number of columns, as a shard is
// composed of chunks having the size of [1, number of cols]
.chunks(number_of_columns)
.enumerate()
.for_each(|(first_term_idx, chunk)| {
self.retrieve_chunk_elements(
&matrix,
i,
number_of_columns as u64,
first_term_idx,
first_term_idx + (shard * rows_per_shard(arr)) as usize,
chunk,
);
})
Expand All @@ -150,15 +151,13 @@ pub(crate) trait LayoutOps<C> {
fn retrieve_chunk_elements(
&mut self,
matrix: &Mutex<TriMat<usize>>,
i: u64,
number_of_columns: u64,
first_term_idx: usize,
chunk: &[usize],
);
fn sharding_factor(&self, dimensionality: &Dimensionality) -> usize;
}

pub(crate) trait Layout<C>: LayoutOps<C> {
pub trait Layout<C>: LayoutOps<C> {
fn shape(&self, dimensionality: &Dimensionality) -> Vec<u64>;
fn data_type(&self) -> DataType;
fn chunk_shape(
Expand Down
8 changes: 2 additions & 6 deletions src/storage/layout/tabular.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ impl Layout<Chunk> for TabularLayout {
&self,
_: &Dimensionality,
) -> StorageResult<Box<dyn ArrayToBytesCodecTraits>> {
let mut sharding_codec_builder = ShardingCodecBuilder::new(
vec![NonZeroU64::new(1).unwrap(), NonZeroU64::new(3).unwrap()].into(),
);
let mut sharding_codec_builder = ShardingCodecBuilder::new(vec![1, 3].try_into()?);
sharding_codec_builder.bytes_to_bytes_codecs(vec![Box::new(GzipCodec::new(5)?)]);
Ok(Box::new(sharding_codec_builder.build()))
}
Expand Down Expand Up @@ -86,9 +84,7 @@ impl LayoutOps<Chunk> for TabularLayout {
fn retrieve_chunk_elements(
&mut self,
matrix: &Mutex<TriMat<usize>>,
i: u64,
number_of_columns: u64,
first_term_idx: usize,
first_term_index: usize, // TODO: will first_term_index instead of chunk[0] do the trick?

Check warning on line 87 in src/storage/layout/tabular.rs

View workflow job for this annotation

GitHub Actions / Check (stable)

unused variable: `first_term_index`

Check failure on line 87 in src/storage/layout/tabular.rs

View workflow job for this annotation

GitHub Actions / Clippy (stable)

unused variable: `first_term_index`

Check warning on line 87 in src/storage/layout/tabular.rs

View workflow job for this annotation

GitHub Actions / Test Suite (stable)

unused variable: `first_term_index`

Check warning on line 87 in src/storage/layout/tabular.rs

View workflow job for this annotation

GitHub Actions / Check (stable)

unused variable: `first_term_index`

Check failure on line 87 in src/storage/layout/tabular.rs

View workflow job for this annotation

GitHub Actions / Clippy (stable)

unused variable: `first_term_index`

Check warning on line 87 in src/storage/layout/tabular.rs

View workflow job for this annotation

GitHub Actions / Test Suite (stable)

unused variable: `first_term_index`
chunk: &[usize],
) {
matrix
Expand Down
3 changes: 2 additions & 1 deletion tests/write_read_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fn write_read_tabular_test() {
let mut storage = Storage::new(TabularLayout, Serialization::Sparse);

common::setup(
common::MATRIX_ZARR,
common::TABULAR_ZARR,
&mut storage,
ChunkingStrategy::Chunk,
ReferenceSystem::SPO,
Expand All @@ -32,6 +32,7 @@ fn write_read_tabular_test() {
#[test]
fn write_read_matrix_test() {
let mut storage = Storage::new(MatrixLayout, Serialization::Sparse);

common::setup(
common::MATRIX_ZARR,
&mut storage,
Expand Down

0 comments on commit 1851df4

Please sign in to comment.