Skip to content

Commit

Permalink
ds: add valid check
Browse files Browse the repository at this point in the history
  • Loading branch information
gauteh committed Jun 3, 2023
1 parent 3a2b35f commit ce62726
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 10 deletions.
6 changes: 6 additions & 0 deletions src/idx/dataset/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ pub trait DatasetExt {

fn chunk_shape(&self) -> &[u64];

fn valid(&self) -> anyhow::Result<bool>;

fn as_par_reader(&self, p: &dyn AsRef<Path>) -> anyhow::Result<Box<dyn DatasetExtReader + '_>>;
}

Expand All @@ -140,6 +142,10 @@ impl<'a> DatasetExt for DatasetD<'a> {
self.inner().chunk_shape()
}

fn valid(&self) -> anyhow::Result<bool> {
self.inner().valid()
}

fn as_par_reader(&self, p: &dyn AsRef<Path>) -> anyhow::Result<Box<dyn DatasetExtReader + '_>> {
self.inner().as_par_reader(p)
}
Expand Down
24 changes: 24 additions & 0 deletions src/idx/dataset/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,26 @@ impl<const D: usize> Dataset<'_, D> {
self.shape.is_empty()
}

/// Test whether dataset and chunk layout is valid.
pub fn valid(&self) -> anyhow::Result<bool> {
for chunk in self.chunks.iter() {
let offset = chunk.offset.iter().map(|u| u.get()).collect::<Vec<_>>();
ensure!(chunk.contains(&offset, &self.chunk_shape) == std::cmp::Ordering::Equal, "chunk does not contain its offset");
}

let end: u64 = self.shape.iter().product();
let chunk_sh: u64 = self.chunk_shape.iter().product();

ensure!(end % chunk_sh == 0, "chunks not modulo of dataset shape: {0:?} vs {1:?}", self.shape, self.chunk_shape);
let chunks = end / chunk_sh;
ensure!(chunks == self.chunks.len() as u64, "number of chunks does not match dataset shape: {chunks} != {}", self.chunks.len());




Ok(true)
}

/// Returns an iterator over chunk, offset and size which if joined will make up the specified slice through the
/// variable.
pub fn chunk_slices(
Expand Down Expand Up @@ -410,6 +430,10 @@ impl<const D: usize> DatasetExt for Dataset<'_, D> {
self.chunk_shape.as_slice()
}

fn valid(&self) -> anyhow::Result<bool> {
self.valid()
}

fn as_par_reader(&self, p: &dyn AsRef<Path>) -> anyhow::Result<Box<dyn DatasetExtReader + '_>> {
use crate::reader::direct::Direct;

Expand Down
32 changes: 22 additions & 10 deletions src/idx/dataset/slicer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,40 +157,49 @@ impl<'a, const D: usize> Iterator for ChunkSlice<'a, D> {
//
// chunk dimension will always be less or equal to the dataset
// dimension, so we do not need to check it.

// When a chunk_dimension is size 1 it does make the chunk greater. So
// we ignore it. Is the case when all dimensions are 1 special?
//
// When all the higher chunk dimensions are size one we
// will reach the next chunk and we can stop. If we advance to the end of the chunk. We must however advance at least one.
if self.dataset.chunk_shape[di] == 1 {
// if advance == 0 {
// advance = 1;
// }
continue;
}
// if self.dataset.chunk_shape[di] == 1 {
// if advance == 0 {
// advance = 1;
// }
// continue;
// }

// Assert that we have not advanced to the next chunk.
assert_eq!(chunk, self.dataset.chunk_at_coord(&I));
assert_eq!(chunk, self.dataset.chunk_at_coord(&I), "advanced into next chunk");

// Assert that the coordinates are in this chunk.
debug_assert!(
chunk.contains(&I, &self.dataset.chunk_shape) == std::cmp::Ordering::Equal
chunk.contains(&I, &self.dataset.chunk_shape) == std::cmp::Ordering::Equal,
"coordinates are not in this chunk."
);

// End of chunk dimension.
let chunk_d = chunk.offset[di].get() + self.dataset.chunk_shape[di];
debug_assert!(chunk_d <= self.dataset.shape[di]);

// End of count dimension.
let count_d = self.slice_start[di] + self.slice_counts[di];
debug_assert!(count_d <= self.dataset.shape[di]);

let Id = I[di]; // Coordinate in current dimension of entire
// dataset.
let nId = min(chunk_d, count_d); // New coordinate in current
// dimension of entire
// dataset.
debug_assert!(nId < self.dataset.shape[di]);
debug_assert!(nId <= self.dataset.shape[di], "coordinate above shape of dataset dimension.");

dbg!(chunk_d);
dbg!(count_d);

assert!(nId > Id);
assert!(nId > Id); // XXX: This one should probably go. There might be
// cases when we don't advance in this dim, but in the
// next.

let dim_sz = self.dataset.dim_sz[di];

Expand Down Expand Up @@ -223,6 +232,7 @@ impl<'a, const D: usize> Iterator for ChunkSlice<'a, D> {
let chunk_end = chunk_start + advance;

self.slice_offset += advance;
assert!(self.slice_offset <= self.slice_end, "advanced further than slice end.");

assert!(advance > 0, "Iterator not advancing");

Expand Down Expand Up @@ -382,6 +392,8 @@ mod tests {
)
.unwrap();

ds.valid().unwrap();

ChunkSlice::new(&ds, [0, 0, 0], [2, 32, 580]).for_each(drop);

// Should be all chunks.
Expand Down
4 changes: 4 additions & 0 deletions tests/read_norkyst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ fn wind() {
let hUw = hi.reader("Uwind").unwrap().values::<i32>(None, None).unwrap();
let hVw = hi.reader("Vwind").unwrap().values::<i32>(None, None).unwrap();

hi.dataset("Uwind").unwrap().valid().unwrap();

assert_eq!(Uw, hUw);
assert_eq!(Vw, hVw);
}
Expand All @@ -75,6 +77,8 @@ fn current() {

assert_eq!(u.len(), hi.dataset("u_eastward").unwrap().size());

// hi.dataset("u_eastward").unwrap().valid().unwrap();

let hu = hi.reader("u_eastward").unwrap().values::<f32>(None, None).unwrap();
let hv = hi.reader("v_northward").unwrap().values::<f32>(None, None).unwrap();

Expand Down

0 comments on commit ce62726

Please sign in to comment.