Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Several optimizations have been implemented #14

Merged
merged 10 commits into from
Mar 11, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
improving the tests
angelip2303 committed Mar 11, 2024
commit a5ab4b1d51fc60b3245216a4aaf2030a24eb569f
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@ version = "0.0.1"
edition = "2021"

[dependencies]
zarrs = { version = "0.12.3", default-features = false, features = [ "http", "gzip", "sharding", "opendal", "async", "ndarray" ] }
zarrs = { version = "0.12.4", default-features = false, features = [ "http", "gzip", "sharding", "opendal", "async", "ndarray", "crc32c" ] }
clap = { version = "4.1.8", features = ["derive"] }
serde_json = "1.0.108"
thiserror = "1.0.50"
8 changes: 2 additions & 6 deletions src/engine/chunk.rs
Original file line number Diff line number Diff line change
@@ -42,11 +42,7 @@ impl<T: ReadableStorageTraits + 'static> EngineStrategy<Vec<u32>> for Array<T> {
let objects = self.shape()[0];
let col = index as u64;
let shape = ArraySubset::new_with_ranges(&[0..objects, col..col + 1]);
let array_subset = self.retrieve_array_subset(&shape).unwrap();
let third_term_subset = array_subset
.windows(4)
.map(|w| u32::from_ne_bytes(w.try_into().unwrap()))
.collect::<Vec<_>>();
Ok(third_term_subset)
let array_subset = self.retrieve_array_subset_elements::<u32>(&shape)?;
Ok(array_subset)
}
}
12 changes: 10 additions & 2 deletions src/storage/layout/mod.rs
Original file line number Diff line number Diff line change
@@ -77,10 +77,18 @@ pub trait LayoutOps<C> {
}

if !remainder.is_empty() {
// first we count the number of shards that have been processed, and
// multiply it by the number of chunks in every shard. Hence, we will
// obtain the number of rows that have been processed
let rows_processed = count.load(Ordering::Relaxed) * rows_per_shard(&arr);
// then we obtain the size of the last shard that is going to be
// processed; it is equals to the size of the remainder
let last_shard_size = remainder.len() as u64;
// lastly, we store the elements in the provided subset
arr.store_array_subset_elements::<u32>(
&ArraySubset::new_with_start_shape(
vec![count.load(Ordering::Relaxed) * rows_per_shard(&arr), 0],
vec![remainder.len() as u64, columns_per_shard(&arr)],
vec![rows_processed, 0],
vec![last_shard_size, columns_per_shard(&arr)],
)?,
self.store_chunk_elements(remainder, columns),
)?;
78 changes: 78 additions & 0 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
@@ -192,3 +192,81 @@ impl Graph {
ans.to_csc()
}
}

pub fn set_expected_first_term_matrix(
expected: &mut Vec<u32>,
subject: Subject,
predicate: Predicate,
object: Object,
dictionary: &Dictionary,
reference_system: ReferenceSystem,
) {
let subject_idx = subject.get_idx(dictionary);
let predicate_idx = predicate.get_idx(dictionary);
let object_idx = object.get_idx(dictionary);

match reference_system {
ReferenceSystem::SPO => expected[object_idx] = predicate_idx as u32,
ReferenceSystem::SOP => expected[predicate_idx] = object_idx as u32,
ReferenceSystem::PSO => expected[object_idx] = subject_idx as u32,
ReferenceSystem::POS => expected[subject_idx] = object_idx as u32,
ReferenceSystem::OSP => expected[predicate_idx] = subject_idx as u32,
ReferenceSystem::OPS => expected[subject_idx] = predicate_idx as u32,
}
}

pub fn set_expected_second_term_matrix(
expected: &mut Vec<u32>,
subject: Subject,
predicate: Predicate,
object: Object,
dictionary: &Dictionary,
reference_system: ReferenceSystem,
) {
let subject_idx = subject.get_idx(dictionary);
let predicate_idx = predicate.get_idx(dictionary);
let object_idx = object.get_idx(dictionary);

match reference_system {
ReferenceSystem::SPO => {
expected[subject_idx * dictionary.objects_size() + object_idx] = predicate_idx as u32
}
ReferenceSystem::SOP => {
expected[subject_idx * dictionary.predicates_size() + predicate_idx] = object_idx as u32
}
ReferenceSystem::PSO => {
expected[predicate_idx * dictionary.objects_size() + object_idx] = subject_idx as u32
}
ReferenceSystem::POS => {
expected[predicate_idx * dictionary.subjects_size() + subject_idx] = object_idx as u32
}
ReferenceSystem::OSP => {
expected[object_idx * dictionary.predicates_size() + predicate_idx] = subject_idx as u32
}
ReferenceSystem::OPS => {
expected[object_idx * dictionary.subjects_size() + subject_idx] = predicate_idx as u32
}
}
}

pub fn set_expected_third_term_matrix(
expected: &mut Vec<u32>,
subject: Subject,
predicate: Predicate,
object: Object,
dictionary: &Dictionary,
reference_system: ReferenceSystem,
) {
let subject_idx = subject.get_idx(dictionary);
let predicate_idx = predicate.get_idx(dictionary);
let object_idx = object.get_idx(dictionary);

match reference_system {
ReferenceSystem::SPO => expected[subject_idx] = predicate_idx as u32,
ReferenceSystem::SOP => expected[subject_idx] = object_idx as u32,
ReferenceSystem::PSO => expected[predicate_idx] = subject_idx as u32,
ReferenceSystem::POS => expected[predicate_idx] = object_idx as u32,
ReferenceSystem::OSP => expected[object_idx] = subject_idx as u32,
ReferenceSystem::OPS => expected[object_idx] = predicate_idx as u32,
}
}
30 changes: 23 additions & 7 deletions tests/get_object_test.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use common::set_expected_third_term_matrix;
use remote_hdt::storage::layout::matrix::MatrixLayout;
use remote_hdt::storage::layout::tabular::TabularLayout;
use remote_hdt::storage::ops::Ops;
@@ -19,7 +20,7 @@ fn get_object_matrix_sharding_test() -> Result<(), Box<dyn Error>> {
common::setup(
common::SHARDING_ZARR,
&mut storage,
ChunkingStrategy::Sharding(3),
ChunkingStrategy::Sharding(4),
ReferenceSystem::SPO,
);

@@ -31,10 +32,19 @@ fn get_object_matrix_sharding_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

if actual == vec![2, 0, 0, 0, 0] {
let mut expected = vec![0u32; storage.get_dictionary().subjects_size()];
set_expected_third_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::DateOfBirth,
common::Object::Date,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);

if actual == expected {
Ok(())
} else {
println!("{:?}", actual);
Err(String::from("Expected and actual results are not equals").into())
}
}
@@ -58,11 +68,17 @@ fn get_object_tabular_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

let mut expected = TriMat::new((4, 9));
expected.add_triplet(1, 3, 3);
let expected = expected.to_csc();
let mut expected = TriMat::new((
storage.get_dictionary().subjects_size(),
storage.get_dictionary().objects_size(),
));
expected.add_triplet(
common::Subject::Bombe.get_idx(&storage.get_dictionary()),
common::Object::Alan.get_idx(&storage.get_dictionary()),
common::Predicate::Discoverer.get_idx(&storage.get_dictionary()),
);

if actual == expected {
if actual == expected.to_csc() {
Ok(())
} else {
Err(String::from("Expected and actual results are not equals").into())
43 changes: 36 additions & 7 deletions tests/get_predicate_test.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use common::set_expected_second_term_matrix;
use remote_hdt::storage::layout::matrix::MatrixLayout;
use remote_hdt::storage::layout::tabular::TabularLayout;
use remote_hdt::storage::ops::Ops;
@@ -31,12 +32,37 @@ fn get_predicate_matrix_chunk_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

if actual
== vec![
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 5, 0, 0, 0,
]
{
let mut expected = vec![
0u32;
storage.get_dictionary().subjects_size()
* storage.get_dictionary().objects_size()
];
set_expected_second_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::InstanceOf,
common::Object::Human,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);
set_expected_second_term_matrix(
&mut expected,
common::Subject::Wilmslow,
common::Predicate::InstanceOf,
common::Object::Town,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);
set_expected_second_term_matrix(
&mut expected,
common::Subject::Bombe,
common::Predicate::InstanceOf,
common::Object::Computer,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);

if actual == expected {
Ok(())
} else {
Err(String::from("Expected and actual results are not equals").into())
@@ -62,7 +88,10 @@ fn get_predicate_tabular_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

let mut expected = TriMat::new((4, 9));
let mut expected = TriMat::new((
storage.get_dictionary().subjects_size(),
storage.get_dictionary().objects_size(),
));
expected.add_triplet(
common::Subject::Alan.get_idx(&storage.get_dictionary()),
common::Object::Human.get_idx(&storage.get_dictionary()),
89 changes: 79 additions & 10 deletions tests/get_subject_test.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use common::set_expected_first_term_matrix;
use remote_hdt::storage::layout::matrix::MatrixLayout;
use remote_hdt::storage::layout::tabular::TabularLayout;
use remote_hdt::storage::ops::Ops;
@@ -31,7 +32,49 @@ fn get_subject_matrix_chunk_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

if actual == vec![2, 4, 5, 0, 0, 0, 0, 7, 8] {
let mut expected = vec![0u32; storage.get_dictionary().objects_size()];
set_expected_first_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::InstanceOf,
common::Object::Human,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);
set_expected_first_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::PlaceOfBirth,
common::Object::Warrington,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);
set_expected_first_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::PlaceOfDeath,
common::Object::Wilmslow,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);
set_expected_first_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::DateOfBirth,
common::Object::Date,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);
set_expected_first_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::Employer,
common::Object::GCHQ,
&storage.get_dictionary(),
ReferenceSystem::SPO,
);

if actual == expected {
Ok(())
} else {
Err(String::from("Expected and actual results are not equals").into())
@@ -57,7 +100,11 @@ fn get_subject_matrix_sharding_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

if actual == vec![0, 0, 0, 0, 0, 5, 1, 0, 0] {
let mut expected = vec![0u32; storage.get_dictionary().objects_size()];
expected[5] = common::Predicate::InstanceOf.get_idx(&storage.get_dictionary()) as u32;
expected[6] = common::Predicate::Country.get_idx(&storage.get_dictionary()) as u32;

if actual == expected {
Ok(())
} else {
Err(String::from("Expected and actual results are not equals").into())
@@ -83,15 +130,37 @@ fn get_subject_tabular_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

let mut expected = TriMat::new((4, 9));
expected.add_triplet(0, 0, 2);
expected.add_triplet(0, 1, 4);
expected.add_triplet(0, 2, 5);
expected.add_triplet(0, 7, 7);
expected.add_triplet(0, 8, 8);
let expected = expected.to_csc();
let mut expected = TriMat::new((
storage.get_dictionary().subjects_size(),
storage.get_dictionary().objects_size(),
));
expected.add_triplet(
common::Subject::Alan.get_idx(&storage.get_dictionary()),
common::Object::Human.get_idx(&storage.get_dictionary()),
common::Predicate::InstanceOf.get_idx(&storage.get_dictionary()),
);
expected.add_triplet(
common::Subject::Alan.get_idx(&storage.get_dictionary()),
common::Object::Warrington.get_idx(&storage.get_dictionary()),
common::Predicate::PlaceOfBirth.get_idx(&storage.get_dictionary()),
);
expected.add_triplet(
common::Subject::Alan.get_idx(&storage.get_dictionary()),
common::Object::Wilmslow.get_idx(&storage.get_dictionary()),
common::Predicate::PlaceOfDeath.get_idx(&storage.get_dictionary()),
);
expected.add_triplet(
common::Subject::Alan.get_idx(&storage.get_dictionary()),
common::Object::Date.get_idx(&storage.get_dictionary()),
common::Predicate::DateOfBirth.get_idx(&storage.get_dictionary()),
);
expected.add_triplet(
common::Subject::Alan.get_idx(&storage.get_dictionary()),
common::Object::GCHQ.get_idx(&storage.get_dictionary()),
common::Predicate::Employer.get_idx(&storage.get_dictionary()),
);

if actual == expected {
if actual == expected.to_csc() {
Ok(())
} else {
Err(String::from("Expected and actual results are not equals").into())
41 changes: 39 additions & 2 deletions tests/orientation.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use common::set_expected_first_term_matrix;
use remote_hdt::storage::layout::matrix::MatrixLayout;
use remote_hdt::storage::layout::tabular::TabularLayout;
use remote_hdt::storage::ops::Ops;
@@ -30,7 +31,33 @@ fn orientation_pso_matrix_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

if actual == vec![3, 0, 1] {
let mut expected = vec![0u32; storage.get_dictionary().objects_size()];
set_expected_first_term_matrix(
&mut expected,
common::Subject::Alan,
common::Predicate::InstanceOf,
common::Object::Human,
&storage.get_dictionary(),
ReferenceSystem::PSO,
);
set_expected_first_term_matrix(
&mut expected,
common::Subject::Wilmslow,
common::Predicate::InstanceOf,
common::Object::Town,
&storage.get_dictionary(),
ReferenceSystem::PSO,
);
set_expected_first_term_matrix(
&mut expected,
common::Subject::Bombe,
common::Predicate::InstanceOf,
common::Object::Computer,
&storage.get_dictionary(),
ReferenceSystem::PSO,
);

if actual == expected {
Ok(())
} else {
Err(String::from("Expected and actual results are not equals").into())
@@ -56,7 +83,17 @@ fn orientation_ops_matrix_test() -> Result<(), Box<dyn Error>> {
_ => unreachable!(),
};

if actual == vec![0, 3, 0, 0] {
let mut expected = vec![0u32; storage.get_dictionary().subjects_size()];
set_expected_first_term_matrix(
&mut expected,
common::Subject::Bombe,
common::Predicate::Discoverer,
common::Object::Alan,
&storage.get_dictionary(),
ReferenceSystem::OPS,
);

if actual == expected {
Ok(())
} else {
println!("{:?}", actual);