Skip to content

Commit

Permalink
clean up check
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Sep 27, 2023
1 parent 0799adc commit feaafb1
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 64 deletions.
30 changes: 0 additions & 30 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ murmurhash3 = "0.0.5"
niffler = { version = "2.3.1", default-features = false, features = [ "gz" ] }
nohash-hasher = "0.2.0"
num-iter = "0.1.43"
numsep = "0.1.12"
once_cell = "1.18.0"
ouroboros = "0.18.0"
piz = "0.5.0"
Expand All @@ -56,14 +55,12 @@ rkyv = { version = "0.7.39", optional = true }
roaring = "0.10.0"
serde = { version = "1.0.168", features = ["derive"] }
serde_json = "1.0.107"
size = "0.4.0"
thiserror = "1.0"
twox-hash = "1.6.0"
typed-builder = "0.14.0"
vec-collections = "0.4.3"

[dev-dependencies]
assert_matches = "1.3.0"
criterion = "0.5.1"
needletail = { version = "0.5.1", default-features = false }
proptest = { version = "1.2.0", default-features = false, features = ["std"]}
Expand Down
9 changes: 4 additions & 5 deletions src/core/src/index/revindex/disk_revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ use rocksdb::{ColumnFamilyDescriptor, MergeOperands, Options};

use crate::collection::{Collection, CollectionSet};
use crate::encodings::{Color, Idx};
use crate::index::revindex::prepare_query;
use crate::index::revindex::{
self as module, stats_for_cf, Datasets, HashToColor, QueryColors, RevIndexOps, DB, HASHES,
MANIFEST, METADATA, STORAGE_SPEC,
self as module, prepare_query, stats_for_cf, Datasets, DbStats, HashToColor, QueryColors,
RevIndexOps, DB, HASHES, MANIFEST, METADATA, STORAGE_SPEC,
};
use crate::index::{GatherResult, SigCounter};
use crate::manifest::Manifest;
Expand Down Expand Up @@ -409,8 +408,8 @@ impl RevIndexOps for RevIndex {
Ok(module::RevIndex::Plain(self))
}

fn check(&self, quick: bool) {
stats_for_cf(self.db.clone(), HASHES, true, quick);
fn check(&self, quick: bool) -> DbStats {
stats_for_cf(self.db.clone(), HASHES, true, quick)
}

fn compact(&self) {
Expand Down
53 changes: 27 additions & 26 deletions src/core/src/index/revindex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::sync::Arc;

use byteorder::{LittleEndian, WriteBytesExt};
use enum_dispatch::enum_dispatch;
use getset::{Getters, Setters};
use nohash_hasher::BuildNoHashHasher;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -67,7 +68,7 @@ pub trait RevIndexOps {

fn convert(&self, output_db: RevIndex) -> Result<()>;

fn check(&self, quick: bool);
fn check(&self, quick: bool) -> DbStats;

fn gather(
&self,
Expand Down Expand Up @@ -381,11 +382,27 @@ impl Datasets {
*/
}

fn stats_for_cf(db: Arc<DB>, cf_name: &str, deep_check: bool, quick: bool) {
#[derive(Getters, Setters, Debug)]
pub struct DbStats {
#[getset(get = "pub")]
total_datasets: usize,

#[getset(get = "pub")]
total_keys: usize,

#[getset(get = "pub")]
kcount: usize,

#[getset(get = "pub")]
vcount: usize,

#[getset(get = "pub")]
vcounts: histogram::Histogram,
}

fn stats_for_cf(db: Arc<DB>, cf_name: &str, deep_check: bool, quick: bool) -> DbStats {
use byteorder::ReadBytesExt;
use histogram::Histogram;
use log::info;
use numsep::{separate, Locale};

let cf = db.cf_handle(cf_name).unwrap();

Expand All @@ -411,28 +428,12 @@ fn stats_for_cf(db: Arc<DB>, cf_name: &str, deep_check: bool, quick: bool) {
//println!("Saw {} {:?}", k, value);
}

info!("*** {} ***", cf_name);
use size::Size;
let ksize = Size::from_bytes(kcount);
let vsize = Size::from_bytes(vcount);
if !quick && cf_name == COLORS {
info!(
"total datasets: {}",
separate(datasets.len(), Locale::English)
);
}
info!("total keys: {}", separate(kcount / 8, Locale::English));

info!("k: {}", ksize.to_string());
info!("v: {}", vsize.to_string());

if !quick && kcount > 0 && deep_check {
info!("max v: {}", vcounts.maximum().unwrap());
info!("mean v: {}", vcounts.mean().unwrap());
info!("stddev: {}", vcounts.stddev().unwrap());
info!("median v: {}", vcounts.percentile(50.0).unwrap());
info!("p25 v: {}", vcounts.percentile(25.0).unwrap());
info!("p75 v: {}", vcounts.percentile(75.0).unwrap());
DbStats {
total_datasets: datasets.len(),
total_keys: kcount / 8,
kcount,
vcount,
vcounts,
}
}

Expand Down

0 comments on commit feaafb1

Please sign in to comment.