diff --git a/Cargo.lock b/Cargo.lock index 45130db..4478aeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -137,6 +137,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64ct" version = "1.6.0" @@ -673,12 +679,12 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jaq-core" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03d6a5713b8f33675abfac79d1db0022a3f28764b2a6b96a185c199ad8dab86d" +checksum = "eaadb25ab6563759089aa897f8b8d609d108a8531d742b17a2a80220de685440" dependencies = [ "aho-corasick", - "base64", + "base64 0.22.1", "hifijson", "jaq-interpret", "libm", @@ -690,9 +696,9 @@ dependencies = [ [[package]] name = "jaq-interpret" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f569e38e5fc677db8dfda89ee0b4c25b3f53e811b16434fd14bdc5b43fc362ac" +checksum = "1bbde0d83ef94bb43e862a845e9ecc3fb87210d11b1a844850a9d1a678a496cf" dependencies = [ "ahash", "dyn-clone", @@ -715,9 +721,9 @@ dependencies = [ [[package]] name = "jaq-std" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d7871c59297cbfdd18f6f1bbbafaad24e97fd555ee1e2a1be7a40a5a20f551a" +checksum = "6d80737e5c3dfdc0cb0ef05ec35ac1eaccd7e79ad712d1f934995ccf6a2cda39" dependencies = [ "bincode", "jaq-parse", @@ -963,7 +969,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5699cc8a63d1aa2b1ee8e12b9ad70ac790d65788cd36101fa37f87ea46c4cef" dependencies = [ - "base64", + "base64 0.21.7", "indexmap", "line-wrap", "quick-xml 0.31.0", @@ -1261,7 +1267,7 @@ name = "scnr_core" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.21.7", "bytes", "flate2", "flume", diff --git a/Cargo.toml b/Cargo.toml index 533f802..ad181f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,9 +108,9 @@ csv = "1.3" plist = "1.6" # jq -jaq-core = "1.2" -jaq-std = "1.2" -jaq-interpret = "1.2" +jaq-core = "1.4" +jaq-std = "1.4" +jaq-interpret = "1.4" jaq-parse = "1.0" jaq-syn = "1.1" diff --git a/_samples/sakila_full.db b/_samples/sakila_full.db new file mode 100644 index 0000000..248a237 Binary files /dev/null and b/_samples/sakila_full.db differ diff --git a/justfile b/justfile index a4c5d96..1acf4e5 100644 --- a/justfile +++ b/justfile @@ -11,13 +11,6 @@ o________________INIT_COMMANDS: _default clean: cargo clean -install_python_venv: - cd py_scnr && python3 -m venv .venv - cd py_scnr && pip install -r requirements.txt - # cd py_scnr && pip freeze > requirements.txt - echo "now call ---->" - echo "source ./py_scnr/.venv/bin/activate" - # execute all commands to check workspace health, if this command pass, CI should pass as well all: clean test check check_deny install @@ -104,13 +97,30 @@ install: # ================================================================================================== o________________DEPS_COMMANDS: _default -# Installs build tools & dependencies -install_tooling: +# Installs cargo tools +install_cargo_tools: cargo install cargo-deny cargo install --locked maturin - sudo apt install python3-venv - sudo apt install python3-pip - - +# Installs python virtual env requirements +install_python_venv: + cd py_scnr && python3 -m venv .venv + cd py_scnr && pip install -r requirements.txt + # cd py_scnr && pip freeze > requirements.txt + echo "now call ---->" + echo "source ./py_scnr/.venv/bin/activate" +# Installs build tools & dependencies +[linux] +install_tooling: install_cargo_tools && install_python_venv + sudo apt install python3-venv + sudo apt install python3-pip + pip install virtualenv + +# Installs build tools & dependencies +[macos] +install_tooling: install_cargo_tools && install_python_venv + brew install python pipx + pipx ensurepath + pipx install pip + pip install virtualenv diff --git a/py_scnr/requirements.txt b/py_scnr/requirements.txt index 2dbbe3d..7bce869 100644 --- a/py_scnr/requirements.txt +++ b/py_scnr/requirements.txt @@ -1 +1,2 @@ maturin==1.4.0 +virtualenv diff --git a/py_scnr/src/iterators.rs b/py_scnr/src/iterators.rs index 1fd87f3..e2079e6 100644 --- a/py_scnr/src/iterators.rs +++ b/py_scnr/src/iterators.rs @@ -43,17 +43,13 @@ type JqInnerIterator = Box + Send>; impl JqIterator { pub fn new(result: ScanResult, query: &str) -> Result { - let filter = scnr_core::jq::make_jq_filter(query)?; + let filter = scnr_core::jq::JqFilter::new(query)?; let iter = result .into_iter() .filter_map(|c| c.map_err(|e| tracing::error!("{e:?}")).ok()) .filter_map(|c| c.content.json().map(|json| (c.rel_path, json))) - .flat_map(move |(_path, json)| { - scnr_core::jq::jq_from_filter(json, filter.clone()) - .map_err(|e| tracing::error!("{e:?}")) - .unwrap_or_default() - }); + .flat_map(move |(_path, json)| filter.run(json).map_err(|e| tracing::error!("{e:?}")).unwrap_or_default()); Ok(Self { iter: Box::new(iter) }) } diff --git a/py_scnr/src/lib.rs b/py_scnr/src/lib.rs index 2b6675e..598824c 100644 --- a/py_scnr/src/lib.rs +++ b/py_scnr/src/lib.rs @@ -41,27 +41,31 @@ fn activate_verbose(verbose: bool) { } #[pyfunction] -#[pyo3(signature = (*, input = DEFAULT_INPUT.to_string(), filter=vec![], starter=vec![], cfg=vec![], profile=CfgProfile::default(), verbose=false))] +#[allow(clippy::too_many_arguments)] +#[pyo3(signature = (*, input = DEFAULT_INPUT.to_string(), filter=vec![], starter=vec![], cfg=vec![], profile=CfgProfile::default(), print_file_names=false, pretty_print=false, verbose=false))] fn scan( input: String, filter: Vec, starter: Vec, cfg: Vec<(String, Plugin)>, profile: CfgProfile, + print_file_names: bool, + pretty_print: bool, verbose: bool, ) -> Result { activate_verbose(verbose); let starter = to_scnr_starter(starter); let cfg = to_scnr_cfg(cfg); let profile = profile.into(); - let common = CommonArgs { input, filter, starter, cfg, profile }; + let common = CommonArgs { input, filter, starter, cfg, profile, print_file_names, pretty_print }; let scanner = scnr::get_scanner_from_options(&common)?; let result = scanner.scan()?; Ok(result.into()) } #[pyfunction] -#[pyo3(signature = (*, input = DEFAULT_INPUT.to_string(), query = DEFAULT_JQ_QUERY, filter=vec![], starter=vec![], cfg=vec![], profile=CfgProfile::default(), verbose=false))] +#[allow(clippy::too_many_arguments)] +#[pyo3(signature = (*, input = DEFAULT_INPUT.to_string(), query = DEFAULT_JQ_QUERY, filter=vec![], starter=vec![], cfg=vec![], profile=CfgProfile::default(), print_file_names=false, pretty_print=false, verbose=false))] fn jq( input: String, query: &str, @@ -69,13 +73,15 @@ fn jq( starter: Vec, cfg: Vec<(String, Plugin)>, profile: CfgProfile, + print_file_names: bool, + pretty_print: bool, verbose: bool, ) -> Result { activate_verbose(verbose); let starter = to_scnr_starter(starter); let cfg = to_scnr_cfg(cfg); let profile = profile.into(); - let common = CommonArgs { input, filter, starter, cfg, profile }; + let common = CommonArgs { input, filter, starter, cfg, profile, print_file_names, pretty_print }; let scanner = scnr::get_scanner_from_options(&common)?; let result = scanner.scan()?; let iterator = JqIterator::new(result, query)?; diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 1e67870..13fc52f 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -channel = "1.76" +channel = "stable" # https://rust-lang.github.io/rustup-components-history/ components = ["rustfmt", "clippy"] diff --git a/scnr/src/lib.rs b/scnr/src/lib.rs index 815a37a..3e957bb 100644 --- a/scnr/src/lib.rs +++ b/scnr/src/lib.rs @@ -1,12 +1,13 @@ use options::CommonArgs; -use scnr_core::{filter::Glob, Scanner}; +use scnr_core::{filter::Glob, Scanner, ScannerOptions}; pub mod options; pub mod profiles; pub use scnr_core as core; pub fn get_scanner_from_options(common_args: &CommonArgs) -> Result { - let picker = profiles::get_plugin_picker(common_args.profile, &common_args.cfg, &common_args.starter)?; + let options = ScannerOptions::default(); + let picker = profiles::get_plugin_picker(common_args.profile, &common_args.cfg, &common_args.starter, &options)?; let scanner = Scanner::new(&common_args.input, picker); let scanner = config_scanner_filter(scanner, &common_args.filter)?; Ok(scanner) diff --git a/scnr/src/main.rs b/scnr/src/main.rs index 135c9f0..95be14d 100644 --- a/scnr/src/main.rs +++ b/scnr/src/main.rs @@ -1,8 +1,8 @@ #![allow(clippy::default_trait_access, clippy::module_name_repetitions, clippy::wildcard_imports)] #![deny(clippy::expect_used, clippy::unwrap_used, clippy::panic)] -use scnr_core::{bin_repr, jq, Scanner}; -use std::io::Write; +use scnr_core::{bin_repr, jq, Content, Scanner}; +use std::{io::Write, path::Path}; use scnr::options::*; @@ -26,6 +26,32 @@ fn main() -> anyhow::Result<()> { Ok(()) } +fn print_path(out: &mut impl Write, path: &Path, options: &CommonArgs) -> anyhow::Result<()> { + if options.print_file_names { + writeln!(out, "{}", path.display())?; + } + Ok(()) +} + +fn print_content(out: &mut impl Write, content: &Content, options: &CommonArgs) -> anyhow::Result<()> { + match &content { + scnr_core::Content::Json(json) => { + let consume_out = &mut *out; + if options.pretty_print { + serde_json::to_writer_pretty(consume_out, &json)?; + } else { + serde_json::to_writer(consume_out, &json)?; + } + } + scnr_core::Content::Text(text) => writeln!(out, "{text}")?, + scnr_core::Content::Bytes(bytes) => writeln!(out, "{}", bin_repr::BinRepr::Base64.to_string(bytes))?, + } + + writeln!(out)?; + + Ok(()) +} + #[tracing::instrument(skip(scanner), err)] fn scan(scanner: Scanner, args: ScanArgs) -> anyhow::Result<()> { let stdout = std::io::stdout(); @@ -36,12 +62,8 @@ fn scan(scanner: Scanner, args: ScanArgs) -> anyhow::Result<()> { for content in iter { match content { Ok(content) => { - writeln!(lock, "{}", content.rel_path.display())?; - match content.content { - scnr_core::Content::Json(json) => serde_json::to_writer_pretty(&mut lock, &json)?, - scnr_core::Content::Text(text) => writeln!(lock, "{text}")?, - scnr_core::Content::Bytes(bytes) => writeln!(lock, "{}", bin_repr::BinRepr::Base64.to_string(&bytes))?, - } + print_path(&mut lock, &content.rel_path, &args.common)?; + print_content(&mut lock, &content.content, &args.common)?; } Err(err) => tracing::error!("{err:?}"), } @@ -55,7 +77,7 @@ fn jq(scanner: Scanner, args: JqArgs) -> anyhow::Result<()> { let stdout = std::io::stdout(); let mut lock = stdout.lock(); - let jq_filter = jq::make_jq_filter(&args.query)?; + let jq_filter = jq::JqFilter::new(&args.query)?; let iter = scanner.scan()?; @@ -63,8 +85,9 @@ fn jq(scanner: Scanner, args: JqArgs) -> anyhow::Result<()> { match content { Ok(content) => { if let Some(json) = content.content.json() { - for element in jq::jq_from_filter(json, jq_filter.clone())? { - serde_json::to_writer_pretty(&mut lock, &element)?; + print_path(&mut lock, &content.rel_path, &args.common)?; + for element in jq_filter.run(json)? { + print_content(&mut lock, &Content::Json(element), &args.common)?; } } } @@ -72,8 +95,6 @@ fn jq(scanner: Scanner, args: JqArgs) -> anyhow::Result<()> { } } - writeln!(lock)?; - Ok(()) } @@ -141,13 +162,13 @@ mod tests { #[test] fn sample_to_console() -> anyhow::Result<()> { let samples = get_samples_path()?; - test_scnr_scan_output(&format!("scnr scan -i {samples}"), 24, 7, 4, 2) + test_scnr_scan_output(&format!("scnr scan -i {samples}"), 40, 7, 4, 2) } #[test] fn sample_to_console_sysdiag_profil() -> anyhow::Result<()> { let samples = get_samples_path()?; - test_scnr_scan_output(&format!("scnr scan -i {samples} -p sysdiagnose"), 39, 7, 1, 3) + test_scnr_scan_output(&format!("scnr scan -i {samples} -p sysdiagnose"), 55, 7, 1, 3) } fn test_scnr_scan_output( diff --git a/scnr/src/options.rs b/scnr/src/options.rs index 8dbb4aa..c860750 100644 --- a/scnr/src/options.rs +++ b/scnr/src/options.rs @@ -47,11 +47,25 @@ pub struct CommonArgs { #[arg(short, long, default_value_t = CfgProfile::default(), help = "Plugins configuration profile to start with. Profiles are cfg bundles and can be then overridden by cfg args")] pub profile: CfgProfile, + + #[arg(long, short = 'n', help = "DO print the file names (before the content)")] + pub print_file_names: bool, + + #[arg(long, short = 'b', help = "DO pretty(beautiful) print the output")] + pub pretty_print: bool, } impl Default for CommonArgs { fn default() -> Self { - CommonArgs { input: DEFAULT_INPUT.to_string(), filter: vec![], profile: CfgProfile::default(), cfg: vec![], starter: vec![] } + CommonArgs { + input: DEFAULT_INPUT.to_string(), + filter: vec![], + profile: CfgProfile::default(), + cfg: vec![], + starter: vec![], + print_file_names: false, + pretty_print: false, + } } } @@ -179,7 +193,7 @@ mod tests { #[test] fn parse_cmd_2() { let cmd = - "scnr -v extract --output /tmp -f *.json --filter=**/*.xml --force -p sysdiagnose --cfg img.svg=json --cfg *.toml=text -s file-system"; + "scnr -v extract --output /tmp -f *.json --filter=**/*.xml --force -p sysdiagnose --cfg img.svg=json --cfg *.toml=text -s file-system -nb"; let opts = Opts::parse_from(cmd.split(' ')); assert!(opts.verbose); assert_eq!( @@ -191,6 +205,8 @@ mod tests { profile: CfgProfile::Sysdiagnose, cfg: vec![("img.svg".into(), Plugin::Json), ("*.toml".into(), Plugin::Text)], starter: vec![Plugin::FileSystem], + print_file_names: true, + pretty_print: true }, output: PathBuf::from("/tmp"), force: true, diff --git a/scnr/src/profiles.rs b/scnr/src/profiles.rs index d769e84..43b07f4 100644 --- a/scnr/src/profiles.rs +++ b/scnr/src/profiles.rs @@ -3,16 +3,22 @@ use scnr_core::{ bin::BinPlugin, file_system::FileSystemPlugin, ips::IpsPlugin, json::JsonPlugin, targz::TarGzPlugin, tarxz::TarXzPlugin, text::TextPlugin, toml::TomlPlugin, xml::XmlPlugin, yaml::YamlPlugin, zip::ZipPlugin, DefaultPluginPicker, }, - ScanError, ScanPlugin, + ScanError, ScanPlugin, ScannerOptions, }; use scnr_plist::PlistPlugin; use scnr_sqlite::SqlitePlugin; use crate::options::{CfgProfile, Plugin}; -pub fn get_plugin_picker(profile: CfgProfile, cfg: &[(String, Plugin)], starter: &[Plugin]) -> Result { +pub fn get_plugin_picker( + profile: CfgProfile, + cfg: &[(String, Plugin)], + starter: &[Plugin], + options: &ScannerOptions, +) -> Result { use scnr_core::plugins::DefaultPluginPickerBuilder; - fn add_standard_plugins(builder: DefaultPluginPickerBuilder) -> Result { + + fn add_standard_plugins(builder: DefaultPluginPickerBuilder, options: &ScannerOptions) -> Result { builder .push_plugin("*.tar.gz", TarGzPlugin)? .push_plugin("*.tar.xz", TarXzPlugin)? @@ -27,28 +33,28 @@ pub fn get_plugin_picker(profile: CfgProfile, cfg: &[(String, Plugin)], starter: .push_plugin("*.rs", TextPlugin)? .push_plugin("*.log", TextPlugin)? .push_plugin("*.plist", PlistPlugin)? - .push_plugin("*.db", SqlitePlugin)? - .push_plugin("*.sqlite", SqlitePlugin)? - .push_plugin("*.sqlite3", SqlitePlugin)? - .push_plugin("*.sqlitedb", SqlitePlugin) + .push_plugin("*.db", SqlitePlugin::new(options))? + .push_plugin("*.sqlite", SqlitePlugin::new(options))? + .push_plugin("*.sqlite3", SqlitePlugin::new(options))? + .push_plugin("*.sqlitedb", SqlitePlugin::new(options)) } let mut builder = match profile { - CfgProfile::Standard => add_standard_plugins(DefaultPluginPicker::builder())?, - CfgProfile::Sysdiagnose => add_standard_plugins(DefaultPluginPicker::builder())? + CfgProfile::Standard => add_standard_plugins(DefaultPluginPicker::builder(), options)?, + CfgProfile::Sysdiagnose => add_standard_plugins(DefaultPluginPicker::builder(), options)? .push_plugin("*.stub", PlistPlugin)? - .push_plugin("*.plsql", SqlitePlugin)? - .push_plugin("*.epsql", SqlitePlugin)? + .push_plugin("*.plsql", SqlitePlugin::new(options))? + .push_plugin("*.epsql", SqlitePlugin::new(options))? .push_plugin("*.ips", IpsPlugin)?, CfgProfile::Nothing => DefaultPluginPicker::builder(), }; for (pattern, plugin) in cfg { - builder = builder.insert_boxed_plugin(pattern.as_str(), get_plugin(*plugin))?; + builder = builder.insert_boxed_plugin(pattern.as_str(), get_plugin(*plugin, options))?; } for plugin in starter { - builder = builder.push_starter_plugin(get_plugin(*plugin))?; + builder = builder.push_starter_plugin(get_plugin(*plugin, options))?; } Ok(match profile { @@ -57,7 +63,7 @@ pub fn get_plugin_picker(profile: CfgProfile, cfg: &[(String, Plugin)], starter: }) } -fn get_plugin(plugin: Plugin) -> Box { +fn get_plugin(plugin: Plugin, options: &ScannerOptions) -> Box { match plugin { Plugin::FileSystem => Box::new(FileSystemPlugin), Plugin::Json => Box::new(JsonPlugin), @@ -67,7 +73,7 @@ fn get_plugin(plugin: Plugin) -> Box { Plugin::TarXz => Box::new(TarXzPlugin), Plugin::Text => Box::new(TextPlugin), Plugin::Plist => Box::new(PlistPlugin), - Plugin::Sqlite => Box::new(SqlitePlugin), + Plugin::Sqlite => Box::new(SqlitePlugin::new(options)), Plugin::Bin => Box::new(BinPlugin), } } diff --git a/scnr_core/src/jq.rs b/scnr_core/src/jq.rs index 88250ab..60da8c3 100644 --- a/scnr_core/src/jq.rs +++ b/scnr_core/src/jq.rs @@ -30,27 +30,39 @@ impl From for JqError { } } -pub fn jq(json: Value, query: &str) -> Result, JqError> { - let filter = make_jq_filter(query)?; - jq_from_filter(json, filter) +pub struct JqFilter { + owned: jaq_interpret::Filter, } -pub fn jq_from_filter(json: Value, filter: Filter) -> Result, JqError> { - let mut ctx = make_default_context(); - let filter = ctx.compile(filter); +impl JqFilter { + pub fn new(query: &str) -> Result { + let filter = make_jq_filter(query)?; + Ok(Self::from_filter(filter)) + } + + #[must_use] + pub fn from_filter(filter: Filter) -> Self { + let mut ctx = make_default_context(); + let owned = ctx.compile(filter); - let jq_val: Val = json.into(); + Self { owned } + } - let null = Box::new(core::iter::once(Ok(Val::Null))) as Box>; - let null = RcIter::new(null); - let null_ctx = Ctx::new(vec![], &null); + pub fn run(&self, json: Value) -> Result, JqError> { + let jq_val: Val = json.into(); - let results = filter - .run((null_ctx.clone(), jq_val)) - .map(|x| x.map(Into::into)) - .collect::, _>>()?; + let null = Box::new(core::iter::once(Ok(Val::Null))) as Box>; + let null = RcIter::new(null); + let null_ctx = Ctx::new(vec![], &null); - Ok(results) + let results = self + .owned + .run((null_ctx.clone(), jq_val)) + .map(|x| x.map(Into::into)) + .collect::, _>>()?; + + Ok(results) + } } pub fn make_jq_filter(query: &str) -> Result { @@ -110,7 +122,7 @@ mod tests { let json: Value = serde_json::from_str(json)?; let expected: Vec = expected.iter().map(|s| serde_json::from_str(s)).collect::>()?; - let result = jq(json, query)?; + let result = JqFilter::new(query)?.run(json)?; assert_eq!(expected, result); Ok(()) diff --git a/scnr_core/src/lib.rs b/scnr_core/src/lib.rs index b65b385..cef3a1a 100644 --- a/scnr_core/src/lib.rs +++ b/scnr_core/src/lib.rs @@ -102,6 +102,22 @@ pub enum ScanError { Any(#[from] anyhow::Error), } +#[derive(Debug)] +pub struct ScannerOptions { + /// Enables tables output split for database plugins + pub split_tables_output: bool, + + /// Limit the size of json arrays output from plugins that handle this option (table in databases for instance) + /// Default is 5000, 0 will propably get you in troubles + pub json_array_limit: usize, +} + +impl Default for ScannerOptions { + fn default() -> Self { + Self { split_tables_output: false, json_array_limit: 5000 } + } +} + pub struct Scanner { root_start: String, filter: Arc>, @@ -122,7 +138,8 @@ impl Scanner { /// Start a thread and returns a content receiver pub fn scan(self) -> Result { - let (sender, receiver) = flume::unbounded::>(); + // this queue is bounded to avoid building up an insane amount of memory in case of slow iteration on the results + let (sender, receiver) = flume::bounded::>(10); // scan in a thread let _thread = std::thread::spawn(move || { diff --git a/scnr_sqlite/src/lib.rs b/scnr_sqlite/src/lib.rs index a0d1be1..a64f7f6 100644 --- a/scnr_sqlite/src/lib.rs +++ b/scnr_sqlite/src/lib.rs @@ -11,7 +11,17 @@ mod sqlite_ext; use sqlite_ext::SqliteExt; #[derive(Debug)] -pub struct SqlitePlugin; +pub struct SqlitePlugin { + json_limit: usize, +} + +impl SqlitePlugin { + #[must_use] + pub fn new(options: &ScannerOptions) -> Self { + let json_limit = if options.split_tables_output { options.json_array_limit } else { usize::MAX }; + Self { json_limit } + } +} impl ScanPlugin for SqlitePlugin { #[tracing::instrument(level = "debug", err)] @@ -32,8 +42,20 @@ impl ScanPlugin for SqlitePlugin { let mut big_json: Vec = vec![]; + let send_big_json = |json: Vec, already_sent_this_table: bool| { + if json.is_empty() && already_sent_this_table { + return Ok(()); + } + tracing::debug!("Sending json array of {} elements for table {}", json.len(), &table_name); + let json_array = Value::Array(json); + context.send_child_content(Content::Json(json_array), &table_name)?; + ScanPluginResult::Ok(()) + }; + let columns = conn.get_columns_infos(&table_name)?; + let mut already_sent_this_table = false; + while let Some(row) = rows.next()? { let mut json = Map::new(); for (i, column) in columns.iter().map(|c| &c.name).enumerate() { @@ -42,12 +64,15 @@ impl ScanPlugin for SqlitePlugin { } big_json.push(Value::Object(json)); + + if big_json.len() >= self.json_limit { + send_big_json(big_json, already_sent_this_table)?; + big_json = vec![]; + already_sent_this_table = true; + } } - tracing::debug!("Sending json array of {} elements for table {}", big_json.len(), &table_name); - let json_array = Value::Array(big_json); - // dbg!(&table_name, &json_array); - context.send_child_content(Content::Json(json_array), table_name)?; + send_big_json(big_json, already_sent_this_table)?; } drop(tmp_file); @@ -77,10 +102,17 @@ mod tests { ScanReader, }; - fn get_json_contents(sample_path: &str) -> anyhow::Result> { + fn get_json_contents( + sample_path: &str, + split_tables_output: bool, + json_array_limit: usize, + ) -> anyhow::Result> { let samples_dir = get_samples_path()?; let mut file = std::fs::File::open(format!("{samples_dir}/{sample_path}"))?; - let results = exec_plugin_scan(ScanReader::read_seek(&mut file), &SqlitePlugin)?; + + let plugin = SqlitePlugin::new(&ScannerOptions { split_tables_output, json_array_limit }); + + let results = exec_plugin_scan(ScanReader::read_seek(&mut file), &plugin)?; let mut json_contents = vec![]; for result in results { @@ -96,22 +128,78 @@ mod tests { #[test] fn test() -> anyhow::Result<()> { - let jsons = get_json_contents("sakila_country_only.db")?; + let jsons = get_json_contents("sakila_country_only.db", false, 0)?; assert_eq!(jsons.len(), 1); - assert_eq!(jsons[0].0, PathBuf::from("country")); assert_eq!(jsons[0].1.as_array().unwrap().len(), 109); Ok(()) } + #[test] + fn test_chunks_of_1() -> anyhow::Result<()> { + let jsons = get_json_contents("sakila_country_only.db", true, 1)?; + + assert_eq!(jsons.len(), 109); + assert_eq!(jsons[0].0, PathBuf::from("country")); + assert_eq!(jsons[0].1.as_array().unwrap().len(), 1); + + Ok(()) + } + + #[test] + fn test_read_all_tables() -> anyhow::Result<()> { + let jsons = get_json_contents("sakila_full.db", true, 5000)?; + + assert_eq!(jsons.len(), 23); + + let real_table_and_counts = jsons + .into_iter() + .map(|(path, json)| (path, json.as_array().unwrap().len())) + .collect::>(); + + let expected_tables_and_counts = [ + ("actor", 200), + ("address", 603), + ("category", 16), + ("city", 600), + ("country", 109), + ("customer", 599), + ("film", 1000), + ("film_actor", 5000), + ("film_actor", 462), + ("film_category", 1000), + ("film_text", 0), + ("inventory", 4581), + ("language", 6), + ("payment", 5000), + ("payment", 5000), + ("payment", 5000), + ("payment", 1049), + ("rental", 5000), + ("rental", 5000), + ("rental", 5000), + ("rental", 1044), + ("staff", 2), + ("store", 2), + ] + .map(|(p, c)| (PathBuf::from(p), c)) + .into_iter() + .collect::>(); + + assert_eq!(expected_tables_and_counts, real_table_and_counts); + + Ok(()) + } + #[test] fn failing_test() -> anyhow::Result<()> { let samples_dir = get_samples_path()?; let mut file = std::fs::File::open(format!("{samples_dir}/w.tar.gz"))?; - let result = exec_plugin_scan(ScanReader::read_seek(&mut file), &SqlitePlugin); + let plugin = SqlitePlugin::new(&ScannerOptions::default()); + let result = exec_plugin_scan(ScanReader::read_seek(&mut file), &plugin); assert!(result.is_err()); Ok(()) diff --git a/scnr_sqlite/src/sqlite_ext.rs b/scnr_sqlite/src/sqlite_ext.rs index ac6032b..1721918 100644 --- a/scnr_sqlite/src/sqlite_ext.rs +++ b/scnr_sqlite/src/sqlite_ext.rs @@ -14,6 +14,7 @@ pub use rusqlite::*; /// │ 4 │ `NodeID` │ BIGINT │ false │ │ false │ /// │ 5 │ `RootNodeID` │ BIGINT │ false │ │ false │ /// +#[allow(dead_code)] pub struct TableFieldInfos { pub column_id: i32, pub name: String, @@ -87,6 +88,7 @@ impl IntoDisplayable for types::Value { } } +#[allow(dead_code)] pub trait GetToString { fn get_to_string(&self, idx: usize) -> Result>; }