diff --git a/.gitignore b/.gitignore index 4691d50..291a950 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /target /.cargo +/.idea Cargo.lock tags rusty-tags.vi diff --git a/Cargo.toml b/Cargo.toml index 7bf464b..f97c5c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "slog-kvfilter" version = "0.7.0" -authors = ["Dawid Ciężarkiewicz ", "Tony Przygienda "] +authors = ["Dawid Ciężarkiewicz ", "Tony Przygienda ", "Tomáš Dvořák "] description = "Key values and Regex based filter Drain for slog-rs" keywords = ["slog", "logging", "log", "filter"] license = "MPL-2.0/MIT/Apache-2.0" @@ -10,6 +10,26 @@ homepage = "https://github.com/slog-rs/slog" repository = "https://github.com/slog-rs/kvfilter" readme = "README.md" +[features] +# serde_regex should be enabled iff serde is enabled. +# I don't know how to express this in Cargo.toml automatically, though. +default=["serde", "serde_regex"] + [dependencies] -slog = "2" +serde = { version = "1", optional = true, features = ["derive"] } +serde_regex = { version = "0", optional = true } regex = "1" + + +[dependencies.slog] +version = "2.7" +default-features = false +# So that the tests can reliably use debug and trace macros +features = ["max_level_trace", "release_max_level_trace", "std", "dynamic-keys"] + +[dev-dependencies] +criterion = "0.2" + +[[bench]] +name = "kvfilter_benchmark" +harness = false diff --git a/README.md b/README.md index c73b46f..4d28a27 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,22 @@ # slog-kvfilter - Key values and Regex based filter Drain for [slog-rs] -For more information, help, to report issues etc. see [slog-rs][slog-rs]. +Filter records by matching their messages, keys and values. Records can be matched -[slog-rs]: https://github.com/slog-rs/slog + - based on logging level (debug, info, warn, ...) + - regular expression on record message + - keys and values + - simple filters can be arbitrarily composed using `And`, `Or` and `Not` expressions -Documentation can be found on -[`slog-kvfilter` type](https://docs.rs/slog-kvfilter/*/slog-kvfilter/index.html). \ No newline at end of file + +**slog-kvfilter has undergone a complete rewrite as of version 0.7.0. +The API has changed. See [this issue](https://github.com/slog-rs/kvfilter/issues/15) +for details.** + +slog-kvfilter documentation can be found [here](https://docs.rs/slog-kvfilter/*/slog_kvfilter/). + +For more information, help, to report issues etc. see +[slog-kvfilter][slog-kvfilter] and [slog-rs][slog-rs]. + +[slog-kvfilter]: https://github.com/slog-rs/kvfilter/ +[slog-rs]: https://github.com/slog-rs/slog \ No newline at end of file diff --git a/benches/kvfilter_benchmark.rs b/benches/kvfilter_benchmark.rs new file mode 100644 index 0000000..935130c --- /dev/null +++ b/benches/kvfilter_benchmark.rs @@ -0,0 +1,264 @@ +#[macro_use] +extern crate criterion; +#[macro_use] +extern crate slog; +extern crate slog_kvfilter; + +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use criterion::Criterion; +use slog::{Drain, Logger, Never, Record, OwnedKVList, Level}; +use slog_kvfilter::{EvaluationOrder, FilterSpec, KVFilter}; + +#[derive(Debug, Clone)] +pub struct CountingDrain { + count: Arc +} + +impl Drain for CountingDrain { + type Ok = (); + type Err = Never; + + fn log(&self, _: &Record, _: &OwnedKVList) -> Result<(), Never> { + self.count.fetch_add(1, Ordering::Relaxed); + Ok(()) + } + + #[inline] + fn is_enabled(&self, _: Level) -> bool { + true + } +} + +struct Tester { + log: Logger, + count: Arc, +} + +impl Tester { + fn assert_count(&self, expected_count: usize) { + let actual_count = self.count.load(Ordering::Relaxed); + assert_eq!(expected_count, actual_count) + } +} + +fn new_tester(spec: FilterSpec, evaluation_order: EvaluationOrder) -> Tester { + let count = Arc::new(AtomicUsize::new(0)); + let drain = KVFilter::new( + CountingDrain { count: Arc::clone(&count) }, + spec, + evaluation_order, + ); + Tester { + log: Logger::root(drain.fuse(), o!("key_foo" => "value_foo")), + count, + } +} + + +// simple AND use_case - useful for comparison with original KVFilter in simple cases +fn simple_and_benchmark(c: &mut Criterion) { + let tester = { + let match_kv = FilterSpec::match_kv; + + let filter_spec = FilterSpec::all_of(&[ + match_kv("some_key", "some_value"), + match_kv("another_key", "another_value"), + ]); + + new_tester(filter_spec, EvaluationOrder::LoggerAndMessage) + }; + + let mut first_iteration = true; + c.bench_function("simple AND", move |b| { + b.iter(|| { + info!(tester.log, "ACCEPT"; + "some_key" => "some_value", + "another_key" => "another_value", + ); + + debug!(tester.log, "REJECT"; + "some_key" => "some_value", + ); + + trace!(tester.log, "REJECT"; + "another_key" => "another_value", + "bad_key" => "bad_key", + ); + + if first_iteration { + tester.assert_count(1); + first_iteration = false; + } + }) + }); +} + +// simple OR use_case - not expressible in original KVFilter +fn simple_or_benchmark(c: &mut Criterion) { + let tester = { + let match_kv = FilterSpec::match_kv; + let level_at_least = FilterSpec::LevelAtLeast; + + let filter_spec = FilterSpec::any_of(&[ + match_kv("debug_key", "debug_value").and(level_at_least(Level::Debug)), + match_kv("trace_key", "trace_value").and(level_at_least(Level::Trace)), + ]); + + new_tester(filter_spec, EvaluationOrder::LoggerAndMessage) + }; + + let mut first_iteration = true; + c.bench_function("simple OR", move |b| { + b.iter(|| { + info!(tester.log, "REJECT"; + "some_key" => "some_value", + "another_key" => "another_value", + ); + + debug!(tester.log, "ACCEPT"; + "another_key" => "another_value", + "debug_key" => "debug_value", + ); + + trace!(tester.log, "REJECT"; + "debug_key" => "debug_value", + "another_key" => "another_value", + ); + + trace!(tester.log, "ACCEPT"; + "trace_key" => "trace_value", + "another_key" => "another_value", + ); + + if first_iteration { + tester.assert_count(2); + first_iteration = false; + } + }) + }); +} + +// @przygienda use-case +fn przygienda_tester() -> Tester { + let match_any = FilterSpec::match_any_value; + + // (key1 must be either value1a or value1b) AND key2 must be value2 + let positive_filter = FilterSpec::all_of(&[ + match_any( + "some_key", + &[ + "some_value_1", + "some_value_2", + "some_value_3", + "some_value_4", + "foo", + ], + ), + match_any( + "another_key", + &[ + "another_value_1", + "another_value_2", + "another_value_3", + "another_value_4", + "bar", + ], + ), + match_any( + "key_foo", + &[ + "foo_value_1", + "foo_value_2", + "foo_value_3", + "foo_value_4", + "value_foo", + ], + ), + match_any( + "bar_key", + &[ + "bar_value_1", + "bar_value_2", + "bar_value_3", + "bar_value_4", + "xyz", + ], + ), + match_any( + "ultimate_key", + &[ + "ultimate_value_1", + "ultimate_value_2", + "ultimate_value_3", + "ultimate_value_4", + "xyz", + ], + ), + ]); + + // neg_key1 must be neg_value1 OR neg_key2 must be neg_value2a OR neg_key2 must be neg_value2b + let negative_filter = FilterSpec::any_of(&[ + match_any( + "some_negative_key", + &[ + "some_value_1", + "some_value_2", + "some_value_3", + "some_value_4", + "foo", + ], + ), + match_any( + "another_negative_key", + &[ + "some_value_1", + "some_value_2", + "some_value_3", + "some_value_4", + "foo", + ], + ), + ]); + + let filter_spec = positive_filter.and(negative_filter.not()); + new_tester(filter_spec, EvaluationOrder::LoggerAndMessage) +} + + +fn przygienda_benchmark(c: &mut Criterion) { + let tester = przygienda_tester(); + let mut first_iteration = true; + c.bench_function("przygienda", move |b| { + b.iter(|| { + info!(tester.log, "ACCEPT"; + "some_key" => "some_value_4", + "another_key" => "another_value_1", + "bar_key" => "bar_value_3", + "ultimate_key" => "ultimate_value_3", + ); + + info!(tester.log, "REJECT - negative filter"; + "some_key" => "some_value_4", + "another_key" => "another_value_1", + "bar_key" => "bar_value_3", + "ultimate_key" => "ultimate_value_3", + "some_negative_key" => "foo" + ); + + info!(tester.log, "REJECT - not all keys present"; + "some_key" => "some_value_4", + "another_key" => "another_value_1", + ); + + if first_iteration { + tester.assert_count(1); + first_iteration = false; + } + }) + }); +} + +criterion_group!(benches, simple_and_benchmark, przygienda_benchmark, simple_or_benchmark); +criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index 1dfe6fa..129a30e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,593 +1,1240 @@ -//! Filter records by checking for presence/absence of keys first. After this it is -//! matching some of the keys against a sets of values while allowing -//! for records of level high enough to pass. It also can apply a negative filter after the -//! positive filter to allow sophisticated 'hole-punching' into a matching category. Ultimately, -//! the resulting message (without keys and values) can be constrained by both presence of a regex -//! or its absence. +//! Filter records by matching their messages, keys and values. Records can be matched +//! +//! - based on logging level (debug, info, warn, ...) +//! - regular expression on record message +//! - keys and values +//! - simple filters can be arbitrarily composed using `And`, `Or` and `Not` expressions +//! +//! # Examples +//! +//! This is an example that implements the behavior requested at this [issue](https://github.com/sile/sloggers/issues/9#issuecomment-422685244) +//! +//! The requested behavior: +//! +//! 1. pass all records with ("subsystem": "A1" || "subsystem": "A2") && level at least debug +//! 2. pass all records with (subsystem": "B1" || "subsystem": "B2") && level at least info +//! 3. pass all records with level at least warn +//! 4. reject all other records +//! +//! In all cases, make decisions based only on the LOGGER keys and values, +//! don't take the message KVs into account (`EvaluationOrder::LoggerOnly`) +//! ``` +//! extern crate slog; +//! extern crate slog_kvfilter; +//! +//! use slog_kvfilter::{EvaluationOrder, FilterSpec, KVFilter}; +//! use slog::{Discard, Level}; +//! +//! let match_kv = FilterSpec::match_kv; +//! +//! let subsystem_a = (match_kv("subsystem", "A1").or(match_kv("subsystem", "A2"))) +//! .and(FilterSpec::LevelAtLeast(Level::Debug)); +//! +//! let subsystem_b = (match_kv("subsystem", "B1").or(match_kv("subsystem", "B2"))) +//! .and(FilterSpec::LevelAtLeast(Level::Info)); +//! +//! // `And` and `Or` rules are evaluated first-to-last, so put the simplest rules (`LevelAtLeast`) first so the filter +//! // doesn't have to evaluate the more complicated rules if the simpler one already decides a message's fate +//! let filter_spec = FilterSpec::LevelAtLeast(Level::Warning) +//! .or(subsystem_a) +//! .or(subsystem_b); +//! let root_drain = Discard; +//! let kv_filter = KVFilter::new(root_drain, filter_spec, EvaluationOrder::LoggerOnly); +//! // Use the kv_filter as your `Drain` +//!``` +//! +//! See the unit tests for more example usage. +//! +extern crate regex; +#[cfg(feature = "serde")] +#[macro_use] +extern crate serde; +#[cfg(feature = "serde")] +extern crate serde_regex; #[cfg(test)] #[macro_use] extern crate slog; - #[cfg(not(test))] extern crate slog; -extern crate regex; - -use std::collections::{HashMap, HashSet}; -use std::fmt; -use std::option::Option; -use std::panic::UnwindSafe; -use std::panic::RefUnwindSafe; -use std::fmt::format; +use std::{ + cell::Cell, + collections::HashSet, + fmt, + panic::{RefUnwindSafe, UnwindSafe}, +}; -use slog::KV; use regex::Regex; -// @todo: must that be thread-safe? -struct FilteringSerializer<'a> { - presence: KeyPresenceMapFlyWeight<'a>, - valuematches: KVFilterListFlyWeight<'a>, - tmp_str: String, +use slog::{Drain, Key, KV, Level, OwnedKVList, Record, Serializer}; + +//region KVFilterConfig +/// All the configuration for a KVFilter +/// +/// If compiled with the "serde" feature, the config can be serialized and deserialized using Serde. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Clone, Debug, PartialEq)] +pub struct KVFilterConfig { + /// The specification of the filtering to be applied to the message. See the `FilterSpec` docs. + pub filter_spec: FilterSpec, + /// See the `EvaluationOrder` docs + pub evaluation_order: EvaluationOrder, } -impl<'a> slog::Serializer for FilteringSerializer<'a> { - fn emit_arguments(&mut self, key: slog::Key, val: &fmt::Arguments) -> slog::Result { - if self.valuematches.is_empty() && self.presence.is_empty() { - return Ok(()); - } +// https://serde.rs/remote-derive.html +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(remote = "Level"))] +#[allow(dead_code)] +enum LevelSerdeDef { + Critical, + Error, + Warning, + Info, + Debug, + Trace, +} - // either filter is empty (always e'thing passes) or filter keys - let matched = !self.presence.is_empty(); +/// Just a wrapper around Regex to provide +/// +/// - Serde support +/// - PartialEq (compares the original Regex strings, not the regular expression semantics) +/// - `impl UnwindSafe` - the wrapped Regex is not UnwindSafe, we mark RegexWrapper as being so. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Clone, Debug)] +pub struct RegexWrapper(#[cfg_attr(feature = "serde", serde(with = "serde_regex"))] Regex); + +// Just cargo-culting here. I'm not sure what are the actual implications of Regex +// not being Unwind-safe :-( +impl UnwindSafe for RegexWrapper {} + +impl RefUnwindSafe for RegexWrapper {} + +impl PartialEq for RegexWrapper { + fn eq(&self, other: &RegexWrapper) -> bool { + format!("{}", self.0) == format!("{}", other.0) + } +} - if matched { - self.presence.remove(&key); +/// Specification of a filter. Filters are either simple filters like "Some Key and value must match this:", +/// or compound filters like `And`, `Or` that are recursively composed from another filters. +/// +/// If compiled with the "serde" feature, the config can be serialized and deserialized using Serde. +/// +/// See the tests for examples. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Clone, Debug, PartialEq)] +pub enum FilterSpec { + /// Always accept + Accept, + /// Always reject + Reject, + /// Accept when logging level is at least given treshold. + /// + /// Example: message with level *Warning* will pass `FilterSpec::LevelAtLeast(Info)` + #[cfg_attr(feature = "serde", serde(with = "LevelSerdeDef"))] + LevelAtLeast(Level), + /// Accept when the key and value match the specification + MatchKV { key: String, value: String }, + /// Accept when all `values` for a given `key` are present. + /// + /// Equivalent to `MatchKV {key, value1}.or(MatchKV {key, value2}).or(...)`, + /// but is implemented using a `HashSet` instead of a linked `Or` structure, so + /// it will be more performant for a larger number of values + /// (no benchmarks done, but my guess is > 3 values). + /// + /// Always accepts on empty `values` + MatchAnyValue { key: String, values: Vec }, + /// Accept when the log message matches the given regular expression. + MatchMsgRegex(RegexWrapper), + /// Accepts when the key is present + KeyExists(String), + /// Accept when all the sub-filters accept. Sub-filter are evaluated left-to-right. + /// + /// Please note that slog processes KV arguments to `log!` right to left, that is e. g. + /// `info!(log, "Msg"; "key1" => %value1, "key2" => %value2)` will first expand value2 and then value1. + And(Box, Box), + /// Accept when at least one of the sub-filters accepts. Sub-filter are evaluated left-to-right. + /// + /// Please note that slog processes KV arguments to `log!` right to left, that is e. g. + /// `info!(log, "Msg"; "key1" => %value1, "key2" => %value2)` will first expand value2 and then value1. + Or(Box, Box), + /// Turns an Accept into a Reject and vice versa. + Not(Box), +} + +impl FilterSpec { + pub fn match_kv(key: impl ToString, value: impl ToString) -> FilterSpec { + FilterSpec::MatchKV { + key: key.to_string(), + value: value.to_string(), + } + } + + pub fn match_any_value(key: impl ToString, values: &[impl ToString]) -> FilterSpec { + FilterSpec::MatchAnyValue { + key: key.to_string(), + values: values.iter().map(|v| v.to_string()).collect(), } + } - let matched = if let Some(keyvalues) = self.valuematches.get(&key) { - self.tmp_str.clear(); - fmt::write(&mut self.tmp_str, *val)?; + pub fn match_msg_regex(regex: Regex) -> FilterSpec { + FilterSpec::MatchMsgRegex(RegexWrapper(regex)) + } - keyvalues.contains(&self.tmp_str) - } else { - false - }; + pub fn try_match_msg_regex(regex: &str) -> Result { + Ok(Self::match_msg_regex(Regex::new(regex)?)) + } - if matched { - self.valuematches.remove(&key); + pub fn key_exists(key: impl ToString) -> FilterSpec { + FilterSpec::KeyExists(key.to_string()) + } + + pub fn and(self, other: FilterSpec) -> FilterSpec { + FilterSpec::And(Box::new(self), Box::new(other)) + } + + /// Creates a filter that accepts if any of the variant accepts. Rejects on empty `variants` collection. + /// + /// variants are composed using foldr, that is [a, b, c] -> Or(a, Or(b, c)) + pub fn any_of(variants: &[FilterSpec]) -> FilterSpec { + match variants.split_first() { + Some((head, tail)) => { + if tail.is_empty() { + head.clone() + } else { + head.clone().or(Self::any_of(tail)) + } + } + None => FilterSpec::Reject, } + } - Ok(()) + /// Creates a filter that accepts if all of the variants accept. Rejects on empty `variants` collection. + /// + /// variants are composed using foldr, that is [a, b, c] -> And(a, And(b, c)) + pub fn all_of(variants: &[FilterSpec]) -> FilterSpec { + match variants.split_first() { + Some((head, tail)) => { + if tail.is_empty() { + head.clone() + } else { + head.clone().and(Self::all_of(tail)) + } + } + None => FilterSpec::Reject, + } } -} -/// tests either for presence or lack of key if in this list -pub type KeyPresenceMap = HashMap; + pub fn or(self, other: FilterSpec) -> FilterSpec { + FilterSpec::Or(Box::new(self), Box::new(other)) + } -/// flyweight copy that is created upfront and given to every serializer -type KeyPresenceMapFlyWeight<'a> = HashMap<&'a str, bool>; + pub fn not(self) -> FilterSpec { + FilterSpec::Not(Box::new(self)) + } +} -/// Must be a hashmap since we do not rely on ordered keys -pub type KVFilterList = HashMap>; +/// The order of evaluation of message KVs and logger KVs +/// +/// The keys and values to be logged come from two sources: +/// +/// - some KVs are associated with the loggers, typically created with `new_log = log.new(o!("key": "value"))` +/// - some KVs are associated with the logged messages, e. g. `info!(log, "message"; "key" => "value")` +/// +/// Evaluation order allows us to specify, which KVs will be used for message filtering, and what order will be used. +/// +/// I presume in practice `LoggerOnly` and `LoggerAndMessage` will be the most commonly used orders. +/// +/// - `LoggerOnly` means that only KVs associated with the loggers will be used for message filtering +/// - `LoggerAndMessage` means that first KVs associated with the loggers will be used for message filtering. +/// If the filter isn't determined by the loggers KVs, message KVs will be used. +/// +/// This can have both performance and semantics implications. If you are curious, +/// see the comment at `KVFilter` for a more thorough discussion. +/// +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum EvaluationOrder { + LoggerOnly, + MessageOnly, + LoggerAndMessage, + MessageAndLogger, +} -/// flyweight copy that is created upfront and given to every serializer -type KVFilterListFlyWeight<'a> = HashMap<&'a str, &'a HashSet>; +impl Default for EvaluationOrder { + fn default() -> Self { + EvaluationOrder::LoggerAndMessage + } +} +//endregion -/// `Drain` filtering records using list of keys and values they -/// must have unless they are of a higher level than filtering applied. -/// it can apply a negative filter as well that overrides any matches but -/// will let higher level than filtering applied as well. +//region KVFilter +/// The KVFilter itself. It implements `Drain`. +/// +/// It passes messages matching the criteria specified by a given `config` into an underlying `drain` +/// and discards the rest. +/// --- +/// The rest of this documentation is a little bit difficult, both for me as a writer and for the reader. +/// I try to explain what happens under the hood when a message is being processed, and what performance +/// implications it may have. +/// +/// ## TL;DR: I try hard not to do any unnecessary work within the limits set by slog implementation. +/// +/// ## Long version: /// -/// This `Drain` filters a log entry on a filtermap -/// that holds the key name in question and acceptable values -/// Key values are gathered up the whole hierarchy of inherited -/// loggers. +/// When a log message is to be processed, we obtain two KV sets from two sources: /// -/// Example -/// ======= +/// - KVs associated with the loggers, typically created with `new_log = log.new(o!("key": "value"))` +/// - KVs associated with the logged messages, e. g. `info!(log, "message"; "key" => "value")` /// -/// Logger( ... ; o!("thread" => "100"); -/// log( ... ; "packet" => "send"); -/// log( ... ; "packet" => "receive"); +/// Each of the sets can be evaluated independently. Each KV set is evaluated in two stages: /// -/// can be filtered on a map containing "thread" key component. If the -/// values contain "100" the log will be output, otherwise filtered. -/// The filtering map can contain further key "packet" and value "send". -/// With that the output for "receive" would be filtered. +/// ### 1. `kv_set.serialize(filtering_serializer)` is called for each KV set /// -/// More precisely +/// This will eventually make `EvaluateFilterSerializer::emit_arguments` +/// called for each Key and Value in the KV set. +/// The important thing is that if there are any closures (`FnValue` and friends), +/// **all** of the closures in the KV set will be called at this step. +/// That means that if e. g. you don't need to filter on message KVs, just logger KVs +/// is enough for you, you can save a some CPU by using `EvaluationOrder::LoggerOnly`. That will +/// prevent the message closures from being called when a particular message is rejected based on logger KVs. +/// Also, if you use e. g. `EvaluationOrder::LoggerAndMessage` and a particular message is resolved +/// to be `Reject`-ed or `Accept`-ed early by just looking at the logger KV set, the message KV set will not +/// be `serialize()`d because the result is already known. /// -/// * if any key presence filter is present it's tested for the presence (or lack) -/// of key (no value checks) first -/// * a key is ignored until present in `filters`, otherwise an entry must -/// match for all the keys present in `filters` for any of the values given -/// for the key to pass the filter. -/// * an entry that hits any value of any negative filter key is filtered, this -/// takes precedence over `filters` -/// * Behavior of empty `KVFilterList` is undefined but normally anything should pass. -/// * Behavior of `KVFilter` that has same key in both the matching and the suppressing -/// section is undefined even if we have different values there. Logically, it should -/// be matching the positive and pass and only suppress negative if it finds matching -/// value but it's untested. +/// ### 2. `EvaluateFilterSerializer::emit_arguments(key: String, value: std::fmt::Arguments)` gets called for each KV /// -/// Additionally, the resulting message (without keys and values) can be constrained -/// by both presence of a regex or its absence by applying the `only_pass_on_regex` -/// and `always_suppress_on_regex` API calls. As the names suggest, suppression wins -/// if both regex's are set. +/// The `value` parameter is not a String, but a ` std::fmt::Arguments` +/// that needs to be turned into a String first so we can compare it to filters specified in the config. /// -/// Usage -/// ===== +/// We do two optimizations here: /// -/// Filtering in large systems that consist of multiple threads of same -/// code or have functionality of interest spread across many components, -/// modules, such as e.g. "sending packet" or "running FSM". -pub struct KVFilter { +/// a. Reuse the same String for expanding all the values to avoid some allocation +/// +/// b. Only expand the value when we are not sure about the result yet. Again, if we don't need the value, because +/// we already know a message will be Rejected or Accepted, we don't expand the value at all. Remember from +/// bullet 1. that `kv_set.serialize(filtering_serializer)` will call `emit_arguments` with **all** the KVs from a +/// particular set, so we try hard not do any unnecessary work with them when not needed. +pub struct KVFilter { drain: D, - keypresence: Option, - filters: Option, - neg_filters: Option, - level: slog::Level, - regex: Option, - neg_regex: Option, + config: KVFilterConfig, } -impl UnwindSafe for KVFilter {} -impl RefUnwindSafe for KVFilter {} +impl KVFilter { + pub fn new_from_config(drain: D, config: KVFilterConfig) -> Self { + KVFilter { drain, config } + } -impl<'a, D: slog::Drain> KVFilter { - /// Create `KVFilter` letting e'thing pass unless filters are set. Anything more - /// important than `level` will pass in any case. - /// - /// * `drain` - drain to be sent to - /// * `level` - maximum level filtered, higher levels pass by without filtering - pub fn new(drain: D, level: slog::Level) -> Self { + pub fn new(drain: D, filter_spec: FilterSpec, evaluation_order: EvaluationOrder) -> Self { KVFilter { - drain: drain, - level: level, - keypresence: None, - filters: None, - neg_filters: None, - regex: None, - neg_regex: None, + drain, + config: KVFilterConfig { + filter_spec, + evaluation_order, + }, } } - pub fn only_pass_on_any_key_present<'b, I: Iterator>(mut self, keys: I) -> Self { - if let Some(ref mut v) = self.keypresence { - v.extend(keys.map(|v| (v.clone(), true))); - } else { - self.keypresence = Some(keys.map(|v| (v.clone(), true)).collect()); - } - self - } + fn is_match(&self, record: &Record, logger_values: &OwnedKVList) -> bool { + let mut evaluating_serializer = EvaluateFilterSerializer { + filter: Filter::from_spec(&self.config.filter_spec), + message_level: record.level(), + msg: record.msg(), + tmp_value_string: String::new(), + }; - pub fn only_pass_on_all_keys_absent<'b, I: Iterator>(mut self, keys: I) -> Self { - if let Some(ref mut v) = self.keypresence { - v.extend(keys.map(|v| (v.clone(), false))); + macro_rules! serialize_and_return_if_decided { + ($kv: expr) => { + $kv + .serialize(record, &mut evaluating_serializer) + .unwrap(); // Is unwrap the right thing to do here? + + match evaluating_serializer.filter { + Filter::Accept => return true, + Filter::Reject => return false, + _ => {} + } + } } - self - } - /// pass through entries with all keys with _any_ of the matching values in its entries - /// or ignore condition if None - pub fn only_pass_any_on_all_keys(mut self, filters: Option) -> Self { - self.filters = filters; - self - } + match self.config.evaluation_order { + EvaluationOrder::LoggerOnly => { + serialize_and_return_if_decided!(logger_values); + } + EvaluationOrder::MessageOnly => { + serialize_and_return_if_decided!(record.kv()); + } + EvaluationOrder::LoggerAndMessage => { + serialize_and_return_if_decided!(logger_values); + serialize_and_return_if_decided!(record.kv()); + } + EvaluationOrder::MessageAndLogger => { + serialize_and_return_if_decided!(record.kv()); + serialize_and_return_if_decided!(logger_values); + } + } - /// suppress _any_ key with _any_ of the matching values in its entries or ignore - /// condition if None. - /// @note: This takes precedence over `only_pass_any` - pub fn always_suppress_any(mut self, filters: Option) -> Self { - self.neg_filters = filters; - self - } + fn final_evaluate_filter(filter: &mut Filter, level: Level) -> AcceptOrReject { + match filter { + Filter::Accept => AcceptOrReject::Accept, + Filter::Reject => AcceptOrReject::Reject, + Filter::LevelAtLeast(treshold) => { + if level.is_at_least(*treshold) { + AcceptOrReject::Accept + } else { + AcceptOrReject::Reject + } + } + Filter::MatchKV { .. } => AcceptOrReject::Reject, + Filter::MatchAnyValue { values, .. } => { + if values.is_empty() { + AcceptOrReject::Accept + } else { + AcceptOrReject::Reject + } + } + Filter::MatchMsgRegex(_) => AcceptOrReject::Reject, + Filter::KeyExists(_) => AcceptOrReject::Reject, + Filter::And(a, b) => { + if final_evaluate_filter(a, level) == AcceptOrReject::Accept + && final_evaluate_filter(b, level) == AcceptOrReject::Accept + { + AcceptOrReject::Accept + } else { + AcceptOrReject::Reject + } + } + Filter::Or(a, b) => { + if final_evaluate_filter(a, level) == AcceptOrReject::Accept + || final_evaluate_filter(b, level) == AcceptOrReject::Accept + { + AcceptOrReject::Accept + } else { + AcceptOrReject::Reject + } + } + Filter::Not(f) => { + if final_evaluate_filter(f, level) == AcceptOrReject::Accept { + AcceptOrReject::Reject + } else { + AcceptOrReject::Accept + } + } + } + } - /// only pass when this regex is found in the log message output. - pub fn only_pass_on_regex(mut self, regex: Regex) -> Self { - self.regex = Some(regex); - self - } + let final_result = final_evaluate_filter(&mut evaluating_serializer.filter, record.level()); - /// suppress output if this regex if found in the log message output. - pub fn always_suppress_on_regex(mut self, regex: Regex) -> Self { - self.neg_regex = Some(regex); - self + match final_result { + AcceptOrReject::Accept => true, + AcceptOrReject::Reject => false, + } } +} - fn is_match(&self, record: &slog::Record, logger_values: &slog::OwnedKVList) -> bool { - // println!("------------"); - - // Can't use chaining here, as it's not possible to cast - // SyncSerialize to Serialize - let mut pres = FilteringSerializer { - presence: Default::default(), - valuematches: self.filters.as_ref().map_or(HashMap::new(), |f| { - f.iter().map(|(k, v)| (k.as_str(), v)).collect() - }), - tmp_str: String::new(), - }; - - // println!("{:?}", self.keypresence); +impl Drain for KVFilter + where + D: Drain, +{ + type Ok = Option; + type Err = Option; - let mut negser = FilteringSerializer { - presence: self.keypresence.as_ref().map_or(HashMap::new(), |f| { - f.iter().map(|(k, v)| (k.as_str(), *v)).collect() - }), - valuematches: self.neg_filters.as_ref().map_or(HashMap::new(), |ref f| { - f.iter().map(|(k, v)| (k.as_str(), v)).collect() - }), - tmp_str: String::new(), - }; + fn log( + &self, + record: &slog::Record, + logger_values: &slog::OwnedKVList, + ) -> Result { + if self.is_match(record, logger_values) { + self.drain + .log(record, logger_values) + .map(Some) + .map_err(Some) + } else { + Ok(None) + } + } +} +//endregion + +//region Filtering implementation +/// Simple enum to express a message is to be either Accepted or Rejected +#[derive(Clone, Copy, Debug, PartialEq)] +enum AcceptOrReject { + Reject, + Accept, +} - record.kv().serialize(record, &mut pres).unwrap(); +/// An actual filter in progress that get's progressively simplified during a log message evaluation. +/// A lightweight clone of FilterSpec. +#[derive(Debug, PartialEq)] +enum Filter<'a> { + Accept, + Reject, + LevelAtLeast(Level), + MatchKV { + key: &'a str, + value: &'a str, + }, + KeyExists(&'a str), + MatchAnyValue { + key: &'a str, + values: HashSet<&'a str>, + }, + MatchMsgRegex(&'a RegexWrapper), + And(Box>, Box>), + Or(Box>, Box>), + Not(Box>), +} - // negative we have to go all way down to check for _any_ key match - record.kv().serialize(record, &mut negser).unwrap(); - logger_values.serialize(record, &mut negser).unwrap(); +impl<'a> Filter<'a> { + fn from_spec<'b: 'a>(spec: &'b FilterSpec) -> Filter<'a> { + match spec { + FilterSpec::Accept => Filter::Accept, + FilterSpec::Reject => Filter::Reject, + FilterSpec::LevelAtLeast(level) => Filter::LevelAtLeast(*level), + FilterSpec::MatchKV { key, value } => Filter::MatchKV { + key: &key, + value: &value, + }, + FilterSpec::MatchAnyValue { key, values } => Filter::MatchAnyValue { + key: &key, + values: values.iter().map(|v| v.as_str()).collect(), + }, + FilterSpec::MatchMsgRegex(regex_wrapper) => Filter::MatchMsgRegex(®ex_wrapper), + FilterSpec::KeyExists(key) => Filter::KeyExists(&key), + FilterSpec::And(a, b) => Filter::And( + Box::new(Filter::from_spec(&a)), + Box::new(Filter::from_spec(&b)), + ), + FilterSpec::Or(a, b) => Filter::Or( + Box::new(Filter::from_spec(&a)), + Box::new(Filter::from_spec(&b)), + ), + FilterSpec::Not(f) => Filter::Not(Box::new(Filter::from_spec(&f))), + } + } +} - let anypositivematch = |pres: &FilteringSerializer| - pres.valuematches.is_empty(); +/// Helper struct to turn arguments into a string and cache it. +/// The actual cached string is passed around as `tmp_string` to +/// the `is_equal` method. That is quite ugly, but I just didn't succeed adding a +/// tmp_string: &'a mut String +/// member to this struct and successfully negotiate its usage with the borrow checker. +struct ArgumentsValueMemo<'a> { + arguments: &'a fmt::Arguments<'a>, + is_serialized: Cell, +} - let anynegativematch = |negser: &FilteringSerializer| - negser.valuematches.len() == self.neg_filters.as_ref() - .map_or(0, - |m| m.keys().len()); +impl<'a> ArgumentsValueMemo<'a> { + fn new(arguments: &'a fmt::Arguments) -> ArgumentsValueMemo<'a> { + ArgumentsValueMemo { + arguments, + is_serialized: Cell::new(false), + } + } - // at least one positive gone & all negatives remain (i.e. no negatives found) - let mut pass = - self.keypresence.as_ref().map_or(true, |kp| { - let pos = negser.presence.iter().filter(|(_, v)| **v).count(); - pos == 0 || pos < kp.iter().filter(|(_, v)| **v).count() - }); + fn is_equal(&self, value: &str, tmp_string: &mut String) -> Result { + if !self.is_serialized.get() { + tmp_string.clear(); + fmt::write(tmp_string, *self.arguments)?; + self.is_serialized.set(true); + } + Ok(tmp_string == value) + } - // println!("{} after checking for positive key hits {:?}", pass, &negser.presence); + fn is_contained_in_hash_set(&self, set: &HashSet<&str>, tmp_string: &mut String) -> Result { + if !self.is_serialized.get() { + tmp_string.clear(); + fmt::write(tmp_string, *self.arguments)?; + self.is_serialized.set(true); + } + Ok(set.contains(tmp_string.as_str())) + } +} - pass &= self.keypresence.as_ref() - .map_or(true, |kp| - negser.presence.iter().filter(|(_, v)| !**v).count() == kp.iter().filter(|(_, v)| !**v).count()); +/// This is the filtering workhorse. It used to process a single log message. +/// It is set up with the logging level of the log message and a filter. +/// `tmp_value_string` is there just to avoid repeated string allocation and serves as +/// a temporary serialized string for `ArgumentsValueMemo` +struct EvaluateFilterSerializer<'a> { + message_level: Level, + msg: &'a fmt::Arguments<'a>, + filter: Filter<'a>, + tmp_value_string: String, +} - // println!("{} after checking for negative key hits {:?}", pass, &negser.presence); +impl<'a> Serializer for EvaluateFilterSerializer<'a> { + fn emit_arguments(&mut self, key: Key, value: &fmt::Arguments) -> slog::Result { + let mut value = ArgumentsValueMemo::new(value); - pass &= - if anypositivematch(&pres) { - // if e'thing matched on the positive make sure _nothing_ matched on negative - anynegativematch(&negser) - } else { - // check inside whether we find more matches - logger_values.serialize(record, &mut pres).unwrap(); + struct Context<'a> { + level: Level, + msg: &'a fmt::Arguments<'a>, + key: Key, + value: &'a mut ArgumentsValueMemo<'a>, + tmp_value_string: &'a mut String, + } - if anypositivematch(&pres) { - anynegativematch(&negser) - } else { - false + /// (Partially) in-place evaluate the filter for a particular key and value + fn evaluate_filter_with_kv(filter: &mut Filter, context: &mut Context) -> slog::Result { + let maybe_simplified_filter = match filter { + Filter::Accept => None, + Filter::Reject => None, + Filter::MatchMsgRegex(RegexWrapper(regex)) => { + let message = fmt::format(*context.msg); + if regex.is_match(&message) { + Some(Filter::Accept) + } else { + Some(Filter::Reject) + } + } + Filter::LevelAtLeast(treshold) => { + if context.level.is_at_least(*treshold) { + Some(Filter::Accept) + } else { + Some(Filter::Reject) + } + } + Filter::MatchKV { + key: this_key, + value: this_value, + } => { + if &context.key == this_key { + if context.value.is_equal(this_value, context.tmp_value_string)? { + Some(Filter::Accept) + } else { + None + } + } else { + None + } + } + Filter::MatchAnyValue { + key: this_key, + values, + } => { + if &context.key == this_key && + context.value + .is_contained_in_hash_set(values, context.tmp_value_string)? { + Some(Filter::Accept) + } else { + None + } + } + Filter::KeyExists(this_key) => { + if &context.key == this_key { + Some(Filter::Accept) + } else { + None + } + } + Filter::And(a, b) => { + evaluate_filter_with_kv(a, context)?; + if **a == Filter::Reject { + Some(Filter::Reject) + } else { + evaluate_filter_with_kv(b, context)?; + if **a == Filter::Accept { + if **b == Filter::Accept { + Some(Filter::Accept) + } else if **b == Filter::Reject { + Some(Filter::Reject) + } else { + None + } + } else { + None + } + } + } + Filter::Or(a, b) => { + evaluate_filter_with_kv(a, context)?; + if **a == Filter::Accept { + Some(Filter::Accept) + } else { + evaluate_filter_with_kv(b, context)?; + if **b == Filter::Accept { + Some(Filter::Accept) + } else if **a == Filter::Reject && **b == Filter::Reject { + Some(Filter::Reject) + } else { + None + } + } + } + Filter::Not(f) => { + evaluate_filter_with_kv(f, context)?; + if **f == Filter::Accept { + Some(Filter::Reject) + } else if **f == Filter::Reject { + Some(Filter::Accept) + } else { + None + } } }; - if pass && (self.regex.is_some() || self.neg_regex.is_some()) { - let res = format(*record.msg()); - - if let Some(ref posmatch) = self.regex { - pass = posmatch.is_match(&res); - }; - - if pass { - if let Some(ref negmatch) = self.neg_regex { - pass = !negmatch.is_match(&res); - } + if let Some(simplified_filter) = maybe_simplified_filter { + *filter = simplified_filter } - } - pass - } -} + Ok(()) + } -impl<'a, D: slog::Drain> slog::Drain for KVFilter { - type Err = D::Err; - type Ok = Option; + let mut context = Context { + level: self.message_level, + msg: self.msg, + key, + value: &mut value, + tmp_value_string: &mut self.tmp_value_string, + }; - fn log( - &self, - info: &slog::Record, - logger_values: &slog::OwnedKVList, - ) -> Result { - // println!("{:#?}", info.msg()); + evaluate_filter_with_kv(&mut self.filter, &mut context)?; - if info.level() < self.level || self.is_match(info, logger_values) { - self.drain.log(info, logger_values).map(Some) - } else { - Ok(None) - } + Ok(()) } } +//endregion +//region tests #[cfg(test)] mod tests { - use super::KVFilter; - use slog::{Drain, Level, Logger, OwnedKVList, Record}; - use regex::Regex; - use std::collections::{HashSet, HashMap}; - use std::iter::FromIterator; - use std::sync::Mutex; - use std::fmt::Debug; - use std::fmt::Display; - use std::fmt::Formatter; - use std::fmt::Result as FmtResult; - use std::io; - use std::sync::Arc; - - const YES: &'static str = "YES"; - const NO: &'static str = "NO"; + use std::{ + fmt, + io, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + Mutex, + }, + }; + + use slog::{Drain, FnValue, Level, Logger, OwnedKVList, Record}; + + use super::*; + use super::FilterSpec::*; #[derive(Debug)] struct StringDrain { output: Arc>>, } - /// seriously hacked logger drain that just counts messages to make - /// sure we have tests behaving correcly impl<'a> Drain for StringDrain { - type Err = io::Error; type Ok = (); + type Err = io::Error; - fn log(&self, info: &Record, _: &OwnedKVList) -> io::Result<()> { - let mut lo = self.output.lock().unwrap(); - let fmt = format!("{:?}", info.msg()); - - if !fmt.contains(YES) && !fmt.contains(NO) { - panic!("{}", fmt); + fn log(&self, message: &Record, _: &OwnedKVList) -> io::Result<()> { + let formatted = format!("{:?}", message.msg()); + if !formatted.contains("ACCEPT") && !formatted.contains("REJECT") { + panic!(formatted); } - (*lo).push(fmt); + self.output.lock().unwrap().push(formatted); Ok(()) } } - impl<'a> Display for StringDrain { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "none") + impl<'a> fmt::Display for StringDrain { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "String drain: {:#?}", self.output.lock().unwrap()) } } - fn testkvfilter(d: D) -> KVFilter { - KVFilter::new(d, Level::Info).only_pass_any_on_all_keys(Some( - vec![ - ( - "thread".to_string(), - HashSet::from_iter(vec!["100".to_string(), "200".to_string()]), - ), - ( - "direction".to_string(), - HashSet::from_iter(vec!["send".to_string(), "receive".to_string()]), - ), - ].into_iter() - .collect(), - )) + /// A message that adds the message into the formatted_messages Vec as a side-effect of debug format + struct TestingMessage { + message: String, + increment_on_format: Arc, } - fn testnegkvfilter(f: KVFilter) -> KVFilter { - f.always_suppress_any(Some( - vec![ - ( - "deepcomp".to_string(), - HashSet::from_iter(vec!["1".to_string(), "2".to_string()]), - ), - ( - "deepercomp".to_string(), - HashSet::from_iter(vec!["4".to_string(), "5".to_string()]), - ), - ].into_iter() - .collect(), - )) + impl TestingMessage { + fn new(message: &str, increment_on_format: &Arc) -> Self { + TestingMessage { + message: message.to_owned(), + increment_on_format: Arc::clone(increment_on_format), + } + } } - #[test] - /// get an asserting Drain, get a couple of loggers that - /// have different nodes, components and see whether filtering - /// is applied properly on the derived `Logger` copies - fn nodecomponentlogfilter() { - assert!(Level::Critical < Level::Warning); - - let out = Arc::new(Mutex::new(vec![])); - - let drain = StringDrain { - output: out.clone(), - }; + impl fmt::Debug for TestingMessage { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.increment_on_format.fetch_add(1, Ordering::Relaxed); + write!(f, "{}", self.message) + } + } - // build some small filter - let filter = testkvfilter(drain); + struct Tester { + accepted_messages: Arc>>, + evaluated_log_values: Arc, + log: Logger, + } - // Get a root logger that will log into a given drain. - let mainlog = Logger::root(filter.fuse(), o!("version" => env!("CARGO_PKG_VERSION"))); - let sublog = mainlog.new(o!("thread" => "200", "sub" => "sub")); - let subsublog = sublog.new(o!("direction" => "send")); - let subsubsublog = subsublog.new(o!()); + impl Tester { + fn new(filter_spec: FilterSpec, evaluation_order: EvaluationOrder) -> Self { + let accepted_messages = Arc::new(Mutex::new(Vec::new())); + let evaluated_log_values = Arc::new(AtomicUsize::new(0)); + let evaluated_log_values_clone = Arc::clone(&evaluated_log_values); + let drain = StringDrain { + output: accepted_messages.clone(), + }; + let filter = KVFilter::new(drain, filter_spec, evaluation_order); + let log = Logger::root( + filter.fuse(), + o!("main_log" => FnValue(move |_: &Record| { + evaluated_log_values_clone.fetch_add(1, Ordering::Relaxed); + "m" + })), + ); + + Tester { + accepted_messages, + evaluated_log_values, + log, + } + } - let wrongthread = mainlog.new(o!("thread" => "400", "sub" => "sub")); + fn assert_accepted(&self, expected_count: usize) { + let accepted_messages = self.accepted_messages.lock().unwrap(); + for message in accepted_messages.iter() { + assert!( + message.contains("ACCEPT"), + "Message without ACCEPT accepted: `{}`", + message + ) + } + let actual_count = accepted_messages.len(); + assert_eq!(expected_count, actual_count) + } - info!(mainlog, "NO: filtered, main, no keys"); - info!(mainlog, "YES: unfiltered, on of thread matches, direction matches"; - "thread" => "100", "direction" => "send"); - info!(mainlog, - "YES: unfiltered, on of thread matches, direction matches, different key order"; - "direction" => "send", "thread" => "100"); + fn assert_evaluated_log_values(&self, expected_count: usize) { + let actual_count = self.evaluated_log_values.load(Ordering::Relaxed); + assert_eq!(expected_count, actual_count) + } + } - warn!(mainlog, "YES: unfiltered, higher level"); // level high enough to pass anyway + #[test] + fn test_any_all_of() { + let a = FilterSpec::match_kv("k1", "v1"); + let b = FilterSpec::match_kv("k2", "v2"); + let c = FilterSpec::match_kv("k3", "v3"); + assert_eq!( + a.clone().or(b.clone().or(c.clone())), + FilterSpec::any_of(&[a.clone(), b.clone(), c.clone()]) + ); + assert_eq!( + a.clone().and(b.clone().and(c.clone())), + FilterSpec::all_of(&[a.clone(), b.clone(), c.clone()]) + ); + } - debug!(mainlog, "NO: filtered, level to low, no keys"); // level low + #[test] + fn test_lazy_format() { + let filter_spec = FilterSpec::match_kv("sub_log_key", "sub_log_value") + .or(FilterSpec::match_kv("key1", "value1")) + .or(FilterSpec::match_kv("key2", "value2")); + + let tester = Tester::new(filter_spec, EvaluationOrder::LoggerAndMessage); + + let value1_evaluations = Arc::new(AtomicUsize::new(0)); + let value1 = TestingMessage::new("value1", &value1_evaluations); + let value2_evaluations = Arc::new(AtomicUsize::new(0)); + let value2 = TestingMessage::new("value2", &value2_evaluations); + let invalid_value_evaluations = Arc::new(AtomicUsize::new(0)); + let invalid_value = TestingMessage::new("invalid", &invalid_value_evaluations); + + let assert_evaluations = |value1: usize, value2: usize, invalid: usize| { + assert_eq!(value1, value1_evaluations.load(Ordering::Relaxed)); + value1_evaluations.store(0, Ordering::Relaxed); + assert_eq!(value2, value2_evaluations.load(Ordering::Relaxed)); + value2_evaluations.store(0, Ordering::Relaxed); + assert_eq!(invalid, invalid_value_evaluations.load(Ordering::Relaxed)); + invalid_value_evaluations.store(0, Ordering::Relaxed); + }; - info!(mainlog, "NO: filtered, wrong thread on record"; - "thread" => "300", "direction" => "send"); + let sub_log = tester.log.new(o!("sub_log_key" => "sub_log_value")); + info!(sub_log, "ACCEPT because of sub_log_key"; "key1" => ?value1); + assert_evaluations(0, 0, 0); - info!(wrongthread, "NO: filtered, wrong thread on sublog"); + info!(tester.log, "ACCEPT because of key1"; "key1" => ?value1); + assert_evaluations(1, 0, 0); - info!(sublog, "NO: filtered sublog, missing dirction "); + info!(tester.log, "ACCEPT because of key1"; "key1" => ?value1); + assert_evaluations(1, 0, 0); - info!(sublog, "YES: unfiltered sublog with added directoin"; - "direction" => "receive"); + info!(tester.log, "ACCEPT because of key2"; "key2" => ?value2); + assert_evaluations(0, 1, 0); - info!( - subsubsublog, - "YES: unfiltered subsubsublog, direction on subsublog, thread on sublog" + // Slog processes KVs in reverse order. If this ever changes, please update the relevant documentation for FilterSpec. + info!(tester.log, + "ACCEPT because of key1, don't process key2."; + "key2" => ?invalid_value, "key1" => ?value1 ); + assert_evaluations(1, 0, 0); - // test twice same keyword with right value will give filter match - let stackedthreadslog = wrongthread.new(o!("thread" => "200")); - - info!(stackedthreadslog, - "YES: unfiltered since one of the threads matches from inherited"; - "direction" => "send"); + info!(tester.log, + "ACCEPT because of key2, key1 had to be processed first with negative result."; + "key2" => ?value2, "key1" => ?invalid_value + ); + assert_evaluations(0, 1, 1); - println!("resulting output: {:#?}", *out.lock().unwrap()); + info!(tester.log, "REJECT"; "key2" => ?invalid_value, "key1" => ?invalid_value ); + assert_evaluations(0, 0, 2); - assert_eq!(out.lock().unwrap().len(), 6); + tester.assert_evaluated_log_values(7); + tester.assert_accepted(6); } #[test] - /// get an asserting Drain, get a couple of loggers that - /// have different nodes, components and deep/deeper components and see whether filtering - /// is applied properly on the derived `Logger` copies while punching holes for the disallowed - /// values - fn negnodecomponentlogfilter() { - assert!(Level::Critical < Level::Warning); + fn test_evaluation_order() { + fn new_tester(evaluation_order: EvaluationOrder) -> Tester { + Tester::new( + FilterSpec::match_kv("key", "value").or(FilterSpec::match_kv("sub_log", "a")), + evaluation_order, + ) + } - let out = Arc::new(Mutex::new(vec![])); + { + let tester = new_tester(EvaluationOrder::LoggerOnly); + let sub_log = tester.log.new(o!("sub_log" => "a")); - let drain = StringDrain { - output: out.clone(), - }; + info!(sub_log, "ACCEPT: Will be accepted because of sub log"); + tester.assert_accepted(1); + info!(tester.log, "REJECT: Won't be accepted because message KVs are not evaluated"; "key" => "value"); + tester.assert_accepted(1); + tester.assert_evaluated_log_values(2); + } - // build some small filter - let filter = testnegkvfilter(testkvfilter(drain.fuse())); + { + let tester = new_tester(EvaluationOrder::MessageOnly); + let sub_log = tester.log.new(o!("sub_log" => "a")); + + info!( + sub_log, + "REJECT: Will be rejected because log KVs are ignored" + ); + tester.assert_accepted(0); + info!(tester.log, "ACCEPT"; "key" => "value"); + tester.assert_accepted(1); + tester.assert_evaluated_log_values(0); + } - // Get a root logger that will log into a given drain. - let mainlog = Logger::root(filter.fuse(), o!("version" => env!("CARGO_PKG_VERSION"))); - let sublog = mainlog.new(o!("thread" => "200", "sub" => "sub")); - let subsublog = sublog.new(o!("direction" => "send")); - // deep match won't match - let subsubsublog = subsublog.new(o!("deepcomp" => "0")); - // deep match will filter - let negsubsubsublog = subsublog.new(o!("deepcomp" => "1")); + { + let tester = new_tester(EvaluationOrder::LoggerAndMessage); + let sub_log = tester.log.new(o!("sub_log" => "a")); - info!(mainlog, "NO: filtered, main, no keys"); - info!(mainlog, "YES: unfiltered, on of thread matches, direction matches"; - "thread" => "100", "direction" => "send"); - info!(subsubsublog, "YES: unfiltered, one of thread matches, direction matches, deep doesn't apply"; - "thread" => "100", "direction" => "send"); - info!(negsubsubsublog, "NO: filtered, one of thread matches, direction matches, deep negative applies"; - "thread" => "100", "direction" => "send"); - info!(subsubsublog, "NO: filtered, one of thread matches, direction matches, deep doesn't apply but deeper does"; - "thread" => "100", "direction" => "send", "deepercomp" => "4"); - info!(subsubsublog, "YES: unfiltered, on of thread matches, direction matches, deep doesn't apply and deeper doesn't"; - "thread" => "100", "direction" => "send", "deepercomp" => "7"); + info!(tester.log, "ACCEPT"; "key" => "value"); + info!(sub_log, "ACCEPT: Will be accepted because of sub log"); + tester.assert_accepted(2); + tester.assert_evaluated_log_values(2); + } - println!("resulting output: {:#?}", *out.lock().unwrap()); + { + let tester = new_tester(EvaluationOrder::MessageAndLogger); + let sub_log = tester.log.new(o!("sub_log" => "a")); - assert_eq!(out.lock().unwrap().len(), 3); + info!(tester.log, "ACCEPT"; "key" => "value"); + info!(sub_log, "ACCEPT: Will be accepted because of sub log"); + tester.assert_accepted(2); + tester.assert_evaluated_log_values(1); + } } #[test] - /// test negative and positive - fn regextest() { - assert!(Level::Critical < Level::Warning); - - let out = Arc::new(Mutex::new(vec![])); - - let drain = StringDrain { - output: out.clone(), - }; - - // build some small filter - let filter = KVFilter::new(drain.fuse(), Level::Info) - .only_pass_on_regex(Regex::new(r"PASS\d:").unwrap()) - .always_suppress_on_regex(Regex::new(r"NOPE\d:").unwrap()); - - // Get a root logger that will log into a given drain. - let mainlog = Logger::root(filter.fuse(), o!("version" => env!("CARGO_PKG_VERSION"))); - - info!(mainlog, "NO: filtered, no positive"); - info!(mainlog, "NO: NOPE2 PASS0 filtered, negative"); - info!(mainlog, "NO: filtered, no positive"); - info!(mainlog, "YES: PASS2: not filtered, positive"); - info!(mainlog, "YES: {}: not filtered, positive", "PASS4"); + fn test_sub_log() { + { + let tester = Tester::new( + FilterSpec::match_kv("main_log", "m"), + EvaluationOrder::LoggerAndMessage, + ); + let sub_log = tester.log.new(o!("sub_log" => "a")); + let sub_sub_log = sub_log.new(o!("sub_sub_log" => "b")); + + info!(tester.log, "ACCEPT: main log"); + info!(sub_log, "ACCEPT: sub log inherits the main_log KV"); + info!(sub_sub_log, "ACCEPT: the same for sub sub log"); + tester.assert_accepted(3); + } - println!("resulting output: {:#?}", *out.lock().unwrap()); + { + let tester = Tester::new( + FilterSpec::match_kv("sub_sub_log", "b"), + EvaluationOrder::LoggerAndMessage, + ); + let sub_log = tester.log.new(o!("sub_log" => "a")); + let sub_sub_log = sub_log.new(o!("sub_sub_log" => "b")); + + info!( + tester.log, + "REJECT: main log doesn't have the sub_sub_log key" + ); + info!(sub_log, "REJECT: neither has the sub_log"); + info!(sub_sub_log, "ACCEPT: sub sub log has it"); + tester.assert_accepted(1); + } + } - assert_eq!(out.lock().unwrap().len(), 2); + #[test] + fn test_accept() { + let tester = Tester::new(Accept, EvaluationOrder::LoggerAndMessage); + info!(tester.log, "ACCEPT: Everything will pass"); + warn!(tester.log, "ACCEPT: Everything will pass"); + tester.assert_accepted(2); } #[test] - /// test presence key negative and positive - fn keypresence() { - assert!(Level::Critical < Level::Warning); + fn test_reject() { + let tester = Tester::new(Reject, EvaluationOrder::LoggerAndMessage); + info!(tester.log, "REJECT: Everything will be rejected"); + warn!(tester.log, "REJECT: Everything will be rejected"); + tester.assert_accepted(0); + } - let out = Arc::new(Mutex::new(vec![])); + #[test] + fn test_level_at_least() { + let tester = Tester::new( + LevelAtLeast(Level::Warning), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "REJECT: Level too low"); + warn!(tester.log, "ACCEPT: Level good"); + error!(tester.log, "ACCEPT: Level good"); + tester.assert_accepted(2); + } - let drain = StringDrain { - output: out.clone(), - }; + #[test] + fn test_match_kv() { + let tester = Tester::new( + FilterSpec::match_kv("key", "value"), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "ACCEPT: Good key"; "key" => "value"); + info!(tester.log, "REJECT"; "main_log" => "m"); + info!(tester.log, "REJECT"; "key" => "bad_value"); + info!(tester.log, "REJECT"; "bad_key" => "value"); + tester.assert_accepted(1); + } - const POS1: &str = "p1"; - const POS2: &str = "p2"; - const NEG1: &str = "n1"; - const NEG2: &str = "n2"; + #[test] + fn test_match_any_value() { + let tester = Tester::new( + FilterSpec::match_any_value("key", &["v1", "v2", "v3"]), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "ACCEPT"; "key" => "v1"); + info!(tester.log, "ACCEPT"; "key" => "v3"); + info!(tester.log, "ACCEPT"; "key" => "foo", "key" => "v2", "key" => "bar"); + info!(tester.log, "ACCEPT"; "key" => "v3", "key" => "v1", "key" => "v2", "key" => "v5",); + info!(tester.log, "REJECT"); + info!(tester.log, "REJECT"; "key" => "foo", "key" => "bar"); + info!(tester.log, "REJECT"; "bad_key" => "v1"); + tester.assert_accepted(4); + } - let tostriter = |slice: &[&str]| - slice.iter().map(|v| v.to_string()) - .collect::>(); + #[test] + fn test_match_msg_regex() { + let tester = Tester::new( + FilterSpec::try_match_msg_regex(r"Foo.*Bar").unwrap(), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "ACCEPT FooBar"); + info!(tester.log, "ACCEPT FooBarFooBar"); + info!(tester.log, "ACCEPT FooQuxBar"); + info!(tester.log, "REJECT Foo"); + info!(tester.log, "REJECT"); + tester.assert_accepted(3); + } - // build some small filter - let filter = KVFilter::new(drain.fuse(), Level::Info) - .only_pass_on_any_key_present(tostriter(&[POS1, POS2]).iter()) - .only_pass_on_all_keys_absent(tostriter(&[NEG1, NEG2]).iter()); + #[test] + fn test_key_exists() { + let tester = Tester::new( + FilterSpec::key_exists("key"), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "ACCEPT: Good key"; "key" => "value"); + info!(tester.log, "ACCEPT: Good key"; "foo" => "bar", "key" => "value", "bar" => "baz"); + info!(tester.log, "REJECT"; "main_log" => "m"); + info!(tester.log, "REJECT"; "bad_key" => "value"); + info!(tester.log, "REJECT"); + tester.assert_accepted(2); + } - let mainlog = Logger::root(filter.fuse(), o!("version" => env!("CARGO_PKG_VERSION"))); - info!(mainlog, "NO: none of positive but negative"; NEG1 => "" ); - info!(mainlog, "NO: positive but negative also present"; POS1 => "", NEG1 => "" ); - info!(mainlog, "YES: positive"; POS1 => "", ); + #[test] + fn test_and() { + { + let tester = Tester::new( + FilterSpec::match_kv("key1", "value1").and(FilterSpec::match_kv("key2", "value2")), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "ACCEPT"; "key1" => "value1", "key2" => "value2"); + info!(tester.log, "REJECT: key1"; "key1" => "value1"); + info!(tester.log, "REJECT: key1"; "key1" => "value1", "key2" => "boo"); + info!(tester.log, "REJECT: key2"; "key2" => "value2"); + info!(tester.log, "REJECT: key2"; "key1" => "boo", "key2" => "value2"); + info!(tester.log, "REJECT"; "key1" => "boo", "key2" => "boo"); + tester.assert_accepted(1); + } + } - println!("resulting output: {:#?}", *out.lock().unwrap()); + #[test] + fn test_or() { + { + let tester = Tester::new( + FilterSpec::match_kv("key1", "value1").or(FilterSpec::match_kv("key2", "value2")), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "ACCEPT: key1"; "key1" => "value1"); + info!(tester.log, "ACCEPT: key1"; "key1" => "value1", "key2" => "boo"); + info!(tester.log, "ACCEPT: key2"; "key2" => "value2"); + info!(tester.log, "ACCEPT: key2"; "key1" => "boo", "key2" => "value2"); + info!(tester.log, "REJECT"; "key1" => "boo", "key2" => "boo"); + tester.assert_accepted(4); + } + } - assert_eq!(out.lock().unwrap().len(), 1); + #[test] + fn test_not() { + { + let tester = Tester::new( + FilterSpec::match_kv("key1", "value1").not(), + EvaluationOrder::LoggerAndMessage, + ); + info!(tester.log, "REJECT: key1 match was turned into negative"; "key1" => "value1"); + info!(tester.log, "ACCEPT: No match, we turn it into a negative, that is Accept"; "key1" => "boo"); + tester.assert_accepted(1); + } } #[test] - fn should_not_log_info_messages() { - let out = Arc::new(Mutex::new(vec![])); + fn test_complex_example() { + // This is an example that mimics the original KVFilter. + // We express the following example filter: + // KVFilter { + // filters: key1 -> [value1a, value1b], key2 -> [value2] + // neg_filters: neg_key1 -> [neg_value1], neg_key2 -> [neg_value2a, neg_value2b] + // level: Debug (that actually means level at least Info) + // } + + // (key1 must be either value1a or value1b) AND key2 must be value2 + let positive_filter = FilterSpec::all_of(&[ + FilterSpec::match_any_value("key1", &["value1a", "value1b"]), + FilterSpec::match_kv("key2", "value2") + ]); + + // neg_key1 must be neg_value1 OR neg_key2 must be neg_value2a OR neg_key2 must be neg_value2b + let negative_filter = FilterSpec::any_of(&[ + FilterSpec::match_kv("neg_key1", "neg_value1"), + FilterSpec::match_any_value("neg_key2", &["neg_value2a", "neg_value2b"]), + ]); + + // We pass everything with level at least info, OR anything that matches the positive filter but not the negative one. + // `And` and `Or` rules are evaluated first-to-last, so put the simplest rules (`LevelAtLeast`) first so the filter + // doesn't have to evaluate the more complicated rules if the simpler one already decides a message's fate + let filter = + FilterSpec::LevelAtLeast(Level::Info).or(positive_filter.and(negative_filter.not())); + + let tester = Tester::new(filter, EvaluationOrder::LoggerAndMessage); + + info!(tester.log, "ACCEPT: Info level"); + debug!(tester.log, "REJECT 1: Level too low"); + debug!(tester.log, "ACCEPT"; "key1" => "value1a", "key2" => "value2"); + debug!(tester.log, "ACCEPT"; "key1" => "value1b", "key2" => "value2"); + debug!(tester.log, "REJECT 2"; "key1" => "invalid", "key2" => "value2"); + debug!(tester.log, "REJECT 3"; "key1" => "value1b", "key2" => "invalid"); + debug!(tester.log, "REJECT 4"; "invalid" => "value1b", "key2" => "value2"); + debug!(tester.log, "REJECT 5"; "invalid" => "value1b", "key2" => "value2"); + debug!(tester.log, "REJECT 6"; "key1" => "value1a", "key2" => "value2", "neg_key1" => "neg_value1"); + debug!(tester.log, "REJECT 7"; "key1" => "value1a", "key2" => "value2", "neg_key2" => "neg_value2b"); + debug!(tester.log, "ACCEPT"; "key1" => "value1a", "key2" => "value2", "neg_key1" => "invalid"); + + tester.assert_accepted(4); + } - let drain = StringDrain { - output: out.clone(), - }; + #[test] + fn test_complex_example_2() { + // This is an example that implements the behavior requested at this issue: + // https://github.com/sile/sloggers/issues/9#issuecomment-422685244 + // + // The requested behavior: + // + // 1. pass all records with ("subsystem": "A1" || "subsystem": "A2") && level at least debug + // 2. pass all records with (subsystem": "B1" || "subsystem": "B2") && level at least info + // 3. pass all records with level at least warn + // 4. reject all other records + // In all cases, make decisions based only on the LOGGER keys and values, + // don't take the message KVs into account + + let match_kv = FilterSpec::match_kv; + + let subsystem_a = (match_kv("subsystem", "A1").or(match_kv("subsystem", "A2"))) + .and(FilterSpec::LevelAtLeast(Level::Debug)); + + let subsystem_b = (match_kv("subsystem", "B1").or(match_kv("subsystem", "B2"))) + .and(FilterSpec::LevelAtLeast(Level::Info)); + + // `And` and `Or` rules are evaluated first-to-last, so put the simplest rules (`LevelAtLeast`) first so the filter + // doesn't have to evaluate the more complicated rules if the simpler one already decides a message's fate + let filter = FilterSpec::LevelAtLeast(Level::Warning) + .or(subsystem_a) + .or(subsystem_b); + + // EvaluationOrder::Logger means that only the logger KVs will be used for message filtering + let tester = Tester::new(filter, EvaluationOrder::LoggerOnly); + let subsystem_a1 = tester.log.new(o!("subsystem" => "A1")); + let subsystem_a2 = tester.log.new(o!("subsystem" => "A2")); + let subsystem_b1 = tester.log.new(o!("subsystem" => "B1")); + let subsystem_b2 = tester.log.new(o!("subsystem" => "B2")); + let subsystem_xxx = tester.log.new(o!("subsystem" => "XXX")); + + // Rule 1 + debug!(subsystem_a1, "ACCEPT"); + debug!(subsystem_a2, "ACCEPT"); + debug!(subsystem_xxx, "REJECT debug XXX"); + debug!(subsystem_xxx, "REJECT debug XXX, message KV is discarded"; "subsystem" => "A1"); + + // Rule 2 + debug!(subsystem_b1, "REJECT"); + info!(subsystem_b1, "ACCEPT"); + info!(subsystem_b2, "ACCEPT"); + info!(subsystem_xxx, "REJECT info XXX"); + + // Rule 3 + warn!(tester.log, "ACCEPT: Info level"); + + // Rule 4 + info!(tester.log, "REJECT: Level too low"); + + tester.assert_accepted(5); + } - let filter = KVFilter::new(drain.fuse(), Level::Info) - .only_pass_on_any_key_present(["err".to_string()].iter()) - .always_suppress_any(Some( - HashMap::from_iter( - vec![( - "err".to_string(), - HashSet::from_iter(vec!["None".to_string(), "".to_string()]), - )] - ) - )); - let logger = Logger::root(filter.fuse(), o!()); + #[test] + fn test_complex_example_3() { + // Implements https://github.com/slog-rs/kvfilter/pull/16#issuecomment-795856834 + + let tester = Tester::new( + LevelAtLeast(Level::Warning).or( + LevelAtLeast(Level::Info).and( + FilterSpec::key_exists("err") + .and(FilterSpec::match_kv("err", "None").not()) + .and(FilterSpec::match_kv("err", "").not()) + )), + EvaluationOrder::LoggerAndMessage, + ); // should discard - info!(logger, "NO: test info"); - info!(logger, "NO: test info"; "count" => 10); - info!(logger, "NO: test error"; "err" => "None"); - info!(logger, "NO: test error"; "err" => ""); - info!(logger, "NO: test info"; "count" => 10); - debug!(logger, "NO: test debug"); + info!(tester.log, "REJECT: test info"); + info!(tester.log, "REJECT: test info"; "count" => 10); + info!(tester.log, "REJECT: test error"; "err" => "None"); + info!(tester.log, "REJECT: test error"; "err" => ""); + debug!(tester.log, "REJECT: test debug"; "err" => "Panic!"); + debug!(tester.log, "REJECT: test debug"); // should log to drain - info!(logger, "YES: test error"; "err" => "Panic!"); - error!(logger, "YES: test error"); - - println!("resulting output: {:#?}", *out.lock().unwrap()); - - assert_eq!(out.lock().unwrap().len(), 2); + info!(tester.log, "ACCEPT: test error"; "err" => "Panic!"); + error!(tester.log, "ACCEPT: test error"); + tester.assert_accepted(2); } } +//endregion