diff --git a/kayle_innate/Cargo.lock b/kayle_innate/Cargo.lock index 9799759..d7bafe5 100644 --- a/kayle_innate/Cargo.lock +++ b/kayle_innate/Cargo.lock @@ -2,6 +2,70 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "accessibility-rs" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162de529358b740edb31af55e6d5f2d553a19ba0aa921db97d46acc7f129d29d" +dependencies = [ + "accessibility-scraper", + "accessibility-tree", + "cssparser 0.25.9", + "ego-tree", + "getrandom", + "lazy_static", + "markup5ever", + "selectors 0.21.0", + "serde", + "slotmap", + "smallvec 1.11.1", + "taffy", + "url", +] + +[[package]] +name = "accessibility-scraper" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81990c4b7b08138e9a5a0901f50cda6ec0d70de0e05f594692e3feb8c174cbb7" +dependencies = [ + "ahash", + "cssparser 0.25.9", + "ego-tree", + "getopts", + "html5ever", + "once_cell", + "selectors 0.21.0", + "smallvec 0.6.14", + "tendril", +] + +[[package]] +name = "accessibility-tree" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54561d0499b9ec59fa5d09b76a2ff98aff00ab336e2eda1a7f4fe0b2d21f8249" +dependencies = [ + "accessibility-scraper", + "atomic_refcell", + "cssparser 0.25.9", + "dtoa 0.4.8", + "euclid", + "html5ever", + "itoa 0.4.8", + "lazy_static", + "lock_api 0.1.5", + "num-traits", + "rayon", + "rayon_croissant", + "selectors 0.21.0", + "smallbitvec", + "smallvec 1.11.1", + "victor-tree-internal-proc-macros", + "xi-unicode", + "xml-rs", +] + [[package]] name = "ahash" version = "0.8.3" @@ -47,6 +111,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "bumpalo" version = "3.14.0" @@ -83,7 +153,7 @@ version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -136,7 +206,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbe18ca4efb9ba3716c6da66cc3d7e673bf59fa576353011f48c4cfddbdd740e" dependencies = [ "autocfg 0.1.8", - "cssparser-macros", + "cssparser-macros 0.3.6", "dtoa-short", "itoa 0.4.8", "matches", @@ -148,6 +218,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros 0.6.1", + "dtoa-short", + "itoa 1.0.9", + "phf 0.10.1", + "smallvec 1.11.1", +] + [[package]] name = "cssparser-macros" version = "0.3.6" @@ -161,6 +244,27 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote 1.0.33", + "syn 2.0.38", +] + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "proc-macro2 1.0.68", + "quote 1.0.33", + "syn 1.0.109", +] + [[package]] name = "dtoa" version = "0.4.8" @@ -194,12 +298,6 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - [[package]] name = "euclid" version = "0.19.9" @@ -283,12 +381,6 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eec1c01eb1de97451ee0d60de7d81cf1e72aabefb021616027f3d1c3ec1c723c" -[[package]] -name = "hashbrown" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12" - [[package]] name = "html5ever" version = "0.26.0" @@ -313,16 +405,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "indexmap" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" -dependencies = [ - "equivalent", - "hashbrown", -] - [[package]] name = "itoa" version = "0.4.8" @@ -348,23 +430,23 @@ dependencies = [ name = "kayle_innate" version = "0.0.22" dependencies = [ + "accessibility-rs", "case_insensitive_string", "console_error_panic_hook", - "cssparser", + "cssparser 0.25.9", "ego-tree", "getrandom", "js-sys", "lazy_static", "markup5ever", - "scraper_forky", - "selectors", + "scraper", + "selectors 0.21.0", "serde", "serde-wasm-bindgen", "slotmap", "smallvec 1.11.1", "taffy", "url", - "victor_tree", "wasm-bindgen", "wasm-bindgen-test", "wee_alloc", @@ -531,7 +613,9 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" dependencies = [ + "phf_macros", "phf_shared 0.10.0", + "proc-macro-hack", ] [[package]] @@ -574,6 +658,20 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "phf_macros" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro-hack", + "proc-macro2 1.0.68", + "quote 1.0.33", + "syn 1.0.109", +] + [[package]] name = "phf_shared" version = "0.7.24" @@ -604,6 +702,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" version = "0.4.30" @@ -827,7 +931,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -855,18 +959,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "scraper_forky" +name = "scraper" version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c95a930e03325234c18c7071fd2b60118307e025d6fff3e12745ffbf63a3d29c" dependencies = [ "ahash", - "cssparser", + "cssparser 0.31.2", "ego-tree", "getopts", "html5ever", - "indexmap", "once_cell", - "selectors", - "smallvec 0.6.14", + "selectors 0.25.0", + "smallvec 1.11.1", "tendril", ] @@ -876,19 +981,38 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b86b100bede4f651059740afc3b6cb83458d7401cb7c1ad96d8a11e91742c86" dependencies = [ - "bitflags", - "cssparser", + "bitflags 1.3.2", + "cssparser 0.25.9", "fxhash", "log", "matches", "phf 0.7.24", "phf_codegen 0.7.24", "precomputed-hash", - "servo_arc", + "servo_arc 0.1.1", "smallvec 0.6.14", "thin-slice", ] +[[package]] +name = "selectors" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +dependencies = [ + "bitflags 2.4.0", + "cssparser 0.31.2", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen 0.10.0", + "precomputed-hash", + "servo_arc 0.3.0", + "smallvec 1.11.1", +] + [[package]] name = "serde" version = "1.0.188" @@ -941,6 +1065,15 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "siphasher" version = "0.2.3" @@ -1151,36 +1284,14 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "victor-tree-internal-proc-macros" version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb9a26b1452763752832db93fa95e9949fe7e4f5d44001a8712537897f2bb37" dependencies = [ "proc-macro2 0.4.30", "quote 0.6.13", "syn 0.15.44", ] -[[package]] -name = "victor_tree" -version = "0.0.7" -dependencies = [ - "atomic_refcell", - "cssparser", - "dtoa 0.4.8", - "euclid", - "html5ever", - "itoa 0.4.8", - "lazy_static", - "lock_api 0.1.5", - "num-traits", - "rayon", - "rayon_croissant", - "scraper_forky", - "selectors", - "smallbitvec", - "smallvec 1.11.1", - "victor-tree-internal-proc-macros", - "xi-unicode", - "xml-rs", -] - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/kayle_innate/Cargo.toml b/kayle_innate/Cargo.toml index d26657e..d3323d8 100644 --- a/kayle_innate/Cargo.toml +++ b/kayle_innate/Cargo.toml @@ -1,9 +1,7 @@ [workspace] resolver = "2" members = [ - "kayle_innate", - "kayle_victor/victor", - "kayle_scraper" + "kayle_innate" ] [workspace.dependencies] diff --git a/kayle_innate/kayle_innate/Cargo.toml b/kayle_innate/kayle_innate/Cargo.toml index d5ff194..c61cd47 100644 --- a/kayle_innate/kayle_innate/Cargo.toml +++ b/kayle_innate/kayle_innate/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" license = "MIT" description = "Incredibly fast and precise universal web accessibility evaluator for puppeteer or playwright." repository = "https://github.com/a11ywatch/kayle" +categories = ["accessibility"] [lib] crate-type = ["cdylib", "rlib"] @@ -21,7 +22,7 @@ wee_alloc = { version = "0.4.5", optional = true } url = "2.4.0" lazy_static = { workspace = true } case_insensitive_string = "0.1.0" -scraper_forky = { version = "0.17.1", features = ["main"], default-features = false, path = "../kayle_scraper" } +scraper = { version = "0.17.1", features = ["main"], default-features = false } getrandom = { version = "0.2", features = ["js"] } taffy = { version = "0.3.13", optional = true } serde = { version = "1.0", features = ["derive"] } @@ -30,10 +31,10 @@ js-sys = "0.3.64" selectors = { workspace = true } smallvec = { workspace = true } ego-tree = { workspace = true } -victor_tree = { version = "0.0.7", path = "../kayle_victor/victor" } markup5ever = "0.11.0" cssparser = { workspace = true } slotmap = "1.0.6" +accessibility-rs = "0.0.2" [dev-dependencies] wasm-bindgen-test = "0.3.37" diff --git a/kayle_innate/kayle_innate/src/engine/audit/auditor.rs b/kayle_innate/kayle_innate/src/engine/audit/auditor.rs deleted file mode 100644 index 4eef968..0000000 --- a/kayle_innate/kayle_innate/src/engine/audit/auditor.rs +++ /dev/null @@ -1,69 +0,0 @@ -use super::tree::parse_accessibility_tree; -use crate::{console_log, now}; -use markup5ever::local_name; -use scraper_forky::ElementRef; -use scraper_forky::Html; -use slotmap::DefaultKey; -use taffy::Taffy; -use victor_tree::style::StyleSet; - -/// the intro to an audit -pub struct Auditor<'a> { - /// the html document - pub document: &'a Html, - /// the tree to map to nodes - pub tree: std::collections::BTreeMap<&'a str, Vec<(ElementRef<'a>, DefaultKey)>>, - /// styles for the audit - pub author: StyleSet, - /// the matching context for css selectors - pub match_context: selectors::matching::MatchingContext<'a, scraper_forky::selector::Simple>, - /// layout handling - pub taffy: Taffy, -} - -impl<'a> Auditor<'a> { - pub fn new( - document: &'a Html, - css_rules: &str, - match_context: selectors::matching::MatchingContext<'a, scraper_forky::selector::Simple>, - ) -> Auditor<'a> { - let tt = now(); - // TODO: make stylesheet building optional and only on first requirement - let author = { - let mut author = victor_tree::style::StyleSetBuilder::new(); - if !css_rules.is_empty() { - author.add_stylesheet(css_rules); - } else { - let selector = - unsafe { scraper_forky::Selector::parse("style").unwrap_unchecked() }; - let mut s = document.select(&selector); - - while let Some(node) = s.next() { - if let Some(type_attr) = node.attr(&local_name!("type")) { - if !type_attr.eq_ignore_ascii_case("text/css") { - continue; - } - author.add_stylesheet(&node.inner_html()) - } - } - } - author.finish() - }; - - console_log!("StyleSheets Build Time {:?}", now() - tt); - - let t = now(); - - let (tree, taffy, match_context) = - parse_accessibility_tree(&document, &author, match_context); - console_log!("Tree Build Time {:?}", now() - t); - - Auditor { - document, - tree, - author, - match_context, - taffy, - } - } -} diff --git a/kayle_innate/kayle_innate/src/engine/audit/mod.rs b/kayle_innate/kayle_innate/src/engine/audit/mod.rs deleted file mode 100644 index c31cc56..0000000 --- a/kayle_innate/kayle_innate/src/engine/audit/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -/// the auditor -pub mod auditor; -/// the node tree -pub mod tree; -/// WCAG audit -pub mod wcag; diff --git a/kayle_innate/kayle_innate/src/engine/audit/tree.rs b/kayle_innate/kayle_innate/src/engine/audit/tree.rs deleted file mode 100644 index 331fd66..0000000 --- a/kayle_innate/kayle_innate/src/engine/audit/tree.rs +++ /dev/null @@ -1,278 +0,0 @@ -use crate::console_log; -use ego_tree::NodeRef; -use scraper_forky::selector::Simple; -use scraper_forky::ElementRef; -use scraper_forky::Html; -use selectors::matching::MatchingContext; -use slotmap::DefaultKey; -use std::collections::BTreeMap; -use std::collections::HashSet; -use std::sync::Arc; -use taffy::prelude::*; -use taffy::style::Dimension; -use victor_tree::style::values::LengthOrPercentageOrAuto; -use victor_tree::style::ComputedValues; -use victor_tree::style::StyleSet; - -lazy_static! { - static ref NODE_IGNORE: HashSet<&'static str> = - HashSet::from(["meta", "style", "link", "script", "head", "html", "body"]); -} - -/// length to taffy dimensions -pub fn length_dimensions(v: &LengthOrPercentageOrAuto) -> Dimension { - match v { - LengthOrPercentageOrAuto::Length(l) => Dimension::Points(l.px), - LengthOrPercentageOrAuto::Percentage(l) => Dimension::Percent(l.unit_value), - LengthOrPercentageOrAuto::Auto => Dimension::Auto, - } -} - -/// layout style -pub fn node_layout_style(style: Arc, element: &ElementRef) -> Style { - let physical_size = style.box_size().size_to_physical(style.writing_mode()); - let mut size = Size { - width: length_dimensions(&physical_size.x), - height: length_dimensions(&physical_size.y), - }; - - // get the img raw height/width - if element.value().name() == "img" { - let width = element.attr("width"); - let height = element.attr("height"); - if physical_size.x.inner_px() == 0.0 { - match width { - Some(w) => { - let w = w.parse::(); - match w { - Ok(w) => { - size.width = points(w); - } - _ => (), - } - } - _ => (), - } - } - if physical_size.y.inner_px() == 0.0 { - match height { - Some(h) => { - let h = h.parse::(); - - match h { - Ok(h) => { - size.height = points(h); - } - _ => (), - } - } - _ => (), - } - } - } - - // todo: determine if all children at the top level have floats set to use flex-row - Style { - size, - border: points(style.border_width().inner_px()), - padding: points(style.padding().inner_px()), - margin: points(style.margin().inner_px()), - ..Default::default() - } -} - -/// push leaf -pub fn push_leaf<'a, 'b, 'c>( - node: &NodeRef<'_, scraper_forky::Node>, - author: &StyleSet, - document: &'a Html, - mut matching_context: &mut MatchingContext<'c, Simple>, - taffy: &mut Taffy, - mut l_leafs: &mut Vec, -) { - match ElementRef::wrap(*node) { - Some(element) => { - let name = element.value().name(); - if !NODE_IGNORE.contains(name) { - console_log!("LEAFY {:?}", name); - let style = victor_tree::style::cascade::style_for_element_ref( - &element, - &author, - &document, - &mut matching_context, - ); - - // TOOD: Only push leaf empty without children - - let leaf = taffy.new_leaf(node_layout_style(style, &element)); - - l_leafs.push(leaf.unwrap()); - - // TODO: If node has children push leaf with children - // push leaf until children finished - if node.has_children() { - let children = node.children(); - - // iterate all children - for child in children { - push_leaf( - &child, - author, - document, - matching_context, - taffy, - &mut l_leafs, - ); - } - } - } - } - _ => (), - } -} - -/// get a layout leaf a new leaf -pub fn leaf<'a, 'b, 'c>( - element: &ElementRef, - author: &StyleSet, - document: &'a Html, - mut matching_context: &mut MatchingContext<'c, Simple>, - taffy: &mut Taffy, -) -> DefaultKey { - let mut l_leafs: Vec = vec![]; - let mut children = element.children(); - - while let Some(child) = children.next() { - push_leaf( - &child, - author, - document, - matching_context, - taffy, - &mut l_leafs, - ); - } - - let style = victor_tree::style::cascade::style_for_element_ref( - &element, - &author, - &document, - &mut matching_context, - ); - - let leaf_style = node_layout_style(style, &element); - - // build leaf with children - if l_leafs.len() > 0 { - taffy.new_with_children(leaf_style, &l_leafs) - } else { - taffy.new_leaf(leaf_style) - } - .unwrap() -} - -/// try to fix all possible issues using a spec against the tree. -pub fn parse_accessibility_tree<'a, 'b, 'c>( - document: &'a Html, - author: &StyleSet, - match_context: MatchingContext<'c, Simple>, // todo: return the nodes with a tuple of the layout node and the element node -) -> ( - BTreeMap<&'a str, Vec<(ElementRef<'a>, slotmap::DefaultKey)>>, - Taffy, - MatchingContext<'c, Simple>, -) { - // TODO: make layout optional - let mut taffy = Taffy::new(); - let mut accessibility_tree: BTreeMap<&str, Vec<(ElementRef<'_>, DefaultKey)>> = - BTreeMap::from([("title".into(), Default::default())]); - let mut matching_context = match_context; - let mut layout_leafs: Vec = vec![]; - - // push taffy layout in order from elements - for node in document.tree.nodes() { - match ElementRef::wrap(node) { - Some(element) => { - let name = element.value().name(); - // TODO: determine if children are found to get entire layout of children to vector first - let layout_leaf = { - if NODE_IGNORE.contains(name) { - taffy.new_leaf(Default::default()).unwrap() - } else { - // all leafs created must be put into the body node at the end - leaf( - &element, - &author, - document, - &mut matching_context, - &mut taffy, - ) - } - }; - - // layout_leafs.push(layout_leaf.clone()); - - accessibility_tree - .entry(name) - .and_modify(|n| n.push((element, layout_leaf))) - .or_insert(Vec::from([(element, layout_leaf)])); - } - _ => (), - }; - } - - match accessibility_tree.get("body") { - Some(node) => { - for child in node[0].0.children() { - match ElementRef::wrap(child) { - Some(element) => { - let name = element.value().name(); - - if !NODE_IGNORE.contains(name) { - console_log!("BODY {:?}", name); - - let leaf = leaf( - &element, - &author, - document, - &mut matching_context, - &mut taffy, - ); - - layout_leafs.push(leaf) - } - } - _ => (), - } - } - } - _ => (), - }; - - let root_node = taffy - .new_with_children( - Style { - flex_direction: FlexDirection::Column, - // compute the default layout from CDP - size: Size { - width: points(800.0), - height: points(600.0), - }, - ..Default::default() - }, - &layout_leafs, - ) - .unwrap(); - - console_log!("Layout leafs {:?}", layout_leafs.len()); - - // TODO: set the root node to html, body in the accessibility_tree - taffy.compute_layout(root_node, Size::MAX_CONTENT).unwrap(); - - for lea in layout_leafs { - crate::console_log!("Leaf Position {:?}", taffy.layout(lea).unwrap()); - } - // console_log!("Getting tree links {:?}", accessibility_tree.get("a")); - // console_log!("Tree {:?}", accessibility_tree); - - (accessibility_tree, taffy, matching_context) -} diff --git a/kayle_innate/kayle_innate/src/engine/audit/wcag.rs b/kayle_innate/kayle_innate/src/engine/audit/wcag.rs deleted file mode 100644 index 51a0a98..0000000 --- a/kayle_innate/kayle_innate/src/engine/audit/wcag.rs +++ /dev/null @@ -1,65 +0,0 @@ -use crate::engine::rules::wcag_rule_map::RULES_A; -use crate::i18n::locales::{get_message, Langs}; -use crate::Auditor; -use crate::{console_log, engine::issue::Issue}; - -/// baseline for all rules -#[derive(Default)] -pub struct WCAG3AA; - -/// wcag rules to test for -impl WCAG3AA { - /// init the rules - pub fn audit( - // allow tree mutation until threads or setup the tree with initial elements. - auditor: &Auditor<'_>, - // todo: get configs like viewport - ) -> Vec { - let mut issues: Vec = Vec::new(); - - // go through nodes and map to validation rules - for node in &auditor.tree { - if RULES_A.contains_key(&*node.0) { - let rules = RULES_A.get(&*node.0); - match rules { - Some(rules) => { - for rule in rules { - let (valid, section, selector) = (rule.validate)(&node.0, &node.1); - - if !valid { - // get locales prior or from document - let message = - get_message(&rule.rule_id, §ion, &Langs::En.as_str()); - let issue = Issue::new( - message, - &node.0, - &[ - "WCAGAAA", - rule.principle.as_str(), - rule.guideline.as_str(), - rule.rule_id.as_str(), - ] - .join("."), - rule.criteria.as_str(), - selector, - ); - issues.push(issue); - } - - console_log!( - "RULE {:?} {:?} {:?} Valid: {:?}", - rule.rule_id, - rule.criteria, - section, - valid - ); - } - } - _ => (), - } - } - } - - issues - } -} diff --git a/kayle_innate/kayle_innate/src/engine/issue.rs b/kayle_innate/kayle_innate/src/engine/issue.rs deleted file mode 100644 index 2cb21c6..0000000 --- a/kayle_innate/kayle_innate/src/engine/issue.rs +++ /dev/null @@ -1,72 +0,0 @@ -use serde::{Deserialize, Serialize}; - -/// clip bounding box -#[derive(Default, Debug, Serialize, Deserialize)] -pub struct Clip { - /// the x coords - pub x: u32, - /// the y coords - pub y: u32, - /// the element height - pub height: u32, - /// the element width - pub width: u32, -} - -/// issue details -#[derive(Default, Debug, Serialize, Deserialize)] -pub struct RunnerExtras { - /// the url to get more information on the issue - pub help_url: &'static str, -} - -/// issue details -#[derive(Default, Debug, Serialize, Deserialize)] -pub struct Issue { - /// the context of the issue or raw html - pub context: String, - /// the selector to identify the issue with css, xpath, or raw path - pub selectors: Vec<&'static str>, - /// the type of code for the issue - pub code: String, - /// the type of issue - pub issue_type: &'static str, - /// the typecode of the issue 0,1,2 - pub type_code: u8, - /// the message of the issue - pub message: &'static str, - /// the type of runner - pub runner: &'static str, - /// extra details for the runner - pub runner_extras: RunnerExtras, - /// the amount of times the issue appeared - pub recurrence: u32, - /// the visual position of the element - pub clip: Option, -} - -impl Issue { - /// create a new issue - pub fn new( - message: &'static str, - context: &str, - code: &str, - issue_type: &'static str, - selectors: Vec<&'static str>, - ) -> Issue { - Issue { - message, - context: context.into(), - runner: "kayle", - code: code.into(), - issue_type, - type_code: match issue_type { - "error" => 0, - "warning" => 1, - _ => 2, - }, - selectors, - ..Default::default() - } - } -} diff --git a/kayle_innate/kayle_innate/src/engine/mod.rs b/kayle_innate/kayle_innate/src/engine/mod.rs deleted file mode 100644 index e2e79fc..0000000 --- a/kayle_innate/kayle_innate/src/engine/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// audits -pub mod audit; -/// issue handling and formats -pub mod issue; -/// rules to follow -pub mod rules; -/// styles -pub mod styles; diff --git a/kayle_innate/kayle_innate/src/engine/rules/ids.rs b/kayle_innate/kayle_innate/src/engine/rules/ids.rs deleted file mode 100644 index 5a531b0..0000000 --- a/kayle_innate/kayle_innate/src/engine/rules/ids.rs +++ /dev/null @@ -1,35 +0,0 @@ -use std::vec; - -#[derive(PartialOrd, Ord, std::cmp::Eq, PartialEq, Hash, Debug)] -/// techniques for WCAG https://www.w3.org/TR/WCAG20-TECHS/ -pub enum Techniques { - H25, - H32, -} - -impl Techniques { - /// get rule id to string - pub fn as_str(&self) -> &'static str { - match self { - Techniques::H25 => "H25", - Techniques::H32 => "H32", - } - } - /// get pairs for a rule - pub fn pairs(&self) -> Vec<&'static str> { - match self { - Techniques::H25 => vec!["H25.1.NoTitleEl", "H25.1.EmptyTitle"], - Techniques::H32 => vec!["H32.2"], - } - } - // /// get the value of the rule id with the pair - // pub fn index(&self, i: usize) -> String { - // let pair = self.pairs(); - - // if pair.len() <= i { - // self.as_str().to_owned() + pair[i] - // } else { - // self.as_str().into() - // } - // } -} diff --git a/kayle_innate/kayle_innate/src/engine/rules/mod.rs b/kayle_innate/kayle_innate/src/engine/rules/mod.rs deleted file mode 100644 index 9405d29..0000000 --- a/kayle_innate/kayle_innate/src/engine/rules/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// rules or techniques -pub mod ids; -/// the rule to follow -pub mod rule; -/// the base of the wcag set -pub mod wcag_base; -/// rules to map to -pub mod wcag_rule_map; diff --git a/kayle_innate/kayle_innate/src/engine/rules/rule.rs b/kayle_innate/kayle_innate/src/engine/rules/rule.rs deleted file mode 100644 index fc0d902..0000000 --- a/kayle_innate/kayle_innate/src/engine/rules/rule.rs +++ /dev/null @@ -1,42 +0,0 @@ -use slotmap::DefaultKey; - -use crate::engine::rules::ids::Techniques; -use crate::engine::rules::wcag_base::{Criteria, Guideline, Principle}; -use crate::ElementRef; - -/// the rule validation method that should be performed. -pub struct Rule { - /// the message id of the rule to point to the locale - pub rule_id: Techniques, - /// the type of rule - pub criteria: Criteria, - /// validate a test returns (valid, rule, selectors) - pub validate: - fn(&str, &Vec<(ElementRef<'_>, DefaultKey)>) -> (bool, &'static str, Vec<&'static str>), - /// the principle type - pub principle: Principle, - /// the guideline to follow - pub guideline: Guideline, -} - -impl Rule { - /// a new rule type - pub fn new( - rule_id: Techniques, - criteria: Criteria, - principle: Principle, - guideline: Guideline, - validate: fn( - &str, - &Vec<(ElementRef<'_>, DefaultKey)>, - ) -> (bool, &'static str, Vec<&'static str>), - ) -> Rule { - Rule { - rule_id, - criteria, - guideline, - principle, - validate, - } - } -} diff --git a/kayle_innate/kayle_innate/src/engine/rules/wcag_base.rs b/kayle_innate/kayle_innate/src/engine/rules/wcag_base.rs deleted file mode 100644 index acd26e4..0000000 --- a/kayle_innate/kayle_innate/src/engine/rules/wcag_base.rs +++ /dev/null @@ -1,61 +0,0 @@ -/// the success criteria to use -#[derive(Debug)] -pub enum Criteria { - /// a hard error that should be fixed - Error, - /// a warning that may be an issue - Warning, - /// a generic notice to help accessibility needs - Notice, -} - -impl Criteria { - /// get rule id to string - pub fn as_str(&self) -> &'static str { - match self { - Criteria::Error => "error", - Criteria::Warning => "warning", - Criteria::Notice => "notice", - } - } -} - -/// wcag principle to follow -pub enum Principle { - /// Provide text alternatives for any non-text content so that it can be changed into other forms people need, such as large print, braille, speech, symbols or simpler language. - Perceivable, - /// Make all functionality available from a keyboard. - Operable, - /// Make text content readable and understandable. - Understandable, - /// Maximize compatibility with current and future user agents, including assistive technologies. - Robust, -} - -impl Principle { - pub fn as_str(&self) -> &'static str { - match self { - Principle::Perceivable => "Principle1", - Principle::Operable => "Principle2", - Principle::Understandable => "Principle3", - Principle::Robust => "Principle4", - } - } -} - -/// wcag principle to follow -pub enum Guideline { - /// Provide ways to help users navigate, find content, and determine where they are. - Navigable, - /// Make Web pages appear and operate in predictable ways. - Predictable, -} - -impl Guideline { - pub fn as_str(&self) -> &'static str { - match self { - Guideline::Navigable => "Guideline2_4", - Guideline::Predictable => "Guideline3_2", - } - } -} diff --git a/kayle_innate/kayle_innate/src/engine/rules/wcag_rule_map.rs b/kayle_innate/kayle_innate/src/engine/rules/wcag_rule_map.rs deleted file mode 100644 index d77d07b..0000000 --- a/kayle_innate/kayle_innate/src/engine/rules/wcag_rule_map.rs +++ /dev/null @@ -1,42 +0,0 @@ -use crate::engine::rules::ids::Techniques; -use crate::engine::rules::rule::Rule; -use crate::engine::rules::wcag_base::{Criteria, Guideline, Principle}; -use scraper_forky::Selector; -use std::collections::BTreeMap; - -// todo: validate each element and add a shape that can prevent repitiion -lazy_static! { - /// a list of rules that should be applied for WCAG1 - pub static ref RULES_A: BTreeMap<&'static str, Vec> = - vec![ - // empty titles - ("title", Vec::from([ - Rule::new(Techniques::H25, Criteria::Error, Principle::Operable, Guideline::Navigable, |_rule, nodes| { - (!nodes.is_empty(), "1.NoTitleEl", Default::default()) - }), - Rule::new(Techniques::H25, Criteria::Error, Principle::Understandable, Guideline::Predictable, |_rule, nodes| { - (nodes.is_empty() || nodes[0].0.html().is_empty(), "2", Default::default()) - }), - ])), - // missing label - ("form", Vec::from([ - Rule::new(Techniques::H32, Criteria::Error, Principle::Operable, Guideline::Predictable, |_rule, nodes| { - // check the first element for now - let mut valid = false; - let selector = unsafe { Selector::parse("button[type=submit]").unwrap_unchecked() }; - - for ele in nodes { - let ele = ele.0; - valid = match ele.select(&selector).next() { - Some(_) => true, - _ => false - }; - } - - (valid, "2", Default::default()) - }), - ])) - ] - .into_iter() - .collect(); -} diff --git a/kayle_innate/kayle_innate/src/engine/styles/css_cache.rs b/kayle_innate/kayle_innate/src/engine/styles/css_cache.rs deleted file mode 100644 index 08ade4f..0000000 --- a/kayle_innate/kayle_innate/src/engine/styles/css_cache.rs +++ /dev/null @@ -1,11 +0,0 @@ -/// build matching context -pub fn build_matching_context<'a>( - nth_index_cache: &'a mut selectors::NthIndexCache, -) -> selectors::matching::MatchingContext<'a, scraper_forky::selector::Simple> { - selectors::matching::MatchingContext::new( - selectors::matching::MatchingMode::Normal, - None, - Some(nth_index_cache), - selectors::matching::QuirksMode::NoQuirks, - ) -} diff --git a/kayle_innate/kayle_innate/src/engine/styles/errors.rs b/kayle_innate/kayle_innate/src/engine/styles/errors.rs deleted file mode 100644 index 3114816..0000000 --- a/kayle_innate/kayle_innate/src/engine/styles/errors.rs +++ /dev/null @@ -1,19 +0,0 @@ -// use cssparser::{CowRcStr, ParseError}; -use selectors::parser::SelectorParseErrorKind; - -// pub type PropertyParseError<'i> = ParseError<'i, PropertyParseErrorKind<'i>>; - -// pub enum PropertyParseErrorKind<'i> { -// UnknownProperty(CowRcStr<'i>), -// UnknownUnit(CowRcStr<'i>), -// } - -pub enum RuleParseErrorKind<'i> { - Selector(SelectorParseErrorKind<'i>), -} - -impl<'i> From> for RuleParseErrorKind<'i> { - fn from(e: SelectorParseErrorKind<'i>) -> Self { - RuleParseErrorKind::Selector(e) - } -} diff --git a/kayle_innate/kayle_innate/src/engine/styles/mod.rs b/kayle_innate/kayle_innate/src/engine/styles/mod.rs deleted file mode 100644 index 0fda49b..0000000 --- a/kayle_innate/kayle_innate/src/engine/styles/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod css_cache; -pub mod errors; -pub mod rules; diff --git a/kayle_innate/kayle_innate/src/engine/styles/rules.rs b/kayle_innate/kayle_innate/src/engine/styles/rules.rs deleted file mode 100644 index e1a3de5..0000000 --- a/kayle_innate/kayle_innate/src/engine/styles/rules.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::engine::styles::errors::RuleParseErrorKind; -use cssparser::{AtRuleParser, ParseError, QualifiedRuleParser}; -use scraper_forky::selector::Simple; -use std::sync::Arc; - -#[derive(Debug)] -pub enum CssRule { - StyleRule { - selectors: selectors::SelectorList, - block: Arc, - }, -} - -pub struct Parser; - -impl<'i> selectors::parser::Parser<'i> for Parser { - type Impl = Simple; - type Error = RuleParseErrorKind<'i>; -} - -pub type SelectorList = selectors::SelectorList; -// pub type Selector = selectors::parser::Selector; - -pub struct RulesParser; - -impl<'i> QualifiedRuleParser<'i> for RulesParser { - type Prelude = SelectorList; - type QualifiedRule = CssRule; - type Error = RuleParseErrorKind<'i>; - - fn parse_prelude<'t>( - &mut self, - parser: &mut cssparser::Parser<'i, 't>, - ) -> Result> { - SelectorList::parse(&Parser, parser) - } - - fn parse_block<'t>( - &mut self, - prelude: Self::Prelude, - _location: cssparser::SourceLocation, - _parser: &mut cssparser::Parser<'i, 't>, - ) -> Result> { - Ok(CssRule::StyleRule { - selectors: prelude, - block: Arc::new(String::new()), - }) - } -} - -impl<'i> AtRuleParser<'i> for RulesParser { - type PreludeBlock = (); - type PreludeNoBlock = (); - - type AtRule = CssRule; - type Error = RuleParseErrorKind<'i>; -} diff --git a/kayle_innate/kayle_innate/src/i18n/locales.rs b/kayle_innate/kayle_innate/src/i18n/locales.rs deleted file mode 100644 index 44699ff..0000000 --- a/kayle_innate/kayle_innate/src/i18n/locales.rs +++ /dev/null @@ -1,107 +0,0 @@ -use crate::engine::rules::ids::Techniques; -use std::collections::BTreeMap; - -/// messages to display for issues -#[derive(std::cmp::Eq, PartialEq, PartialOrd, Ord)] -pub struct Messages { - /// english - en: &'static str, - /// spanish - es: &'static str, - /// german - de: &'static str, - /// japanese - ja: &'static str, - /// portugese portugal - pt_pt: &'static str, - /// portugese brazil - pt_br: &'static str, - /// chinese cantanese - zh_cn: &'static str, - /// chinese traditional - zh_tw: &'static str, - /// hindi - hi: &'static str, -} - -pub enum Langs { - /// english - En, - /// spanish - Es, - /// german - De, - /// japanese - Ja, - /// portugese portugal - PtPt, - /// portugese brazil - PtBr, - /// chinese cantanese - ZhCn, - /// chinese traditional - ZhTw, - /// hindi - HI, -} - -impl Langs { - /// get the lang as a string - pub fn as_str(&self) -> &'static str { - match self { - Langs::En => "en", - Langs::Es => "es", - Langs::De => "de", - Langs::Ja => "ja", - Langs::PtPt => "pt_pt", - Langs::PtBr => "pt_br", - Langs::ZhCn => "zh_cn", - Langs::ZhTw => "zh_tw", - Langs::HI => "hi", - } - } -} - -/// the context of the issue -impl Messages { - /// create a new message - pub fn new(en: &'static str, es: &'static str, de: &'static str) -> Messages { - Messages { - en, - es, - de, - ja: &"", - pt_pt: &"", - pt_br: &"", - zh_cn: &"", - zh_tw: &"", - hi: &"", - } - } -} - -/// parse -pub fn get_message(rule_id: &Techniques, section: &str, lang: &str) -> &'static str { - let k = &[rule_id.as_str(), section].join("."); - let message = LOCALES.get(&k.as_str()); - - match message { - Some(m) => match lang { - "en" => m.en, - "es" => m.es, - _ => Default::default(), - }, - _ => Default::default(), - } -} - -lazy_static! { - /// message for an issue - pub static ref LOCALES: BTreeMap<&'static str, Messages> = { - BTreeMap::from([ - (Techniques::H25.pairs()[0], Messages::new(&"A title should be provided for the document, using a non-empty title element in the head section.", "", "")), - (Techniques::H25.pairs()[1], Messages::new(&"The title element in the head section should be non-empty.", "", "")), - (Techniques::H32.pairs()[0], Messages::new(&r###"Form does not contain a submit button (input type="submit", input type="image", or button type="submit")."###, "", "")) - ]) - }; -} diff --git a/kayle_innate/kayle_innate/src/i18n/mod.rs b/kayle_innate/kayle_innate/src/i18n/mod.rs deleted file mode 100644 index 998ffb9..0000000 --- a/kayle_innate/kayle_innate/src/i18n/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -/// localization -pub mod locales; diff --git a/kayle_innate/kayle_innate/src/lib.rs b/kayle_innate/kayle_innate/src/lib.rs index a1f851d..c7120e9 100644 --- a/kayle_innate/kayle_innate/src/lib.rs +++ b/kayle_innate/kayle_innate/src/lib.rs @@ -6,20 +6,11 @@ pub use wasm_bindgen::prelude::*; #[macro_use] extern crate lazy_static; -/// the main engine for audits. -mod engine; -/// locales for translations. -mod i18n; /// auto-kayle helpers to gather all links in a page. mod links; /// app utilities. mod utils; -#[cfg(feature = "accessibility")] -use crate::engine::audit::auditor::Auditor; -#[cfg(feature = "accessibility")] -use scraper_forky::ElementRef; - #[cfg(feature = "accessibility")] #[wasm_bindgen] extern "C" { @@ -43,33 +34,8 @@ macro_rules! console_log { #[cfg(feature = "accessibility")] /// audit a web page passing the html and css rules. pub fn _audit_not_ready(html: &str, css_rules: &str) -> Result { - let t = now(); - let document = scraper_forky::Html::parse_document(html); - console_log!("Parse Document Time {:?}", now() - t); - let mut nth_index_cache = selectors::NthIndexCache::from(Default::default()); - let auditor = Auditor::new( - &document, - &css_rules, - engine::styles::css_cache::build_matching_context(&mut nth_index_cache), - ); - let ttt = now(); - let _audit = engine::audit::wcag::WCAG3AA::audit(&auditor); - console_log!("Audit Time {:?}", now() - ttt); - - // let mut _match_context = auditor.match_context; - - // for item in auditor.tree { - // for node in item.1 { - // let _style = victor_tree::style::cascade::style_for_element_ref( - // &node, - // &auditor.author, - // &document, - // &mut _match_context, - // ); - // console_log!("{:?}", _style.as_ref().box_size()) - // } - // } + let audit = accessibility_rs::audit(&html, &css_rules); // todo: map to JsValues instead of serde - Ok(serde_wasm_bindgen::to_value(&_audit)?) + Ok(serde_wasm_bindgen::to_value(&audit)?) } diff --git a/kayle_innate/kayle_innate/src/links.rs b/kayle_innate/kayle_innate/src/links.rs index 4bb6b77..fcac3ed 100644 --- a/kayle_innate/kayle_innate/src/links.rs +++ b/kayle_innate/kayle_innate/src/links.rs @@ -32,7 +32,7 @@ pub fn get_document_links(res: &str, domain: &str) -> Box<[JsValue]> { let parent_host_scheme = base_url.scheme(); let parent_host = base_url.host_str().unwrap_or_default(); - let h = scraper_forky::Html::parse_fragment(res); + let h = scraper::Html::parse_fragment(res); h.tree .into_iter() @@ -90,7 +90,7 @@ pub fn get_document_links(res: &str, domain: &str) -> Box<[JsValue]> { .collect::>() } _ => { - let h = scraper_forky::Html::parse_fragment(res); + let h = scraper::Html::parse_fragment(res); h.tree .into_iter() diff --git a/kayle_innate/kayle_scraper/.editorconfig b/kayle_innate/kayle_scraper/.editorconfig deleted file mode 100644 index 6c52a14..0000000 --- a/kayle_innate/kayle_scraper/.editorconfig +++ /dev/null @@ -1,13 +0,0 @@ -# EditorConfig -root = true - -[*] -end_of_line = lf -charset = utf-8 -trim_trailing_whitespace = true -insert_final_newline = true -indent_style = space -indent_size = 4 - -[*.md] -trim_trailing_whitespace = false diff --git a/kayle_innate/kayle_scraper/.gitignore b/kayle_innate/kayle_scraper/.gitignore deleted file mode 100644 index cc563fb..0000000 --- a/kayle_innate/kayle_scraper/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -target -*.bk -.idea diff --git a/kayle_innate/kayle_scraper/Cargo.lock b/kayle_innate/kayle_scraper/Cargo.lock deleted file mode 100644 index 320b6b2..0000000 --- a/kayle_innate/kayle_scraper/Cargo.lock +++ /dev/null @@ -1,811 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "ahash" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" -dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "autocfg" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78" -dependencies = [ - "autocfg 1.1.0", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "cloudabi" -version = "0.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -dependencies = [ - "bitflags", -] - -[[package]] -name = "cssparser" -version = "0.25.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbe18ca4efb9ba3716c6da66cc3d7e673bf59fa576353011f48c4cfddbdd740e" -dependencies = [ - "autocfg 0.1.8", - "cssparser-macros", - "dtoa-short", - "itoa", - "matches", - "phf 0.7.24", - "proc-macro2", - "procedural-masquerade", - "quote", - "smallvec 0.6.14", - "syn", -] - -[[package]] -name = "cssparser-macros" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb1c84e87c717666564ec056105052331431803d606bd45529b28547b611eef" -dependencies = [ - "phf_codegen 0.7.24", - "proc-macro2", - "procedural-masquerade", - "quote", - "syn", -] - -[[package]] -name = "dtoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" - -[[package]] -name = "dtoa-short" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74" -dependencies = [ - "dtoa", -] - -[[package]] -name = "ego-tree" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - -[[package]] -name = "futf" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" -dependencies = [ - "mac", - "new_debug_unreachable", -] - -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - -[[package]] -name = "getopts" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "getrandom" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" - -[[package]] -name = "html5ever" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" -dependencies = [ - "log", - "mac", - "markup5ever", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - -[[package]] -name = "libc" -version = "0.2.147" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" - -[[package]] -name = "lock_api" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" -dependencies = [ - "autocfg 1.1.0", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" - -[[package]] -name = "mac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" - -[[package]] -name = "markup5ever" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" -dependencies = [ - "log", - "phf 0.10.1", - "phf_codegen 0.10.0", - "string_cache", - "string_cache_codegen", - "tendril", -] - -[[package]] -name = "matches" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" - -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - -[[package]] -name = "new_debug_unreachable" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" - -[[package]] -name = "nodrop" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" - -[[package]] -name = "once_cell" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec 1.11.1", - "windows-targets", -] - -[[package]] -name = "phf" -version = "0.7.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" -dependencies = [ - "phf_shared 0.7.24", -] - -[[package]] -name = "phf" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" -dependencies = [ - "phf_shared 0.10.0", -] - -[[package]] -name = "phf_codegen" -version = "0.7.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" -dependencies = [ - "phf_generator 0.7.24", - "phf_shared 0.7.24", -] - -[[package]] -name = "phf_codegen" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" -dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", -] - -[[package]] -name = "phf_generator" -version = "0.7.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" -dependencies = [ - "phf_shared 0.7.24", - "rand 0.6.5", -] - -[[package]] -name = "phf_generator" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" -dependencies = [ - "phf_shared 0.10.0", - "rand 0.8.5", -] - -[[package]] -name = "phf_shared" -version = "0.7.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" -dependencies = [ - "siphasher 0.2.3", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher 0.3.10", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - -[[package]] -name = "proc-macro2" -version = "1.0.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "procedural-masquerade" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1383dff4092fe903ac180e391a8d4121cc48f08ccf850614b0290c6673b69d" - -[[package]] -name = "quote" -version = "1.0.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" -dependencies = [ - "autocfg 0.1.8", - "libc", - "rand_chacha 0.1.1", - "rand_core 0.4.2", - "rand_hc", - "rand_isaac", - "rand_jitter", - "rand_os", - "rand_pcg", - "rand_xorshift", - "winapi", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -dependencies = [ - "autocfg 0.1.8", - "rand_core 0.3.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", -] - -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_hc" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rand_isaac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rand_jitter" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" -dependencies = [ - "libc", - "rand_core 0.4.2", - "winapi", -] - -[[package]] -name = "rand_os" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" -dependencies = [ - "cloudabi", - "fuchsia-cprng", - "libc", - "rand_core 0.4.2", - "rdrand", - "winapi", -] - -[[package]] -name = "rand_pcg" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" -dependencies = [ - "autocfg 0.1.8", - "rand_core 0.4.2", -] - -[[package]] -name = "rand_xorshift" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "scraper_forky" -version = "0.17.1" -dependencies = [ - "ahash", - "cssparser", - "ego-tree", - "getopts", - "html5ever", - "indexmap", - "once_cell", - "selectors", - "smallvec 0.6.14", - "tendril", -] - -[[package]] -name = "selectors" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b86b100bede4f651059740afc3b6cb83458d7401cb7c1ad96d8a11e91742c86" -dependencies = [ - "bitflags", - "cssparser", - "fxhash", - "log", - "matches", - "phf 0.7.24", - "phf_codegen 0.7.24", - "precomputed-hash", - "servo_arc", - "smallvec 0.6.14", - "thin-slice", -] - -[[package]] -name = "serde" -version = "1.0.183" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" - -[[package]] -name = "servo_arc" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" -dependencies = [ - "nodrop", - "stable_deref_trait", -] - -[[package]] -name = "siphasher" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" - -[[package]] -name = "siphasher" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" - -[[package]] -name = "smallvec" -version = "0.6.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97fcaeba89edba30f044a10c6a3cc39df9c3f17d7cd829dd1446cab35f890e0" -dependencies = [ - "maybe-uninit", -] - -[[package]] -name = "smallvec" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "string_cache" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" -dependencies = [ - "new_debug_unreachable", - "once_cell", - "parking_lot", - "phf_shared 0.10.0", - "precomputed-hash", - "serde", -] - -[[package]] -name = "string_cache_codegen" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" -dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", - "proc-macro2", - "quote", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "tendril" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" -dependencies = [ - "futf", - "mac", - "utf-8", -] - -[[package]] -name = "thin-slice" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" - -[[package]] -name = "unicode-ident" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" - -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-targets" -version = "0.48.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/kayle_innate/kayle_scraper/Cargo.toml b/kayle_innate/kayle_scraper/Cargo.toml deleted file mode 100644 index 51945d8..0000000 --- a/kayle_innate/kayle_scraper/Cargo.toml +++ /dev/null @@ -1,42 +0,0 @@ -[package] -name = "scraper_forky" -version = "0.17.1" -edition = "2021" - -description = "HTML parsing and querying with CSS selectors" -keywords = ["html", "css", "selector", "scraping"] - -authors = [ - "June McEnroe ", -] -license = "ISC" - -repository = "https://github.com/causal-agent/scraper" -readme = "README.md" - -[dependencies] -cssparser = { workspace = true } -ego-tree = { workspace = true } -html5ever = { workspace = true } -selectors = { workspace = true } -smallvec = "0.6.14" -tendril = "0.4.3" -ahash = "0.8" -indexmap = { version = "2.0.0", optional = true } -once_cell = "1.0" - -[dependencies.getopts] -version = "0.2.21" -optional = true - -[features] -default = ["main", "errors"] -deterministic = ["indexmap"] -main = ["getopts"] -atomic = [] -errors = [] - -[[bin]] -name = "scraper" -path = "src/main.rs" -required-features = ["main"] diff --git a/kayle_innate/kayle_scraper/LICENSE b/kayle_innate/kayle_scraper/LICENSE deleted file mode 100644 index 3c78752..0000000 --- a/kayle_innate/kayle_scraper/LICENSE +++ /dev/null @@ -1,14 +0,0 @@ -Copyright © 2016, June McEnroe -Copyright © 2017, Vivek Kushwaha - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/kayle_innate/kayle_scraper/README.md b/kayle_innate/kayle_scraper/README.md deleted file mode 100644 index 39450ec..0000000 --- a/kayle_innate/kayle_scraper/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# scraper - -[![crates.io](https://img.shields.io/crates/v/scraper?color=dark-green)][crate] -[![downloads](https://img.shields.io/crates/d/scraper)][crate] -[![test](https://github.com/causal-agent/scraper/actions/workflows/test.yml/badge.svg)][tests] - -HTML parsing and querying with CSS selectors. - -`scraper` is on [Crates.io][crate] and [GitHub][github]. - -[crate]: https://crates.io/crates/scraper -[github]: https://github.com/causal-agent/scraper -[tests]: https://github.com/causal-agent/scraper/actions/workflows/test.yml - -Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade parsing and querying. - -## Examples - -### Parsing a document - -```rust -use scraper::Html; - -let html = r#" - - - Hello, world! -

Hello, world!

-"#; - -let document = Html::parse_document(html); -``` - -### Parsing a fragment - -```rust -use scraper::Html; -let fragment = Html::parse_fragment("

Hello, world!

"); -``` - -### Parsing a selector - -```rust -use scraper::Selector; -let selector = Selector::parse("h1.foo").unwrap(); -``` - -### Selecting elements - -```rust -use scraper::{Html, Selector}; - -let html = r#" -
    -
  • Foo
  • -
  • Bar
  • -
  • Baz
  • -
-"#; - -let fragment = Html::parse_fragment(html); -let selector = Selector::parse("li").unwrap(); - -for element in fragment.select(&selector) { - assert_eq!("li", element.value().name()); -} -``` - -### Selecting descendent elements - -```rust -use scraper::{Html, Selector}; - -let html = r#" -
    -
  • Foo
  • -
  • Bar
  • -
  • Baz
  • -
-"#; - -let fragment = Html::parse_fragment(html); -let ul_selector = Selector::parse("ul").unwrap(); -let li_selector = Selector::parse("li").unwrap(); - -let ul = fragment.select(&ul_selector).next().unwrap(); -for element in ul.select(&li_selector) { - assert_eq!("li", element.value().name()); -} -``` - -### Accessing element attributes - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment(r#""#); -let selector = Selector::parse(r#"input[name="foo"]"#).unwrap(); - -let input = fragment.select(&selector).next().unwrap(); -assert_eq!(Some("bar"), input.value().attr("value")); -``` - -### Serializing HTML and inner HTML - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment("

Hello, world!

"); -let selector = Selector::parse("h1").unwrap(); - -let h1 = fragment.select(&selector).next().unwrap(); - -assert_eq!("

Hello, world!

", h1.html()); -assert_eq!("Hello, world!", h1.inner_html()); -``` - -### Accessing descendent text - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment("

Hello, world!

"); -let selector = Selector::parse("h1").unwrap(); - -let h1 = fragment.select(&selector).next().unwrap(); -let text = h1.text().collect::>(); - -assert_eq!(vec!["Hello, ", "world!"], text); -``` - -### Manipulating the DOM - -```rust -use html5ever::tree_builder::TreeSink; -use scraper::{Html, Selector}; - -let html = "hello

REMOVE ME

"; -let selector = Selector::parse(".hello").unwrap(); -let mut document = Html::parse_document(html); -let node_ids: Vec<_> = document.select(&selector).map(|x| x.id()).collect(); -for id in node_ids { - document.remove_from_parent(&id); -} -assert_eq!(document.html(), "hello"); -``` - -## Contributing - -Please feel free to open pull requests. If you're planning on implementing -something big (i.e. not fixing a typo, a small bug fix, minor refactor, etc) -then please open an issue first. diff --git a/kayle_innate/kayle_scraper/examples/document.rs b/kayle_innate/kayle_scraper/examples/document.rs deleted file mode 100644 index 1e0fde1..0000000 --- a/kayle_innate/kayle_scraper/examples/document.rs +++ /dev/null @@ -1,28 +0,0 @@ -extern crate scraper_forky; - -use std::io::{self, Read, Write}; - -use scraper_forky::{Html, Selector}; - -fn main() { - let mut input = String::new(); - let mut stdout = io::stdout(); - let mut stdin = io::stdin(); - - write!(stdout, "CSS selector: ").unwrap(); - stdout.flush().unwrap(); - stdin.read_line(&mut input).unwrap(); - let selector = Selector::parse(&input).unwrap(); - - writeln!(stdout, "HTML document:").unwrap(); - stdout.flush().unwrap(); - input.clear(); - stdin.read_to_string(&mut input).unwrap(); - let document = Html::parse_document(&input); - - println!("{:#?}", document); - - for node in document.select(&selector) { - println!("{:?}", node.value()); - } -} diff --git a/kayle_innate/kayle_scraper/examples/fragment.rs b/kayle_innate/kayle_scraper/examples/fragment.rs deleted file mode 100644 index 4bc814b..0000000 --- a/kayle_innate/kayle_scraper/examples/fragment.rs +++ /dev/null @@ -1,28 +0,0 @@ -extern crate scraper_forky; - -use std::io::{self, Read, Write}; - -use scraper_forky::{Html, Selector}; - -fn main() { - let mut input = String::new(); - let mut stdout = io::stdout(); - let mut stdin = io::stdin(); - - write!(stdout, "CSS selector: ").unwrap(); - stdout.flush().unwrap(); - stdin.read_line(&mut input).unwrap(); - let selector = Selector::parse(&input).unwrap(); - - writeln!(stdout, "HTML fragment:").unwrap(); - stdout.flush().unwrap(); - input.clear(); - stdin.read_to_string(&mut input).unwrap(); - let fragment = Html::parse_fragment(&input); - - println!("{:#?}", fragment); - - for node in fragment.select(&selector) { - println!("{:?}", node.value()); - } -} diff --git a/kayle_innate/kayle_scraper/scraper.1 b/kayle_innate/kayle_scraper/scraper.1 deleted file mode 100644 index daa2110..0000000 --- a/kayle_innate/kayle_scraper/scraper.1 +++ /dev/null @@ -1,72 +0,0 @@ -.Dd October 29, 2018 -.Dt SCRAPER 1 -.Os -. -.Sh NAME -.Nm scraper -.Nd HTML querying with CSS selectors -. -.Sh SYNOPSIS -.Nm -.Op Fl HIcint -.Op Fl a Ar attr -.Op Fl d | f -.Ar selector -.Op Ar -. -.Sh DESCRIPTION -The -.Nm -utility parses HTML and outputs elements matching CSS selectors. -. -.Pp -The arguments are as follows: -.Bl -tag -width Ds -. -.It Fl H , Fl \-html -Output the HTML of the matching elements. -This is the default. -. -.It Fl I , Fl \-inner\-html -Output the inner HTML of the matching elements. -. -.It Fl a Ar attr , Fl \-attr Ar attr -Output the value of the attribute -.Ar attr -of the matching elements. -. -.It Fl c , Fl \-classes -Output the classes of the matching elements. -. -.It Fl d , Fl \-document -Parse the input as HTML documents. -This is the default. -. -.It Fl f , Fl \-fragment -Parse the input as HTML fragments. -. -.It Fl i , Fl \-id -Output the IDs of the matching elements. -. -.It Fl n , Fl \-name -Output the names of the matching elements. -. -.It Fl t , Fl \-text -Output the text of the matching elements. -.El -. -.Sh EXIT STATUS -The -.Nm -utility exits 0 on success, -1 if no elements match, -and >1 if an error occurs. -. -.Sh AUTHORS -.An June McEnroe Aq Mt june@causal.agency -.An Vivek Kushwaha Aq Mt yoursvivek@gmail.com -.Pp -The -.Nm -utility relies heavily on code from the -.Lk https://servo.org "Servo project" . diff --git a/kayle_innate/kayle_scraper/src/element_ref/element.rs b/kayle_innate/kayle_scraper/src/element_ref/element.rs deleted file mode 100644 index 9e704e0..0000000 --- a/kayle_innate/kayle_scraper/src/element_ref/element.rs +++ /dev/null @@ -1,214 +0,0 @@ -use html5ever::Namespace; -use selectors::{ - attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}, - matching, Element, OpaqueElement, -}; - -use super::ElementRef; -use crate::selector::{CssLocalName, CssString, NonTSPseudoClass, PseudoElement, Simple}; - -/// Note: will never match against non-tree-structure pseudo-classes. -impl<'a> Element for ElementRef<'a> { - type Impl = Simple; - - fn local_name(&self) -> &::BorrowedLocalName { - &self.value().local_name - } - - fn namespace(&self) -> &Namespace { - &self.value().name.ns - } - - fn opaque(&self) -> OpaqueElement { - OpaqueElement::new(self.node.value()) - } - - fn parent_element(&self) -> Option { - self.parent().and_then(ElementRef::wrap) - } - - fn parent_node_is_shadow_root(&self) -> bool { - false - } - - fn containing_shadow_host(&self) -> Option { - None - } - - // fn is_pseudo_element(&self) -> bool { - // false - // } - - // fn is_part(&self, _name: &CssLocalName) -> bool { - // false - // } - - // fn is_same_type(&self, other: &Self) -> bool { - // self.value().name == other.value().name - // } - - // fn imported_part(&self, _: &CssLocalName) -> Option { - // None - // } - - fn prev_sibling_element(&self) -> Option { - self.prev_siblings() - .find(|sibling| sibling.value().is_element()) - .map(ElementRef::new) - } - - fn next_sibling_element(&self) -> Option { - self.next_siblings() - .find(|sibling| sibling.value().is_element()) - .map(ElementRef::new) - } - - // fn first_element_child(&self) -> Option { - // self.children() - // .find(|child| child.value().is_element()) - // .map(ElementRef::new) - // } - - fn is_html_element_in_html_document(&self) -> bool { - // FIXME: Is there more to this? - self.value().name.ns == ns!(html) - } - - // fn has_local_name(&self, name: &CssLocalName) -> bool { - // self.value().name.local == name.0 - // } - - // fn has_namespace(&self, namespace: &Namespace) -> bool { - // &self.value().name.ns == namespace - // } - - fn attr_matches( - &self, - ns: &NamespaceConstraint<&Namespace>, - local_name: &CssLocalName, - operation: &AttrSelectorOperation<&CssString>, - ) -> bool { - self.value().attrs.iter().any(|(key, value)| { - !matches!(*ns, NamespaceConstraint::Specific(url) if *url != key.ns) - && local_name.0 == key.local - && operation.eval_str(value) - }) - } - - fn match_non_ts_pseudo_class( - &self, - pseudo_class: &NonTSPseudoClass, - _context: &mut selectors::matching::MatchingContext, - _flags_setter: &mut F, - ) -> bool - where - F: FnMut(&Self, selectors::matching::ElementSelectorFlags), - { - match *pseudo_class {} - } - - fn match_pseudo_element( - &self, - _pe: &PseudoElement, - _context: &mut matching::MatchingContext, - ) -> bool { - false - } - - fn is_link(&self) -> bool { - self.value().name() == "link" - } - - fn is_html_slot_element(&self) -> bool { - true - } - - fn has_id(&self, id: &CssLocalName, case_sensitivity: CaseSensitivity) -> bool { - match self.value().id() { - Some(val) => case_sensitivity.eq(id.0.as_bytes(), val.as_bytes()), - None => false, - } - } - - fn has_class(&self, name: &CssLocalName, case_sensitivity: CaseSensitivity) -> bool { - self.value().has_class(&name.0, case_sensitivity) - } - - fn is_empty(&self) -> bool { - !self - .children() - .any(|child| child.value().is_element() || child.value().is_text()) - } - - fn is_root(&self) -> bool { - self.parent() - .map_or(false, |parent| parent.value().is_document()) - } - - // fn apply_selector_flags(&self, _flags: matching::ElementSelectorFlags) {} -} - -#[cfg(test)] -mod tests { - use crate::html::Html; - use crate::selector::{CssLocalName, Selector}; - use selectors::attr::CaseSensitivity; - use selectors::Element; - - #[test] - fn test_has_id() { - let html = ""; - let fragment = Html::parse_fragment(html); - let sel = Selector::parse("p").unwrap(); - - let element = fragment.select(&sel).next().unwrap(); - assert!(element.has_id( - &CssLocalName::from("link_id_456"), - CaseSensitivity::CaseSensitive - )); - - let html = "

hey there

"; - let fragment = Html::parse_fragment(html); - let element = fragment.select(&sel).next().unwrap(); - assert!(!element.has_id( - &CssLocalName::from("any_link_id"), - CaseSensitivity::CaseSensitive - )); - } - - #[test] - fn test_is_link() { - let html = ""; - let fragment = Html::parse_fragment(html); - let sel = Selector::parse("link").unwrap(); - let element = fragment.select(&sel).next().unwrap(); - assert!(element.is_link()); - - let html = "

hey there

"; - let fragment = Html::parse_fragment(html); - let sel = Selector::parse("p").unwrap(); - let element = fragment.select(&sel).next().unwrap(); - assert!(!element.is_link()); - } - - #[test] - fn test_has_class() { - let html = "

hey there

"; - let fragment = Html::parse_fragment(html); - let sel = Selector::parse("p").unwrap(); - let element = fragment.select(&sel).next().unwrap(); - assert!(element.has_class( - &CssLocalName::from("my_class"), - CaseSensitivity::CaseSensitive - )); - - let html = "

hey there

"; - let fragment = Html::parse_fragment(html); - let sel = Selector::parse("p").unwrap(); - let element = fragment.select(&sel).next().unwrap(); - assert!(!element.has_class( - &CssLocalName::from("my_class"), - CaseSensitivity::CaseSensitive - )); - } -} diff --git a/kayle_innate/kayle_scraper/src/element_ref/mod.rs b/kayle_innate/kayle_scraper/src/element_ref/mod.rs deleted file mode 100644 index c53b078..0000000 --- a/kayle_innate/kayle_scraper/src/element_ref/mod.rs +++ /dev/null @@ -1,166 +0,0 @@ -//! Element references. - -use std::ops::Deref; - -use ego_tree::iter::{Edge, Traverse}; -use ego_tree::NodeRef; -use html5ever::serialize::{serialize, SerializeOpts, TraversalScope}; - -use crate::node::Element; -use crate::{Node, Selector}; - -/// Wrapper around a reference to an element node. -/// -/// This wrapper implements the `Element` trait from the `selectors` crate, which allows it to be -/// matched against CSS selectors. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct ElementRef<'a> { - node: NodeRef<'a, Node>, -} - -impl<'a> ElementRef<'a> { - fn new(node: NodeRef<'a, Node>) -> Self { - ElementRef { node } - } - - /// Wraps a `NodeRef` only if it references a `Node::Element`. - pub fn wrap(node: NodeRef<'a, Node>) -> Option { - if node.value().is_element() { - Some(ElementRef::new(node)) - } else { - None - } - } - - /// Returns the `Element` referenced by `self`. - pub fn value(&self) -> &'a Element { - self.node.value().as_element().unwrap() - } - - /// Returns an iterator over descendent elements matching a selector. - pub fn select<'b>(&self, selector: &'b Selector) -> Select<'a, 'b> { - let mut inner = self.traverse(); - inner.next(); // Skip Edge::Open(self). - - Select { - scope: *self, - inner, - selector, - } - } - - fn serialize(&self, traversal_scope: TraversalScope) -> String { - let opts = SerializeOpts { - scripting_enabled: false, // It's not clear what this does. - traversal_scope, - create_missing_parent: false, - }; - let mut buf = Vec::new(); - serialize(&mut buf, self, opts).unwrap(); - String::from_utf8(buf).unwrap() - } - - /// Returns the HTML of this element. - pub fn html(&self) -> String { - self.serialize(TraversalScope::IncludeNode) - } - - /// Returns the inner HTML of this element. - pub fn inner_html(&self) -> String { - self.serialize(TraversalScope::ChildrenOnly(None)) - } - - /// Returns the value of an attribute. - pub fn attr(&self, attr: &str) -> Option<&str> { - self.value().attr(attr) - } - - /// Returns an iterator over descendent text nodes. - pub fn text(&self) -> Text<'a> { - Text { - inner: self.traverse(), - } - } -} - -impl<'a> Deref for ElementRef<'a> { - type Target = NodeRef<'a, Node>; - fn deref(&self) -> &NodeRef<'a, Node> { - &self.node - } -} - -/// Iterator over descendent elements matching a selector. -#[derive(Debug, Clone)] -pub struct Select<'a, 'b> { - scope: ElementRef<'a>, - inner: Traverse<'a, Node>, - selector: &'b Selector, -} - -impl<'a, 'b> Iterator for Select<'a, 'b> { - type Item = ElementRef<'a>; - - fn next(&mut self) -> Option> { - for edge in &mut self.inner { - if let Edge::Open(node) = edge { - if let Some(element) = ElementRef::wrap(node) { - if self.selector.matches_with_scope(&element, Some(self.scope)) { - return Some(element); - } - } - } - } - None - } -} - -/// Iterator over descendent text nodes. -#[derive(Debug, Clone)] -pub struct Text<'a> { - inner: Traverse<'a, Node>, -} - -impl<'a> Iterator for Text<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - for edge in &mut self.inner { - if let Edge::Open(node) = edge { - if let Node::Text(ref text) = node.value() { - return Some(&**text); - } - } - } - None - } -} - -mod element; -mod serializable; - -#[cfg(test)] -mod tests { - use crate::html::Html; - use crate::selector::Selector; - - #[test] - fn test_scope() { - let html = r" -
- 1 - - 2 - 3 - -
- "; - let fragment = Html::parse_fragment(html); - let sel1 = Selector::parse("div > span").unwrap(); - let sel2 = Selector::parse(":scope > b").unwrap(); - - let element1 = fragment.select(&sel1).next().unwrap(); - let element2 = element1.select(&sel2).next().unwrap(); - assert_eq!(element2.inner_html(), "3"); - } -} diff --git a/kayle_innate/kayle_scraper/src/element_ref/serializable.rs b/kayle_innate/kayle_scraper/src/element_ref/serializable.rs deleted file mode 100644 index 98dda70..0000000 --- a/kayle_innate/kayle_scraper/src/element_ref/serializable.rs +++ /dev/null @@ -1,15 +0,0 @@ -use std::io::Error; - -use html5ever::serialize::{Serialize, Serializer, TraversalScope}; - -use crate::ElementRef; - -impl<'a> Serialize for ElementRef<'a> { - fn serialize( - &self, - serializer: &mut S, - traversal_scope: TraversalScope, - ) -> Result<(), Error> { - crate::node::serializable::serialize(**self, serializer, traversal_scope) - } -} diff --git a/kayle_innate/kayle_scraper/src/error.rs b/kayle_innate/kayle_scraper/src/error.rs deleted file mode 100644 index 15141ee..0000000 --- a/kayle_innate/kayle_scraper/src/error.rs +++ /dev/null @@ -1,119 +0,0 @@ -//! Custom error types for diagnostics -//! Includes re-exported error types from dependencies - -mod utils; - -use std::{error::Error, fmt::Display}; - -use cssparser::{BasicParseErrorKind, ParseErrorKind, Token}; -use selectors::parser::SelectorParseErrorKind; - -/// Error type that is returned when calling `Selector::parse` -#[derive(Debug, Clone)] -pub enum SelectorErrorKind<'a> { - /// A `Token` was not expected - UnexpectedToken(Token<'a>), - - /// End-Of-Line was unexpected - EndOfLine, - - /// `@` rule is invalid - InvalidAtRule(String), - - /// The body of an `@` rule is invalid - InvalidAtRuleBody, - - /// The qualified rule is invalid - QualRuleInvalid, - - /// Expected a `::` for a pseudoelement - ExpectedColonOnPseudoElement(Token<'a>), - - /// Expected an identity for a pseudoelement - ExpectedIdentityOnPseudoElement(Token<'a>), - - /// A `SelectorParseErrorKind` error that isn't really supposed to happen did - UnexpectedSelectorParseError(SelectorParseErrorKind<'a>), -} - -impl<'a> From>> for SelectorErrorKind<'a> { - fn from(original: cssparser::ParseError<'a, SelectorParseErrorKind<'a>>) -> Self { - // NOTE: This could be improved, but I dont - // exactly know how - match original.kind { - ParseErrorKind::Basic(err) => SelectorErrorKind::from(err), - ParseErrorKind::Custom(err) => SelectorErrorKind::from(err), - } - } -} - -impl<'a> From> for SelectorErrorKind<'a> { - fn from(err: BasicParseErrorKind<'a>) -> Self { - match err { - BasicParseErrorKind::UnexpectedToken(token) => Self::UnexpectedToken(token), - BasicParseErrorKind::EndOfInput => Self::EndOfLine, - BasicParseErrorKind::AtRuleInvalid(rule) => Self::InvalidAtRule(rule.to_string()), - BasicParseErrorKind::AtRuleBodyInvalid => Self::InvalidAtRuleBody, - BasicParseErrorKind::QualifiedRuleInvalid => Self::QualRuleInvalid, - } - } -} - -impl<'a> From> for SelectorErrorKind<'a> { - fn from(err: SelectorParseErrorKind<'a>) -> Self { - match err { - SelectorParseErrorKind::PseudoElementExpectedColon(token) => { - Self::ExpectedColonOnPseudoElement(token) - } - SelectorParseErrorKind::PseudoElementExpectedIdent(token) => { - Self::ExpectedIdentityOnPseudoElement(token) - } - other => Self::UnexpectedSelectorParseError(other), - } - } -} - -impl<'a> Display for SelectorErrorKind<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - Self::UnexpectedToken(token) => { - format!("Token {:?} was not expected", utils::render_token(token)) - } - Self::EndOfLine => "Unexpected EOL".to_string(), - Self::InvalidAtRule(rule) => format!("Invalid @-rule {:?}", rule), - Self::InvalidAtRuleBody => "The body of an @-rule was invalid".to_string(), - Self::QualRuleInvalid => "The qualified name was invalid".to_string(), - Self::ExpectedColonOnPseudoElement(token) => format!( - "Expected a ':' token for pseudoelement, got {:?} instead", - utils::render_token(token) - ), - Self::ExpectedIdentityOnPseudoElement(token) => format!( - "Expected identity for pseudoelement, got {:?} instead", - utils::render_token(token) - ), - Self::UnexpectedSelectorParseError(err) => format!( - "Unexpected error occurred. Please report this to the developer\n{:#?}", - err - ), - } - ) - } -} - -impl<'a> Error for SelectorErrorKind<'a> { - fn description(&self) -> &str { - match self { - Self::UnexpectedToken(_) => "Token was not expected", - Self::EndOfLine => "Unexpected EOL", - Self::InvalidAtRule(_) => "Invalid @-rule", - Self::InvalidAtRuleBody => "The body of an @-rule was invalid", - Self::QualRuleInvalid => "The qualified name was invalid", - Self::ExpectedColonOnPseudoElement(_) => "Missing colon character on pseudoelement", - Self::ExpectedIdentityOnPseudoElement(_) => "Missing pseudoelement identity", - Self::UnexpectedSelectorParseError(_) => "Unexpected error", - } - } -} diff --git a/kayle_innate/kayle_scraper/src/error/utils.rs b/kayle_innate/kayle_scraper/src/error/utils.rs deleted file mode 100644 index a1bcb0e..0000000 --- a/kayle_innate/kayle_scraper/src/error/utils.rs +++ /dev/null @@ -1,91 +0,0 @@ -use cssparser::Token; - -pub(crate) fn render_token(token: &Token<'_>) -> String { - // THIS TOOK FOREVER TO IMPLEMENT - - match token { - // TODO: Give these guys some better names - Token::Ident(ident) => format!("{}", ident.clone()), - Token::AtKeyword(value) => format!("@{}", value.clone()), - Token::Hash(name) | Token::IDHash(name) => format!("#{}", name.clone()), - Token::QuotedString(value) => format!("\"{}\"", value.clone()), - Token::Number { - has_sign: signed, - value: num, - int_value: _, - } - | Token::Percentage { - has_sign: signed, - unit_value: num, - int_value: _, - } => render_number(*signed, *num, token), - Token::Dimension { - has_sign: signed, - value: num, - int_value: _, - unit, - } => format!("{}{}", render_int(*signed, *num), unit), - Token::WhiteSpace(_) => String::from(" "), - Token::Comment(comment) => format!("/* {} */", comment), - Token::Function(name) => format!("{}()", name.clone()), - Token::BadString(string) => format!("", string.clone()), - Token::BadUrl(url) => format!("", url.clone()), - // Single-character token - sc_token => render_single_char_token(sc_token), - } -} - -fn render_single_char_token(token: &Token) -> String { - String::from(match token { - Token::Colon => ":", - Token::Semicolon => ";", - Token::Comma => ",", - Token::IncludeMatch => "~=", - Token::DashMatch => "|=", - Token::PrefixMatch => "^=", - Token::SuffixMatch => "$=", - Token::SubstringMatch => "*=", - Token::CDO => "", - Token::ParenthesisBlock => "<(", - Token::SquareBracketBlock => "<[", - Token::CurlyBracketBlock => "<{", - Token::CloseParenthesis => "<)", - Token::CloseSquareBracket => "<]", - Token::CloseCurlyBracket => "<}", - other => panic!( - "Token {:?} is not supposed to match as a single-character token!", - other - ), - }) -} - -fn render_number(signed: bool, num: f32, token: &Token) -> String { - let num = render_int(signed, num); - - match token { - Token::Number { .. } => num, - Token::Percentage { .. } => format!("{}%", num), - _ => panic!("render_number is not supposed to be called on a non-numerical token"), - } -} - -fn render_int(signed: bool, num: f32) -> String { - if signed { - render_int_signed(num) - } else { - render_int_unsigned(num) - } -} - -fn render_int_signed(num: f32) -> String { - if num > 0.0 { - format!("+{}", num) - } else { - format!("-{}", num) - } -} - -fn render_int_unsigned(num: f32) -> String { - format!("{}", num) -} diff --git a/kayle_innate/kayle_scraper/src/html/mod.rs b/kayle_innate/kayle_scraper/src/html/mod.rs deleted file mode 100644 index 26ec5ea..0000000 --- a/kayle_innate/kayle_scraper/src/html/mod.rs +++ /dev/null @@ -1,233 +0,0 @@ -//! HTML documents and fragments. - -#[cfg(feature = "errors")] -use std::borrow::Cow; - -use ego_tree::iter::Nodes; -use ego_tree::Tree; -use html5ever::serialize::SerializeOpts; -use html5ever::tree_builder::QuirksMode; -use html5ever::QualName; -use html5ever::{driver, serialize}; -use tendril::TendrilSink; - -use crate::selector::Selector; -use crate::{ElementRef, Node}; - -/// An HTML tree. -/// -/// Parsing does not fail hard. Instead, the `quirks_mode` is set and errors are added to the -/// `errors` field. The `tree` will still be populated as best as possible. -/// -/// Implements the `TreeSink` trait from the `html5ever` crate, which allows HTML to be parsed. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Html { - #[cfg(feature = "errors")] - /// Parse errors. - pub errors: Vec>, - - /// The quirks mode. - pub quirks_mode: QuirksMode, - - /// The node tree. - pub tree: Tree, -} - -impl Html { - /// Creates an empty HTML document. - pub fn new_document() -> Self { - Html { - #[cfg(feature = "errors")] - errors: Vec::new(), - quirks_mode: QuirksMode::NoQuirks, - tree: Tree::new(Node::Document), - } - } - - /// Creates an empty HTML fragment. - pub fn new_fragment() -> Self { - Html { - #[cfg(feature = "errors")] - errors: Vec::new(), - quirks_mode: QuirksMode::NoQuirks, - tree: Tree::new(Node::Fragment), - } - } - - /// Parses a string of HTML as a document. - /// - /// This is a convenience method for the following: - /// - /// ``` - /// # extern crate html5ever; - /// # extern crate scraper; - /// # extern crate tendril; - /// # fn main() { - /// # let document = ""; - /// use html5ever::driver::{self, ParseOpts}; - /// use scraper::Html; - /// use tendril::TendrilSink; - /// - /// let parser = driver::parse_document(Html::new_document(), ParseOpts::default()); - /// let html = parser.one(document); - /// # } - /// ``` - pub fn parse_document(document: &str) -> Self { - let parser = driver::parse_document(Self::new_document(), Default::default()); - parser.one(document) - } - - /// Parses a string of HTML as a fragment. - pub fn parse_fragment(fragment: &str) -> Self { - let parser = driver::parse_fragment( - Self::new_fragment(), - Default::default(), - QualName::new(None, ns!(html), local_name!("body")), - Vec::new(), - ); - parser.one(fragment) - } - - /// Returns an iterator over elements matching a selector. - pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b> { - Select { - inner: self.tree.nodes(), - selector, - } - } - - /// Returns the root `` element. - pub fn root_element(&self) -> ElementRef { - let root_node = self - .tree - .root() - .children() - .find(|child| child.value().is_element()) - .expect("html node missing"); - ElementRef::wrap(root_node).unwrap() - } - - /// Serialize entire document into HTML. - pub fn html(&self) -> String { - let opts = SerializeOpts { - scripting_enabled: false, // It's not clear what this does. - traversal_scope: html5ever::serialize::TraversalScope::IncludeNode, - create_missing_parent: false, - }; - let mut buf = Vec::new(); - serialize(&mut buf, self, opts).unwrap(); - String::from_utf8(buf).unwrap() - } -} - -/// Iterator over elements matching a selector. -#[derive(Debug)] -pub struct Select<'a, 'b> { - inner: Nodes<'a, Node>, - selector: &'b Selector, -} - -impl<'a, 'b> Iterator for Select<'a, 'b> { - type Item = ElementRef<'a>; - - fn next(&mut self) -> Option> { - for node in self.inner.by_ref() { - if let Some(element) = ElementRef::wrap(node) { - if element.parent().is_some() && self.selector.matches(&element) { - return Some(element); - } - } - } - None - } - - fn size_hint(&self) -> (usize, Option) { - let (_lower, upper) = self.inner.size_hint(); - - (0, upper) - } -} - -impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> { - fn next_back(&mut self) -> Option { - for node in self.inner.by_ref().rev() { - if let Some(element) = ElementRef::wrap(node) { - if element.parent().is_some() && self.selector.matches(&element) { - return Some(element); - } - } - } - None - } -} - -mod serializable; -mod tree_sink; - -#[cfg(test)] -mod tests { - use super::Html; - use super::Selector; - - #[test] - fn root_element_fragment() { - let html = Html::parse_fragment(r#"1"#); - let root_ref = html.root_element(); - let href = root_ref - .select(&Selector::parse("a").unwrap()) - .next() - .unwrap(); - assert_eq!(href.inner_html(), "1"); - assert_eq!(href.value().attr("href").unwrap(), "http://github.com"); - } - - #[test] - fn root_element_document_doctype() { - let html = Html::parse_document("\nabc"); - let root_ref = html.root_element(); - let title = root_ref - .select(&Selector::parse("title").unwrap()) - .next() - .unwrap(); - assert_eq!(title.inner_html(), "abc"); - } - - #[test] - fn root_element_document_comment() { - let html = Html::parse_document("abc"); - let root_ref = html.root_element(); - let title = root_ref - .select(&Selector::parse("title").unwrap()) - .next() - .unwrap(); - assert_eq!(title.inner_html(), "abc"); - } - - #[test] - fn select_is_reversible() { - let html = Html::parse_document("

element1

element2

element3

"); - let selector = Selector::parse("p").unwrap(); - let result: Vec<_> = html - .select(&selector) - .rev() - .map(|e| e.inner_html()) - .collect(); - assert_eq!(result, vec!["element3", "element2", "element1"]); - } - - #[test] - fn select_has_a_size_hint() { - let html = Html::parse_document("

element1

element2

element3

"); - let selector = Selector::parse("p").unwrap(); - let (lower, upper) = html.select(&selector).size_hint(); - assert_eq!(lower, 0); - assert_eq!(upper, Some(10)); - } - - #[cfg(feature = "atomic")] - #[test] - fn html_is_send() { - fn send_sync() {} - send_sync::(); - } -} diff --git a/kayle_innate/kayle_scraper/src/html/serializable.rs b/kayle_innate/kayle_scraper/src/html/serializable.rs deleted file mode 100644 index a30a2cc..0000000 --- a/kayle_innate/kayle_scraper/src/html/serializable.rs +++ /dev/null @@ -1,27 +0,0 @@ -use std::io::Error; - -use html5ever::serialize::{Serialize, Serializer, TraversalScope}; - -use crate::Html; - -impl Serialize for Html { - fn serialize( - &self, - serializer: &mut S, - traversal_scope: TraversalScope, - ) -> Result<(), Error> { - crate::node::serializable::serialize(self.tree.root(), serializer, traversal_scope) - } -} - -#[cfg(test)] -mod tests { - use crate::Html; - - #[test] - fn test_serialize() { - let src = r#"

Hello world!

"#; - let html = Html::parse_document(src); - assert_eq!(html.html(), src); - } -} diff --git a/kayle_innate/kayle_scraper/src/html/tree_sink.rs b/kayle_innate/kayle_scraper/src/html/tree_sink.rs deleted file mode 100644 index 436c14b..0000000 --- a/kayle_innate/kayle_scraper/src/html/tree_sink.rs +++ /dev/null @@ -1,244 +0,0 @@ -use super::Html; -use crate::node::{Comment, Doctype, Element, Node, ProcessingInstruction, Text}; -use crate::tendril_util::make as make_tendril; -use ego_tree::NodeId; -use html5ever::tendril::StrTendril; -use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; -use html5ever::Attribute; -use html5ever::{ExpandedName, QualName}; -use std::borrow::Cow; - -/// Note: does not support the `