From a9e717000a18e7906545a7a61bd190a7b5e6a1b5 Mon Sep 17 00:00:00 2001 From: "Kim J. Nordmo" Date: Fri, 26 Mar 2021 05:05:04 +0100 Subject: [PATCH] feat: add ability to parse links on html pages (#2) --- .rustfmt.toml | 4 +- Cargo.lock | 967 +++++++++++++++++++- Cargo.toml | 3 +- pkg-upd/src/logging.rs | 8 + pkg-upd/src/parsers/toml.rs | 2 +- pkg-upd/test-data/deserialize-full.pkg.toml | 2 +- pkg-web/Cargo.toml | 14 + pkg-web/src/elements.rs | 102 +++ pkg-web/src/lib.rs | 31 + pkg-web/src/request.rs | 145 +++ pkg-web/src/response.rs | 153 ++++ pkg-web/src/response/html.rs | 342 +++++++ 12 files changed, 1765 insertions(+), 8 deletions(-) create mode 100644 pkg-web/Cargo.toml create mode 100644 pkg-web/src/elements.rs create mode 100644 pkg-web/src/lib.rs create mode 100644 pkg-web/src/request.rs create mode 100644 pkg-web/src/response.rs create mode 100644 pkg-web/src/response/html.rs diff --git a/.rustfmt.toml b/.rustfmt.toml index 74b3af4..7337e05 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,13 +1,13 @@ condense_wildcard_suffixes = true format_code_in_doc_comments = true format_strings = true -license_template_path = ".build/license-header.txt" +group_imports = "StdExternalCrate" imports_granularity = "Module" +license_template_path = ".build/license-header.txt" newline_style = "Unix" normalize_comments = true normalize_doc_attributes = true reorder_impl_items = true -group_imports = "StdExternalCrate" use_field_init_shorthand = true version = "Two" wrap_comments = true diff --git a/Cargo.lock b/Cargo.lock index 3d773f8..452eb16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -57,6 +66,27 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.2.1" @@ -69,6 +99,18 @@ version = "3.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63396b8a4b9de3f4fdfb320ab6080762242f66a8ef174c49d8e19b674db4cdbe" +[[package]] +name = "bytes" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" + +[[package]] +name = "cc" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" + [[package]] name = "cfg-if" version = "0.1.10" @@ -118,6 +160,31 @@ dependencies = [ "vec_map", ] +[[package]] +name = "core-foundation" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" + +[[package]] +name = "encoding_rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "envmnt" version = "0.8.4" @@ -138,6 +205,27 @@ dependencies = [ "log", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.0.1" @@ -154,6 +242,64 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1fd087255f739f4f1aeea69f11b72f8080e9c2e7645cd06955dad4a178a49e3" +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "futures-channel" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94" + +[[package]] +name = "futures-io" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59" + +[[package]] +name = "futures-sink" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3" + +[[package]] +name = "futures-task" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80" + +[[package]] +name = "futures-util" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1" +dependencies = [ + "futures-core", + "futures-io", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "getopts" version = "0.2.21" @@ -163,6 +309,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.2" @@ -171,7 +328,7 @@ checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" dependencies = [ "cfg-if 1.0.0", "libc", - "wasi", + "wasi 0.10.0+wasi-snapshot-preview1", ] [[package]] @@ -180,6 +337,25 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" +[[package]] +name = "h2" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc018e188373e2777d0ef2467ebff62a08e66c3f5857b23c8fbec3018210dc00" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.9.1" @@ -204,6 +380,54 @@ dependencies = [ "libc", ] +[[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "http" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7245cd7449cc792608c3c8a9eaf69bd4eabbabf802713748fd739c98b82f0747" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfb77c123b4e2f72a2069aeae0b4b4949cc7e966df277813fc16347e7549737" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615caabe2c3160b313d52ccc905335f4ed5f10881dd63dc5699d47e90be85691" + +[[package]] +name = "httpdate" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47" + [[package]] name = "human-panic" version = "1.0.4-alpha.0" @@ -218,6 +442,43 @@ dependencies = [ "uuid", ] +[[package]] +name = "hyper" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8e946c2b1349055e0b72ae281b238baf1a3ea7307c7e9f9d64673bdd9c26ac7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "idna" version = "0.2.2" @@ -239,6 +500,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "ipnet" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135" + [[package]] name = "itoa" version = "0.4.7" @@ -260,6 +527,25 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lenient_semver" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e69422a47c615a5d4dc300df0aeceaf8db75316d7a96b80c41f1ae0d1c6e30" +dependencies = [ + "lenient_semver_parser", + "semver 0.11.0", +] + +[[package]] +name = "lenient_semver_parser" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7cb776fddcf01a2bf698446332a08b38ad591f93126f3d78880bd2e1900cbbe" +dependencies = [ + "semver 0.11.0", +] + [[package]] name = "libc" version = "0.2.90" @@ -281,12 +567,59 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" +dependencies = [ + "log", + "phf", + "phf_codegen", + "serde", + "serde_derive", + "serde_json", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + [[package]] name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + [[package]] name = "miniz_oxide" version = "0.4.4" @@ -297,12 +630,67 @@ dependencies = [ "autocfg", ] +[[package]] +name = "mio" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" +dependencies = [ + "libc", + "log", + "miow", + "ntapi", + "winapi", +] + +[[package]] +name = "miow" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" +dependencies = [ + "winapi", +] + +[[package]] +name = "native-tls" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "nias" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0" +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi", +] + [[package]] name = "num-integer" version = "0.1.44" @@ -322,12 +710,61 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "object" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9a7ab5d64814df0fe4a4b5ead45ed6c5f181ee3ff04ba344313a6c80446c5d4" +[[package]] +name = "once_cell" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" + +[[package]] +name = "openssl" +version = "0.10.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a61075b62a23fef5a29815de7536d940aa35ce96d18ce0cc5076272db678a577" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "foreign-types", + "libc", + "once_cell", + "openssl-sys", +] + +[[package]] +name = "openssl-probe" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" + +[[package]] +name = "openssl-sys" +version = "0.9.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "313752393519e876837e09e1fa183ddef0be7735868dced3196f4472d536277f" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "os_info" version = "2.0.8" @@ -354,6 +791,82 @@ dependencies = [ "ucd-trie", ] +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand 0.7.3", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96fa8ebb90271c4477f144354485b8068bd8f6b78b428b01ba892ca26caf0b63" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758669ae3558c6f74bd2a18b41f7ac0b5a195aea6639d6a9b5e5d1ad5ba24c0b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0e1f259c92177c30a4c9d177246edd0a3568b25756a977d0632cf8fa37e905" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + [[package]] name = "pkg-data" version = "0.1.0" @@ -399,6 +912,31 @@ dependencies = [ "yansi", ] +[[package]] +name = "pkg-web" +version = "0.1.0" +dependencies = [ + "lazy_static", + "lenient_semver", + "log", + "regex", + "reqwest", + "select", + "semver 0.11.0", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -441,6 +979,166 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", + "rand_pcg", +] + +[[package]] +name = "rand" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +dependencies = [ + "libc", + "rand_chacha 0.3.0", + "rand_core 0.6.2", + "rand_hc 0.3.0", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.2", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +dependencies = [ + "getrandom 0.2.2", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_hc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +dependencies = [ + "rand_core 0.6.2", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "redox_syscall" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "reqwest" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf12057f289428dbf5c591c74bf10392e4a8003f993405a902f20117019022d4" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "lazy_static", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + [[package]] name = "rstest" version = "0.6.4" @@ -487,6 +1185,50 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "schannel" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75" +dependencies = [ + "lazy_static", + "winapi", +] + +[[package]] +name = "security-framework" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d493c5f39e02dfb062cd8f33301f90f9b13b650e8c1b1d0fd75c19dd64bff69d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee48cdde5ed250b0d3252818f646e174ab414036edb884dde62d80a3ac6082d" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "select" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee061f90afcc8678bef7a78d0d121683f0ba753f740ff7005f833ec445876b7" +dependencies = [ + "bit-set", + "html5ever", + "markup5ever_rcdom", +] + [[package]] name = "semver" version = "0.9.0" @@ -552,6 +1294,66 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "siphasher" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27" + +[[package]] +name = "slab" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" + +[[package]] +name = "socket2" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "winapi", +] + +[[package]] +name = "string_cache" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.8.0" @@ -593,6 +1395,31 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tempfile" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "rand 0.8.3", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "tendril" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.1.2" @@ -618,7 +1445,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", - "wasi", + "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] @@ -637,6 +1464,45 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +[[package]] +name = "tokio" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722" +dependencies = [ + "autocfg", + "bytes", + "libc", + "memchr", + "mio", + "num_cpus", + "pin-project-lite", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5143d049e85af7fbc36f5454d990e62c2df705b3589f123b71f441b6b59f443f" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "log", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.5.8" @@ -646,6 +1512,38 @@ dependencies = [ "serde", ] +[[package]] +name = "tower-service" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" + +[[package]] +name = "tracing" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f" +dependencies = [ + "cfg-if 1.0.0", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f50de3927f93d202783f4513cda820ab47ef17f624b03c096e86ef00c67e6b5f" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + [[package]] name = "ucd-trie" version = "0.1.3" @@ -701,15 +1599,27 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" + [[package]] name = "uuid" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom", + "getrandom 0.2.2", ] +[[package]] +name = "vcpkg" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb" + [[package]] name = "vec_map" version = "0.8.2" @@ -722,6 +1632,22 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -735,6 +1661,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ee1280240b7c461d6a0071313e08f34a60b0365f14260362e5a2b17d1d31aa7" dependencies = [ "cfg-if 1.0.0", + "serde", + "serde_json", "wasm-bindgen-macro", ] @@ -753,6 +1681,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e67a5806118af01f0d9045915676b22aaebecf4178ae7021bc171dab0b897ab" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.71" @@ -833,6 +1773,27 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "winreg" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +dependencies = [ + "winapi", +] + +[[package]] +name = "xml5ever" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b1b52e6e8614d4a58b8e70cf51ec0cc21b256ad8206708bcff8139b5bbd6a59" +dependencies = [ + "log", + "mac", + "markup5ever", + "time", +] + [[package]] name = "yansi" version = "0.5.0" diff --git a/Cargo.toml b/Cargo.toml index 88c63aa..8eefc2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,8 @@ members = [ "pkg-data", "pkg-license", - "pkg-upd" + "pkg-upd", + "pkg-web" ] [profile.release] diff --git a/pkg-upd/src/logging.rs b/pkg-upd/src/logging.rs index 92b7b05..e71feb8 100644 --- a/pkg-upd/src/logging.rs +++ b/pkg-upd/src/logging.rs @@ -70,6 +70,12 @@ pub fn setup_logging(log: &LogData) -> Result<(), Box> { warn: Style::new(Color::Fixed(208)).bold(), error: Style::new(Color::Fixed(196)).bold(), }; + let html5ever_level = if log.level > log::LevelFilter::Info { + log::LevelFilter::Info + } else { + log.level + }; + let cli_info = if log.level > log::LevelFilter::Info { fern::Dispatch::new().format(move |out, message, record| { let level = record.level(); @@ -86,6 +92,7 @@ pub fn setup_logging(log: &LogData) -> Result<(), Box> { } .filter(move |metadata| metadata.level() >= log::Level::Info) .level(log.level) + .level_for("html5ever", html5ever_level) .chain(std::io::stdout()); let cli_warn = fern::Dispatch::new() .format(move |out, message, record| { @@ -122,6 +129,7 @@ pub fn setup_logging(log: &LogData) -> Result<(), Box> { } }) .level(log::LevelFilter::Trace) + .level_for("html5ever", log::LevelFilter::Info) .chain(fern::log_file(&log.path)?); fern::Dispatch::new() diff --git a/pkg-upd/src/parsers/toml.rs b/pkg-upd/src/parsers/toml.rs index f235714..7ec3fc8 100644 --- a/pkg-upd/src/parsers/toml.rs +++ b/pkg-upd/src/parsers/toml.rs @@ -292,7 +292,7 @@ mod tests { choco.parse_url = Some(ChocolateyParseUrl::UrlWithRegex { url: Url::parse("https://sourceforge.net/projects/astyle/files/astyle/") .unwrap(), - regex: r"astyle( |%30)(?P[\d\.]+)/$".into(), + regex: r"astyle( |%20)(?P[\d\.]+)/$".into(), }); choco.add_regex("arch32", r"windows\.zip/download$"); choco diff --git a/pkg-upd/test-data/deserialize-full.pkg.toml b/pkg-upd/test-data/deserialize-full.pkg.toml index 42a7e08..9f7baf7 100644 --- a/pkg-upd/test-data/deserialize-full.pkg.toml +++ b/pkg-upd/test-data/deserialize-full.pkg.toml @@ -25,7 +25,7 @@ description = { from = "./astyle.md", skip_start = 2, skip_end = 1 } [updater.chocolatey] embedded = true type = "Archive" -parse_url = { url = "https://sourceforge.net/projects/astyle/files/astyle/", regex = '''astyle( |%30)(?P[\d\.]+)/$''' } +parse_url = { url = "https://sourceforge.net/projects/astyle/files/astyle/", regex = '''astyle( |%20)(?P[\d\.]+)/$''' } [updater.chocolatey.regexes] arch32 = '''windows\.zip/download$''' diff --git a/pkg-web/Cargo.toml b/pkg-web/Cargo.toml new file mode 100644 index 0000000..3ab0d09 --- /dev/null +++ b/pkg-web/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "pkg-web" +version = "0.1.0" +authors = ["AdmiringWorm "] +edition = "2018" + +[dependencies] +lazy_static = "1.4.0" +lenient_semver = "0.3.0" +log = "0.4.14" +regex = "1.4.5" +reqwest = {version = "0.11.2", features = ["blocking"] } +select = "0.5.0" +semver = "0.11.0" diff --git a/pkg-web/src/elements.rs b/pkg-web/src/elements.rs new file mode 100644 index 0000000..7dfd57d --- /dev/null +++ b/pkg-web/src/elements.rs @@ -0,0 +1,102 @@ +// Copyright (c) 2021 Kim J. Nordmo and WormieCorp. +// Licensed under the MIT license. See LICENSE.txt file in the project + +//! Contains information gathered when parsing an html page, or during manual +//! creation. + +use std::collections::HashMap; + +use reqwest::Url; +use semver::Version; + +/// Defines what type (MIME or extension) the current link +/// is for. +/// +/// This can be incorrect in cases +/// where the the link is only checked but not the request have been parsed. +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum LinkType { + /// The current link uses an html extension, or have the mime type of + /// `text/html`. + Html, + /// The current link uses a text document extension, or report the mime type + /// of being `text/html`. + Text, + /// The current link uses a css document extension, or the response reports + /// the mime type of being `text/css`. + Css, + /// The current link uses a json document extension, or the response reports + /// the mime type of being `text/json`. + Json, + /// The current link uses one of the following extensions: + /// - `.exe` + /// - `.msi` + /// - `.7z` + /// - `.zip` + /// + /// or the response reports the mime type of being + /// `application/octet-stream`. + Binary, + /// The current link is not a known type, this could be because no file + /// extension is used, or the request have been sent to the url. + Unknown, +} + +impl Default for LinkType { + fn default() -> LinkType { + LinkType::Unknown + } +} + +/// Stores information that are know about the current link. +#[derive(Debug, PartialEq)] +pub struct LinkElement { + /// The full link of this element. + /// In most cases this is expected to include the domain, and will only be + /// without one when it has been created manually. + pub link: Url, + /// The title of the link, usually gotten from the html attribute `title`. + pub title: String, + /// The inner text or html of this link. + pub text: String, + /// The version that was parsed pased on any regex that a user specified + pub version: Option, + /// The type (either by extension, or mime type) that links are for. (*ie: + /// html, json, text, binary, etc.). + pub link_type: LinkType, + /// Any additional attributes specified for the link that are not stored in + /// any other field. + pub attributes: HashMap, +} + +impl LinkElement { + /// Creates a new edition of the link element, with the specified link url + /// and the link type. + pub fn new(url: Url, link_type: LinkType) -> LinkElement { + LinkElement { + link: url, + link_type, + ..Default::default() + } + } + + /// Returns true if the link element type have been set as being a binary + /// file, in all other cases it will return false. + pub fn is_binary(&self) -> bool { + self.link_type == LinkType::Binary + } +} + +impl Default for LinkElement { + /// Creates a new default link element, with the url set to example.org. + fn default() -> LinkElement { + LinkElement { + link: Url::parse("https://example.org").unwrap(), + title: Default::default(), + text: Default::default(), + version: None, + link_type: Default::default(), + attributes: Default::default(), + } + } +} diff --git a/pkg-web/src/lib.rs b/pkg-web/src/lib.rs new file mode 100644 index 0000000..2121b14 --- /dev/null +++ b/pkg-web/src/lib.rs @@ -0,0 +1,31 @@ +// Copyright (c) 2021 Kim J. Nordmo and WormieCorp. +// Licensed under the MIT license. See LICENSE.txt file in the project + +//! This crate allows requesting different kind of websites remotely, as well as +//! downloading binary files and extracting link items. +//! +//! ## Examples +//! +//! Aquiring the links from an html page, and asserting that 4 links was +//! returned! +//! +//! ``` +//! use pkg_web::*; +//! +//! let request = WebRequest::create(); +//! let response = request +//! .get_html_response("https://httpbin.org/links/5/2") +//! .unwrap(); +//! let (parent_link, links) = response.read(None).unwrap(); +//! +//! assert_eq!(links.len(), 4); +//! ``` + +mod elements; + +pub mod request; +pub mod response; + +pub use elements::{LinkElement, LinkType}; +pub use request::WebRequest; +pub use response::WebResponse; diff --git a/pkg-web/src/request.rs b/pkg-web/src/request.rs new file mode 100644 index 0000000..027218e --- /dev/null +++ b/pkg-web/src/request.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2021 Kim J. Nordmo and WormieCorp. +// Licensed under the MIT license. See LICENSE.txt file in the project + +//! Section responsible for allowing requests to be sent to remote locations. + +use reqwest::blocking::Client; +use reqwest::{header, Url}; + +use crate::response::HtmlResponse; + +/// The name of the application + the version, which should be sent with every +/// request to the websites. +static APP_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); + +/// Holds the necessary information to create requests to websites. +/// Also responsible for having a structure instance that can be used to get +/// different types of responses. +/// +/// ## Examples +/// +/// Aquiring html response. +/// ``` +/// use pkg_web::WebRequest; +/// +/// let request = WebRequest::create(); +/// let response = request +/// .get_html_response("https://httpbin.org/get") +/// .unwrap(); +/// ``` +pub struct WebRequest { + client: Client, +} + +macro_rules! headers { + ($($key:expr=>$value:literal),+) => { + { + let mut headers = ::reqwest::header::HeaderMap::new(); + $(headers.insert($key, ::reqwest::header::HeaderValue::from_static($value));)* + + headers + } + }; +} + +impl WebRequest { + /// Creates a new instance of a web request. This also creates a client with + /// the information set to the current application+version, a do not track + /// header and a header requesting to upgrade insecure requests. + pub fn create() -> WebRequest { + let client = Client::builder() + .user_agent(APP_USER_AGENT) + .default_headers(headers!( + header::ACCEPT_LANGUAGE => "en-US, en;q=0.8, *;q=0.5", + header::DNT => "1", + header::UPGRADE_INSECURE_REQUESTS => "1" + )) + .build() + .unwrap(); + + WebRequest { client } + } + + /// Makes a request to a website and requesting the html at the location + /// without downloading the actual upstream content. THe function also + /// verifies that the returned response have the mime type set to + /// `text/html`, otherwise an error is returned. + /// + /// The `Ok` value should be an instance of [HtmlResponse], and the links in + /// the response can be found by calling the + /// [read](crate::response::HtmlResponse::read) function. + pub fn get_html_response(&self, url: &str) -> Result> { + let url = Url::parse(url)?; + + let client = &self.client; + + let response = client + .get(url) + .header(header::ACCEPT, "text/html;charset=UTF-8") + .send()?; + + Ok(HtmlResponse::new(response)) + } +} + +#[cfg(test)] +mod tests { + use reqwest::StatusCode; + + use super::*; + use crate::response::*; + + #[test] + fn create_should_build_client_with_expected_values() { + let _ = WebRequest::create(); + + // Nothing more is done, as we only test if a panic happens which we do + // not expect. + } + + #[test] + fn get_html_response_should_create_response() { + let url = Url::parse("https://httpbin.org/get").unwrap(); + let request = WebRequest::create(); + + let response = request.get_html_response(url.as_str()).unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response.response().url(), &url); + } + + #[test] + fn get_html_response_should_set_404_status_code() { + let request = WebRequest::create(); + + let response = request + .get_html_response("https://httpbin.org/status/404") + .unwrap(); + + assert_eq!(response.status(), StatusCode::NOT_FOUND); + } + + #[test] + fn get_html_response_should_follow_redirection() { + let final_url = + Url::parse("https://github.com/WormieCorp/Faker.NET.Portable/releases/tag/2.6.0") + .unwrap(); + let url = "https://github.com/WormieCorp/Faker.NET.Portable/releases/latest"; + let request = WebRequest::create(); + + let response = request.get_html_response(url).unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response.response().url(), &final_url); + } + + #[test] + #[should_panic(expected = "failed to lookup address information: Name or service not known")] + fn get_html_response_should_be_error_on_non_existing_urls() { + let request = WebRequest::create(); + + request + .get_html_response("https://chocolatyyy.org") + .unwrap(); + } +} diff --git a/pkg-web/src/response.rs b/pkg-web/src/response.rs new file mode 100644 index 0000000..392ae43 --- /dev/null +++ b/pkg-web/src/response.rs @@ -0,0 +1,153 @@ +// Copyright (c) 2021 Kim J. Nordmo and WormieCorp. +// Licensed under the MIT license. See LICENSE.txt file in the project + +//! Holds all supported types of response types, that can be used when creating +//! a package. + +/// Contains code related to handling html responses. +mod html; + +use std::collections::HashMap; + +pub use html::HtmlResponse; +use lazy_static::lazy_static; +use reqwest::blocking::Response; +use reqwest::StatusCode; + +lazy_static! { + static ref MIME_TYPES: HashMap<&'static str, LinkType> = { + let mut map = HashMap::new(); + map.insert("text/html", LinkType::Html); + map.insert("text/plain", LinkType::Text); + map.insert("text/json", LinkType::Json); + map.insert("application/json", LinkType::Json); + map.insert("text/css", LinkType::Css); + map.insert("application/octet-stream", LinkType::Binary); + map + }; +} + +use crate::elements::LinkType; + +/// Common trait to allow multiple response types to have the same functions to +/// be used. +/// +/// ### See also +/// +/// The following structures implements the [WebResponse] trait. +/// +/// - [HtmlResponse](HtmlResponse): _Responsible of parsing html sites, +/// generally for aquiring links on a web page_. +pub trait WebResponse { + /// The response content that will be returned by any implementation of + /// [WebResponse]. This can be anything that would be expected by the + /// response parser. + type ResponseContent; + + /// Returns the actual response that was created by + /// [WebRequest](crate::WebRequest). + fn response(&self) -> &Response; + + /// Returns all of the headers that was returned by the web server. + /// The headers can alternatively be gotten through the + /// [response](WebResponse::response) function. + fn get_headers(&self) -> HashMap<&str, &str> { + let response = self.response(); + let mut headers = HashMap::with_capacity(response.headers().len()); + + for (key, value) in response.headers() { + if let Ok(val) = value.to_str() { + headers.insert(key.as_str(), val); + } + } + + headers + } + + /// Returns the status that was returned with the rest of the response. + fn status(&self) -> StatusCode { + self.response().status() + } + + /// Reads the current response content, and if successful returns the a + /// structure holding the necessary items found. This may return an + /// error if the status code is a success code, or if the reading of the + /// content failed. + fn read(self, re: Option<&str>) -> Result>; +} + +#[cfg(test)] +mod tests { + use reqwest::blocking::get; + + use super::*; + + struct DummyResponse { + response: Response, + } + + impl DummyResponse { + fn new(response: Response) -> DummyResponse { + DummyResponse { response } + } + } + + impl WebResponse for DummyResponse { + type ResponseContent = String; + + fn response(&self) -> &reqwest::blocking::Response { + &self.response + } + + fn read( + self, + _: Option<&str>, + ) -> std::result::Result< + ::ResponseContent, + std::boxed::Box<(dyn std::error::Error + 'static)>, + > { + unimplemented!() + } + } + + #[test] + fn status_should_get_the_actual_status_code_of_response() { + let response = get("https://httpbin.org/status/406").unwrap(); + + let response = DummyResponse::new(response); + + assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE); + } + + #[test] + fn get_headers_should_get_the_actual_headers_for_the_response() { + let response = get("https://httpbin.org/get").unwrap(); + + let response = DummyResponse::new(response); + + let mut headers = response.get_headers(); + // let us remove the date and server header + let _ = headers.remove("server"); + let _ = headers.remove("date"); + let _ = headers.remove("content-length"); // This can vary a little, so we remove it + + assert_eq!(headers, { + let mut map = HashMap::new(); + map.insert("access-control-allow-origin", "*"); + map.insert("access-control-allow-credentials", "true"); + map.insert("content-type", "application/json"); + map.insert("connection", "keep-alive"); + + map + }); + } + + #[test] + #[should_panic] + fn just_for_coverage_on_test_dummy_structure() { + let response = get("https://httpbin.org/get").unwrap(); + let response = DummyResponse::new(response); + + response.read(None).unwrap(); + } +} diff --git a/pkg-web/src/response/html.rs b/pkg-web/src/response/html.rs new file mode 100644 index 0000000..7fda1c0 --- /dev/null +++ b/pkg-web/src/response/html.rs @@ -0,0 +1,342 @@ +// Copyright (c) 2021 Kim J. Nordmo and WormieCorp. +// Licensed under the MIT license. See LICENSE.txt file in the project + +use regex::{Captures, Regex}; +use reqwest::blocking::Response; +use reqwest::{header, Url}; +use select::document::Document; +use select::predicate::Name; +use semver::Version; + +use crate::response::MIME_TYPES; +use crate::{LinkElement, LinkType, WebResponse}; + +/// Contains functions and structure for holding a single html response, and +/// extracting any necessary information out of the html page. +/// +/// Implements the [WebResponse] trait, and are not meant to be created directly +/// by a user. +#[derive(Debug)] +pub struct HtmlResponse { + response: Response, +} + +impl HtmlResponse { + /// Creates a new instance of the [HtmlResponse] structe to hold the current + /// response, and allow reading the content from that response. + pub fn new(response: Response) -> HtmlResponse { + HtmlResponse { response } + } +} + +impl WebResponse for HtmlResponse { + /// Sets the response type that will be returned when calling the + /// [read](HtmlResponse::read) function. The first item is the link the + /// response came from, and the second item holds a vector of different + /// link elements that were found on the html page. + type ResponseContent = (LinkElement, Vec); + + fn response(&self) -> &Response { + &self.response + } + + /// Reads the current response, and extracts any link elements that were + /// found in the body as well as the link that were used to get the response + /// itself. This function can return will return an error if the + /// response do not have a successful status code, or if the reading of the + /// body fails. + fn read(self, re: Option<&str>) -> Result> { + { + let response = &self.response; + if !response.status().is_success() { + let response = self.response; + response.error_for_status()?; + unreachable!(); + } + } + let response_url = self.response.url().clone(); + + let parent_link = get_parent_link_element(&self); + + let body = self.response.text()?; + let links = get_link_elements(body, response_url, re)?; + + Ok((parent_link, links)) + } +} + +fn get_parent_link_element(content: &T) -> LinkElement { + let headers = content.get_headers(); + let url = content.response().url(); + let response_type = headers + .get(header::CONTENT_TYPE.as_str()) + .unwrap_or(&"UNKNOWN"); + + for (key, val) in MIME_TYPES.iter() { + if response_type.contains(key) { + return LinkElement::new(url.clone(), *val); + } + } + + LinkElement::new(url.clone(), LinkType::Unknown) +} + +fn get_link_elements( + text: String, + parent_url: Url, + re: Option<&str>, +) -> Result, Box> { + let document = Document::from(text.as_str()); + + let re = if let Some(re) = re { + Some(Regex::new(&re)?) + } else { + None + }; + + let results = document + .find(Name("a")) + .filter_map(|n| { + let mut link = { + let href = match n.attr("href") { + Some(n) => { + if n.is_empty() { + return None; + } else { + n + } + } + _ => return None, + }; + + let href = + if href.starts_with('/') || href.starts_with('.') || href.starts_with('#') { + parent_url.join(&href) + } else { + Url::parse(href) + } + .ok()?; + LinkElement::new(href, LinkType::Unknown) + }; + + if let Some(re) = &re { + let capture = re.captures(link.link.as_str())?; + link.version = parse_version(capture); + } + + link.text = n.text().trim().into(); + + for (key, val) in n.attrs() { + let key = key.to_lowercase(); + if key == "href" { + continue; + } else if key == "title" { + link.title = val.into(); + } else { + let _ = link.attributes.insert(key, val.into()); + } + } + + let path = link.link.path(); + if path.ends_with(".html") { + link.link_type = LinkType::Html; + } else if path.ends_with(".json") { + link.link_type = LinkType::Json; + } else if path.ends_with(".css") { + link.link_type = LinkType::Css; + } else if path.ends_with(".txt") { + link.link_type = LinkType::Text; + } else if path.ends_with(".zip") + || path.ends_with(".7z") + || path.ends_with(".exe") + || path.ends_with(".msi") + || path.ends_with(".tar") + || path.ends_with(".tar.gz") + || path.ends_with(".tar.bz2") + || path.ends_with(".nupkg") + { + link.link_type = LinkType::Binary; + } + + Some(link) + }) + .collect(); + + Ok(results) +} + +fn parse_version(captures: Captures<'_>) -> Option { + lenient_semver::parse(captures.name("version")?.as_str()).ok() +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use semver::Version; + + use super::*; + use crate::WebRequest; + + #[test] + fn read_should_get_links_from_page() { + let request = WebRequest::create(); + let url = Url::parse("https://httpbin.org/links/4/1").unwrap(); + let response = request.get_html_response(url.as_ref()).unwrap(); + + let (parent, links) = response.read(None).unwrap(); + + assert_eq!(parent, LinkElement::new(url, LinkType::Html)); + assert_eq!( + links, + [ + LinkElement { + link: Url::parse("https://httpbin.org/links/4/0").unwrap(), + text: "0".into(), + ..Default::default() + }, + LinkElement { + link: Url::parse("https://httpbin.org/links/4/2").unwrap(), + text: "2".into(), + ..Default::default() + }, + LinkElement { + link: Url::parse("https://httpbin.org/links/4/3").unwrap(), + text: "3".into(), + ..Default::default() + }, + ] + ); + } + + #[test] + fn read_should_extract_version_from_parsed_links() { + let request = WebRequest::create(); + let response = request + .get_html_response("https://github.com/MASGAU/MASGAU/releases/tag/v.1.0.6") + .unwrap(); + + let links = response + .read(Some(r"/([v\.]+)(?P[\d\.]+)/.*\.exe$")) + .unwrap() + .1; + + assert_eq!(links, [ + LinkElement { + link: Url::parse("https://github.com/MASGAU/MASGAU/releases/download/v.1.0.6/MASGAU-1.0.6-Release-Setup.exe").unwrap(), + link_type: LinkType::Binary, + title: "".into(), + text: "MASGAU v.1.0.6 for Windows".into(), + attributes: { + let mut map = HashMap::new(); + map.insert("rel".into(), "nofollow".into()); + map.insert("class".into(), "d-flex flex-items-center min-width-0".into()); + map + }, + version: Some(Version::parse("1.0.6").unwrap()) + } + ]) + } + + #[test] + fn read_should_only_return_links_matching_specified_regex() { + let request = WebRequest::create(); + let response = request + .get_html_response("https://github.com/GitTools/GitReleaseManager/releases/tag/0.11.0") + .unwrap(); + + let links = response.read(Some(r"\.nupkg$")).unwrap().1; + + let expected_items = [ + LinkElement { + link: Url::parse("https://github.com/GitTools/GitReleaseManager/releases/download/0.11.0/GitReleaseManager.0.11.0.nupkg".into()).unwrap(), + link_type: LinkType::Binary, + title: "".into(), + text: "GitReleaseManager.0.11.0.nupkg".into(), + attributes: { + let mut map = HashMap::new(); + map.insert("rel".into(), "nofollow".into()); + map.insert("class".into(), "d-flex flex-items-center min-width-0".into()); + + map + }, + version: None + }, + LinkElement { + link: Url::parse("https://github.com/GitTools/GitReleaseManager/releases/download/0.11.0/gitreleasemanager.portable.0.11.0.nupkg".into()).unwrap(), + link_type: LinkType::Binary, + title: "".into(), + text: "gitreleasemanager.portable.0.11.0.nupkg".into(), + attributes: { + let mut map = HashMap::new(); + map.insert("rel".into(), "nofollow".into()); + map.insert("class".into(), "d-flex flex-items-center min-width-0".into()); + + map + }, + version: None + }, + LinkElement { + link: Url::parse("https://github.com/GitTools/GitReleaseManager/releases/download/0.11.0/GitReleaseManager.Tool.0.11.0.nupkg".into()).unwrap(), + link_type: LinkType::Binary, + title: "".into(), + text: "GitReleaseManager.Tool.0.11.0.nupkg".into(), + attributes: { + let mut map = HashMap::new(); + map.insert("rel".into(), "nofollow".into()); + map.insert("class".into(), "d-flex flex-items-center min-width-0".into()); + + map + }, + version: None + }, + ]; + + assert_eq!(links, expected_items) + } + + #[test] + #[should_panic(expected = "Status(500)")] + fn read_should_return_error_on_error_response() { + let request = WebRequest::create(); + let response = request.get_html_response("https://httpbin.org/status/500"); + + if let Ok(response) = response { + let _ = response.read(None).unwrap(); + } + } + + #[test] + fn read_should_return_correct_links() { + let request = WebRequest::create(); + let response = request + .get_html_response("https://github.com/codecov/codecov-exe/releases/tag/1.13.0") + .unwrap(); + let (parent, links) = response.read(None).unwrap(); + + assert_eq!( + parent, + LinkElement::new( + Url::parse("https://github.com/codecov/codecov-exe/releases/tag/1.13.0").unwrap(), + LinkType::Html + ) + ); + + assert_eq!( + links + .iter() + .filter(|l| !l.title.is_empty()) + .collect::>() + .len(), + 3 + ); + assert_eq!( + links + .iter() + .filter(|l| l.link_type == LinkType::Binary) + .collect::>() + .len(), + 6 + ); + } +}