diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 00000000000..16f1e73ac06 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["--cfg", "tokio_unstable"] \ No newline at end of file diff --git a/.gitignore b/.gitignore index 37c8717f9a0..6bca7c191b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target **/*.rs.bk .env +/data_new \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 88242eeef3c..9a0c3cf29e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "aho-corasick" version = "1.1.3" @@ -26,12 +32,104 @@ dependencies = [ "memchr", ] +[[package]] +name = "anyhow" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "axum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + [[package]] name = "backtrace" version = "0.3.66" @@ -42,11 +140,17 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.5.4", "object", "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -119,6 +223,69 @@ dependencies = [ "tracing-error", ] +[[package]] +name = "console-api" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ed14aa9c9f927213c6e4f3ef75faaad3406134efe84ba2cb7983431d5f0931" +dependencies = [ + "futures-core", + "prost", + "prost-types", + "tonic", + "tracing-core", +] + +[[package]] +name = "console-subscriber" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e3a111a37f3333946ebf9da370ba5c5577b18eb342ec683eb488dd21980302" +dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures-task", + "hdrhistogram", + "humantime", + "hyper-util", + "prost", + "prost-types", + "serde", + "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "csv" version = "1.3.0" @@ -146,6 +313,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "enum-map" version = "2.7.3" @@ -167,6 +340,12 @@ dependencies = [ "syn", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "eyre" version = "0.6.12" @@ -177,6 +356,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "flate2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +dependencies = [ + "crc32fast", + "miniz_oxide 0.8.0", +] + [[package]] name = "fnv" version = "1.0.7" @@ -259,6 +448,50 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.6.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64 0.21.7", + "byteorder", + "flate2", + "nom", + "num-traits", +] + [[package]] name = "hermit-abi" version = "0.3.9" @@ -305,6 +538,18 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "1.4.1" @@ -314,9 +559,11 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2", "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -342,6 +589,19 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-timeout" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.9" @@ -377,17 +637,46 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.0", +] + [[package]] name = "ipnet" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.4" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" @@ -435,6 +724,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "memchr" version = "2.5.0" @@ -447,6 +742,12 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.5.4" @@ -456,6 +757,15 @@ dependencies = [ "adler", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "1.0.2" @@ -468,6 +778,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -478,6 +798,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.29.0" @@ -534,6 +863,26 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pin-project" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -564,6 +913,38 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + [[package]] name = "quinn" version = "0.11.5" @@ -710,7 +1091,7 @@ version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", @@ -767,6 +1148,7 @@ name = "rust-repos" version = "0.2.0" dependencies = [ "color-eyre", + "console-subscriber", "csv", "dotenvy", "enum-map", @@ -832,6 +1214,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + [[package]] name = "ryu" version = "1.0.11" @@ -1055,6 +1443,86 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.2" diff --git a/Cargo.toml b/Cargo.toml index 84e48c8e65e..50c7f40ce73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,5 @@ color-eyre = "0.6.3" tracing = "0.1.40" enum-map = { version = "2.7.3", features = ["serde"] } thiserror = "1" + +console-subscriber = "0.4" diff --git a/src/github/api.rs b/src/github/api.rs index ab092de3d7c..6ac6b5e95bb 100644 --- a/src/github/api.rs +++ b/src/github/api.rs @@ -12,7 +12,7 @@ use serde_derive::Deserialize; use serde_json::json; use thiserror::Error; use tokio::{task::yield_now, time::sleep}; -use tracing::{debug, error, warn}; +use tracing::{error, trace, warn}; use crate::{config::Config, data::Repo}; @@ -25,11 +25,12 @@ pub struct Github { current_token_index: AtomicUsize, } -#[derive(Deserialize)] +#[derive(Debug, Deserialize)] pub struct GitHubError { message: String, - #[serde(rename = "type")] - type_: Option, + + #[allow(unused)] + r#type: Option, } #[derive(Clone, Debug, Deserialize)] @@ -45,6 +46,7 @@ pub struct GithubTree { #[derive(Debug, Deserialize)] pub struct RestRepository { pub id: usize, + #[allow(unused, reason = "Useful for debugging, if something does go wrong")] pub full_name: String, pub node_id: String, pub fork: bool, @@ -98,11 +100,6 @@ pub struct GraphLanguage { pub name: String, } -#[derive(Debug, Deserialize)] -pub struct GraphRef { - pub name: String, -} - #[derive(Debug, Error)] pub enum Error { #[error("reqwest error occurred {0:?}")] @@ -110,7 +107,7 @@ pub enum Error { #[error("rate limit hit {0}")] RateLimit(StatusCode), #[error("other http error: {0}")] - HttpError(StatusCode), + HttpStatus(StatusCode), #[error("Response did not contain requested data")] EmptyData, @@ -118,6 +115,9 @@ pub enum Error { Io(#[from] std::io::Error), } +/// 100 is the max results per page of the GH API +pub(crate) const N: usize = 100; + const GRAPHQL_QUERY_REPOSITORIES: &str = " query($ids: [ID!]!) { nodes(ids: $ids) { @@ -153,12 +153,12 @@ impl Github { } fn build_request(&self, method: Method, url: &str) -> RequestBuilder { - let url = if !url.starts_with("https://") { - Cow::from(format!("https://api.github.com/{}", url)) - } else { + let url = if url.starts_with("https://") { Cow::from(url) + } else { + Cow::from(format!("https://api.github.com/{url}")) }; - debug!("Sending request to {url}"); + trace!("Sending request to {url}"); self.client .request(method, url.as_ref()) .header(header::AUTHORIZATION, format!("token {}", self.get_token())) @@ -182,6 +182,10 @@ impl Github { let data: GraphResponse = handle_response_json(resp).await?; + if let Some(errs) = data.errors { + warn!("GraphQL Errors: {:?}, \n {:#?}", data.message, errs); + } + data.data.ok_or_else(|| Error::EmptyData) } @@ -213,7 +217,7 @@ impl Github { pub async fn tree(&self, repo: &Repo, recursive: bool) -> Result { let mut url = format!("repos/{}/git/trees/HEAD", repo.name); if recursive { - url = format!("{url}?recursive=1") + url = format!("{url}?recursive=1"); } self.retry(|| async { @@ -230,7 +234,10 @@ impl Github { let output: Vec = self .retry(|| async { let resp = self - .build_request(Method::GET, &format!("repositories?since={}", since)) + .build_request( + Method::GET, + &format!("repositories?since={since}&per_page{N}"), + ) .send() .await?; @@ -238,6 +245,10 @@ impl Github { }) .await?; + if output.len() != N { + warn!("Github API returned {} instead of {N} repos", output.len()); + } + Ok(output) } @@ -265,7 +276,7 @@ impl Github { return Err(Error::Reqwest(reqwest_error)); } } - Err(err @ Error::HttpError(_)) => return Err(err), + Err(err @ Error::HttpStatus(_)) => return Err(err), Err(Error::RateLimit(_)) => { let mut wait = false; self.current_token_index @@ -288,7 +299,7 @@ impl Github { } // Yield - yield_now().await + yield_now().await; } } } @@ -314,9 +325,9 @@ async fn handle_response(resp: Response) -> Result { Err(Error::RateLimit(status)) } else { warn!("Http Error ({}): {}", status.as_u16(), error.message); - Err(Error::HttpError(status)) + Err(Error::HttpStatus(status)) } } else { - Err(Error::HttpError(status)) + Err(Error::HttpStatus(status)) } } diff --git a/src/github/mod.rs b/src/github/mod.rs index 91ea33d7429..16a67e05f53 100644 --- a/src/github/mod.rs +++ b/src/github/mod.rs @@ -49,9 +49,8 @@ impl Scraper { } async fn load_repositories(&self, repos: Vec) -> color_eyre::Result<()> { - debug!("Loading {} repos", repos.len()); - let mut graph_repos = self.gh.load_repositories(&repos).await?; + let mut js = JoinSet::new(); for repo in graph_repos.drain(..) { if repo .languages @@ -60,34 +59,39 @@ impl Scraper { .filter_map(Option::as_ref) .any(|el| el.name == "Rust") { - let mut repo = repo.into_repo(false, false); - let files = self.gh.tree(&repo, false).await; - match files { - Ok(tree) => { - for node in tree.tree { - if node.path == "Cargo.toml" { - repo.has_cargo_toml = true; - } else if node.path == "Cargo.lock" { - repo.has_cargo_lock = true; + let this = self.clone(); + js.spawn(async move { + let mut repo = repo.into_repo(false, false); + let files = this.gh.tree(&repo, false).await; + match files { + Ok(tree) => { + for node in tree.tree { + if node.path == "Cargo.toml" { + repo.has_cargo_toml = true; + } else if node.path == "Cargo.lock" { + repo.has_cargo_lock = true; + } } } + Err(e) => { + warn!("Could not get tree for {}, error: {e:?}", repo.name); + } } - Err(e) => { - warn!("Could not get tree for {}, error: {e:?}", repo.name); - } - } - self.data.store_repo(Forge::Github, repo).await; + this.data.store_repo(Forge::Github, repo).await; + }); } } + js.join_all().await; + Ok(()) } pub async fn scrape(&self) -> color_eyre::Result<()> { let start = Instant::now(); - let mut to_load = Vec::with_capacity(100); + let mut to_load = Vec::with_capacity(api::N); let mut last_id = self.data.get_last_id(Forge::Github); @@ -106,7 +110,7 @@ impl Scraper { to_load.push(repo.node_id); - if to_load.len() == 100 { + if to_load.len() == api::N { let to_load_now = to_load.clone(); let this = self.clone(); js.spawn(async move { this.load_repositories(to_load_now).await }); @@ -116,8 +120,7 @@ impl Scraper { self.data.set_last_id(Forge::Github, last_id).await?; - while let Some(res) = js.join_next().await { - let res = res.unwrap(); + for res in js.join_all().await { if let Err(e) = res { warn!("Failed scraping repo: {:?}", e); } @@ -130,6 +133,8 @@ impl Scraper { break; } + debug!("Loaded 100 repos in {}ms", start_loop.elapsed().as_millis()); + if let Some(time) = Duration::from_millis(250).checked_sub(start_loop.elapsed()) { sleep(time).await; } diff --git a/src/main.rs b/src/main.rs index b685cfe5eaf..cd32011e378 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,7 @@ fn get_tokens_from_env() -> Vec { if let Some(ts) = env_tokens { let ts = ts.split(','); for t in ts { - tokens.push(t.to_string()) + tokens.push(t.to_string()); } } @@ -36,6 +36,8 @@ async fn main() -> color_eyre::Result<()> { .with(EnvFilter::from_default_env()) .init(); + // console_subscriber::init(); + let github_token = get_tokens_from_env(); let timeout = env::var("RUST_REPOS_TIMEOUT")