Skip to content

Commit

Permalink
Split client into its own file to fix HTTP 429.
Browse files Browse the repository at this point in the history
Get AO3 from single-page view. Allow adding multiple stories at
once. Start fixing panics in parsers.
  • Loading branch information
whispersilk committed Oct 22, 2022
1 parent b1f2cde commit 5ffbfaa
Show file tree
Hide file tree
Showing 12 changed files with 429 additions and 375 deletions.
9 changes: 5 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ regex = "1.6.0"
reqwest = { version = "0.11", features = ["cookies", "json"] }
rusqlite = { version = "0.28.0", features = ["bundled-full"] }
select = "0.5"
serde = "1.0.146"
tokio = { version = "1", features = ["full"] }
6 changes: 3 additions & 3 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ pub(crate) struct Args {

#[derive(Debug, Subcommand)]
pub(crate) enum Commands {
/// Add a story to the archive.
/// Add one or more stories to the archive.
Add {
/// The URL of the story to add.
story: String,
/// The URLs of the story or stories to add.
stories: Vec<String>,
},

/// Check for updates to stories in the archive.
Expand Down
77 changes: 77 additions & 0 deletions src/client.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use reqwest::{Client, Response, StatusCode};
use serde::ser::Serialize;

use std::time::Duration;

use crate::error::ArchiveError;

static CLIENT: once_cell::sync::OnceCell<Client> = once_cell::sync::OnceCell::new();

pub async fn get(url: &str) -> Result<Response, ArchiveError> {
let client: &Client =
CLIENT.get_or_init(|| Client::builder().cookie_store(true).build().unwrap());
let mut response = client.get(url).send().await?;
loop {
match response.status() {
StatusCode::TOO_MANY_REQUESTS => {
let base_url = &url[url.find("://").unwrap() + 3..];
let base_url = &base_url[0..base_url.find("/").unwrap_or(base_url.len())];
let time_to_wait: String = response.headers().get("retry-after").map_or_else(
|| "60".to_owned(),
|v| {
v.to_str()
.map(|ok| ok.to_owned())
.unwrap_or("60".to_owned())
},
);
let time_to_wait = u64::from_str_radix(&time_to_wait, 10).expect(&format!(
"retry-after header {} is not a number",
time_to_wait
));
println!(
"Too many requests to {}. Sleeping for {} seconds.",
base_url, time_to_wait
);
tokio::time::sleep(Duration::from_secs(time_to_wait)).await;
response = client.get(url).send().await?;
}
_ => break Ok(response),
}
}
}

pub async fn get_with_query<T: Serialize + ?Sized>(
url: &str,
query: &T,
) -> Result<Response, ArchiveError> {
let client: &Client =
CLIENT.get_or_init(|| Client::builder().cookie_store(true).build().unwrap());
let mut response = client.get(url).query(query).send().await?;
loop {
match response.status() {
StatusCode::TOO_MANY_REQUESTS => {
let base_url = &url[url.find("://").unwrap() + 3..];
let base_url = &base_url[0..base_url.find("/").unwrap_or(base_url.len())];
let time_to_wait: String = response.headers().get("retry-after").map_or_else(
|| "60".to_owned(),
|v| {
v.to_str()
.map(|ok| ok.to_owned())
.unwrap_or("60".to_owned())
},
);
let time_to_wait = u64::from_str_radix(&time_to_wait, 10).expect(&format!(
"retry-after header {} is not a number",
time_to_wait
));
println!(
"Too many requests to {}. Sleeping for {} seconds.",
base_url, time_to_wait
);
tokio::time::sleep(Duration::from_secs(time_to_wait)).await;
response = client.get(url).query(query).send().await?;
}
_ => break Ok(response),
}
}
}
7 changes: 7 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::{error::Error, fmt};
pub enum ArchiveError {
Internal(String),
BadSource(String),
NoIdInSource(String, String),
PageError(String),
StoryNotExists(String),
Io(std::io::Error),
Request(reqwest::Error),
Expand All @@ -16,6 +18,11 @@ impl fmt::Display for ArchiveError {
match *self {
Self::Internal(ref s) => write!(f, "Internal error: {}", s),
Self::BadSource(ref s) => write!(f, "Could not convert URL {} to a story source", s),
Self::NoIdInSource(ref url, ref name) => write!(
f,
"Url {url} maps to source {name} and must contain a story ID, but does not"
),
Self::PageError(ref s) => write!(f, "{}", s),
Self::StoryNotExists(ref s) => write!(
f,
"Story {} does not exist in the archive. Try adding it first.",
Expand Down
49 changes: 36 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use self::error::ArchiveError;
use self::structs::{Content, StorySource, SOURCES_LIST};

mod args;
mod client;
mod error;
mod parser;
mod sql;
Expand All @@ -19,7 +20,7 @@ async fn main() -> Result<(), ArchiveError> {
let conn = Connection::open("/home/daniel/Documents/Code/fic_archive/test_db.db")?;

match args.command {
Commands::Add { story } => add_story(story, &conn).await?,
Commands::Add { stories } => add_stories(stories, &conn).await?,
Commands::Update {
story,
force_refresh,
Expand Down Expand Up @@ -72,18 +73,40 @@ async fn main() -> Result<(), ArchiveError> {
Ok(())
}

async fn add_story(story: String, conn: &Connection) -> Result<(), ArchiveError> {
let source = StorySource::from_url(story.as_str())?;
if sql::story_exists_with_id(conn, story.as_str())? {
println!("Story already exists in the archive. Updating...");
update_archive(Some(story), false, conn).await
async fn add_stories(stories: Vec<String>, conn: &Connection) -> Result<(), ArchiveError> {
let mut errors: Vec<ArchiveError> = Vec::new();
for story in stories.iter() {
match StorySource::from_url(&story) {
Ok(source) => match add_story(source, conn).await {
Ok(_) => (),
Err(err) => errors.push(err),
},
Err(err) => errors.push(err),
};
}
errors.into_iter().next().map(|e| Err(e)).unwrap_or(Ok(()))
}

async fn add_story(source: StorySource, conn: &Connection) -> Result<(), ArchiveError> {
let exists = sql::story_exists_with_id(conn, &source.to_id())?;
let url = source.to_url();
if exists {
let new_chapters = update_story(source, false, conn).await?;
println!(
"Updated story at {} with {} new chapters.",
url, new_chapters
);
} else {
let parser = source.parser();
let story = parser.get_story(source).await?;
let story = source.parser().get_story(source).await?;
sql::save_story(conn, &story)?;
println!("Saved {} ({} chapters)", story.name, story.num_chapters());
Ok(())
println!(
"Added story {} ({} chapter{})",
story.name,
story.num_chapters(),
if story.num_chapters() == 1 { "" } else { "s" }
);
}
Ok(())
}

async fn update_archive(
Expand All @@ -110,8 +133,7 @@ async fn update_story(
let existing_story = sql::get_story_by_id(conn, source.to_id().as_str())?
.ok_or_else(|| ArchiveError::StoryNotExists(source.to_url()))?;
let parser = source.parser();
let client = parser.get_client();
let new_skeleton = parser.get_skeleton(&client, source).await?;
let new_skeleton = parser.get_skeleton(source).await?;

// Get a list of existing chapters and a list of fetched chapters, then filter to only fetched chapters that aren't saved.
let mut existing_chapters: HashSet<String> =
Expand All @@ -133,7 +155,7 @@ async fn update_story(
// If there are any new chapters, fetch the story and save them.
let mut added_chapters = 0;
if !new_chapters.is_empty() {
let new_story = parser.fill_skeleton(&client, new_skeleton).await?;
let new_story = parser.fill_skeleton(new_skeleton).await?;
for chapter in new_chapters.into_iter() {
match new_story.find_chapter(chapter) {
Some(found) => {
Expand Down Expand Up @@ -166,6 +188,7 @@ async fn delete_story(search: String, conn: &Connection) -> Result<(), ArchiveEr
match matches.len() {
0 => println!("No matching stories found. Please try another search."),
// 1 => sql::delete_story_by_id(matches[0])?,
1 => println!("Got one story back! Id: {}", matches[0]),
_ => todo!(),
}
Ok(())
Expand Down
Loading

0 comments on commit 5ffbfaa

Please sign in to comment.