From 64237048f3d54675510f79fbc7c600e4890ebca1 Mon Sep 17 00:00:00 2001 From: Whispersilk Date: Fri, 25 Nov 2022 18:07:19 -0500 Subject: [PATCH] Add XenForo support and begin TUI work. --- Cargo.lock | 79 ++++ Cargo.toml | 2 + src/args.rs | 4 +- src/client.rs | 9 +- src/error.rs | 8 + src/main.rs | 245 ++++++----- src/parser/ao3.rs | 284 +++++++++---- src/parser/katalepsis.rs | 25 +- src/parser/mod.rs | 1 + src/parser/royalroad.rs | 41 +- src/parser/xenforo.rs | 254 +++++++++++ src/sql.rs | 880 ++++++++++++++++++++++----------------- src/structs.rs | 221 +++++++--- src/tui/event.rs | 56 +++ src/tui/mod.rs | 41 ++ 15 files changed, 1483 insertions(+), 667 deletions(-) create mode 100644 src/parser/xenforo.rs create mode 100644 src/tui/event.rs create mode 100644 src/tui/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 7c1f336..b034303 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -110,6 +110,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db" +[[package]] +name = "cassowary" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + [[package]] name = "cc" version = "1.0.73" @@ -279,6 +285,31 @@ dependencies = [ "once_cell", ] +[[package]] +name = "crossterm" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" +dependencies = [ + "bitflags", + "crossterm_winapi", + "libc", + "mio", + "parking_lot", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" +dependencies = [ + "winapi", +] + [[package]] name = "csv" version = "1.1.6" @@ -344,6 +375,7 @@ dependencies = [ "async-trait", "chrono", "clap", + "crossterm", "futures", "html2md", "once_cell", @@ -355,6 +387,7 @@ dependencies = [ "select", "serde", "tokio", + "tui", ] [[package]] @@ -1504,6 +1537,27 @@ dependencies = [ "serde", ] +[[package]] +name = "signal-hook" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" +dependencies = [ + "libc", + "mio", + "signal-hook", +] + [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -1773,6 +1827,19 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "tui" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccdd26cbd674007e649a272da4475fb666d3aa0ad0531da7136db6fab0e5bad1" +dependencies = [ + "bitflags", + "cassowary", + "crossterm", + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "unicode-bidi" version = "0.3.8" @@ -1794,6 +1861,18 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + [[package]] name = "url" version = "2.2.2" diff --git a/Cargo.toml b/Cargo.toml index 20db962..a7f3f4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" async-trait = "0.1.58" chrono = { version = "0.4", features = ["default", "serde"] } clap = { version = "4.0.9", features = ["derive"] } +crossterm = "0.25.0" futures = "0.3.0" html2md = "0.2.13" once_cell = "1.14" @@ -20,3 +21,4 @@ rusqlite = { version = "0.28.0", features = ["bundled-full"] } select = "0.5" serde = "1.0.146" tokio = { version = "1", features = ["full"] } +tui = "0.19.0" \ No newline at end of file diff --git a/src/args.rs b/src/args.rs index d54d487..c251196 100644 --- a/src/args.rs +++ b/src/args.rs @@ -3,8 +3,10 @@ use clap::{Parser, Subcommand}; #[derive(Parser, Debug)] #[command(author, version, about)] pub(crate) struct Args { + #[arg(global = true, short, long, default_value = "fic_archive.db")] + pub db: String, #[command(subcommand)] - pub command: Commands, + pub command: Option, } #[derive(Debug, Subcommand)] diff --git a/src/client.rs b/src/client.rs index 305d5cd..b4165e6 100644 --- a/src/client.rs +++ b/src/client.rs @@ -3,11 +3,11 @@ use serde::ser::Serialize; use std::time::Duration; -use crate::error::ArchiveError; +use crate::Result; static CLIENT: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new(); -pub async fn get(url: &str) -> Result { +pub async fn get(url: &str) -> Result { let client: &Client = CLIENT.get_or_init(|| Client::builder().cookie_store(true).build().unwrap()); let mut response = client.get(url).send().await?; @@ -40,10 +40,7 @@ pub async fn get(url: &str) -> Result { } } -pub async fn get_with_query( - url: &str, - query: &T, -) -> Result { +pub async fn get_with_query(url: &str, query: &T) -> Result { let client: &Client = CLIENT.get_or_init(|| Client::builder().cookie_store(true).build().unwrap()); let mut response = client.get(url).query(query).send().await?; diff --git a/src/error.rs b/src/error.rs index 04e3a82..5384ce1 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,6 +11,7 @@ pub enum ArchiveError { Request(reqwest::Error), Database(rusqlite::Error), Parse(chrono::format::ParseError), + ParseInt(std::num::ParseIntError), } impl fmt::Display for ArchiveError { @@ -32,6 +33,7 @@ impl fmt::Display for ArchiveError { Self::Request(ref err) => err.fmt(f), Self::Database(ref err) => err.fmt(f), Self::Parse(ref err) => err.fmt(f), + Self::ParseInt(ref err) => err.fmt(f), } } } @@ -60,4 +62,10 @@ impl From for ArchiveError { } } +impl From for ArchiveError { + fn from(err: std::num::ParseIntError) -> ArchiveError { + Self::ParseInt(err) + } +} + impl Error for ArchiveError {} diff --git a/src/main.rs b/src/main.rs index 158adbc..e2b08c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,12 @@ use clap::Parser; -use rusqlite::Connection; - +use futures::future::join_all; use std::collections::HashSet; -use self::args::{Args, Commands}; +use self::args::{Args, Commands::*}; use self::error::ArchiveError; +use self::sql::Database; use self::structs::{Content, StorySource, SOURCES_LIST}; +use self::tui::start_tui; mod args; mod client; @@ -13,71 +14,62 @@ mod error; mod parser; mod sql; mod structs; +mod tui; + +pub type Result = std::result::Result; #[tokio::main] -async fn main() -> Result<(), ArchiveError> { +async fn main() -> Result<()> { let args = Args::parse(); - let conn = Connection::open("/home/daniel/Documents/Code/fic_archive/test_db.db")?; + let db = Database::new(&args.db)?; match args.command { - Commands::Add { stories } => add_stories(stories, &conn).await?, - Commands::Update { - story, - force_refresh, - } => update_archive(story, force_refresh, &conn).await?, - Commands::Delete { search } => delete_story(search, &conn).await?, - Commands::Export { .. } => { - todo!() - } - Commands::List { .. } => list_stories(&conn).await?, - Commands::ListSources => { - let len = - SOURCES_LIST.into_iter().fold(0, |acc, i| acc + i.len()) + SOURCES_LIST.len() - 1; - println!( + Some(sub) => match sub { + Add { stories } => add_stories(stories, &db).await?, + Update { + story, + force_refresh, + } => { + update_archive( + match story { + Some(s) => Some(StorySource::from_url(&s)?), + None => None, + }, + force_refresh, + &db, + ) + .await? + } + Delete { search } => delete_story(search, &db).await?, + Export { .. } => { + todo!() + } + List { .. } => list_stories(&db).await?, + ListSources => println!( "{}", - SOURCES_LIST - .into_iter() - .fold(String::with_capacity(len), |mut acc, i| { - acc.push_str(i); - if acc.len() < acc.capacity() { + SOURCES_LIST.into_iter().rev().enumerate().rev().fold( + String::new(), + |mut acc, (idx, source)| { + acc.push_str(source); + if idx > 0 { acc.push('\n'); } acc - }) - ); - } + } + ) + ), + }, + None => start_tui(args).await?, } - // let source = StorySource::from_url("https://www.royalroad.com/fiction/39408/beware-of-chicken"); - // let source = StorySource::from_url("https://www.fanfiction.net/s/3676590"); - // let source = StorySource::from_url("https://katalepsis.net"); - // let source = StorySource::from_url("https://www.royalroad.com/fiction/59450/bioshifter"); - // let source = StorySource::from_url("https://www.royalroad.com/fiction/40373/vigor-mortis"); - // let source = StorySource::from_url("https://archiveofourown.org/works/35394595"); - - // let existing_story = sql::get_story_by_id(&conn, &source.to_id())?; - - // let parser: &dyn Parser = match source { - // StorySource::AO3(_) => &AO3Parser {}, - // StorySource::Katalepsis => &KatalepsisParser {}, - // StorySource::RoyalRoad(_) => &RoyalRoadParser {}, - // }; - // let story = parser.get_story(&runtime, &TextFormat::Markdown, source)?; - - // if existing_story.is_none() { - // sql::save_story(&conn, &story)?; - // println!("Saved story!"); - // } else { - // println!("Not saving story because it already exists!"); - // } Ok(()) } -async fn add_stories(stories: Vec, conn: &Connection) -> Result<(), ArchiveError> { +async fn add_stories(stories: Vec, db: &Database) -> Result<()> { let mut errors: Vec = Vec::new(); for story in stories.iter() { match StorySource::from_url(&story) { - Ok(source) => match add_story(source, conn).await { + Ok(source) => match add_story(source, db).await { Ok(_) => (), Err(err) => errors.push(err), }, @@ -87,18 +79,18 @@ async fn add_stories(stories: Vec, conn: &Connection) -> Result<(), Arch errors.into_iter().next().map(|e| Err(e)).unwrap_or(Ok(())) } -async fn add_story(source: StorySource, conn: &Connection) -> Result<(), ArchiveError> { - let exists = sql::story_exists_with_id(conn, &source.to_id())?; +async fn add_story(source: StorySource, db: &Database) -> Result<()> { + let exists = db.story_exists_with_id(&source.to_id())?; let url = source.to_url(); if exists { - let new_chapters = update_story(source, false, conn).await?; + let new_chapters = update_story(source, false, db).await?; println!( "Updated story at {} with {} new chapters.", url, new_chapters ); } else { let story = source.parser().get_story(source).await?; - sql::save_story(conn, &story)?; + db.save_story(&story)?; println!( "Added story {} ({} chapter{})", story.name, @@ -110,68 +102,97 @@ async fn add_story(source: StorySource, conn: &Connection) -> Result<(), Archive } async fn update_archive( - story: Option, + story: Option, force_refresh: bool, - conn: &Connection, -) -> Result<(), ArchiveError> { - if story.is_some() { - let url = story.unwrap(); - let source = StorySource::from_url(url.as_str())?; - let result = update_story(source, force_refresh, conn).await?; - println!("Updated story at {} with {} new chapters.", url, result); - Ok(()) - } else { - todo!() + db: &Database, +) -> Result<()> { + match story { + Some(source) => { + let url = source.to_url(); + let result = update_story(source, force_refresh, db).await?; + println!( + "{}pdated story at {} with {} new chapters.", + if force_refresh { "Force-u" } else { "U" }, + url, + result + ); + Ok(()) + } + None => { + let stories = db.get_all_stories()?; + let story_count = stories.len(); + let (new_chaps, failed) = join_all( + stories + .into_iter() + .map(|s| update_story(s.source, force_refresh, db)), + ) + .await + .into_iter() + .fold((0, 0), |acc, x| match x { + Ok(num) => (acc.0 + num, acc.1), + Err(_) => (acc.0, acc.1 + 1), + }); + println!( + "{}pdated archive. Got {} new chapters from {} stories. Failed to update {} stories.", + if force_refresh { "Force-u" } else { "U" }, + new_chaps, + story_count - failed, + failed, + ); + Ok(()) + } } } -async fn update_story( - source: StorySource, - force_refresh: bool, - conn: &Connection, -) -> Result { - let existing_story = sql::get_story_by_id(conn, source.to_id().as_str())? - .ok_or_else(|| ArchiveError::StoryNotExists(source.to_url()))?; +async fn update_story(source: StorySource, force_refresh: bool, db: &Database) -> Result { let parser = source.parser(); - let new_skeleton = parser.get_skeleton(source).await?; - - // Get a list of existing chapters and a list of fetched chapters, then filter to only fetched chapters that aren't saved. - let mut existing_chapters: HashSet = - HashSet::with_capacity(existing_story.chapters.len()); - existing_story - .chapters - .iter() - .for_each(|chap| flatten_content(&mut existing_chapters, chap)); - let mut new_chapters: HashSet = HashSet::with_capacity(new_skeleton.chapters.len()); - new_skeleton - .chapters - .iter() - .for_each(|chap| flatten_content(&mut new_chapters, chap)); - let new_chapters: Vec = new_chapters - .into_iter() - .filter(|c| !existing_chapters.contains(c)) - .collect(); - - // If there are any new chapters, fetch the story and save them. - let mut added_chapters = 0; - if !new_chapters.is_empty() { - let new_story = parser.fill_skeleton(new_skeleton).await?; - for chapter in new_chapters.into_iter() { - match new_story.find_chapter(chapter) { - Some(found) => { - sql::save_content( - conn, - found.chapter, - new_story.source.to_id().as_str(), - found.parent.map(|content| content.id()), - )?; - added_chapters += 1; + if force_refresh { + let story = parser.get_story(source).await?; + db.save_story(&story)?; + Ok(story.num_chapters()) + } else { + let existing_story = db + .get_story_by_id(source.to_id().as_str())? + .ok_or_else(|| ArchiveError::StoryNotExists(source.to_url()))?; + let new_skeleton = parser.get_skeleton(source).await?; + + // Get a list of existing chapters and a list of fetched chapters, then filter to only fetched chapters that aren't saved. + let mut existing_chapters: HashSet = + HashSet::with_capacity(existing_story.chapters.len()); + existing_story + .chapters + .iter() + .for_each(|chap| flatten_content(&mut existing_chapters, chap)); + let mut new_chapters: HashSet = HashSet::with_capacity(new_skeleton.chapters.len()); + new_skeleton + .chapters + .iter() + .for_each(|chap| flatten_content(&mut new_chapters, chap)); + let new_chapters: Vec = new_chapters + .into_iter() + .filter(|c| !existing_chapters.contains(c)) + .collect(); + + // If there are any new chapters, fetch the story and save them. + let mut added_chapters = 0; + if !new_chapters.is_empty() { + let new_story = parser.fill_skeleton(new_skeleton).await?; + for chapter in new_chapters.into_iter() { + match new_story.find_chapter(chapter) { + Some(found) => { + db.save_content( + found.chapter, + new_story.source.to_id().as_str(), + found.parent.map(|content| content.id()), + )?; + added_chapters += 1; + } + None => unreachable!(), } - None => unreachable!(), } } + Ok(added_chapters) } - Ok(added_chapters) } fn flatten_content(set: &mut HashSet, content: &Content) { @@ -183,8 +204,8 @@ fn flatten_content(set: &mut HashSet, content: &Content) { } } -async fn delete_story(search: String, conn: &Connection) -> Result<(), ArchiveError> { - let matches = sql::fuzzy_get_story(conn, search.as_str())?; +async fn delete_story(search: String, db: &Database) -> Result<()> { + let matches = db.fuzzy_get_story(search.as_str())?; match matches.len() { 0 => println!("No matching stories found. Please try another search."), // 1 => sql::delete_story_by_id(matches[0])?, @@ -194,8 +215,8 @@ async fn delete_story(search: String, conn: &Connection) -> Result<(), ArchiveEr Ok(()) } -async fn list_stories(conn: &Connection) -> Result<(), ArchiveError> { - let stories = sql::get_all_stories(conn)?; +async fn list_stories(db: &Database) -> Result<()> { + let stories = db.get_all_stories()?; stories.into_iter().for_each(|ls| { println!( "\"{}\" by {} ({} chapter{})", diff --git a/src/parser/ao3.rs b/src/parser/ao3.rs index f51ccec..c3467ce 100644 --- a/src/parser/ao3.rs +++ b/src/parser/ao3.rs @@ -2,10 +2,12 @@ use async_trait::async_trait; use chrono::{ naive::NaiveDate, offset::{FixedOffset, Local, TimeZone}, + DateTime, }; use regex::Regex; use select::{ document::Document, + node::Node, predicate::{self, Predicate}, }; @@ -13,7 +15,8 @@ use crate::{ client::get_with_query, error::ArchiveError, parser::Parser, - structs::{Author, Chapter, ChapterText, Content, Story, StorySource}, + structs::{Author, AuthorList, Chapter, ChapterText, Completed, Content, Story, StorySource}, + Result, }; static CHAPTER_REGEX: (&str, once_cell::sync::OnceCell) = @@ -23,7 +26,7 @@ pub(crate) struct AO3Parser; #[async_trait] impl Parser for AO3Parser { - async fn get_skeleton(&self, source: StorySource) -> Result { + async fn get_skeleton(&self, source: StorySource) -> Result { let main_page = get_with_query( &source.to_url(), &[("view_adult", "true"), ("view_full_work", "true")], @@ -48,14 +51,15 @@ impl Parser for AO3Parser { "AO3: Could not find title (.title.heading) for story at {}", source.to_url(), )))? - .text(); + .text() + .trim() + .to_owned(); let author = main_page .find(predicate::Attr("rel", "author").and(predicate::Attr("href", ()))) .next() .ok_or(ArchiveError::PageError(format!( - "AO3: Could not find author ([rel=\"author\"]) for {} at {}", - name, + "AO3: Could not find author ([rel=\"author\" href]) for story at {}", source.to_url(), )))?; let author_url = author @@ -82,114 +86,201 @@ impl Parser for AO3Parser { .map(|n| n.children().map(|elem| elem.inner_html()).collect()); let url = source.to_url(); let tags = get_tags(&main_page); + let completed = get_completed(&main_page, &source); let chapters = main_page - .find(predicate::Attr("id", "chapters").child(predicate::Class("chapter"))) - .map(|chapter| { - let title_h3 = chapter - .descendants() - .find(|n| n.is(predicate::Class("title"))) - .expect("Chapter should have title."); - let href = title_h3 - .children() - .find_map(|n| n.attr("href")) - .expect("Chapter should have link."); - let name = title_h3.text(); - let mut name_pieces = name.splitn(2, ":"); - let (chapter_num, chapter_name) = (name_pieces.next(), name_pieces.next()); - let name = chapter_name - .or(chapter_num) - .expect("Chapter should have a name or number") - .trim() - .to_owned(); - let chap_id = CHAPTER_REGEX - .1 - .get_or_init(|| Regex::new(CHAPTER_REGEX.0).unwrap()) - .captures(href) - .unwrap() - .get(1) - .expect("Chapter url must contain id") - .as_str(); - - let posted_on = navigate - .find(predicate::Attr("href", href)) + .find(predicate::Attr("id", "chapters")) + .next() + .ok_or(ArchiveError::PageError(format!( + "AO3: Could not find chapter section ([id=\"chapters\"]) for story at {}", + source.to_url() + )))?; + let mut children = chapters + .children() + .filter(|c| c.is(predicate::Class("chapter"))) + .peekable(); + let chapters = if children.peek().is_some() { + children + .map(|chapter| { + let url = get_chapter_url(&chapter, &source)?; + let id = get_chapter_id(&chapter, &source)?; + let name = get_chapter_name(&chapter, &source)?; + let date_posted = get_chapter_date_posted(&navigate, &url, &source)?; + let text = get_chapter_text(&chapter, &url)?; + Ok(Content::Chapter(Chapter { + id, + name, + description: None, + text: ChapterText::Hydrated(text), + url: format!("https://archiveofourown.org{}", url), + date_posted, + author: None, + })) + }) + .collect() + } else { + vec![{ + let posted_on = main_page + .find(predicate::Name("dd").and(predicate::Class("published"))) .next() - .expect("Navigation page should have a link with this chapter's URL") - .parent() - .unwrap() - .children() - .find_map(|c| { - if c.is(predicate::Class("datetime")) { - Some(c.text()) - } else { - None - } - }) - .expect("Navigation page should have a datetime span for this chapter"); - let posted_on = posted_on.trim(); - let timezone = FixedOffset::west(Local::now().offset().utc_minus_local()); - let date_posted = timezone - .from_local_datetime( - &NaiveDate::parse_from_str(&posted_on[1..posted_on.len() - 1], "%F") - .expect("Could not parse datestring to date") - .and_hms(3, 0, 0), - ) - .earliest() - .expect("Could not turn naive to full date"); - - let top_notes = chapter - .children() - .find(|c| c.is(predicate::Attr("id", "notes"))); - let bottom_notes = chapter - .children() - .find(|c| c.is(predicate::Class("end").and(predicate::Class("notes")))); - let chapter_text = chapter.children().find(|c| { - c.is(predicate::Class("userstuff").and(predicate::Attr("role", "article"))) - }); - - let chapter_text = format!( - "{}{}{}", - top_notes.map(|n| n.inner_html()).unwrap_or_default(), - chapter_text - .expect("Chapter has no text area") - .children() - .filter(|node| !node.is(predicate::Attr("id", "work"))) - .map(|node| node.html()) - .collect::(), - bottom_notes.map(|n| n.inner_html()).unwrap_or_default() - ); - - Content::Chapter(Chapter { - id: format!("{}:{}", source.to_id(), chap_id), - name, + .ok_or(ArchiveError::PageError(format!( + "AO3: Could not find published date (dd.published) for story at {}", + source.to_url() + )))? + .text(); + let date_posted = date_string_to_datetime(posted_on)?; + let text = get_chapter_text(&chapters, &url)?; + Ok(Content::Chapter(Chapter { + id: format!("{}:", source.to_id()), + name: name.clone(), description: None, - text: ChapterText::Hydrated(chapter_text), - url: format!("https://archiveofourown.org{}", href), + text: ChapterText::Hydrated(text), + url: source.to_url(), date_posted, - }) - }) + author: None, + })) + }] + }; + + if chapters.iter().find(|c| c.is_err()).is_some() { + return Err(chapters + .into_iter() + .find(|c| c.is_err()) + .unwrap() + .unwrap_err()); + } + + let chapters = chapters + .into_iter() + .map(|c| c.expect("If there was an error we would have returned already.")) .collect(); Ok(Story { name: name.trim().to_owned(), - author, + authors: AuthorList::new(author), description: description.map(|d: String| d.trim().to_owned()), url, tags, chapters, source, + completed, }) } - async fn fill_skeleton(&self, skeleton: Story) -> Result { + async fn fill_skeleton(&self, skeleton: Story) -> Result { Ok(skeleton) } - async fn get_story(&self, source: StorySource) -> Result { + async fn get_story(&self, source: StorySource) -> Result { self.get_skeleton(source).await } } +fn get_chapter_id(chapter: &Node, source: &StorySource) -> Result { + let href = get_chapter_url(chapter, source)?; + + Ok(CHAPTER_REGEX + .1 + .get_or_init(|| Regex::new(CHAPTER_REGEX.0).unwrap()) + .captures(&href) + .unwrap() + .get(1) + .ok_or(ArchiveError::PageError(format!( + "AO3: Could not find chapter id in chapter link {} for story at {}", + href, + source.to_url() + )))? + .as_str() + .to_owned()) +} + +fn get_chapter_name(chapter: &Node, source: &StorySource) -> Result { + let full_title = chapter + .descendants() + .find(|n| n.is(predicate::Class("title"))) + .ok_or(ArchiveError::PageError(format!( + "AO3: Could not find chapter name (.title) for a chapter in story at {}", + source.to_url() + )))? + .text() + .trim() + .to_owned(); + Ok(full_title.splitn(2, ':').nth(1).or(full_title.splitn(2, ':').next()).ok_or(ArchiveError::PageError(format!("Expected chapter title to look like \"Chapter \" or \"Chapter : \" but got {} for story at {}", full_title, source.to_url())))?.to_string()) +} + +fn get_chapter_url(chapter: &Node, source: &StorySource) -> Result { + Ok(chapter + .descendants() + .find(|n| n.is(predicate::Class("title").child(predicate::Attr("href", ())))) + .ok_or(ArchiveError::PageError(format!( + "AO3: Could not find chapter link (.title [href]) for a chapter in story at {}", + source.to_url() + )))? + .attr("href") + .expect("Node should have href guaranteed by above is()") + .to_owned()) +} + +fn get_chapter_date_posted( + navigate: &Document, + href: &str, + source: &StorySource, +) -> Result> { + let posted_on = navigate + .find(predicate::Attr("href", href)) + .next() + .ok_or(ArchiveError::PageError(format!("AO3: Could not find a link which matches \"{}\" on the navigation page for story at {}/navigate", href, source.to_url())))? + .parent() + .expect("Found node is not root") + .children() + .find_map(|c| { + if c.is(predicate::Class("datetime")) { + Some(c.text()) + } else { + None + } + }) + .ok_or(ArchiveError::PageError(format!("AO3: Could not find a datetime span for the link matching \"{}\" on the navigation page for story at {}/nagivate", href, source.to_url())))?; + date_string_to_datetime(posted_on) +} + +fn date_string_to_datetime(date: String) -> Result> { + let posted_on = date.replace('(', "").replace(')', ""); + let date_posted = posted_on.trim(); + let timezone = FixedOffset::west(Local::now().offset().utc_minus_local()); + Ok(timezone + .from_local_datetime(&NaiveDate::parse_from_str(date_posted, "%F")?.and_hms(3, 0, 0)) + .earliest() + .ok_or(ArchiveError::PageError(format!( + "AO3: Could not convert date string {} to a date", + date + )))?) +} + +fn get_chapter_text(chapter: &Node, chapter_url: &String) -> Result { + let top_notes = chapter + .children() + .find(|c| c.is(predicate::Attr("id", "notes"))); + let bottom_notes = chapter + .children() + .find(|c| c.is(predicate::Class("end").and(predicate::Class("notes")))); + let chapter_text = chapter + .children() + .find(|c| c.is(predicate::Class("userstuff"))); + + Ok(format!( + "{}{}{}", + top_notes.map(|n| n.inner_html()).unwrap_or_default(), + chapter_text + .ok_or(ArchiveError::PageError(format!("AO3: Can't find text area ([id=\"chapters\"] > .userstuff) for chapter with URL {}", chapter_url)))? + .children() + .filter(|node| !node.is(predicate::Attr("id", "work"))) + .map(|node| node.html()) + .collect::(), + bottom_notes.map(|n| n.inner_html()).unwrap_or_default() + )) +} + /// TODO Support series listings and collections at some point? fn get_tags(document: &Document) -> Vec { document @@ -228,3 +319,18 @@ fn get_tags(document: &Document) -> Vec { }) .collect() } + +fn get_completed(document: &Document, source: &StorySource) -> Completed { + document.find( + predicate::Class("stats").child(predicate::Name("dt").and(predicate::Class("status")))) + .next() + .map(|node| match node.text().trim().to_lowercase().as_ref() { + "updated:" => Completed::Incomplete, + "completed:" => Completed::Complete, + _ => { + println!("Encountered unexpected value {} in story status tag (.stats > dt.status) for story at {}", node.text().trim().to_lowercase(), source.to_url()); + Completed::Unknown + }, + }) + .unwrap_or(Completed::Complete) // If there is no "status" stat it's a oneshot and thus complete. +} diff --git a/src/parser/katalepsis.rs b/src/parser/katalepsis.rs index d161d7e..60f41f1 100644 --- a/src/parser/katalepsis.rs +++ b/src/parser/katalepsis.rs @@ -12,16 +12,18 @@ use std::iter; use crate::{ client::get, - error::ArchiveError, parser::Parser, - structs::{Author, Chapter, ChapterText, Content, Section, Story, StorySource}, + structs::{ + Author, AuthorList, Chapter, ChapterText, Completed, Content, Section, Story, StorySource, + }, + Result, }; pub(crate) struct KatalepsisParser; #[async_trait] impl Parser for KatalepsisParser { - async fn get_skeleton(&self, source: StorySource) -> Result { + async fn get_skeleton(&self, source: StorySource) -> Result { let main_page = get(&source.to_url()).await?.text().await?; let main_page = Document::from_read(main_page.as_bytes())?; @@ -93,6 +95,7 @@ impl Parser for KatalepsisParser { .expect("Chapter tag should have an href") .to_owned(), date_posted: FixedOffset::east(0).datetime_from_str("0", "%s").unwrap(), + author: None, }) }) .collect(); @@ -102,27 +105,29 @@ impl Parser for KatalepsisParser { description: None, chapters, url: None, + author: None, }) }) .collect(); Ok(Story { name, - author, + authors: AuthorList::new(author), description, url, tags, chapters, source, + completed: Completed::Incomplete, }) } - async fn fill_skeleton(&self, mut skeleton: Story) -> Result { + async fn fill_skeleton(&self, mut skeleton: Story) -> Result { let mut chapters: Vec<&mut Chapter> = Vec::with_capacity(skeleton.num_chapters()); for content in skeleton.chapters.iter_mut() { match content { - Content::Section(sec) => chapters_from_section(sec, &mut chapters), - Content::Chapter(chap) => chapters.push(chap), + Content::Section(ref mut sec) => chapters_from_section(sec, &mut chapters), + Content::Chapter(ref mut chap) => chapters.push(chap), } } @@ -231,7 +236,7 @@ impl Parser for KatalepsisParser { } } - async fn get_story(&self, source: StorySource) -> Result { + async fn get_story(&self, source: StorySource) -> Result { let story = self.get_skeleton(source).await?; self.fill_skeleton(story).await } @@ -240,8 +245,8 @@ impl Parser for KatalepsisParser { fn chapters_from_section<'a>(section: &'a mut Section, vec: &mut Vec<&'a mut Chapter>) { for content in section.chapters.iter_mut() { match content { - Content::Section(sec) => chapters_from_section(sec, vec), - Content::Chapter(chap) => vec.push(chap), + Content::Section(ref mut sec) => chapters_from_section(sec, vec), + Content::Chapter(ref mut chap) => vec.push(chap), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0dada4c..c231afa 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10,6 +10,7 @@ use crate::{ pub mod ao3; pub mod katalepsis; pub mod royalroad; +pub mod xenforo; #[async_trait] pub trait Parser { diff --git a/src/parser/royalroad.rs b/src/parser/royalroad.rs index 749ce3f..4e7c504 100644 --- a/src/parser/royalroad.rs +++ b/src/parser/royalroad.rs @@ -8,7 +8,8 @@ use crate::{ client::get, error::ArchiveError, parser::Parser, - structs::{Author, Chapter, ChapterText, Content, Story, StorySource}, + structs::{Author, AuthorList, Chapter, ChapterText, Completed, Content, Story, StorySource}, + Result, }; static CHAPTER_REGEX: (&str, once_cell::sync::OnceCell) = @@ -18,7 +19,7 @@ pub(crate) struct RoyalRoadParser; #[async_trait] impl Parser for RoyalRoadParser { - async fn get_skeleton(&self, source: StorySource) -> Result { + async fn get_skeleton(&self, source: StorySource) -> Result { let main_page = get(&source.to_url()).await?.text().await?; let main_page = Document::from_read(main_page.as_bytes())?; let chapters = main_page @@ -95,6 +96,7 @@ impl Parser for RoyalRoadParser { text: ChapterText::Dehydrated, url, date_posted, + author: None, }) }) .collect(); @@ -130,19 +132,21 @@ impl Parser for RoyalRoadParser { .find(predicate::Class("tags").child(predicate::Name("a"))) .map(|elem| elem.text()) .collect(); + let completed = get_completed(&main_page, &source)?; Ok(Story { name: title, - author, + authors: AuthorList::new(author), description: Some(description), url: source.to_url(), tags, chapters, source, + completed, }) } - async fn fill_skeleton(&self, mut skeleton: Story) -> Result { + async fn fill_skeleton(&self, mut skeleton: Story) -> Result { let hydrate = skeleton .chapters .iter_mut() @@ -156,10 +160,7 @@ impl Parser for RoyalRoadParser { }); let results = join_all(hydrate).await; - if results - .iter() - .any(|res: &Result<(_, _), ArchiveError>| res.is_err()) - { + if results.iter().any(|res: &Result<(_, _)>| res.is_err()) { return Err(ArchiveError::Internal("Oopsie!".to_owned())); } @@ -181,8 +182,30 @@ impl Parser for RoyalRoadParser { Ok(skeleton) } - async fn get_story(&self, source: StorySource) -> Result { + async fn get_story(&self, source: StorySource) -> Result { let story = self.get_skeleton(source).await?; self.fill_skeleton(story).await } } + +fn get_completed(document: &Document, source: &StorySource) -> Result { + Ok(document + .find(predicate::Class("fiction-info")) + .next() + .ok_or(ArchiveError::PageError(format!( + "Royalroad: couldn't find story info panel (.fiction-info) for story at {}", + source.to_url() + )))? + .descendants() + .find(|d| { + d.is(predicate::Class("label")) && { + d.text().trim() == "ONGOING" || d.text().trim() == "COMPLETED" + } + }) + .map(|d| match d.text().trim() { + "COMPLETED" => Completed::Complete, + "ONGOING" => Completed::Incomplete, + _ => unreachable!(), + }) + .unwrap_or(Completed::Unknown)) +} diff --git a/src/parser/xenforo.rs b/src/parser/xenforo.rs new file mode 100644 index 0000000..b95cb68 --- /dev/null +++ b/src/parser/xenforo.rs @@ -0,0 +1,254 @@ +use async_trait::async_trait; +use chrono::DateTime; +use futures::future::join_all; +use regex::Regex; +use select::{ + document::Document, + predicate::{self, Predicate}, +}; + +use crate::{ + client::get, + error::ArchiveError, + parser::Parser, + structs::{Author, AuthorList, Chapter, ChapterText, Completed, Content, Story, StorySource}, + Result, +}; + +static CHAPTER_REGEX: (&str, once_cell::sync::OnceCell) = + (r"#post-(\d+)", once_cell::sync::OnceCell::new()); +static AUTHOR_REGEX: (&str, once_cell::sync::OnceCell) = + (r"/members/(?:.+\.)?(\d+)", once_cell::sync::OnceCell::new()); + +pub(crate) struct XenforoParser; + +#[async_trait] +impl Parser for XenforoParser { + async fn get_skeleton(&self, source: StorySource) -> Result { + let main_page = get(&format!("{}/threadmarks", source.to_url())) + .await? + .text() + .await?; + let document = Document::from_read(main_page.as_bytes())?; + + let name = document + .find(predicate::Class("threadmarkListingHeader-name")) + + .next() + .ok_or(ArchiveError::PageError(format!( + "Xenforo: Could not find title (.threadmarkListingHeader-name) for story at {}/threadmarks", + source.to_url() + )))? + .children() + .filter(|c| c.name().is_none()) // Text nodes have None for name() + .next() + .ok_or(ArchiveError::PageError(format!("Xenforo: Could not find text in title (.threadmarkListingHeader-name) for story at {}/threadmarks", source. to_url())))? + .text() + .replace(" - Threadmarks", "") + .trim() + .to_owned(); + let authors: Vec> = document.find(predicate::Class("username")) + .map(|node| { + let author_url = node + .attr("href") + .ok_or( + ArchiveError::PageError( + format!( + "Xenforo: Could not find user profile link (.username[href]) for user {} in story at {}/threadmarks", + node.text().trim().to_owned(), + source.to_url())))?; + let author_id = AUTHOR_REGEX + .1 + .get_or_init(|| Regex::new(AUTHOR_REGEX.0).unwrap()) + .captures(author_url) + .unwrap() + .get(1) + .ok_or(ArchiveError::PageError(format!( + "Xenforo: Could not find author id in author link {} for story at {}/threadmarks", + author_url, + source.to_url() + )))? + .as_str(); + Ok(Author { + id: format!("{}:{}", source.prefix(), author_id), + name: node.text().trim().to_owned(), + }) + }) + .collect(); + if authors.iter().find(|res| res.is_err()).is_some() { + return Err(authors + .into_iter() + .find(|res| res.is_err()) + .unwrap() + .unwrap_err()); + } + let authors: Vec = authors.into_iter().map(|res| res.unwrap()).collect(); + + let description = None; + + let url = source.to_url(); + + let tags = Vec::new(); + + let completed = document + .find(predicate::Class("pairs--rows")) + .find(|node| { + node.children() + .find(|c| { + c.is(predicate::Name("dt")) + && c.text().trim().to_lowercase() == "index progress" + }) + .is_some() + }) + .map(|node| { + match node + .children() + .find(|c| c.is(predicate::Name("dd"))) + .map(|c| c.text()) + .unwrap_or("not found".to_owned()) + .as_ref() + { + "Complete" => Completed::Complete, + "Ongoing" => Completed::Incomplete, + _ => Completed::Unknown, + } + }) + .unwrap_or(Completed::Unknown); + + let chapters: Vec> = document.find(predicate::Class("structItem--threadmark")) + .map(|node| { + let chapter_info = node.descendants().find(|node| node.is(predicate::Class("structItem-title"))).ok_or( + ArchiveError::PageError(format!("Xenforo: Could not find threadmark title container (.structItem-title) for a threadmark for story at {}/threadmarks", source.to_url())))? + .descendants() + .find(|node| node.is(predicate::Name("a").and(predicate::Attr("href", ())))) + .ok_or(ArchiveError::PageError(format!("Xenforo: Could not find threadmark link (.structItem-title a) for a threadmark for story at {}/threadmarks", source.to_url())))?; + let chapter_url = chapter_info.attr("href").expect("Should not fail due to filter above."); + let chapter_id = CHAPTER_REGEX + .1 + .get_or_init(|| Regex::new(CHAPTER_REGEX.0).unwrap()) + .captures(chapter_url) + .unwrap() + .get(1) + .ok_or(ArchiveError::PageError(format!( + "Xenforo: Could not find chapter id in chapter link {} for story at {}/threadmarks", + chapter_url, + source.to_url() + )))? + .as_str() + .to_owned(); + let chapter_url = format!("{}/posts/{}", source.to_base_url(), chapter_id); + let chapter_title = chapter_info.text().trim().to_string(); + + let time_string = node.descendants().find(|node| node.is(predicate::Name("time").and(predicate::Attr("datetime", ())))).ok_or( + ArchiveError::PageError(format!("Xenforo: Could not find threadmark date posted (structItem--threadmark time[datetime]) for a threadmark for story at {}/threadmarks", source.to_url())))?.attr("datetime").expect("Should not fail due to filter above."); + let date_posted = DateTime::parse_from_str(time_string, "%FT%T%z").unwrap_or_else(|_| { + panic!( + "Chapter posted-on date ({}) did not conform to rfc3339", + time_string + ) + }); + + let author_name = node.attr("data-content-author").ok_or(ArchiveError::PageError(format!("Xenforo: Could not find author name (structItem--threadmark.data-content-author for a threadmark for story at {}/threadmarks", source.to_url())))?; + + Ok(Content::Chapter(Chapter { + id: format!("{}:{}", source.to_id(), chapter_id), + name: chapter_title, + description: None, + text: ChapterText::Dehydrated, + url: chapter_url, + date_posted, + author: Some(authors.iter().find(|a| a.name == author_name).ok_or(ArchiveError::PageError(format!("Xenforo: Could not find an author (.username) matching {} for story at {}/threadmarks", author_name, source.to_url())))?.clone()), + })) + }) + .collect(); + if chapters.iter().find(|r| r.is_err()).is_some() { + return Err(chapters + .into_iter() + .find(Result::is_err) + .unwrap() + .unwrap_err()); + } + + Ok(Story { + name, + authors: AuthorList::from_list(authors), + description, + url, + tags, + chapters: chapters.into_iter().map(Result::unwrap).collect(), + source, + completed, + }) + } + + async fn fill_skeleton(&self, mut skeleton: Story) -> Result { + let page_list: Vec = { + let first_page = get(format!("{}/reader", skeleton.source.to_url()).as_ref()) + .await? + .text() + .await?; + let first_page = Document::from_read(first_page.as_bytes())?; + + let last_page = first_page.find(predicate::Class("pageNav-main")).next() + .map(|node| match node.descendants() + .filter(|d| d.is(predicate::Name("a").and(predicate::Attr("href", ())))) + .last() + { + Some(last_page) => usize::from_str_radix(&last_page.text(), 10).map_err(ArchiveError::from), + None => Err(ArchiveError::PageError(format!( + "Xenforo: Could not find pageNav (.pageNav-main a[href]) for story at {}/reader", + skeleton.source.to_url() + ))), + }) + .unwrap_or(Ok(1))?; + (1..=last_page) + .map(|num| format!("{}/reader/page-{}", skeleton.source.to_url(), num)) + .collect() + }; + let page_list = page_list + .into_iter() + .map(|p| async move { Ok(get(p.as_ref()).await?.text().await?) }); + let pages = join_all(page_list).await; + let pages = extract_error(pages)? + .into_iter() + .map(|text| Document::from_read(text.as_bytes()).map_err(ArchiveError::from)) + .collect(); + let pages = extract_error(pages)?; + let maybe_text: Vec> = skeleton + .chapters + .iter_mut() + .filter_map(|content| match content { + Content::Section(_) => None, + Content::Chapter(chap) => Some(chap), + }) + .map(|chap| { + let chapter_id = chap.chapter_id(); + let selector = format!("js-post-{}", chapter_id); + let elem = pages.iter().find(|page| page.find(predicate::Attr("id", selector.as_ref())).next().is_some()).ok_or( + ArchiveError::PageError(format!("Xenforo: could not find a post for chapter with id {chapter_id} (.js-post-{chapter_id}) on any page for story at {}/reader", skeleton.source.to_url())))?; + let content = elem.find(predicate::Attr("id", selector.as_ref())).next().unwrap().descendants().find(|d| d.is(predicate::Class("bbWrapper"))).ok_or( + ArchiveError::PageError(format!("Xenforo: could not find text content for post with id {chapter_id} (.js-post-{chapter_id} .bbWrapper) on any page for story at {}/reader", skeleton.source.to_url())))?; + Ok((chap, ChapterText::Hydrated(content.inner_html()))) + }) + .collect(); + + let maybe_text = extract_error(maybe_text)?; + maybe_text.into_iter().for_each(|(chap, text)| { + chap.text = text; + }); + + Ok(skeleton) + } + + async fn get_story(&self, source: StorySource) -> Result { + let story = self.get_skeleton(source).await?; + self.fill_skeleton(story).await + } +} +fn extract_error(list: Vec>) -> Result> { + if list.iter().find(|i| i.is_err()).is_some() { + Err(list.into_iter().find(Result::is_err).unwrap().unwrap_err()) + } else { + Ok(list.into_iter().map(Result::unwrap).collect()) + } +} diff --git a/src/sql.rs b/src/sql.rs index 710f782..070d1af 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -1,431 +1,463 @@ use chrono::DateTime; +use once_cell::sync::OnceCell; use rayon::prelude::ParallelSliceMut; -use rusqlite::{types::Type, Connection, Error, Result, Row}; +use rusqlite::{types::Type, Connection, Error, OptionalExtension, Row}; use std::ops::{Deref, DerefMut}; +use std::path::Path; use std::sync::Mutex; use crate::error::ArchiveError; use crate::structs::{ - Author, Chapter, ChapterText, Content, ListedStory, Section, Story, StorySource, + Author, AuthorList, Chapter, ChapterText, Completed, Content, ListedStory, Section, Story, + StorySource, }; +use crate::Result; -static TABLES_CREATED: once_cell::sync::OnceCell> = once_cell::sync::OnceCell::new(); +static AUTHOR_LIST_SEPARATOR: char = ';'; -pub fn create_tables(conn: &Connection) -> Result<(), Error> { - let mut lock = TABLES_CREATED - .get_or_init(|| Mutex::new(false)) - .lock() - .unwrap(); - if !lock.deref() { - conn.execute( - "CREATE TABLE IF NOT EXISTS authors ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL - )", - (), - ) - .unwrap(); - conn.execute( - "CREATE TABLE IF NOT EXISTS stories ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - url TEXT NOT NULL, - author_id TEXT NOT NULL, - FOREIGN KEY (author_id) REFERENCES authors(id) - )", - (), - ) - .unwrap(); - conn.execute( - "CREATE TABLE IF NOT EXISTS sections ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - url TEXT, - story_id TEXT NOT NULL, - parent_id TEXT, - FOREIGN KEY (story_id) REFERENCES stories(id) - )", - (), - ) - .unwrap(); - conn.execute( - "CREATE TABLE IF NOT EXISTS chapters ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - text TEXT NOT NULL, - url TEXT NOT NULL, - date_posted TEXT NOT NULL, - story_id TEXT NOT NULL, - section_id TEXT, - FOREIGN KEY (story_id) REFERENCES stories(id), - FOREIGN KEY (section_id) REFERENCES sections(id) - )", - (), - ) - .unwrap(); - conn.execute( - "CREATE TABLE IF NOT EXISTS tags ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL - )", - (), - ) - .unwrap(); - conn.execute( - "CREATE TABLE IF NOT EXISTS tag_uses ( - tag_id TEXT NOT NULL, - story_id TEXT NOT NULL, - FOREIGN KEY (tag_id) REFERENCES tags(id), - FOREIGN KEY (story_id) REFERENCES stories(id) - )", - (), - ) - .unwrap(); - *lock.deref_mut() = true; - } - Ok(()) -} - -pub fn get_all_stories(conn: &Connection) -> Result, ArchiveError> { - create_tables(conn).unwrap(); - let mut failed_stories = 0; - let mut stmt = conn - .prepare( - "SELECT - stories.name, - authors.name, - COUNT(chapters.id) AS chapter_count - FROM stories - INNER JOIN authors ON stories.author_id = authors.id - INNER JOIN chapters ON stories.id = chapters.story_id - GROUP BY stories.id", - ) - .unwrap(); - let stories: Vec = stmt - .query_map([], |row| { - Ok(ListedStory { - name: row.get(0).unwrap(), - author: row.get(1).unwrap(), - chapter_count: row.get(2).unwrap(), - }) - }) - .unwrap() - .filter_map(|listed| match listed { - Ok(story) => Some(story), - Err(_) => { - failed_stories += 1; - None - } - }) - .collect(); - println!( - "Got {} stories. Failed to get {failed_stories} stories.", - stories.len() - ); +static DB_INITIALIZED: OnceCell> = OnceCell::new(); - Ok(stories) +pub struct Database { + conn: Connection, } -pub fn story_exists_with_id(conn: &Connection, id: &str) -> Result { - create_tables(conn).unwrap(); - let mut stmt = conn - .prepare("SELECT COUNT(*) FROM stories WHERE id = :id") - .unwrap(); - let story_exists = stmt - .query_row(&[(":id", id)], |row| match row.get(0) { - Ok(0) => Ok(None), - Ok(1) => Ok(Some(())), - _ => Ok(None), - }) - .unwrap_or(None); - Ok(story_exists.is_some()) -} +impl Database { + pub fn new(path: &str) -> Result { + let file_exists = Path::new(path).try_exists()?; + if !file_exists { + println!("Database file at {} does not exist. Creating...", path); + } + let this = Self { + conn: Connection::open(path)?, + }; + this.init()?; + Ok(this) + } -pub fn fuzzy_get_story(conn: &Connection, search: &str) -> Result, ArchiveError> { - create_tables(conn).unwrap(); - let mut stmt = conn - .prepare( - "SELECT stories.id - FROM stories INNER JOIN authors ON stories.author_id = authors.id - WHERE - stories.name LIKE %:search% - OR stories.id = :search - OR authors.name LIKE %:search%", - ) - .unwrap(); - let matches = stmt - .query_map(&[(":search", search)], |row| Ok(row.get(0).unwrap())) - .unwrap() - .filter_map(|id| id.ok()) - .collect(); - Ok(matches) -} + fn init(&self) -> Result<()> { + let mut lock = DB_INITIALIZED + .get_or_init(|| Mutex::new(false)) + .lock() + .unwrap(); + if !lock.deref() { + init_db(&self.conn)?; + *lock.deref_mut() = true; + } + Ok(()) + } -pub fn get_story_by_id(conn: &Connection, id: &str) -> Result, ArchiveError> { - if !story_exists_with_id(conn, id).unwrap() { - Ok(None) - } else { - let mut stmt = conn.prepare( - "SELECT id, name, description, url, parent_id FROM sections WHERE story_id = :story_id", - ).unwrap(); - let mut sections: Vec<(Option, Section)> = stmt - .query_map(&[(":story_id", id)], |row| { - Ok(( - // ID of parent section, if one exists - match is_null(row, 4) { - true => None, - false => Some(row.get(4)?), - }, - Section { - id: row.get(0).unwrap(), - name: row.get(1).unwrap(), - description: match is_null(row, 2) { - true => None, - false => Some(row.get(2)?), - }, - chapters: Vec::new(), - url: match is_null(row, 3) { - true => None, - false => Some(row.get(3)?), - }, - }, - )) + pub fn get_all_stories(&self) -> Result> { + let conn = &self.conn; + let mut failed_stories = 0; + let mut stmt = conn + .prepare( + "SELECT + stories.name, + authors.name, + stories.completed, + stories.url, + COUNT(chapters.id) AS chapter_count + FROM stories + INNER JOIN authors ON stories.author_id = authors.id + INNER JOIN chapters ON stories.id = chapters.story_id + GROUP BY stories.id", + ) + .unwrap(); + let stories: Vec = stmt + .query_map([], |row| { + Ok(ListedStory { + name: row.get(0)?, + author: row.get(1)?, + completed: Completed::from_string(row.get::(2)?.as_ref()), + source: StorySource::from_url(row.get::(3)?.as_ref()) + .expect("URLs in database should be valid for sources"), + chapter_count: row.get(4)?, + }) }) .unwrap() - .map(|sec| sec.unwrap()) + .filter_map(|listed| match listed { + Ok(story) => Some(story), + Err(_) => { + failed_stories += 1; + None + } + }) .collect(); + println!( + "Got {} stories. Failed to get {failed_stories} stories.", + stories.len() + ); + + Ok(stories) + } - stmt = conn + pub fn story_exists_with_id(&self, id: &str) -> Result { + let conn = &self.conn; + let mut stmt = conn + .prepare("SELECT COUNT(*) FROM stories WHERE id = :id") + .unwrap(); + let story_exists = stmt + .query_row(&[(":id", id)], |row| match row.get(0) { + Ok(0) => Ok(None), + Ok(1) => Ok(Some(())), + _ => Ok(None), + }) + .unwrap_or(None); + Ok(story_exists.is_some()) + } + + pub fn fuzzy_get_story(&self, search: &str) -> Result> { + let conn = &self.conn; + let mut stmt = conn .prepare( - "SELECT id, name, description, text, url, date_posted, section_id - FROM chapters - WHERE story_id = :story_id", + "SELECT stories.id + FROM stories INNER JOIN authors ON stories.author_id = authors.id + WHERE + stories.name LIKE %:search% + OR stories.id = :search + OR authors.name LIKE %:search%", ) .unwrap(); - let mut chapters: Vec<(Option, Chapter)> = stmt - .query_map(&[(":story_id", id)], |row| { - Ok(( - // ID of parent section, if one exists - match is_null(row, 6) { - true => None, - false => Some(row.get(6)?), - }, - Chapter { - id: row.get(0).unwrap(), - name: row.get(1).unwrap(), - description: match is_null(row, 2) { + let matches = stmt + .query_map(&[(":search", search)], |row| Ok(row.get(0).unwrap())) + .unwrap() + .filter_map(|id| id.ok()) + .collect(); + Ok(matches) + } + + pub fn get_story_by_id(&self, id: &str) -> Result> { + let conn = &self.conn; + if !self.story_exists_with_id(id).unwrap() { + Ok(None) + } else { + let mut stmt = conn + .prepare( + "SELECT id, name, description, url, parent_id + FROM sections + WHERE story_id = :story_id", + ) + .unwrap(); + let mut sections: Vec<(Option, Section)> = stmt + .query_map(&[(":story_id", id)], |row| { + Ok(( + // ID of parent section, if one exists + match is_null(row, 4) { true => None, - false => Some(row.get(2)?), + false => Some(row.get(4)?), }, - text: ChapterText::Hydrated(row.get(3).unwrap()), - url: row.get(4).unwrap(), - date_posted: DateTime::parse_from_rfc3339( - row.get::(5).unwrap().as_str(), - ) - .unwrap_or_else(|_| { - panic!( - "Chapter posted-on date ({:?}) did not conform to rfc3339", - row.get::(5) + Section { + id: row.get(0).unwrap(), + name: row.get(1).unwrap(), + description: match is_null(row, 2) { + true => None, + false => Some(row.get(2)?), + }, + chapters: Vec::new(), + url: match is_null(row, 3) { + true => None, + false => Some(row.get(3)?), + }, + author: None, + }, + )) + }) + .unwrap() + .map(|sec| sec.unwrap()) + .collect(); + + stmt = conn + .prepare( + "SELECT id, name, description, text, url, date_posted, section_id + FROM chapters + WHERE story_id = :story_id", + ) + .unwrap(); + let mut chapters: Vec<(Option, Chapter)> = stmt + .query_map(&[(":story_id", id)], |row| { + Ok(( + // ID of parent section, if one exists + match is_null(row, 6) { + true => None, + false => Some(row.get(6)?), + }, + Chapter { + id: row.get(0).unwrap(), + name: row.get(1).unwrap(), + description: match is_null(row, 2) { + true => None, + false => Some(row.get(2)?), + }, + text: ChapterText::Hydrated(row.get(3).unwrap()), + url: row.get(4).unwrap(), + date_posted: DateTime::parse_from_rfc3339( + row.get::(5).unwrap().as_str(), ) - }), - }, - )) - }) - .unwrap() - .map(|chap| chap.unwrap()) - .collect(); + .unwrap_or_else(|_| { + panic!( + "Chapter posted-on date ({:?}) did not conform to rfc3339", + row.get::(5) + ) + }), + author: None, + }, + )) + }) + .unwrap() + .map(|chap| chap.unwrap()) + .collect(); - if !chapters.is_empty() { - for idx in (0..chapters.len() - 1).rev() { - if chapters[idx].0.is_some() { - let (parent_id, chapter) = chapters.remove(idx); - let parent_id = parent_id.unwrap(); - if let Some((_, section)) = sections.iter_mut().find(|(_, s)| s.id == parent_id) - { - section.chapters.push(Content::Chapter(chapter)); - } else { - panic!( - "Chapter {} has section_id {} that does not match any section", - chapter.id, parent_id - ); + if !chapters.is_empty() { + for idx in (0..chapters.len() - 1).rev() { + if chapters[idx].0.is_some() { + let (parent_id, chapter) = chapters.remove(idx); + let parent_id = parent_id.unwrap(); + if let Some((_, section)) = + sections.iter_mut().find(|(_, s)| s.id == parent_id) + { + section.chapters.push(Content::Chapter(chapter)); + } else { + panic!( + "Chapter {} has section_id {} that does not match any section", + chapter.id, parent_id + ); + } } } } - } - if !sections.is_empty() { - for idx in (0..sections.len() - 1).rev() { - sections[idx] - .1 - .chapters - .par_sort_unstable_by(|a, b| a.id().cmp(b.id())); - if sections[idx].0.is_some() { - let (parent_id, section) = sections.remove(idx); - let parent_id = parent_id.unwrap(); - if let Some((_, parent)) = sections.iter_mut().find(|(_, s)| s.id == parent_id) - { - parent.chapters.push(Content::Section(section)); + if !sections.is_empty() { + for idx in (0..sections.len() - 1).rev() { + sections[idx] + .1 + .chapters + .par_sort_unstable_by(|a, b| a.id().cmp(b.id())); + if sections[idx].0.is_some() { + let (parent_id, section) = sections.remove(idx); + let parent_id = parent_id.unwrap(); + if let Some((_, parent)) = + sections.iter_mut().find(|(_, s)| s.id == parent_id) + { + parent.chapters.push(Content::Section(section)); + } } } } - } - let mut story_chapters: Vec = sections - .into_iter() - .map(|(_, section)| Content::Section(section)) - .chain( - chapters - .into_iter() - .map(|(_, chapter)| Content::Chapter(chapter)), - ) - .collect(); - story_chapters.par_sort_unstable_by(|a, b| a.id().cmp(b.id())); + let mut story_chapters: Vec = sections + .into_iter() + .map(|(_, section)| Content::Section(section)) + .chain( + chapters + .into_iter() + .map(|(_, chapter)| Content::Chapter(chapter)), + ) + .collect(); + story_chapters.par_sort_unstable_by(|a, b| a.id().cmp(b.id())); - stmt = conn - .prepare( - "SELECT tags.name - FROM tag_uses INNER JOIN tags - ON tags.id = tag_uses.tag_id - WHERE tag_uses.story_id = :story_id", - ) - .unwrap(); - let story_tags: Vec = stmt - .query_map(&[(":story_id", id)], |row| row.get::(0)) - .unwrap() - .filter(|res| res.is_ok()) - .map(|res| res.unwrap()) - .collect(); + stmt = conn + .prepare( + "SELECT tags.name + FROM tag_uses INNER JOIN tags + ON tags.id = tag_uses.tag_id + WHERE tag_uses.story_id = :story_id", + ) + .unwrap(); + let story_tags: Vec = stmt + .query_map(&[(":story_id", id)], |row| row.get::(0)) + .unwrap() + .filter(|res| res.is_ok()) + .map(|res| res.unwrap()) + .collect(); - stmt = conn.prepare( - "SELECT stories.name, stories.description, stories.url, stories.author_id, authors.name - FROM stories INNER JOIN authors - ON stories.author_id = authors.id - WHERE stories.id = :story_id", - ).unwrap(); - let mut story = stmt - .query_row(&[(":story_id", id)], |row| { - let source = StorySource::from_url(row.get::(2).unwrap().as_str()) - .map_err(|e| { - rusqlite::Error::FromSqlConversionFailure( - 2, - rusqlite::types::Type::Text, - Box::new(e), - ) + stmt = conn + .prepare("SELECT author_id FROM stories WHERE id = :id") + .unwrap(); + let author_ids = stmt.query_row(&[(":id", id)], |row| { + Ok(row + .get::(0)? + .replace(AUTHOR_LIST_SEPARATOR, ", ")) + })?; + stmt = conn + .prepare("SELECT id, name FROM authors WHERE id IN (?)") + .unwrap(); + let authors: Vec = stmt + .query_map([author_ids], |row| { + Ok(Author { + id: row.get(0)?, + name: row.get(1)?, }) - .unwrap(); - Ok(( - row.get::(2).unwrap(), - Story { - name: row.get(0).unwrap(), - description: row.get(1).unwrap(), - url: row.get(2).unwrap(), - author: Author { - id: row.get(3).unwrap(), - name: row.get(4).unwrap(), + })? + .filter(|res| res.is_ok()) + .map(|res| res.unwrap()) + .collect(); + + stmt = conn + .prepare( + "SELECT stories.name, stories.description, stories.url, stories.completed FROM stories WHERE id = :id", + ) + .unwrap(); + let mut story = stmt + .query_row(&[(":id", id)], |row| { + let source = StorySource::from_url(row.get::(2)?.as_str()) + .map_err(|e| { + rusqlite::Error::FromSqlConversionFailure( + 2, + rusqlite::types::Type::Text, + Box::new(e), + ) + }) + .unwrap(); + Ok(( + row.get::(2)?, + Story { + name: row.get(0)?, + description: row.get(1)?, + url: row.get(2)?, + authors: AuthorList::from_list(authors), + chapters: story_chapters, + tags: story_tags, + source, + completed: Completed::from_string( + row.get::(3)?.as_ref(), + ), }, - chapters: story_chapters, - tags: story_tags, - source, - }, - )) - }) - .map_err(ArchiveError::from) - .unwrap(); - story.1.source = StorySource::from_url(story.0.as_str()).unwrap(); - Ok(Some(story.1)) + )) + }) + .map_err(ArchiveError::from) + .unwrap(); + story.1.source = StorySource::from_url(story.0.as_str()).unwrap(); + Ok(Some(story.1)) + } } -} -pub fn save_story(conn: &Connection, story: &Story) -> Result<(), ArchiveError> { - create_tables(conn).unwrap(); - conn.execute( - "INSERT OR IGNORE INTO authors (id, name) VALUES (?1, ?2)", - (&story.author.id, &story.author.name), - ) - .unwrap(); - conn.execute( - "INSERT INTO stories (id, name, description, url, author_id) VALUES (?1, ?2, ?3, ?4, ?5)", - ( - &story.source.to_id(), - &story.name, - &story.description, - &story.url, - &story.author.id, - ), - ) - .unwrap(); - for content in story.chapters.iter().as_ref() { - save_content(conn, content, &story.source.to_id(), None).unwrap(); - } - for tag in story.tags.iter().as_ref() { - let tag_id = tag.to_lowercase(); - conn.execute( - "INSERT OR IGNORE INTO tags (id, name) VALUES (?1, ?2)", - (&tag_id, &tag), - ) - .unwrap(); + pub fn save_story(&self, story: &Story) -> Result<()> { + let conn = &self.conn; + for author in story.authors.authors() { + conn.execute( + "INSERT OR IGNORE INTO authors (id, name) VALUES (?1, ?2)", + (&author.id, &author.name), + ) + .unwrap(); + } + conn.execute( - "INSERT OR IGNORE INTO tag_uses (tag_id, story_id) VALUES (?1, ?2)", - (&tag_id, &story.source.to_id()), + "INSERT INTO stories (id, name, description, url, completed) VALUES (?1, ?2, ?3, ?4, ?5)", + ( + &story.source.to_id(), + &story.name, + &story.description, + &story.url, + // &story.authors.authors().iter().enumerate().fold( + // String::new(), + // |mut acc, (idx, author)| { + // acc.push_str(&author.id); + // if idx < story.authors.len() - 1 { + // acc.push(AUTHOR_LIST_SEPARATOR); + // }; + // acc + // }, + // ), + &story.authors.authors().iter().next().unwrap().id, + &story.completed.to_string(), + ), ) .unwrap(); + for content in story.chapters.iter().as_ref() { + self.save_content(content, &story.source.to_id(), None) + .unwrap(); + } + for tag in story.tags.iter().as_ref() { + let tag_id = tag.to_lowercase(); + conn.execute( + "INSERT OR IGNORE INTO tags (id, name) VALUES (?1, ?2)", + (&tag_id, &tag), + ) + .unwrap(); + conn.execute( + "INSERT OR IGNORE INTO tag_uses (tag_id, story_id) VALUES (?1, ?2)", + (&tag_id, &story.source.to_id()), + ) + .unwrap(); + } + Ok(()) } - Ok(()) -} -pub fn save_content( - conn: &Connection, - content: &Content, - story_id: &str, - parent_id: Option<&str>, -) -> Result<(), ArchiveError> { - match content { - Content::Section(Section { - id, - name, - description, - chapters, - url, - }) => { - conn.execute("INSERT INTO sections (id, name, description, url, story_id, parent_id) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - ( - id, - name, - description, - url, - story_id, - parent_id - ) - ).unwrap(); - for inner in chapters.iter() { - save_content(conn, inner, story_id, Some(id)).unwrap(); + pub fn save_content( + &self, + content: &Content, + story_id: &str, + parent_id: Option<&str>, + ) -> Result<()> { + let conn = &self.conn; + match content { + Content::Section(Section { + id, + name, + description, + chapters, + url, + author, + }) => { + conn.execute("INSERT INTO sections (id, name, description, url, story_id, parent_id) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + ( + id, + name, + description, + url, + story_id, + parent_id, + author.as_ref().map(|a| &a.id) + ) + ).unwrap(); + for inner in chapters.iter() { + self.save_content(inner, story_id, Some(id)).unwrap(); + } + } + Content::Chapter(Chapter { + id, + name, + description, + text, + url, + date_posted, + author, + }) => { + conn.execute("INSERT INTO chapters (id, name, description, text, url, date_posted, story_id, section_id, author_id) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + ( + id, + name, + description, + text.as_str(), + url, + &date_posted.to_rfc3339(), + story_id, + parent_id, + author.as_ref().map(|a| &a.id) + ) + ).expect(format!("Failed to add chapter with values\nid: {}\nname: {}\nurl: {}\ndate_posted: {}\nstory_id: {}\nsection_id {}", id, name, url, date_posted.to_rfc3339(), story_id, "NULL").as_str()); } } - Content::Chapter(Chapter { - id, - name, - description, - text, - url, - date_posted, - }) => { - conn.execute("INSERT INTO chapters (id, name, description, text, url, date_posted, story_id, section_id) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", - ( - id, - name, - description, - text.as_str(), - url, - &date_posted.to_rfc3339(), - story_id, - parent_id - ) - ).expect(format!("Failed to add chapter with values\nid: {}\nname: {}\nurl: {}\ndate_posted: {}\nstory_id: {}\nsection_id {}", id, name, url, date_posted.to_rfc3339(), story_id, "NULL").as_str()); - } + Ok(()) + } + + pub fn add_valid_site(&self, url: &str, matches: &str) -> Result<()> { + let conn = &self.conn; + conn.execute( + "INSERT OR IGNORE INTO valid_sites (site_url, matches_parser) VALUES (?1, ?2)", + (url, matches), + )?; + Ok(()) + } + + pub fn get_parser_for_site(&self, url: &str) -> Result> { + let conn = &self.conn; + let mut stmt = conn + .prepare("SELECT matches_parser FROM valid_sites WHERE site_url = :url") + .unwrap(); + stmt.query_row(&[(":url", url)], |row| Ok(row.get::(0)?)) + .optional() + .map_err(|e| e.into()) } - Ok(()) } fn is_null(row: &Row, column: usize) -> bool { @@ -434,3 +466,89 @@ fn is_null(row: &Row, column: usize) -> bool { Err(Error::InvalidColumnType(_, _, Type::Null)) ) } + +fn init_db(conn: &Connection) -> Result<()> { + conn.execute( + "CREATE TABLE IF NOT EXISTS authors ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL + )", + (), + )?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS stories ( + id TEXT NOT NULL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + url TEXT NOT NULL, + completed TEXT NOT NULL + )", + (), + )?; + conn.execute( + "CREATE TABLE IF NOT EXISTS story_authors ( + story_id TEXT NOT NULL, + author_id TEXT NOT NULL, + FOREIGN KEY (story_id) REFERENCES stories(id), + FOREIGN KEY (author_id) REFERENCES authors(id) + )", + (), + )?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS sections ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + url TEXT, + story_id TEXT NOT NULL, + parent_id TEXT, + author_id TEXT, + FOREIGN KEY (story_id) REFERENCES stories(id) + FOREIGN KEY (author_id) REFERENCES authors(id) + )", + (), + )?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS sections ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + url TEXT, + story_id TEXT NOT NULL, + parent_id TEXT, + author_id TEXT, + FOREIGN KEY (story_id) REFERENCES stories(id) + FOREIGN KEY (author_id) REFERENCES authors(id) + )", + (), + )?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS tags ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL + )", + (), + )?; + conn.execute( + "CREATE TABLE IF NOT EXISTS tag_uses ( + tag_id TEXT NOT NULL, + story_id TEXT NOT NULL, + FOREIGN KEY (tag_id) REFERENCES tags(id), + FOREIGN KEY (story_id) REFERENCES stories(id) + )", + (), + )?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS valid_sites ( + site_url TEXT PRIMARY KEY, + matches_parser TEXT NOT NULL + )", + (), + )?; + Ok(()) +} diff --git a/src/structs.rs b/src/structs.rs index ccd4a05..e37fcac 100644 --- a/src/structs.rs +++ b/src/structs.rs @@ -4,18 +4,45 @@ use regex::Regex; use crate::error::ArchiveError; use crate::parser::{ - ao3::AO3Parser, katalepsis::KatalepsisParser, royalroad::RoyalRoadParser, Parser, + ao3::AO3Parser, katalepsis::KatalepsisParser, royalroad::RoyalRoadParser, + xenforo::XenforoParser, Parser, }; +use crate::Result; + +#[derive(Debug, Clone)] +pub enum Completed { + Complete, + Incomplete, + Unknown, +} + +impl Completed { + pub fn to_string(&self) -> String { + match self { + Self::Complete => "COMPLETE".to_owned(), + Self::Incomplete => "INCOMPLETE".to_owned(), + Self::Unknown => "UNKNOWN".to_owned(), + } + } + pub fn from_string(s: &str) -> Self { + match s { + "COMPLETE" => Self::Complete, + "INCOMPLETE" => Self::Incomplete, + _ => Self::Unknown, + } + } +} #[derive(Debug, Clone)] pub struct Story { pub name: String, - pub author: Author, + pub authors: AuthorList, pub description: Option, pub url: String, pub tags: Vec, pub chapters: Vec, pub source: StorySource, + pub completed: Completed, } impl Story { @@ -27,23 +54,18 @@ impl Story { } pub fn find_chapter(&self, id: String) -> Option { - let mut found = None; - for content in self.chapters.iter() { - if found.is_some() { - break; - } else if content.id() == id.as_str() { - found = Some(FindChapter { - chapter: content, + self.chapters.iter().find_map(|con| { + if con.id() == &id { + Some(FindChapter { + chapter: con, parent: None, - }); - } else if let Content::Section(s) = content { - found = s.find_chapter(&id).map(|f| FindChapter { - chapter: f.chapter, - parent: Some(content), - }); + }) + } else if let Content::Section(_) = con { + con.find_child(&id) + } else { + None } - } - found + }) } } @@ -52,6 +74,8 @@ pub struct ListedStory { pub name: String, pub author: String, pub chapter_count: usize, + pub source: StorySource, + pub completed: Completed, } pub struct FindChapter<'a> { @@ -72,6 +96,24 @@ impl Content { Self::Section(s) => &s.id, } } + + pub fn find_child(&self, id: &str) -> Option { + match self { + Self::Chapter(_) => None, + Self::Section(s) => s.chapters.iter().find_map(|con| { + if con.id() == id { + Some(FindChapter { + chapter: con, + parent: Some(self), + }) + } else if let Content::Section(_) = con { + con.find_child(&id) + } else { + None + } + }), + } + } } #[derive(Debug, Clone)] @@ -81,6 +123,7 @@ pub struct Section { pub description: Option, pub chapters: Vec, pub url: Option, + pub author: Option, } impl Section { @@ -90,26 +133,6 @@ impl Section { Content::Chapter(_) => acc + 1, }) } - - pub fn find_chapter(&self, id: &String) -> Option { - let mut found = None; - for content in self.chapters.iter() { - if found.is_some() { - break; - } else if content.id() == id.as_str() { - found = Some(FindChapter { - chapter: content, - parent: None, - }); - } else if let Content::Section(s) = content { - found = s.find_chapter(id).map(|f| FindChapter { - chapter: f.chapter, - parent: Some(content), - }); - } - } - found - } } #[derive(Debug, Clone)] @@ -120,6 +143,16 @@ pub struct Chapter { pub text: ChapterText, pub url: String, pub date_posted: DateTime, + pub author: Option, +} + +impl Chapter { + pub fn chapter_id(&self) -> String { + match self.id.rfind(":") { + Some(idx) => self.id[idx + 1..].to_string(), + None => String::new(), + } + } } #[derive(Debug, Clone)] @@ -152,40 +185,74 @@ impl Author { } } +#[derive(Debug, Clone)] +pub struct AuthorList { + authors: Vec, +} + +impl AuthorList { + pub fn new(author: Author) -> AuthorList { + let mut authors = Vec::with_capacity(1); + authors.push(author); + AuthorList { authors } + } + + pub fn from_list>>(authors: T) -> AuthorList { + let authors = authors.into(); + assert!(authors.len() > 0); // TODO: This will panic if 0 authors are passed in + AuthorList { authors } + } + + pub fn authors(&self) -> &Vec { + &self.authors + } + + pub fn authors_mut(&mut self) -> &mut Vec { + &mut self.authors + } + + pub fn len(&self) -> usize { + self.authors.len() + } +} + #[derive(Debug, Clone)] pub enum StorySource { AO3(String), Katalepsis, RoyalRoad(String), + SpaceBattles(String), + SufficientVelocity(String), } -pub static SOURCES_LIST: [&str; 3] = [ +pub static SOURCES_LIST: [&str; 5] = [ "Archive of Our Own: https://archiveofourown.org/works/", "Katalepsis: https://katalepsis.net", "RoyalRoad: https://www.royalroad.com/fiction/", + "SpaceBattles: https://forums.spacebattles.com/threads/thread_name.", + "SufficientVelocity: https://forums.sufficientvelocity.com/threads/thread_name.", + // "XenForo: https:///threads/thread_name.", ]; static REGEXES: OnceCell> = OnceCell::new(); +#[rustfmt::skip] +fn init_regexes() -> Vec<(&'static str, Regex)> { + vec![ + ("ao3", r"^https://archiveofourown.org/works/(?P\d+)/?.*"), + ("ffnet", r"^https?://(?:www)?\.fanfiction\.net/s/(?P\d+)/?.*"), + ("katalepsis", r"^https?://katalepsis\.net/?.*"), + ("rr", r"^https?://(?:www)?\.royalroad\.com/fiction/(?P\d+)/?.*"), + ("sb", r"^https?://forums\.spacebattles\.com/threads/([^.]+\.)?(?P\d+)/?.*"), + ("sv", r"^https?://forums\.sufficientvelocity\.com/threads/([^.]+\.)?(?P\d+)/?.*"), + ] + .into_iter() + .map(|(src, reg_src)| (src, Regex::new(reg_src).unwrap())) + .collect() +} impl StorySource { - pub fn from_url(url: &str) -> Result { - let regex_map = REGEXES.get_or_init(|| { - vec![ - ("ao3", r"^https://archiveofourown.org/works/(?P\d+)/?.*"), - ( - "ffnet", - r"^https?://(?:www)?\.fanfiction\.net/s/(?P\d+)/?.*", - ), - ("katalepsis", r"^https?://katalepsis\.net/?.*"), - ( - "rr", - r"^https?://(?:www)?\.royalroad\.com/fiction/(?P\d+)/?.*", - ), - ] - .into_iter() - .map(|(src, reg_src)| (src, Regex::new(reg_src).unwrap())) - .collect() - }); + pub fn from_url(url: &str) -> Result { + let regex_map = REGEXES.get_or_init(init_regexes); match regex_map.iter().find(|(_, regex)| regex.is_match(url)) { Some((name, regex)) => { let id = regex.captures(url).unwrap().name("id"); @@ -201,6 +268,16 @@ impl StorySource { .as_str() .to_owned(), ), + "sb" => Self::SpaceBattles( + id.ok_or(ArchiveError::NoIdInSource(url.to_owned(), name.to_string()))? + .as_str() + .to_owned(), + ), + "sv" => Self::SufficientVelocity( + id.ok_or(ArchiveError::NoIdInSource(url.to_owned(), name.to_string()))? + .as_str() + .to_owned(), + ), _ => panic!("URL matched source {name}, which has not been fully implemented"), }) } @@ -210,9 +287,22 @@ impl StorySource { pub fn to_id(&self) -> String { match self { - Self::AO3(ref id) => format!("ao3:{}", id), - Self::Katalepsis => "katalepsis".to_owned(), - Self::RoyalRoad(ref id) => format!("rr:{}", id), + Self::AO3(id) => format!("{}:{}", self.prefix(), id), + Self::Katalepsis => self.prefix().to_owned(), + Self::RoyalRoad(id) => format!("{}:{}", self.prefix(), id), + Self::SpaceBattles(id) => format!("{}:{}", self.prefix(), id), + Self::SufficientVelocity(id) => format!("{}:{}", self.prefix(), id), + } + } + + #[inline(always)] + pub fn prefix(&self) -> &str { + match self { + Self::AO3(_) => "ao3", + Self::Katalepsis => "katalepsis", + Self::RoyalRoad(_) => "rr", + Self::SpaceBattles(_) => "sb", + Self::SufficientVelocity(_) => "sv", } } @@ -223,14 +313,27 @@ impl StorySource { } Self::Katalepsis => "https://katalepsis.net".to_owned(), Self::RoyalRoad(id) => format!("https://www.royalroad.com/fiction/{}", id), + Self::SpaceBattles(id) => format!("https://forums.spacebattles.com/threads/{}", id), + Self::SufficientVelocity(id) => { + format!("https://forums.sufficientvelocity.com/threads/{}", id) + } } } + pub fn to_base_url(&self) -> String { + let url = self.to_url(); + let start = url.find("://").map(|pos| pos + 3).unwrap_or(0); + let end = url[start..].find("/").unwrap_or(url[start..].len()); + url[0..end + start].to_owned() + } + pub fn parser(&self) -> Box { match self { Self::AO3(_) => Box::new(AO3Parser {}), Self::Katalepsis => Box::new(KatalepsisParser {}), Self::RoyalRoad(_) => Box::new(RoyalRoadParser {}), + Self::SpaceBattles(_) => Box::new(XenforoParser {}), + Self::SufficientVelocity(_) => Box::new(XenforoParser {}), } } } diff --git a/src/tui/event.rs b/src/tui/event.rs new file mode 100644 index 0000000..2a73c43 --- /dev/null +++ b/src/tui/event.rs @@ -0,0 +1,56 @@ +use crossterm::event::{poll, read, Event as TermEvent, KeyCode, KeyEvent, KeyModifiers}; + +use std::sync::mpsc; +use std::thread; +use std::time::Duration; + +pub enum Event { + Input(I), + Quit, + Tick, +} + +pub struct Events { + recv: mpsc::Receiver>, + _input_handle: thread::JoinHandle<()>, +} + +impl Events { + pub fn new() -> Events { + let (sender, reciever) = mpsc::channel(); + let _input_handle = { + let sender = sender.clone(); + thread::spawn(move || loop { + let event = match poll(Duration::from_millis(3000)) + .expect("Docs say this will never return Err") + { + true => match read() { + Ok(TermEvent::Key(event)) => match (event.code, event.modifiers) { + (KeyCode::Char('q'), KeyModifiers::NONE) + | (KeyCode::Char('c'), KeyModifiers::CONTROL) => Event::Quit, + _ => Event::Input(event), + }, + Ok(_) => Event::Tick, + Err(_) => Event::Tick, + }, + false => Event::Tick, + }; + sender.send(event).unwrap(); + }) + }; + Events { + recv: reciever, + _input_handle, + } + } + + pub fn next(&self) -> Event { + match self.recv.recv() { + Ok(e) => e, + Err(_) => { + println!("Events channel disconnected. Exiting."); + Event::Quit + } + } + } +} diff --git a/src/tui/mod.rs b/src/tui/mod.rs new file mode 100644 index 0000000..bebab58 --- /dev/null +++ b/src/tui/mod.rs @@ -0,0 +1,41 @@ +use crossterm::{ + event::{poll, read, DisableMouseCapture, EnableMouseCapture, Event}, + execute, + terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen}, +}; +use tui::{backend::CrosstermBackend, Terminal}; + +use std::io::stdout; + +mod event; + +use crate::Args; +use crate::Result; + +pub(crate) async fn start_tui(_args: Args) -> Result<()> { + enable_raw_mode()?; + let mut stdout = stdout(); + execute!(stdout, EnterAlternateScreen, EnableMouseCapture)?; + let backend = CrosstermBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + let events = event::Events::new(); + + loop { + match events.next() { + event::Event::Input(input) => match input { + _ => continue, + }, + event::Event::Quit => break, + event::Event::Tick => continue, + }; + } + disable_raw_mode()?; + execute!( + terminal.backend_mut(), + LeaveAlternateScreen, + DisableMouseCapture + )?; + terminal.show_cursor(); + Ok(()) +}