From a06868e4cd77ab449fd723db4cbf18a38df5f85b Mon Sep 17 00:00:00 2001 From: Kyler Chin <7539174+kylerchin@users.noreply.github.com> Date: Wed, 3 Apr 2024 04:39:59 -0700 Subject: [PATCH] Check also if trips.txt exists --- src/birch/server.rs | 40 ++++++------ .../gtfs_handlers/shape_colour_calculator.rs | 56 ++++++++--------- .../shapes_into_postgres.rs | 63 +++++++++---------- src/maple/gtfs_process.rs | 27 ++++---- src/maple/main.rs | 47 ++++++++------ 5 files changed, 118 insertions(+), 115 deletions(-) diff --git a/src/birch/server.rs b/src/birch/server.rs index 883591d8..fabb173d 100644 --- a/src/birch/server.rs +++ b/src/birch/server.rs @@ -2,6 +2,7 @@ use actix_web::dev::Service; use actix_web::middleware::DefaultHeaders; use actix_web::{get, middleware, web, App, HttpRequest, HttpResponse, HttpServer, Responder}; use bb8::Pool; +use cached::proc_macro::once; use catenary::postgis_to_diesel::diesel_multi_polygon_to_geo; use catenary::postgres_tools::{make_async_pool, CatenaryPostgresPool}; use diesel::query_dsl::methods::FilterDsl; @@ -19,7 +20,6 @@ use rstar::RTree; use serde::Deserialize; use serde_derive::Serialize; use serde_json::to_string; -use cached::proc_macro::once; use serde_json::{json, to_string_pretty}; use sqlx::postgres::{PgPoolOptions, PgRow}; use sqlx::{FromRow, Row}; @@ -585,28 +585,34 @@ struct ChateauToSend { } #[actix_web::get("/getchateaus")] -async fn chateaus(pool: web::Data>, req: HttpRequest, chateau_cache: web::Data) -> impl Responder { - +async fn chateaus( + pool: web::Data>, + req: HttpRequest, + chateau_cache: web::Data, +) -> impl Responder { let chateau_lock = chateau_cache.read().unwrap(); let chateau_as_ref = chateau_lock.as_ref(); let cloned_chateau_data = match chateau_as_ref { Some(chateau_as_ref) => Some(chateau_as_ref.clone()), - None => None + None => None, }; std::mem::drop(chateau_as_ref); std::mem::drop(chateau_lock); if let Some(cloned_chateau_data) = cloned_chateau_data { - if cloned_chateau_data.last_updated_time_ms > SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_millis() as u64 - 3_600_000 { + if cloned_chateau_data.last_updated_time_ms + > SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis() as u64 + - 3_600_000 + { return HttpResponse::Ok() - .insert_header(("Content-Type", "application/json")) - .insert_header(("Cache-Control", "max-age=86400")) - .body(cloned_chateau_data.chateau_geojson); + .insert_header(("Content-Type", "application/json")) + .insert_header(("Cache-Control", "max-age=86400")) + .body(cloned_chateau_data.chateau_geojson); } } @@ -709,15 +715,13 @@ async fn chateaus(pool: web::Data>, req: HttpRequest, let mut chateau_lock = chateau_cache.write().unwrap(); let mut chateau_mut_ref = chateau_lock.as_mut(); - chateau_mut_ref = Some( - &mut ChateauCache { - chateau_geojson: serialized.clone(), - last_updated_time_ms: SystemTime::now() + chateau_mut_ref = Some(&mut ChateauCache { + chateau_geojson: serialized.clone(), + last_updated_time_ms: SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .unwrap() - .as_millis() as u64 - } - ); + .as_millis() as u64, + }); std::mem::drop(chateau_lock); diff --git a/src/maple/gtfs_handlers/shape_colour_calculator.rs b/src/maple/gtfs_handlers/shape_colour_calculator.rs index 0b2c002e..9230d765 100644 --- a/src/maple/gtfs_handlers/shape_colour_calculator.rs +++ b/src/maple/gtfs_handlers/shape_colour_calculator.rs @@ -1,26 +1,21 @@ use crate::gtfs_handlers::colour_correction; use rgb::RGB; -use std::collections::{HashSet,HashMap}; +use std::collections::{HashMap, HashSet}; pub struct ShapeToColourResponse { pub shape_to_color_lookup: HashMap>, pub shape_to_text_color_lookup: HashMap>, pub shape_id_to_route_ids_lookup: HashMap>, - pub route_ids_to_shape_ids: HashMap> + pub route_ids_to_shape_ids: HashMap>, } -pub fn shape_to_colour( - feed_id: &str, - gtfs: >fs_structures::Gtfs, -) -> ShapeToColourResponse { +pub fn shape_to_colour(feed_id: &str, gtfs: >fs_structures::Gtfs) -> ShapeToColourResponse { let mut shape_to_color_lookup: HashMap> = HashMap::new(); let mut shape_to_text_color_lookup: HashMap> = HashMap::new(); let mut shape_id_to_route_ids_lookup: HashMap> = HashMap::new(); let mut route_ids_to_shape_ids: HashMap> = HashMap::new(); for (trip_id, trip) in >fs.trips { - - if let Some(shape_id) = &trip.shape_id { if let Some(route) = gtfs.routes.get(&trip.route_id) { if !shape_to_color_lookup.contains_key(shape_id) { @@ -33,38 +28,37 @@ pub fn shape_to_colour( ); shape_to_color_lookup.insert(trip.shape_id.as_ref().unwrap().to_owned(), color); - shape_to_text_color_lookup - .insert(shape_id.clone(), route.text_color); + shape_to_text_color_lookup.insert(shape_id.clone(), route.text_color); } //assign route id to this shape id - shape_id_to_route_ids_lookup.entry(shape_id.clone()) - .and_modify(|existing_route_ids| { - //if it does not contain the route id already - if !existing_route_ids.contains(&route.id) { - existing_route_ids.insert(route.id.clone()); - } - }).or_insert(HashSet::from_iter([route.id.clone()])); - - //assign shape to a route id - route_ids_to_shape_ids.entry(route.id.clone()) - .and_modify(|existing_shape_ids| { - if !existing_shape_ids.contains(shape_id) { - existing_shape_ids.insert(shape_id.clone()); - } - }).or_insert( - HashSet::from_iter([shape_id.clone()]) - ); + shape_id_to_route_ids_lookup + .entry(shape_id.clone()) + .and_modify(|existing_route_ids| { + //if it does not contain the route id already + if !existing_route_ids.contains(&route.id) { + existing_route_ids.insert(route.id.clone()); + } + }) + .or_insert(HashSet::from_iter([route.id.clone()])); + //assign shape to a route id + route_ids_to_shape_ids + .entry(route.id.clone()) + .and_modify(|existing_shape_ids| { + if !existing_shape_ids.contains(shape_id) { + existing_shape_ids.insert(shape_id.clone()); + } + }) + .or_insert(HashSet::from_iter([shape_id.clone()])); } - } - + } } ShapeToColourResponse { shape_to_color_lookup, shape_to_text_color_lookup, shape_id_to_route_ids_lookup, - route_ids_to_shape_ids - } + route_ids_to_shape_ids, + } } diff --git a/src/maple/gtfs_ingestion_sequence/shapes_into_postgres.rs b/src/maple/gtfs_ingestion_sequence/shapes_into_postgres.rs index 2cc66165..158fbe09 100644 --- a/src/maple/gtfs_ingestion_sequence/shapes_into_postgres.rs +++ b/src/maple/gtfs_ingestion_sequence/shapes_into_postgres.rs @@ -41,13 +41,11 @@ pub async fn shapes_into_postgres( //backround colour to use let route = match route_ids { - Some(route_ids) => { - match route_ids.iter().nth(0) { - Some(route_id) => gtfs.routes.get(route_id), - None => None, - } + Some(route_ids) => match route_ids.iter().nth(0) { + Some(route_id) => gtfs.routes.get(route_id), + None => None, }, - None => None + None => None, }; let bg_color = match shape_to_color_lookup.get(shape_id) { @@ -104,31 +102,32 @@ pub async fn shapes_into_postgres( //creates a text label for the shape to be displayed with on the map //todo! change this with i18n let route_label: String = match route_ids { - Some(route_ids) => - route_ids - .iter() - .map(|route_id| { - let route = gtfs.routes.get(route_id); - match route { - Some(route) => match route.short_name.is_some() { - true => route.short_name.to_owned(), - false => match route.long_name.is_some() { - true => route.long_name.to_owned(), - false => None, + Some(route_ids) => route_ids + .iter() + .map(|route_id| { + let route = gtfs.routes.get(route_id); + match route { + Some(route) => match route.short_name.is_some() { + true => route.short_name.to_owned(), + false => match route.long_name.is_some() { + true => route.long_name.to_owned(), + false => None, + }, }, - }, - _ => None, - } - }) - .filter(|route_label| route_label.is_some()) - .map(|route_label| rename_route_string(route_label.as_ref().unwrap().to_owned())) - .collect::>() - .join(",") - .as_str() - .replace("Orange County", "OC") - .replace("Inland Empire", "IE") - .to_string(), - None => String::from("") + _ => None, + } + }) + .filter(|route_label| route_label.is_some()) + .map(|route_label| { + rename_route_string(route_label.as_ref().unwrap().to_owned()) + }) + .collect::>() + .join(",") + .as_str() + .replace("Orange County", "OC") + .replace("Inland Empire", "IE") + .to_string(), + None => String::from(""), }; //run insertion @@ -151,7 +150,7 @@ pub async fn shapes_into_postgres( .map(|route_id| Some(route_id.to_string())) .collect(), ), - None => None + None => None, }, route_type: route_type_number, route_label: Some(route_label.clone()), @@ -181,7 +180,7 @@ pub async fn shapes_into_postgres( .map(|route_id| Some(route_id.to_string())) .collect(), ), - None => None + None => None, }, route_type: route_type_number, route_label: Some(route_label), diff --git a/src/maple/gtfs_process.rs b/src/maple/gtfs_process.rs index 428eb4cb..986d10c9 100644 --- a/src/maple/gtfs_process.rs +++ b/src/maple/gtfs_process.rs @@ -5,6 +5,7 @@ use crate::gtfs_handlers::colour_correction::fix_foreground_colour_rgb_feed; // Removal of the attribution is not allowed, as covered under the AGPL license use crate::gtfs_handlers::gtfs_to_int::availability_to_int; use crate::gtfs_handlers::shape_colour_calculator::shape_to_colour; +use crate::gtfs_handlers::shape_colour_calculator::ShapeToColourResponse; use crate::gtfs_handlers::stops_associated_items::*; use crate::gtfs_ingestion_sequence::shapes_into_postgres::shapes_into_postgres; use crate::gtfs_ingestion_sequence::stops_into_postgres::stops_into_postgres; @@ -17,7 +18,6 @@ use catenary::schema::gtfs::stoptimes::continuous_drop_off; use chrono::NaiveDate; use diesel::ExpressionMethods; use diesel_async::RunQueryDsl; -use crate::gtfs_handlers::shape_colour_calculator::ShapeToColourResponse; use geo::polygon; use gtfs_structures::ContinuousPickupDropOff; use gtfs_structures::FeedInfo; @@ -119,11 +119,11 @@ pub async fn gtfs_process_feed( shape_to_color_lookup, shape_to_text_color_lookup, shape_id_to_route_ids_lookup, - route_ids_to_shape_ids + route_ids_to_shape_ids, } = shape_to_colour(&feed_id, >fs); //insert agencies - let mut agency_id_already_done: HashSet> = HashSet::new(); + let mut agency_id_already_done: HashSet> = HashSet::new(); for agency in >fs.agencies { use catenary::schema::gtfs::agencies::dsl::agencies; @@ -144,7 +144,7 @@ pub async fn gtfs_process_feed( agency_phone: agency.phone.clone(), agency_timezone: agency.timezone.clone(), }; - + diesel::insert_into(agencies) .values(agency_row) .execute(conn) @@ -167,7 +167,7 @@ pub async fn gtfs_process_feed( Arc::clone(&arc_conn_pool), &chateau_id, &attempt_id, - &shape_id_to_route_ids_lookup + &shape_id_to_route_ids_lookup, ) .await?; @@ -324,11 +324,10 @@ pub async fn gtfs_process_feed( for stop_chunk in stop_times_pg.chunks(80) { diesel::insert_into(stoptimes) - .values(stop_chunk) - .execute(conn) - .await?; + .values(stop_chunk) + .execute(conn) + .await?; } - } //insert stops @@ -398,12 +397,12 @@ pub async fn gtfs_process_feed( }) .collect(); - for routes_chunk in routes_pg.chunks(100) { + for routes_chunk in routes_pg.chunks(100) { diesel::insert_into(catenary::schema::gtfs::routes::dsl::routes) - .values(routes_chunk) - .execute(conn) - .await?; - } + .values(routes_chunk) + .execute(conn) + .await?; + } //calculate concave hull let hull = crate::gtfs_handlers::hull_from_gtfs::hull_from_gtfs(>fs); diff --git a/src/maple/main.rs b/src/maple/main.rs index 978d0fbb..7cc7a8f7 100644 --- a/src/maple/main.rs +++ b/src/maple/main.rs @@ -298,33 +298,40 @@ async fn run_ingest() -> Result<(), Box> { unzip_feeds.len() ); - let check_for_stops_ids = unzip_feeds.into_iter().filter(|x| x.1 == true) - .map(|(feed_id, _)| feed_id) - .map(|feed_id| { - - let path_str = format!("gtfs_uncompressed/{}/stops.txt", &feed_id); + let check_for_stops_ids = unzip_feeds + .into_iter() + .filter(|x| x.1 == true) + .map(|(feed_id, _)| feed_id) + .map(|feed_id| { + let stop_path_str = format!("gtfs_uncompressed/{}/stops.txt", &feed_id); - let has_stop_table = - std::path::Path::new(&path_str).exists(); + let has_stop_table = std::path::Path::new(&stop_path_str).exists(); - (feed_id, has_stop_table) - } - ).collect::>(); + let trip_path_str = format!("gtfs_uncompressed/{}/trips.txt", &feed_id); + + let has_trip_table = std::path::Path::new(&trip_path_str).exists(); + + (feed_id, has_stop_table && has_trip_table) + }) + .collect::>(); let feeds_with_stop_table_len = check_for_stops_ids - .iter() - .map(|x| x.1 == true) - .collect::>() - .len(); + .iter() + .map(|x| x.1 == true) + .collect::>() + .len(); let feeds_without_stop_table_len = check_for_stops_ids - .iter() - .map(|x| x.1 == false) - .collect::>() - .len(); + .iter() + .map(|x| x.1 == false) + .collect::>() + .len(); + + println!( + "{} deleted because does not contain stops.txt {} remaining", + feeds_without_stop_table_len, feeds_with_stop_table_len + ); - println!("{} deleted because does not contain stops.txt {} remaining",feeds_without_stop_table_len, feeds_with_stop_table_len); - // todo! perform additional checks to ensure feed is not a zip bomb let attempt_ids: HashMap = {