Skip to content

Commit

Permalink
Compression algorithm v1 for stop times
Browse files Browse the repository at this point in the history
  • Loading branch information
kylerchin committed Apr 6, 2024
1 parent d391ccd commit 7194d85
Show file tree
Hide file tree
Showing 18 changed files with 325 additions and 90 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,5 @@ Session.vim
rustc-ice*.txt
rustc-ice-*

.env
.env
testing-gtfs
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,7 @@ path = "src/spruce/main.rs"
[[bin]]
name = "pg_tests"
path = "src/pg_tests/main.rs"

[[bin]]
name = "test_maple_syrup"
path = "src/maple_syrup/test.rs"
25 changes: 25 additions & 0 deletions migrations/2024-04-06-053500_timetable-compression-v1/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- This file should undo anything in `up.sql`
CREATE TABLE gtfs.stoptimes (
onestop_feed_id text NOT NULL,
attempt_id text NOT NULL,
trip_id text NOT NULL,
stop_sequence int NOT NULL,
arrival_time OID,
departure_time OID,
stop_id text NOT NULL,
stop_headsign text,
stop_headsign_translations jsonb,
pickup_type smallint NOT NULL,
drop_off_type smallint NOT NULL,
shape_dist_traveled float4,
-- true is 1, false is 0
timepoint bool NOT NULL,
continuous_pickup smallint NOT NULL,
continuous_drop_off smallint NOT NULL,
-- point GEOMETRY(POINT, 4326),
route_id text NOT NULL,
chateau text NOT NULL,
PRIMARY KEY (onestop_feed_id, attempt_id, trip_id, stop_sequence)
);

CREATE INDEX stoptimes_chateau_idx ON gtfs.stoptimes (chateau);
3 changes: 3 additions & 0 deletions migrations/2024-04-06-053500_timetable-compression-v1/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Your SQL goes here
DROP TABLE IF EXISTS gtfs.stoptimes CASCADE;

2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly"
channel = "stable"
78 changes: 78 additions & 0 deletions src/enum_to_int.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
use gtfs_structures::BikesAllowedType;
use gtfs_structures::ContinuousPickupDropOff;
use gtfs_structures::LocationType;
use gtfs_structures::RouteType;
use gtfs_structures::TimepointType;

pub fn location_type_conversion(input: &LocationType) -> i16 {
match input {
LocationType::StopPoint => 0,
LocationType::StopArea => 1,
LocationType::StationEntrance => 2,
LocationType::GenericNode => 3,
LocationType::BoardingArea => 4,
LocationType::Unknown(i) => *i,
}
}

pub fn route_type_to_int(input: &RouteType) -> i16 {
match input {
RouteType::Tramway => 0,
RouteType::Subway => 1,
RouteType::Rail => 2,
RouteType::Bus => 3,
RouteType::Ferry => 4,
RouteType::CableCar => 5,
RouteType::Gondola => 6,
RouteType::Funicular => 7,
RouteType::Coach => 200,
RouteType::Air => 1100,
RouteType::Taxi => 1500,
RouteType::Other(i) => *i,
}
}

pub fn availability_to_int(input: &gtfs_structures::Availability) -> i16 {
match input {
gtfs_structures::Availability::Available => 1,
gtfs_structures::Availability::NotAvailable => 2,
gtfs_structures::Availability::Unknown(unknown) => *unknown,
gtfs_structures::Availability::InformationNotAvailable => 0,
}
}

pub fn timepoint_to_bool(timepoint: &TimepointType) -> bool {
match timepoint {
TimepointType::Exact => true,
TimepointType::Approximate => false,
}
}

pub fn pickup_dropoff_to_i16(x: &gtfs_structures::PickupDropOffType) -> i16 {
match x {
gtfs_structures::PickupDropOffType::Regular => 0,
gtfs_structures::PickupDropOffType::NotAvailable => 1,
gtfs_structures::PickupDropOffType::ArrangeByPhone => 2,
gtfs_structures::PickupDropOffType::CoordinateWithDriver => 3,
gtfs_structures::PickupDropOffType::Unknown(x) => *x,
}
}

pub fn continuous_pickup_drop_off_to_i16(x: &ContinuousPickupDropOff) -> i16 {
match x {
ContinuousPickupDropOff::Continuous => 0,
ContinuousPickupDropOff::NotAvailable => 1,
ContinuousPickupDropOff::ArrangeByPhone => 2,
ContinuousPickupDropOff::CoordinateWithDriver => 3,
ContinuousPickupDropOff::Unknown(x) => *x,
}
}

pub fn bikes_allowed_to_int(bikes_allowed: &BikesAllowedType) -> i16 {
match bikes_allowed {
BikesAllowedType::NoBikeInfo => 0,
BikesAllowedType::AtLeastOneBike => 1,
BikesAllowedType::NoBikesAllowed => 2,
BikesAllowedType::Unknown(unknown) => *unknown,
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ extern crate diesel_derive_newtype;
pub mod agency_secret;
pub mod aspen;
pub mod custom_pg_types;
pub mod enum_to_int;
pub mod gtfs_rt_handlers;
pub mod maple_syrup;
pub mod models;
pub mod postgis_to_diesel;
pub mod postgres_tools;
Expand Down
30 changes: 0 additions & 30 deletions src/maple/gtfs_handlers/enum_to_int.rs

This file was deleted.

39 changes: 0 additions & 39 deletions src/maple/gtfs_handlers/gtfs_to_int.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/maple/gtfs_handlers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
pub mod colour_correction;
pub mod convex_hull;
pub mod enum_to_int;
pub mod flatten;
pub mod gtfs_to_int;
pub mod hull_from_gtfs;
pub mod rename_route_labels;
pub mod shape_colour_calculator;
Expand Down
17 changes: 12 additions & 5 deletions src/maple/gtfs_handlers/shape_colour_calculator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub fn shape_to_colour(feed_id: &str, gtfs: &gtfs_structures::Gtfs) -> ShapeToCo
//metrolink colours are all bonked because trips don't have shape ids in them
if (feed_id == "f-9qh-metrolinktrains") {
for (shape_id, shape) in &gtfs.shapes {
let cleanedline = shape_id.to_owned().replace("in","").replace("out","");
let cleanedline = shape_id.to_owned().replace("in", "").replace("out", "");

let value = match cleanedline.as_str() {
"91" => "91 Line",
Expand All @@ -39,10 +39,14 @@ pub fn shape_to_colour(feed_id: &str, gtfs: &gtfs_structures::Gtfs) -> ShapeToCo
})
.or_insert(HashSet::from_iter([shape_id.clone()]));

shape_id_to_route_ids_lookup.insert(shape_id.clone(), HashSet::from_iter([value.to_string()]));
shape_id_to_route_ids_lookup
.insert(shape_id.clone(), HashSet::from_iter([value.to_string()]));

if let Some(route) = gtfs.routes.get(&value.to_string()) {
println!("Route data found for shape {} and route id {}", shape_id, value);
println!(
"Route data found for shape {} and route id {}",
shape_id, value
);
let color = colour_correction::fix_background_colour_rgb_feed_route(
feed_id,
route.color,
Expand All @@ -52,9 +56,12 @@ pub fn shape_to_colour(feed_id: &str, gtfs: &gtfs_structures::Gtfs) -> ShapeToCo
shape_to_color_lookup.insert(shape_id.clone(), color);
shape_to_text_color_lookup.insert(shape_id.clone(), route.text_color);
} else {
eprintln!("Could not find the route data for shape {} and route id {}", shape_id, value);
eprintln!(
"Could not find the route data for shape {} and route id {}",
shape_id, value
);
}
}
}
}

for (trip_id, trip) in &gtfs.trips {
Expand Down
2 changes: 1 addition & 1 deletion src/maple/gtfs_handlers/stops_associated_items.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::gtfs_handlers::enum_to_int::route_type_to_int;
use catenary::enum_to_int::route_type_to_int;
use std::collections::{HashMap, HashSet};

pub fn make_hashmap_stops_to_route_types_and_ids(
Expand Down
2 changes: 1 addition & 1 deletion src/maple/gtfs_ingestion_sequence/shapes_into_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::error::Error;
use std::sync::Arc;

use crate::gtfs_handlers::colour_correction;
use crate::gtfs_handlers::enum_to_int::route_type_to_int;
use catenary::enum_to_int::route_type_to_int;
use crate::gtfs_handlers::rename_route_labels::*;
use catenary::postgres_tools::CatenaryConn;
use catenary::postgres_tools::CatenaryPostgresPool;
Expand Down
5 changes: 3 additions & 2 deletions src/maple/gtfs_ingestion_sequence/stops_into_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use diesel_async::AsyncConnection;
use diesel_async::RunQueryDsl;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use catenary::enum_to_int::*;
use titlecase::titlecase;

pub async fn stops_into_postgres(
Expand Down Expand Up @@ -42,7 +43,7 @@ pub async fn stops_into_postgres(
code: stop.code.clone(),
gtfs_desc: stop.description.clone(),
gtfs_desc_translations: None,
location_type: crate::gtfs_handlers::gtfs_to_int::location_type_conversion(
location_type: location_type_conversion(
&stop.location_type,
),
children_ids: match stop_id_to_children_ids.get(&stop.id) {
Expand All @@ -68,7 +69,7 @@ pub async fn stops_into_postgres(
timezone: stop.timezone.clone(),
level_id: stop.level_id.clone(),
station_feature: false,
wheelchair_boarding: crate::gtfs_handlers::gtfs_to_int::availability_to_int(
wheelchair_boarding: availability_to_int(
&stop.wheelchair_boarding,
),
primary_route_type: match stop_ids_to_route_types.get(&stop.id) {
Expand Down
11 changes: 3 additions & 8 deletions src/maple/gtfs_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ use crate::gtfs_handlers::colour_correction::fix_foreground_colour_rgb;
use crate::gtfs_handlers::colour_correction::fix_foreground_colour_rgb_feed;
// Initial version 3 of ingest written by Kyler Chin
// Removal of the attribution is not allowed, as covered under the AGPL license
use crate::gtfs_handlers::gtfs_to_int::availability_to_int;
use crate::gtfs_handlers::shape_colour_calculator::shape_to_colour;
use crate::gtfs_handlers::shape_colour_calculator::ShapeToColourResponse;
use crate::gtfs_handlers::stops_associated_items::*;
use crate::gtfs_ingestion_sequence::shapes_into_postgres::shapes_into_postgres;
use crate::gtfs_ingestion_sequence::stops_into_postgres::stops_into_postgres;
use crate::DownloadedFeedsInformation;
use catenary::enum_to_int::*;
use catenary::models::Route as RoutePgModel;
use catenary::postgres_tools::CatenaryConn;
use catenary::postgres_tools::CatenaryPostgresPool;
Expand Down Expand Up @@ -235,12 +235,7 @@ pub async fn gtfs_process_feed(
}),
None => None,
},

Check warning

Code scanning / clippy

manual implementation of Option::map Warning

manual implementation of Option::map

Check warning

Code scanning / clippy

manual implementation of Option::map Warning

manual implementation of Option::map
bikes_allowed: match trip.bikes_allowed {
BikesAllowedType::NoBikeInfo => 0,
BikesAllowedType::AtLeastOneBike => 1,
BikesAllowedType::NoBikesAllowed => 2,
BikesAllowedType::Unknown(unknown) => unknown,
},
bikes_allowed: bikes_allowed_to_int(&trip.bikes_allowed),
block_id: trip.block_id.clone(),
shape_id: trip.shape_id.clone(),
wheelchair_accessible: availability_to_int(&trip.wheelchair_accessible),
Expand Down Expand Up @@ -374,7 +369,7 @@ pub async fn gtfs_process_feed(
long_name_translations: None,
gtfs_desc: route.desc.clone(),
gtfs_desc_translations: None,
route_type: crate::gtfs_handlers::gtfs_to_int::route_type_to_int(&route.route_type),
route_type: route_type_to_int(&route.route_type),
url: route.url.clone(),
url_translations: None,
shapes_list: match route_ids_to_shape_ids.get(&route_id.clone()) {
Expand Down
3 changes: 3 additions & 0 deletions src/maple_syrup/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# GTFS compression algorithm

The goal is to reduce the current 197 GB of Stop times into a series of transfer patterns and trip patterns.
Loading

0 comments on commit 7194d85

Please sign in to comment.