diff --git a/combine_gtfs_feeds/cli/__init__.py b/combine_gtfs_feeds/cli/__init__.py index 0152999..6089ed3 100644 --- a/combine_gtfs_feeds/cli/__init__.py +++ b/combine_gtfs_feeds/cli/__init__.py @@ -1,3 +1,3 @@ from .cli import CLI from . import run -from .gtfs_schema import (GTFS_Schema) +from .gtfs_schema import GTFS_Schema diff --git a/combine_gtfs_feeds/cli/gtfs_schema.py b/combine_gtfs_feeds/cli/gtfs_schema.py index ac2fbd9..9e83464 100644 --- a/combine_gtfs_feeds/cli/gtfs_schema.py +++ b/combine_gtfs_feeds/cli/gtfs_schema.py @@ -7,7 +7,6 @@ class GTFS_Schema(object): - class Agency(pa.SchemaModel): agency_id: Series[str] = pa.Field(coerce=True) agency_name: Series[str] = pa.Field(coerce=True) @@ -27,10 +26,14 @@ class Stops(pa.SchemaModel): stop_lon: Series[float64] = pa.Field(coerce=True, nullable=True) zone_id: Series[str] = pa.Field(coerce=True, nullable=True) stop_url: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - location_type: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3, 4]) + location_type: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3, 4] + ) parent_station: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) stop_timezone: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - wheelchair_boarding: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2]) + wheelchair_boarding: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2] + ) level_id: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) platform_code: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) @@ -40,16 +43,19 @@ class Routes(pa.SchemaModel): route_short_name: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) route_long_name: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) route_desc: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - route_type: Series[int] = pa.Field( - isin=[0, 1, 2, 3, 4, 5, 6, 7, 11, 12]) + route_type: Series[int] = pa.Field(isin=[0, 1, 2, 3, 4, 5, 6, 7, 11, 12]) route_url: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) route_color: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) route_text_color: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - route_sort_order: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True) - continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3]) - continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3]) - - + route_sort_order: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True + ) + continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3] + ) + continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3] + ) class Trips(pa.SchemaModel): route_id: Series[str] = pa.Field(coerce=True) @@ -57,12 +63,17 @@ class Trips(pa.SchemaModel): trip_id: Series[str] = pa.Field(coerce=True) trip_headsign: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) trip_short_name: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - direction_id: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1]) + direction_id: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1] + ) block_id: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) shape_id: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - wheelchair_accessible: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2]) - bikes_allowed: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2]) - + wheelchair_accessible: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2] + ) + bikes_allowed: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2] + ) class Stop_Times(pa.SchemaModel): trip_id: Series[str] = pa.Field(coerce=True) @@ -71,12 +82,24 @@ class Stop_Times(pa.SchemaModel): stop_id: Series[str] = pa.Field(coerce=True) stop_sequence: Series[int] = pa.Field(coerce=True) stop_headsign: Optional[Series[str]] = pa.Field(coerce=True, nullable=True) - pickup_type: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3]) - drop_off_type: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3]) - continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3]) - continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3]) - shape_dist_traveled: Optional[Series[float64]] = pa.Field(coerce=True, nullable=True, ge=0) - timepoint: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1]) + pickup_type: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3] + ) + drop_off_type: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3] + ) + continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3] + ) + continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1, 2, 3] + ) + shape_dist_traveled: Optional[Series[float64]] = pa.Field( + coerce=True, nullable=True, ge=0 + ) + timepoint: Optional[Series[pd.Int64Dtype]] = pa.Field( + coerce=True, nullable=True, isin=[0, 1] + ) class Calendar(pa.SchemaModel): service_id: Series[str] = pa.Field(coerce=True) @@ -100,8 +123,10 @@ class Shapes(pa.SchemaModel): shape_pt_lat: Series[float64] = pa.Field(coerce=True) shape_pt_lon: Series[float64] = pa.Field(coerce=True) shape_pt_sequence: Series[int] = pa.Field(coerce=True) - shape_dist_traveled: Optional[Series[float64]] = pa.Field(coerce=True, nullable=True) - + shape_dist_traveled: Optional[Series[float64]] = pa.Field( + coerce=True, nullable=True + ) + agency_columns = list(Agency.__annotations__.keys()) stops_columns = list(Stops.__annotations__.keys()) routes_columns = list(Routes.__annotations__.keys()) @@ -109,4 +134,4 @@ class Shapes(pa.SchemaModel): stop_times_columns = list(Stop_Times.__annotations__.keys()) calendar_columns = list(Calendar.__annotations__.keys()) calendar_dates_columns = list(Calendar_Dates.__annotations__.keys()) - shapes_columns = list(Shapes.__annotations__.keys()) \ No newline at end of file + shapes_columns = list(Shapes.__annotations__.keys()) diff --git a/combine_gtfs_feeds/cli/run.py b/combine_gtfs_feeds/cli/run.py index ec241fe..801d7fa 100644 --- a/combine_gtfs_feeds/cli/run.py +++ b/combine_gtfs_feeds/cli/run.py @@ -15,34 +15,55 @@ class Combined_GTFS(object): file_list = ["agency", "trips", "stop_times", "stops", "routes", "shapes"] def __init__(self, df_dict): - #self.agency_df = df_dict["agency"] + # self.agency_df = df_dict["agency"] self.agency_df = GTFS_Schema.Agency.validate(df_dict["agency"]) - self.agency_df = self.agency_df[[col for col in GTFS_Schema.agency_columns if col in self.agency_df.columns]] + self.agency_df = self.agency_df[ + [col for col in GTFS_Schema.agency_columns if col in self.agency_df.columns] + ] - #self.routes_df = df_dict["routes"] + # self.routes_df = df_dict["routes"] self.routes_df = GTFS_Schema.Routes.validate(df_dict["routes"]) - self.routes_df = self.routes_df[[col for col in GTFS_Schema.routes_columns if col in self.routes_df.columns]] + self.routes_df = self.routes_df[ + [col for col in GTFS_Schema.routes_columns if col in self.routes_df.columns] + ] - #self.stops_df = df_dict["stops"] + # self.stops_df = df_dict["stops"] self.stops_df = GTFS_Schema.Stops.validate(df_dict["stops"]) - self.stops_df = self.stops_df[[col for col in GTFS_Schema.stops_columns if col in self.stops_df.columns]] + self.stops_df = self.stops_df[ + [col for col in GTFS_Schema.stops_columns if col in self.stops_df.columns] + ] - #self.stop_times_df = df_dict["stop_times"] + # self.stop_times_df = df_dict["stop_times"] self.stop_times_df = GTFS_Schema.Stop_Times.validate(df_dict["stop_times"]) - self.stop_times_df = self.stop_times_df[[col for col in GTFS_Schema.stop_times_columns if col in self.stop_times_df.columns]] + self.stop_times_df = self.stop_times_df[ + [ + col + for col in GTFS_Schema.stop_times_columns + if col in self.stop_times_df.columns + ] + ] - #self.shapes_df = df_dict["shapes"] + # self.shapes_df = df_dict["shapes"] self.shapes_df = GTFS_Schema.Shapes.validate(df_dict["shapes"]) - self.shapes_df = self.shapes_df[[col for col in GTFS_Schema.shapes_columns if col in self.shapes_df.columns]] + self.shapes_df = self.shapes_df[ + [col for col in GTFS_Schema.shapes_columns if col in self.shapes_df.columns] + ] - #self.trips_df = df_dict["trips"] + # self.trips_df = df_dict["trips"] self.trips_df = GTFS_Schema.Trips.validate(df_dict["trips"]) - self.trips_df = self.trips_df[[col for col in GTFS_Schema.trips_columns if col in self.trips_df.columns]] + self.trips_df = self.trips_df[ + [col for col in GTFS_Schema.trips_columns if col in self.trips_df.columns] + ] - #self.calendar_df = df_dict["calendar"] + # self.calendar_df = df_dict["calendar"] self.calendar_df = GTFS_Schema.Calendar.validate(df_dict["calendar"]) - self.calendar_df = self.calendar_df[[col for col in GTFS_Schema.calendar_columns if col in self.calendar_df.columns]] - + self.calendar_df = self.calendar_df[ + [ + col + for col in GTFS_Schema.calendar_columns + if col in self.calendar_df.columns + ] + ] def export_feed(self, dir): dir = Path(dir) @@ -116,8 +137,10 @@ def create_id(df, feed, id_column): Changes id_column by prepending each value with the feed parameter. """ - #df[id_column] = feed + "_" + df[id_column].astype(str) - df[id_column] = np.where(~df[id_column].isnull(), feed + "_" + df[id_column].astype(str), '') + # df[id_column] = feed + "_" + df[id_column].astype(str) + df[id_column] = np.where( + ~df[id_column].isnull(), feed + "_" + df[id_column].astype(str), "" + ) return df @@ -439,9 +462,8 @@ def combine(gtfs_dir, output_dir, service_date, logger): routes = read_gtfs(full_path, "routes.txt", zipped) shapes = read_gtfs(full_path, "shapes.txt", zipped) agency = read_gtfs(full_path, "agency.txt", zipped) - if 'agency_id' not in routes.columns: - routes['agency_id'] = agency['agency_id'][0] - + if "agency_id" not in routes.columns: + routes["agency_id"] = agency["agency_id"][0] # create new IDs trips = create_id(trips, feed, "trip_id")