Skip to content

Commit

Permalink
Formatted code.
Browse files Browse the repository at this point in the history
  • Loading branch information
stefancoe committed Aug 30, 2022
1 parent 39c49b7 commit 1a758f2
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 44 deletions.
2 changes: 1 addition & 1 deletion combine_gtfs_feeds/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .cli import CLI
from . import run
from .gtfs_schema import (GTFS_Schema)
from .gtfs_schema import GTFS_Schema
71 changes: 48 additions & 23 deletions combine_gtfs_feeds/cli/gtfs_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


class GTFS_Schema(object):

class Agency(pa.SchemaModel):
agency_id: Series[str] = pa.Field(coerce=True)
agency_name: Series[str] = pa.Field(coerce=True)
Expand All @@ -27,10 +26,14 @@ class Stops(pa.SchemaModel):
stop_lon: Series[float64] = pa.Field(coerce=True, nullable=True)
zone_id: Series[str] = pa.Field(coerce=True, nullable=True)
stop_url: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
location_type: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3, 4])
location_type: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3, 4]
)
parent_station: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
stop_timezone: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
wheelchair_boarding: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2])
wheelchair_boarding: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2]
)
level_id: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
platform_code: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)

Expand All @@ -40,29 +43,37 @@ class Routes(pa.SchemaModel):
route_short_name: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
route_long_name: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
route_desc: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
route_type: Series[int] = pa.Field(
isin=[0, 1, 2, 3, 4, 5, 6, 7, 11, 12])
route_type: Series[int] = pa.Field(isin=[0, 1, 2, 3, 4, 5, 6, 7, 11, 12])
route_url: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
route_color: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
route_text_color: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
route_sort_order: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True)
continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3])
continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3])


route_sort_order: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True
)
continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3]
)
continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3]
)

class Trips(pa.SchemaModel):
route_id: Series[str] = pa.Field(coerce=True)
service_id: Series[str] = pa.Field(coerce=True)
trip_id: Series[str] = pa.Field(coerce=True)
trip_headsign: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
trip_short_name: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
direction_id: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1])
direction_id: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1]
)
block_id: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
shape_id: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
wheelchair_accessible: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2])
bikes_allowed: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2])

wheelchair_accessible: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2]
)
bikes_allowed: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2]
)

class Stop_Times(pa.SchemaModel):
trip_id: Series[str] = pa.Field(coerce=True)
Expand All @@ -71,12 +82,24 @@ class Stop_Times(pa.SchemaModel):
stop_id: Series[str] = pa.Field(coerce=True)
stop_sequence: Series[int] = pa.Field(coerce=True)
stop_headsign: Optional[Series[str]] = pa.Field(coerce=True, nullable=True)
pickup_type: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3])
drop_off_type: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3])
continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3])
continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1, 2, 3])
shape_dist_traveled: Optional[Series[float64]] = pa.Field(coerce=True, nullable=True, ge=0)
timepoint: Optional[Series[pd.Int64Dtype]] = pa.Field(coerce=True, nullable=True, isin=[0, 1])
pickup_type: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3]
)
drop_off_type: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3]
)
continuous_pickup: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3]
)
continuous_drop_off: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1, 2, 3]
)
shape_dist_traveled: Optional[Series[float64]] = pa.Field(
coerce=True, nullable=True, ge=0
)
timepoint: Optional[Series[pd.Int64Dtype]] = pa.Field(
coerce=True, nullable=True, isin=[0, 1]
)

class Calendar(pa.SchemaModel):
service_id: Series[str] = pa.Field(coerce=True)
Expand All @@ -100,13 +123,15 @@ class Shapes(pa.SchemaModel):
shape_pt_lat: Series[float64] = pa.Field(coerce=True)
shape_pt_lon: Series[float64] = pa.Field(coerce=True)
shape_pt_sequence: Series[int] = pa.Field(coerce=True)
shape_dist_traveled: Optional[Series[float64]] = pa.Field(coerce=True, nullable=True)

shape_dist_traveled: Optional[Series[float64]] = pa.Field(
coerce=True, nullable=True
)

agency_columns = list(Agency.__annotations__.keys())
stops_columns = list(Stops.__annotations__.keys())
routes_columns = list(Routes.__annotations__.keys())
trips_columns = list(Trips.__annotations__.keys())
stop_times_columns = list(Stop_Times.__annotations__.keys())
calendar_columns = list(Calendar.__annotations__.keys())
calendar_dates_columns = list(Calendar_Dates.__annotations__.keys())
shapes_columns = list(Shapes.__annotations__.keys())
shapes_columns = list(Shapes.__annotations__.keys())
62 changes: 42 additions & 20 deletions combine_gtfs_feeds/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,34 +15,55 @@ class Combined_GTFS(object):
file_list = ["agency", "trips", "stop_times", "stops", "routes", "shapes"]

def __init__(self, df_dict):
#self.agency_df = df_dict["agency"]
# self.agency_df = df_dict["agency"]
self.agency_df = GTFS_Schema.Agency.validate(df_dict["agency"])
self.agency_df = self.agency_df[[col for col in GTFS_Schema.agency_columns if col in self.agency_df.columns]]
self.agency_df = self.agency_df[
[col for col in GTFS_Schema.agency_columns if col in self.agency_df.columns]
]

#self.routes_df = df_dict["routes"]
# self.routes_df = df_dict["routes"]
self.routes_df = GTFS_Schema.Routes.validate(df_dict["routes"])
self.routes_df = self.routes_df[[col for col in GTFS_Schema.routes_columns if col in self.routes_df.columns]]
self.routes_df = self.routes_df[
[col for col in GTFS_Schema.routes_columns if col in self.routes_df.columns]
]

#self.stops_df = df_dict["stops"]
# self.stops_df = df_dict["stops"]
self.stops_df = GTFS_Schema.Stops.validate(df_dict["stops"])
self.stops_df = self.stops_df[[col for col in GTFS_Schema.stops_columns if col in self.stops_df.columns]]
self.stops_df = self.stops_df[
[col for col in GTFS_Schema.stops_columns if col in self.stops_df.columns]
]

#self.stop_times_df = df_dict["stop_times"]
# self.stop_times_df = df_dict["stop_times"]
self.stop_times_df = GTFS_Schema.Stop_Times.validate(df_dict["stop_times"])
self.stop_times_df = self.stop_times_df[[col for col in GTFS_Schema.stop_times_columns if col in self.stop_times_df.columns]]
self.stop_times_df = self.stop_times_df[
[
col
for col in GTFS_Schema.stop_times_columns
if col in self.stop_times_df.columns
]
]

#self.shapes_df = df_dict["shapes"]
# self.shapes_df = df_dict["shapes"]
self.shapes_df = GTFS_Schema.Shapes.validate(df_dict["shapes"])
self.shapes_df = self.shapes_df[[col for col in GTFS_Schema.shapes_columns if col in self.shapes_df.columns]]
self.shapes_df = self.shapes_df[
[col for col in GTFS_Schema.shapes_columns if col in self.shapes_df.columns]
]

#self.trips_df = df_dict["trips"]
# self.trips_df = df_dict["trips"]
self.trips_df = GTFS_Schema.Trips.validate(df_dict["trips"])
self.trips_df = self.trips_df[[col for col in GTFS_Schema.trips_columns if col in self.trips_df.columns]]
self.trips_df = self.trips_df[
[col for col in GTFS_Schema.trips_columns if col in self.trips_df.columns]
]

#self.calendar_df = df_dict["calendar"]
# self.calendar_df = df_dict["calendar"]
self.calendar_df = GTFS_Schema.Calendar.validate(df_dict["calendar"])
self.calendar_df = self.calendar_df[[col for col in GTFS_Schema.calendar_columns if col in self.calendar_df.columns]]

self.calendar_df = self.calendar_df[
[
col
for col in GTFS_Schema.calendar_columns
if col in self.calendar_df.columns
]
]

def export_feed(self, dir):
dir = Path(dir)
Expand Down Expand Up @@ -116,8 +137,10 @@ def create_id(df, feed, id_column):
Changes id_column by prepending each value with
the feed parameter.
"""
#df[id_column] = feed + "_" + df[id_column].astype(str)
df[id_column] = np.where(~df[id_column].isnull(), feed + "_" + df[id_column].astype(str), '')
# df[id_column] = feed + "_" + df[id_column].astype(str)
df[id_column] = np.where(
~df[id_column].isnull(), feed + "_" + df[id_column].astype(str), ""
)
return df


Expand Down Expand Up @@ -439,9 +462,8 @@ def combine(gtfs_dir, output_dir, service_date, logger):
routes = read_gtfs(full_path, "routes.txt", zipped)
shapes = read_gtfs(full_path, "shapes.txt", zipped)
agency = read_gtfs(full_path, "agency.txt", zipped)
if 'agency_id' not in routes.columns:
routes['agency_id'] = agency['agency_id'][0]

if "agency_id" not in routes.columns:
routes["agency_id"] = agency["agency_id"][0]

# create new IDs
trips = create_id(trips, feed, "trip_id")
Expand Down

0 comments on commit 1a758f2

Please sign in to comment.