-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbasics.py
34 lines (24 loc) · 1.28 KB
/
basics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def read(location):
return pd.read_csv(location)
# read data, drop unnecessary cols
agency = read('./data/google_feed/agency.txt')
agency.drop(['agency_url', 'agency_timezone', 'agency_phone', 'agency_lang'],axis=1,inplace=True)
routes = read('./data/google_feed/routes.txt')
routes.drop(['route_short_name', 'route_desc', 'route_url', 'route_color', 'route_text_color'],axis=1,inplace=True)
trips = read('./data/google_feed/trips.txt')
trips.drop(['trip_headsign', 'block_id'],axis=1,inplace=True)
stopTimes = read('./data/google_feed/stop_times.txt')
stopTimes.drop(['stop_headsign'],axis=1,inplace=True)
stops = read('./data/google_feed/stops.txt')
stops.drop(['stop_desc', 'zone_id', 'stop_url'],axis=1,inplace=True)
# merge data into one df
df = pd.merge(agency, routes, on='agency_id')
df = pd.merge(df, trips, on='route_id')
df = pd.merge(df, stopTimes, on='trip_id')
df = pd.merge(df, stops, on='stop_id')
#print(df.groupby("agency_name").count().loc[:,['route_id']].sort_values(by='route_id'))
#print(df.groupby("stop_name").count().loc[:,['route_id']].sort_values(by='route_id'))
df = df.sort_values(by=['route_id','service_id', 'trip_id', 'direction_id', 'stop_sequence'], ascending=[True, True, True, True, True])