diff --git a/Project-Documentation/BusAID-Hotspot-Analysis/build_busaid_datasets.ipynb b/Project-Documentation/BusAID-Hotspot-Analysis/build_busaid_datasets.ipynb index c0bf045..02d8ee3 100644 --- a/Project-Documentation/BusAID-Hotspot-Analysis/build_busaid_datasets.ipynb +++ b/Project-Documentation/BusAID-Hotspot-Analysis/build_busaid_datasets.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 132, + "execution_count": 100, "metadata": {}, "outputs": [], "source": [ @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 102, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 103, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 104, "metadata": {}, "outputs": [], "source": [ @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 105, "metadata": {}, "outputs": [], "source": [ @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 106, "metadata": {}, "outputs": [], "source": [ @@ -215,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ @@ -232,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ @@ -253,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ @@ -265,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": 110, "metadata": {}, "outputs": [ { @@ -284,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 111, "metadata": {}, "outputs": [ { @@ -303,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -315,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -332,18 +332,18 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "# create point and line gdfs\n", - "point_gdf = read_kml_by_geom_type(os.path.join(dir, \"Spatial Data\", \"January2024\"), \"Point\")\n", - "line_gdf = read_kml_by_geom_type(os.path.join(dir, \"Spatial Data\", \"January2024\"), \"LineString\")" + "point_gdf = read_kml_by_geom_type(os.path.join(dir, \"Spatial Data\", \"January_25_2024\"), \"Point\")\n", + "line_gdf = read_kml_by_geom_type(os.path.join(dir, \"Spatial Data\", \"January_25_2024\"), \"LineString\")" ] }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -362,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -372,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -383,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -393,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -404,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ @@ -456,7 +456,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ @@ -512,7 +512,7 @@ " \"UC:56\": \"AC:56\",\n", " \"UC:97\": \"AC:97\",\n", " \"UC:210\": \"AC:210\",\n", - " # \"SC:500\": None,\n", + " \"SC:500\": \"SC:Rapid 500\",\n", "}\n", "hs_route_expode_df[\"route_id\"] = hs_route_expode_df[\"route_id\"].replace(route_dict)" ] @@ -526,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -546,7 +546,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -577,7 +577,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ @@ -618,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 126, "metadata": {}, "outputs": [ { @@ -627,7 +627,7 @@ "[]" ] }, - "execution_count": 158, + "execution_count": 126, "metadata": {}, "output_type": "execute_result" } @@ -664,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -687,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -696,6 +696,7 @@ " \"route_id\",\n", " \"agency_id\",\n", " \"direction_id\",\n", + " \"trip_id\",\n", " \"route_type\",\n", " \"agency_name\",\n", " \"route_name\",\n", @@ -713,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -739,7 +740,7 @@ }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -756,20 +757,20 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "_merge\n", - "both 400348\n", - "right_only 8\n", + "both 402137\n", + "right_only 6\n", "left_only 0\n", "Name: count, dtype: int64" ] }, - "execution_count": 163, + "execution_count": 131, "metadata": {}, "output_type": "execute_result" } @@ -780,17 +781,17 @@ }, { "cell_type": "code", - "execution_count": 164, + "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['SO:46', 'GG:72', 'SR:95', nan, 'FS:microtransit', 'SO:95',\n", - " 'SC:500', 'ST:NVTA29'], dtype=object)" + "array(['SO:46', 'GG:72', 'SR:95', nan, 'FS:microtransit', 'SO:95'],\n", + " dtype=object)" ] }, - "execution_count": 164, + "execution_count": 132, "metadata": {}, "output_type": "execute_result" } @@ -801,7 +802,7 @@ }, { "cell_type": "code", - "execution_count": 165, + "execution_count": 133, "metadata": {}, "outputs": [], "source": [ @@ -809,6 +810,7 @@ "sub_cols = [\n", " \"hotspot_id\",\n", " \"route_id\",\n", + " \"trip_id\",\n", " \"direction_id\",\n", " \"agency_id_gtfs\",\n", " \"route_type\",\n", @@ -819,7 +821,8 @@ " \"agency_url\",\n", " \"route\",\n", "]\n", - "hs_route_dedup = hs_route_gdf.query(\"_merge == 'both'\").drop_duplicates(subset=sub_cols)" + "hs_route_dedup = hs_route_gdf.query(\"_merge == 'both'\").drop_duplicates(subset=sub_cols)\n", + "# hs_route_gdf.duplicated(subset=sub_cols).value_counts()" ] }, { @@ -831,7 +834,7 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 134, "metadata": {}, "outputs": [], "source": [ @@ -847,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": 167, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ @@ -861,7 +864,7 @@ }, { "cell_type": "code", - "execution_count": 168, + "execution_count": 136, "metadata": {}, "outputs": [ { @@ -888,7 +891,7 @@ }, { "cell_type": "code", - "execution_count": 169, + "execution_count": 137, "metadata": {}, "outputs": [ { @@ -915,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": 170, + "execution_count": 138, "metadata": {}, "outputs": [], "source": [ @@ -929,7 +932,7 @@ }, { "cell_type": "code", - "execution_count": 171, + "execution_count": 139, "metadata": {}, "outputs": [ { @@ -963,7 +966,7 @@ }, { "cell_type": "code", - "execution_count": 172, + "execution_count": 140, "metadata": {}, "outputs": [], "source": [ @@ -998,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": 173, + "execution_count": 141, "metadata": {}, "outputs": [], "source": [ @@ -1015,7 +1018,7 @@ }, { "cell_type": "code", - "execution_count": 174, + "execution_count": 142, "metadata": {}, "outputs": [], "source": [ @@ -1027,7 +1030,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ @@ -1037,7 +1040,7 @@ }, { "cell_type": "code", - "execution_count": 176, + "execution_count": 144, "metadata": {}, "outputs": [], "source": [ @@ -1052,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 177, + "execution_count": 145, "metadata": {}, "outputs": [], "source": [ @@ -1062,7 +1065,7 @@ }, { "cell_type": "code", - "execution_count": 178, + "execution_count": 146, "metadata": {}, "outputs": [], "source": [ @@ -1083,7 +1086,7 @@ }, { "cell_type": "code", - "execution_count": 179, + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ @@ -1098,7 +1101,7 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": 148, "metadata": {}, "outputs": [], "source": [ @@ -1112,7 +1115,7 @@ }, { "cell_type": "code", - "execution_count": 181, + "execution_count": 149, "metadata": {}, "outputs": [], "source": [ @@ -1135,7 +1138,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 150, "metadata": {}, "outputs": [], "source": [ @@ -1157,7 +1160,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ @@ -1176,7 +1179,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 152, "metadata": {}, "outputs": [], "source": [ @@ -1201,7 +1204,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 153, "metadata": {}, "outputs": [], "source": [ @@ -1220,7 +1223,7 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 154, "metadata": {}, "outputs": [], "source": [ @@ -1234,7 +1237,7 @@ }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 155, "metadata": {}, "outputs": [], "source": [ @@ -1246,7 +1249,7 @@ }, { "cell_type": "code", - "execution_count": 188, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ @@ -1258,7 +1261,7 @@ }, { "cell_type": "code", - "execution_count": 189, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ @@ -1275,7 +1278,7 @@ }, { "cell_type": "code", - "execution_count": 190, + "execution_count": 158, "metadata": {}, "outputs": [], "source": [ @@ -1289,7 +1292,7 @@ }, { "cell_type": "code", - "execution_count": 191, + "execution_count": 159, "metadata": {}, "outputs": [], "source": [ @@ -1312,7 +1315,7 @@ }, { "cell_type": "code", - "execution_count": 192, + "execution_count": 160, "metadata": {}, "outputs": [], "source": [ @@ -1322,7 +1325,7 @@ }, { "cell_type": "code", - "execution_count": 193, + "execution_count": 161, "metadata": {}, "outputs": [], "source": [ @@ -1340,7 +1343,7 @@ }, { "cell_type": "code", - "execution_count": 194, + "execution_count": 162, "metadata": {}, "outputs": [], "source": [ @@ -1389,7 +1392,7 @@ }, { "cell_type": "code", - "execution_count": 195, + "execution_count": 163, "metadata": {}, "outputs": [], "source": [ @@ -1438,7 +1441,7 @@ }, { "cell_type": "code", - "execution_count": 196, + "execution_count": 164, "metadata": {}, "outputs": [], "source": [ @@ -1446,6 +1449,13 @@ "with pd.ExcelWriter(excel_path, mode=\"a\", if_sheet_exists=\"replace\") as writer:\n", " hs_demo_summary.to_excel(writer, sheet_name=\"hotspot_demographic_summary\", index=False)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {