diff --git a/Project-Documentation/Equity-Priority-Communities/equity_priority_communities_build_acs2021.ipynb b/Project-Documentation/Equity-Priority-Communities/equity_priority_communities_build_acs2021.ipynb deleted file mode 100644 index 187dc6c..0000000 --- a/Project-Documentation/Equity-Priority-Communities/equity_priority_communities_build_acs2021.ipynb +++ /dev/null @@ -1,1464 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import pandas as pd, geopandas as gp, numpy as np\n", - "import getpass\n", - "from arcgis import GIS\n", - "\n", - "user = getpass.getuser()\n", - "\n", - "DVUTILS_LOCAL_CLONE_PATH = f\"/Users/{user}/Documents/GitHub/dvutils\"\n", - "sys.path.insert(0, DVUTILS_LOCAL_CLONE_PATH)\n", - "from utils_io import *" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# get census api key\n", - "api_key = os.environ.get(\"CENSUS_API_KEY\")\n", - "agol_password = os.environ.get(\"AGOL_CONTENT_PASSWORD\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# authenticate to agol\n", - "gis = GIS(url=\"https://mtc.maps.arcgis.com/home\", username=\"content_MTC\", password=agol_password)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read census api key from file" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def get_file_contents(filename):\n", - " \"\"\"Given a filename,\n", - " return the contents of that file\n", - " \"\"\"\n", - " try:\n", - " with open(filename, \"r\") as f:\n", - " # It's assumed our file contains a single line,\n", - " # with our API key\n", - " return f.read().strip()\n", - " except FileNotFoundError:\n", - " print(\"'%s' file not found\" % filename)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def flag_condition_calc(row, df_share_column, standard_deviation):\n", - " cond = (\n", - " df_share_column.mean().round(decimals=2)\n", - " + (standard_deviation * df_share_column.std().round(decimals=2))\n", - " ).round(decimals=2)\n", - " if row > cond:\n", - " return 1\n", - " else:\n", - " return 0" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def flag_mult_columns(dataframe, dictionary, standard_deviation):\n", - " for key, value in dictionary.items():\n", - " dataframe[value] = dataframe[key].apply(\n", - " lambda row: flag_condition_calc(row, dataframe[key], standard_deviation)\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def set_epc_class(df):\n", - " if df[\"epc50p_1ha\"] == 1:\n", - " return \"Highest\"\n", - " elif df[\"epc50p_1\"] == 1:\n", - " return \"Higher\"\n", - " elif df[\"epc50p_1_2\"] == 1:\n", - " return \"High\"\n", - " else:\n", - " return \"NA\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def pull_acs_5_year_est_data(\n", - " census_api_key,\n", - " acs_year=2019,\n", - " tbl_prof_type=\"Detailed\",\n", - " table_id=None,\n", - " select_table_vars=None,\n", - " drop_anno_cols=True,\n", - " drop_margin_cols=True,\n", - "):\n", - " \"\"\"\n", - " Pull American Community Survey (ACS) 5 year estimate data. Data can be pulled for an entire\n", - " table or for select table variables.\n", - "\n", - " !Must include a table_id or list to select_table_vars parameters!\n", - "\n", - " Parameters\n", - " -------------------\n", - " census_api_key (String):\n", - " Your secret census api key.\n", - "\n", - " acs_year (Integer):\n", - " Year for acs estimates, default is 2019 which is latest year 5 year data is available.\n", - "\n", - " tbl_prof_type (String):\n", - " Table or profile type. These include the following types: Detailed, Subject, Data, or Comparison.\n", - "\n", - " table_id (String):\n", - " ACS table id. Example 'B01001'\n", - "\n", - " select_table_vars (List):\n", - " provide a list of ACS table variables as strings. Example: ['B01001_001E','B01001_002E']\n", - "\n", - " drop_anno_cols (Boolean):\n", - " Used if table_id provided. Drops annotation of margin of error and annotation of estimate\n", - " columns.\n", - "\n", - " drop_margin_cols (Boolean):\n", - " Used if table_id provided. Drops margin of error columns.\n", - "\n", - " Returns\n", - " -------------------\n", - " Geodataframe object\n", - "\n", - " Author: Joshua Croff\n", - " Variable Reference: https://www.census.gov/data/developers/data-sets/acs-5year.html\n", - " \"\"\"\n", - " import requests\n", - " import pandas as pd\n", - "\n", - " if table_id:\n", - " var = f\"group({table_id})\"\n", - " else:\n", - " var = \",\".join(select_table_vars)\n", - "\n", - " counties = \"001,013,041,055,075,081,085,095,097\"\n", - " state = \"06\"\n", - " # set base url\n", - " if tbl_prof_type not in [\"Detailed\", \"Subject\", \"Data\", \"Comparison\"]:\n", - " return \"Please provide the following table types: Detailed, Subject, Data, or Comparison\"\n", - " elif tbl_prof_type == \"Detailed\":\n", - " base_url = f\"https://api.census.gov/data/{acs_year}/acs/acs5?\"\n", - " elif tbl_prof_type == \"Subject\":\n", - " base_url = f\"https://api.census.gov/data/{acs_year}/acs/acs5/subject?\"\n", - " # https://api.census.gov/data/2020/acs/acs5/subject?get=NAME,S0101_C01_001E&for=county:037&in=state:06&key=YOUR_KEY_GOES_HERE\n", - " elif tbl_prof_type == \"Data\":\n", - " base_url = f\"https://api.census.gov/data/{acs_year}/acs/acs5/profile?\"\n", - " else:\n", - " base_url = f\"https://api.census.gov/data/{acs_year}/acs/acs5/cprofile?\"\n", - "\n", - " # set query params\n", - " query_params = {\n", - " \"get\": var,\n", - " \"for\": \"tract:*\",\n", - " \"in\": [\n", - " f\"county:{counties}\",\n", - " f\"state:{state}\",\n", - " ],\n", - " \"key\": census_api_key,\n", - " }\n", - " rq = requests.get(base_url, params=query_params)\n", - " data = rq.json()\n", - " acs_df = pd.DataFrame(data[1:], columns=data[0])\n", - " # Cast numeric columns to numeric types\n", - " cols = acs_df.columns.to_list()\n", - "\n", - " if table_id:\n", - " str_cols = [\"GEO_ID\", \"NAME\", \"state\", \"county\", \"tract\"]\n", - " else:\n", - " str_cols = [\"state\", \"county\", \"tract\"]\n", - " num_cols = list(set(cols) - set(str_cols))\n", - " acs_df[num_cols] = acs_df[num_cols].apply(pd.to_numeric)\n", - "\n", - " # Drop annotation columns\n", - " if drop_anno_cols:\n", - " acs_df = acs_df.loc[\n", - " :, ~((acs_df.columns.str.endswith(\"EA\")) | (acs_df.columns.str.endswith(\"MA\")))\n", - " ].copy()\n", - "\n", - " if drop_margin_cols:\n", - " acs_df = acs_df.loc[:, ~acs_df.columns.str.endswith(\"M\").copy()]\n", - "\n", - " # add tract id column\n", - " acs_df[\"tract_geoid\"] = acs_df[\"state\"] + acs_df[\"county\"] + acs_df[\"tract\"]\n", - "\n", - " # rename columns\n", - " acs_df = acs_df.rename(columns={\"county\": \"fipco\"})\n", - "\n", - " # drop redundent columns\n", - " if table_id:\n", - " acs_df = acs_df.drop(columns=[\"GEO_ID\", \"NAME\", \"state\", \"tract\"])\n", - " else:\n", - " acs_df = acs_df.drop(columns=[\"state\", \"tract\"])\n", - "\n", - " return acs_df" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def pull_census_tracts_geodata(year=2020, cartographic=False):\n", - " \"\"\"\n", - " Pulls Census Tracts from TIGERweb REST API and returns Geopandas GeoDataframe.\n", - " Default year is 2020 which is the latest-available vintage for TIGER tracts.\n", - "\n", - " How to choose vintage: https://www2.census.gov/geo/pdfs/maps-data/data/tiger/How_do_I_choose_TIGER_vintage.pdf\n", - "\n", - " Parameters\n", - " -------------------\n", - " year (int):\n", - " the TIGER vintage.\n", - " list of valid years: [2012,2015,2016,2017,2018,2019,2020]\n", - "\n", - " catrographic (bool):\n", - " If the cartographic parameter is set to true, a generalized version of tracts is returned\n", - " with water areas clipped.\n", - "\n", - " Author: Joshua Croff\n", - " Source: https://tigerweb.geo.census.gov/tigerwebmain/TIGERweb_restmapservice.html\n", - " \"\"\"\n", - " import geopandas as gpd\n", - " import requests\n", - "\n", - " valid_years = [2012, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]\n", - " pre_2020 = [2012, 2015, 2016, 2017, 2018, 2019]\n", - " if year not in valid_years:\n", - " print(\"Error- vintage not available. Please see docstring for valid years\")\n", - " return\n", - "\n", - " if year == 2020 and cartographic == True:\n", - " map_service = f\"Generalized_TAB{year}\"\n", - " elif year == 2020:\n", - " map_service = f\"tigerWMS_Census{year}\"\n", - " layer_id = \"6\"\n", - " elif year in pre_2020 and cartographic == True:\n", - " map_service = f\"Generalized_ACS{year}\"\n", - " elif year in pre_2020:\n", - " map_service = f\"tigerWMS_ACS{year}\"\n", - " layer_id = \"8\"\n", - " elif year > 2020 and cartographic == True:\n", - " map_service = f\"Generalized_ACS{year}\"\n", - " else:\n", - " map_service = f\"tigerWMS_ACS{year}\"\n", - " layer_id = \"6\"\n", - "\n", - " state = \"06\"\n", - " counties = \"('001','013','041','055','075','081','085','095','097')\"\n", - " where_str = f\"where=STATE='{state}'+AND+COUNTY+IN{counties}\"\n", - " query_args = [where_str, \"outFields=GEOID&f=geojson\"]\n", - "\n", - " if cartographic:\n", - " url = \"/\".join(\n", - " [\n", - " \"https://tigerweb.geo.census.gov\",\n", - " \"arcgis\",\n", - " \"rest\",\n", - " \"services\",\n", - " map_service,\n", - " \"Tracts_Blocks\",\n", - " \"MapServer\",\n", - " \"3\",\n", - " \"query?{}\".format(\"&\".join(query_args)),\n", - " ]\n", - " )\n", - " else:\n", - " url = \"/\".join(\n", - " [\n", - " \"https://tigerweb.geo.census.gov\",\n", - " \"arcgis\",\n", - " \"rest\",\n", - " \"services\",\n", - " \"TIGERweb\",\n", - " map_service,\n", - " \"MapServer\",\n", - " layer_id,\n", - " \"query?{}\".format(\"&\".join(query_args)),\n", - " ]\n", - " )\n", - " r = requests.get(url)\n", - " geog_json = r.json()\n", - " geog_gdf = gpd.GeoDataFrame.from_features(geog_json[\"features\"], crs=\"EPSG:4326\")\n", - "\n", - " # rename GEOID column to tract_geoid\n", - " geog_gdf = geog_gdf.rename(columns={\"GEOID\": \"tract_geoid\"})\n", - " return geog_gdf" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# create a function to overwrite a feature layer\n", - "def overwrite_published_feature_layer(f_layer_id, geojson_path, client):\n", - " \"\"\"Overwrite a published feature layer\n", - "\n", - " Parameters:\n", - " -----------\n", - " f_layer_id : str\n", - " id of the feature layer to overwrite\n", - " geojson_path : str\n", - " path to the geojson file\n", - " client : authenticated arcgis client\n", - " authentication example below:\n", - " from arcgis.gis import GIS\n", - " password = os.environ.get(\"AGOL_CONTENT_PASSWORD\")\n", - " gis = GIS(url=\"https://mtc.maps.arcgis.com/home/\", username=\"content_MTC\", password=password)\n", - " \"\"\"\n", - " from arcgis.features import FeatureLayerCollection\n", - "\n", - " # get the feature layer\n", - " host_flayer = client.content.get(f_layer_id)\n", - "\n", - " # create feature layer collection object\n", - " f_layer = FeatureLayerCollection.fromitem(host_flayer)\n", - " # overwrite the feature layer\n", - " f_layer.manager.overwrite(geojson_path)\n", - "\n", - " print(f\"Overwrote hosted feature layer with id: {f_layer_id}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# create a function that publishes a geojson to agol\n", - "def publish_geojson_to_agol(\n", - " geojson_path,\n", - " layer_name,\n", - " layer_snippet,\n", - " tags,\n", - " client,\n", - " folder=None,\n", - " overwrite=False,\n", - " f_layer_id=None,\n", - "):\n", - " \"\"\"Publish a geojson to ArcGIS Online\n", - "\n", - " Parameters:\n", - " -----------\n", - " geojson_path : str\n", - " path to the geojson file\n", - " layer_name : str\n", - " name of the layer\n", - " layer_snippet : str\n", - " layer snippet\n", - " tags : list\n", - " tags as a comma separated string (e.g. \"tag1, tag2, tag3\")\n", - " client : authenticated arcgis client\n", - " authentication example below:\n", - " from arcgis.gis import GIS\n", - " password = os.environ.get(\"AGOL_CONTENT_PASSWORD\")\n", - " gis = GIS(url=\"https://mtc.maps.arcgis.com/home/\", username=\"content_MTC\", password=password)\n", - " folder : str\n", - " name of the folder to publish to (optional)\n", - " overwrite : bool\n", - " if True, overwrite existing layer\n", - " f_layer_id : str\n", - " if overwrite is True, provide the id of the feature layer to overwrite\n", - " \"\"\"\n", - " if overwrite:\n", - " overwrite_published_feature_layer(f_layer_id, geojson_path, client)\n", - " else:\n", - " # publish the geojson\n", - " item_prop = {\n", - " \"type\": \"GeoJson\",\n", - " \"title\": layer_name,\n", - " \"tags\": tags,\n", - " \"snippet\": layer_snippet,\n", - " \"overwrite\": True,\n", - " }\n", - " item = client.content.add(item_properties=item_prop, data=geojson_path, folder=folder)\n", - "\n", - " # publish the item\n", - " published_item = item.publish(file_type=\"geojson\")\n", - "\n", - " print(f\"Published {layer_name} to ArcGIS Online as {published_item.id}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read selected ACS varibles from csv" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "acs_epc_selected_vars = pd.read_csv(\"Data/acs_table_variables_epc_factors.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "acs_vars_lst = acs_epc_selected_vars[\"ACS_Table_Variable\"].tolist()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query ACS API\n", - "#### [Census American Community Survey 5-Year Data API Documentation](https://www.census.gov/data/developers/data-sets/acs-5year.html)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# pull american community survey tabular data\n", - "acs_df = pull_acs_5_year_est_data(\n", - " census_api_key=api_key, acs_year=2021, tbl_prof_type=\"Detailed\", select_table_vars=acs_vars_lst\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# pull american community survey geographic data\n", - "acs_gdf = pull_census_tracts_geodata(year=2021, cartographic=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Rename columns for consistancy with prior epcs" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "cols = {\n", - " \"fipco\": \"county_fip\",\n", - " \"B03002_001E\": \"tot_pop_poc\",\n", - " \"B01001_001E\": \"tot_pop_se\",\n", - " \"C17002_001E\": \"tot_pop_po\",\n", - " \"C18108_001E\": \"tot_pop_ci\",\n", - " \"B08201_001E\": \"tot_hh\",\n", - " \"B11004_001E\": \"tot_fam\",\n", - " \"B16005_001E\": \"tot_pop_ov\",\n", - " \"B25070_010E\": \"pop_hus_re\",\n", - " \"B08201_002E\": \"pop_zvhhs\",\n", - "}\n", - "acs_df.rename(columns=cols, inplace=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Calculate epc and populations" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# calculate poc population (total population - not hispanic or latino white alone)\n", - "acs_df[\"pop_poc\"] = acs_df[\"tot_pop_poc\"] - acs_df[\"B03002_003E\"]\n", - "\n", - "# calculate senior population\n", - "acs_df[\"pop_over75\"] = (\n", - " acs_df[\"B01001_023E\"]\n", - " + acs_df[\"B01001_024E\"]\n", - " + acs_df[\"B01001_025E\"]\n", - " + acs_df[\"B01001_047E\"]\n", - " + acs_df[\"B01001_048E\"]\n", - " + acs_df[\"B01001_049E\"]\n", - ")\n", - "\n", - "# calculate single parent family population (male householder, no spouse present + female householder, no spouse present)\n", - "acs_df[\"pop_spfam\"] = acs_df[\"B11004_010E\"] + acs_df[\"B11004_016E\"]\n", - "\n", - "# calculate limited english proficiency population (primarily speaks a language other than English at home and speaks English less than \"very well\" or \"not at all\")\n", - "acs_df[\"pop_lep\"] = (\n", - " acs_df[\"B16005_007E\"]\n", - " + acs_df[\"B16005_008E\"]\n", - " + acs_df[\"B16005_012E\"]\n", - " + acs_df[\"B16005_013E\"]\n", - " + acs_df[\"B16005_017E\"]\n", - " + acs_df[\"B16005_018E\"]\n", - " + acs_df[\"B16005_022E\"]\n", - " + acs_df[\"B16005_023E\"]\n", - " + acs_df[\"B16005_029E\"]\n", - " + acs_df[\"B16005_030E\"]\n", - " + acs_df[\"B16005_034E\"]\n", - " + acs_df[\"B16005_035E\"]\n", - " + acs_df[\"B16005_039E\"]\n", - " + acs_df[\"B16005_040E\"]\n", - " + acs_df[\"B16005_044E\"]\n", - " + acs_df[\"B16005_045E\"]\n", - ")\n", - "\n", - "# calculate population below 200% of poverty (total population - population above 200% of poverty)\n", - "acs_df[\"pop_below2\"] = acs_df[\"tot_pop_po\"] - acs_df[\"C17002_008E\"]\n", - "\n", - "# calculate population with a disability (total civilian non-institutionalized population - population with no disability)\n", - "acs_df[\"pop_disabi\"] = acs_df[\"tot_pop_ci\"] - (\n", - " acs_df[\"C18108_005E\"] + acs_df[\"C18108_009E\"] + acs_df[\"C18108_013E\"]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Calculate epc shares" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "acs_df[\"pct_poc\"] = np.where(\n", - " acs_df[\"tot_pop_poc\"] == 0, 0, (acs_df[\"pop_poc\"] / acs_df[\"tot_pop_poc\"])\n", - ")\n", - "acs_df[\"pct_over75\"] = np.where(\n", - " acs_df[\"tot_pop_se\"] == 0, 0, (acs_df[\"pop_over75\"] / acs_df[\"tot_pop_se\"])\n", - ")\n", - "acs_df[\"pct_spfam\"] = np.where(acs_df[\"tot_fam\"] == 0, 0, (acs_df[\"pop_spfam\"] / acs_df[\"tot_fam\"]))\n", - "acs_df[\"pct_lep\"] = np.where(\n", - " acs_df[\"tot_pop_ov\"] == 0, 0, (acs_df[\"pop_lep\"] / acs_df[\"tot_pop_ov\"])\n", - ")\n", - "acs_df[\"pct_below2\"] = np.where(\n", - " acs_df[\"tot_pop_po\"] == 0, 0, (acs_df[\"pop_below2\"] / acs_df[\"tot_pop_po\"])\n", - ")\n", - "acs_df[\"pct_disab\"] = np.where(\n", - " acs_df[\"tot_pop_ci\"] == 0, 0, (acs_df[\"pop_disabi\"] / acs_df[\"tot_pop_ci\"])\n", - ")\n", - "acs_df[\"pct_zvhhs\"] = np.where(acs_df[\"tot_hh\"] == 0, 0, (acs_df[\"pop_zvhhs\"] / acs_df[\"tot_hh\"]))\n", - "acs_df[\"pct_hus_re\"] = np.where(acs_df[\"tot_hh\"] == 0, 0, (acs_df[\"pop_hus_re\"] / acs_df[\"tot_hh\"]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag epcs and epc levels (high, higher, highest) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag halfsd columns and count factors" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "cols_dict_halfsd = {\n", - " \"pct_over75\": \"over75_1_2\",\n", - " \"pct_poc\": \"poc_1_2\",\n", - " \"pct_spfam\": \"spfam_1_2\",\n", - " \"pct_disab\": \"disab_1_2\",\n", - " \"pct_lep\": \"lep_1_2\",\n", - " \"pct_below2\": \"below2_1_2\",\n", - " \"pct_zvhhs\": \"zvhh_1_2\",\n", - " \"pct_hus_re\": \"hus_re_1_2\",\n", - "}\n", - "\n", - "flag_mult_columns(acs_df, cols_dict_halfsd, 0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "halfsd_cols_list = [\n", - " \"below2_1_2\",\n", - " \"poc_1_2\",\n", - " \"spfam_1_2\",\n", - " \"disab_1_2\",\n", - " \"lep_1_2\",\n", - " \"over75_1_2\",\n", - " \"zvhh_1_2\",\n", - " \"hus_re_1_2\",\n", - "]\n", - "acs_df[\"count_1_2\"] = acs_df[halfsd_cols_list].sum(axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag halfsd epc" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "halfsd_remain = [\"spfam_1_2\", \"disab_1_2\", \"lep_1_2\", \"over75_1_2\", \"zvhh_1_2\", \"hus_re_1_2\"]\n", - "halfsd_cond = ((acs_df[\"poc_1_2\"] == 1) & (acs_df[\"below2_1_2\"] == 1)) | (\n", - " (acs_df[\"below2_1_2\"] == 1) & (acs_df[halfsd_remain].sum(axis=1) >= 3)\n", - ")\n", - "acs_df[\"epc50p_1_2\"] = np.where(halfsd_cond, 1, 0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag onesd columns" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "cols_dict_onesd = {\n", - " \"pct_over75\": \"over75_1\",\n", - " \"pct_poc\": \"poc_1\",\n", - " \"pct_spfam\": \"spfam_1\",\n", - " \"pct_disab\": \"disab_1\",\n", - " \"pct_lep\": \"lep_1\",\n", - " \"pct_below2\": \"below2_1\",\n", - " \"pct_zvhhs\": \"zvhh_1\",\n", - " \"pct_hus_re\": \"hus_re_1\",\n", - "}\n", - "\n", - "flag_mult_columns(acs_df, cols_dict_onesd, 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "onesd_cols_list = [\n", - " \"below2_1\",\n", - " \"poc_1\",\n", - " \"spfam_1\",\n", - " \"disab_1\",\n", - " \"lep_1\",\n", - " \"over75_1\",\n", - " \"zvhh_1\",\n", - " \"hus_re_1\",\n", - "]\n", - "acs_df[\"count_1\"] = acs_df[onesd_cols_list].sum(axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag onesd epc" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "onesd_remain = [\"spfam_1\", \"disab_1\", \"lep_1\", \"over75_1\", \"zvhh_1\", \"hus_re_1\"]\n", - "onesd_cond = ((acs_df[\"poc_1\"] == 1) & (acs_df[\"below2_1\"] == 1)) | (\n", - " (acs_df[\"below2_1\"] == 1) & (acs_df[onesd_remain].sum(axis=1) >= 3)\n", - ")\n", - "acs_df[\"epc50p_1\"] = np.where(onesd_cond, 1, 0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag onehalfsd columns" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "cols_dict_onehalfsd = {\n", - " \"pct_over75\": \"over75_1ha\",\n", - " \"pct_poc\": \"poc_1ha\",\n", - " \"pct_spfam\": \"spfam_1ha\",\n", - " \"pct_disab\": \"disab_1ha\",\n", - " \"pct_lep\": \"lep_1ha\",\n", - " \"pct_below2\": \"below2_1ha\",\n", - " \"pct_zvhhs\": \"zvhh_1ha\",\n", - " \"pct_hus_re\": \"hus_re_1ha\",\n", - "}\n", - "\n", - "flag_mult_columns(acs_df, cols_dict_onehalfsd, 1.5)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "onehalfsd_cols_list = [\n", - " \"below2_1ha\",\n", - " \"poc_1ha\",\n", - " \"spfam_1ha\",\n", - " \"disab_1ha\",\n", - " \"lep_1ha\",\n", - " \"over75_1ha\",\n", - " \"zvhh_1ha\",\n", - " \"hus_re_1ha\",\n", - "]\n", - "acs_df[\"count_1ha\"] = acs_df[onehalfsd_cols_list].sum(axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag onehalfsd epc" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "onehalfsd_remain = [\"spfam_1ha\", \"disab_1ha\", \"lep_1ha\", \"over75_1ha\", \"zvhh_1ha\", \"hus_re_1ha\"]\n", - "onehalfsd_cond = ((acs_df[\"poc_1ha\"] == 1) & (acs_df[\"below2_1ha\"] == 1)) | (\n", - " (acs_df[\"below2_1ha\"] == 1) & (acs_df[onehalfsd_remain].sum(axis=1) >= 3)\n", - ")\n", - "acs_df[\"epc50p_1ha\"] = np.where(onehalfsd_cond, 1, 0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flag 2050 epcs" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "acs_df[\"epc_2050p\"] = np.where(\n", - " (acs_df[\"epc50p_1ha\"] == 1) | (acs_df[\"epc50p_1\"] == 1) | (acs_df[\"epc50p_1_2\"]), 1, 0\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create epc classes" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "acs_df[\"epc_class\"] = acs_df.apply(set_epc_class, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/9q/xt2lctm54xq6fd45m1lmgp4m0000gp/T/ipykernel_19099/4199737063.py:1: FutureWarning: The provided callable is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n", - " acs_df.groupby(\"epc_class\")[\"epc_2050p\"].agg(sum)\n" - ] - }, - { - "data": { - "text/plain": [ - "epc_class\n", - "High 152\n", - "Higher 101\n", - "Highest 60\n", - "NA 0\n", - "Name: epc_2050p, dtype: int64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "acs_df.groupby(\"epc_class\")[\"epc_2050p\"].agg(sum)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Compare previous EPCs" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "# census_vintage_crosswalk = pd.read_csv(\n", - "# \"https://www2.census.gov/geo/docs/maps-data/data/rel2020/tract/tab20_tract20_tract10_natl.txt\",\n", - "# sep=\"|\",\n", - "# dtype=str,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "# census_vintage_crosswalk.rename(\n", - "# columns={\"GEOID_TRACT_20\": \"tract_geoid20\", \"GEOID_TRACT_10\": \"tract_geoid10\"}, inplace=True\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "# pba50_epc_df = pull_geotable_agol(\n", - "# base_url=\"https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/communities_of_concern_2020_acs2018/FeatureServer/0\",\n", - "# client=gis,\n", - "# reproject_to_analysis_crs=False,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "# pba50_epc_df.rename(columns={\"geoid\": \"tract_geoid10\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "# pba50_epc_df[\"epc_2050\"].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "# pba50_epc_cross = pd.merge(\n", - "# pba50_epc_df[[\"tract_geoid10\", \"epc_2050\", \"geometry\"]],\n", - "# census_vintage_crosswalk[[\"tract_geoid20\", \"tract_geoid10\"]],\n", - "# on=\"tract_geoid10\",\n", - "# how=\"left\",\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "# acs_df.rename(columns={\"tract_geoid\": \"tract_geoid20\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "# # update acs_df with epc_2050 values\n", - "# acs_df[\"epc_2050\"] = acs_df[\"tract_geoid20\"].map(\n", - "# pba50_epc_cross.sort_values(by=\"epc_2050\", ascending=False)\n", - "# .groupby(\"tract_geoid20\")[\"epc_2050\"]\n", - "# .first()\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Sum pba2050 and pba2040 epcs for comparison" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "# acs_df[[\"epc_2050\", \"epc_2050p\"]].sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Calculate epc gains and losses by tract for pba50 and pba2050 +" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "# acs_df.loc[acs_df[\"epc_2050\"].isnull(), \"epc_2050\"] = 0" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "# acs_df[\"c2050_2050p\"] = acs_df[\"epc_2050p\"] - acs_df[\"epc_2050\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculate regional statistics " - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "epc_region_stats = (\n", - " acs_df.agg(\n", - " {\n", - " \"pct_over75\": [\"mean\", \"std\"],\n", - " \"pct_poc\": [\"mean\", \"std\"],\n", - " \"pct_lep\": [\"mean\", \"std\"],\n", - " \"pct_spfam\": [\"mean\", \"std\"],\n", - " \"pct_below2\": [\"mean\", \"std\"],\n", - " \"pct_disab\": [\"mean\", \"std\"],\n", - " \"pct_zvhhs\": [\"mean\", \"std\"],\n", - " \"pct_hus_re\": [\"mean\", \"std\"],\n", - " }\n", - " )\n", - " .transpose()\n", - " .reset_index()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [], - "source": [ - "epc_region_stats.rename(columns={\"index\": \"factors\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "epc_factors = {\n", - " \"pct_over75\": \"Seniors 75 Years and Over\",\n", - " \"pct_poc\": \"People of Color\",\n", - " \"pct_lep\": \"Limited English Proficiency\",\n", - " \"pct_spfam\": \"Single Parent Families\",\n", - " \"pct_below2\": \"Low-Income (<200% Federal Poverty Level-FPL)\",\n", - " \"pct_disab\": \"People with Disability\",\n", - " \"pct_zvhhs\": \"Zero-Vehicle Household\",\n", - " \"pct_hus_re\": \"Rent-Burdened\",\n", - "}\n", - "epc_region_stats[\"factors\"].replace(epc_factors, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "epc_region_stats[\"mean\"] = epc_region_stats[\"mean\"].round(decimals=2)\n", - "epc_region_stats[\"std\"] = epc_region_stats[\"std\"].round(decimals=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create fields for .5, 1, and 1.5 sd from mean" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "epc_region_stats[\"plus_half_sd\"] = (\n", - " epc_region_stats[\"mean\"] + (0.5 * epc_region_stats[\"std\"])\n", - ").round(decimals=2)\n", - "epc_region_stats[\"plus_one_sd\"] = (epc_region_stats[\"mean\"] + epc_region_stats[\"std\"]).round(\n", - " decimals=2\n", - ")\n", - "epc_region_stats[\"plus_one_half_sd\"] = (\n", - " epc_region_stats[\"mean\"] + (1.5 * epc_region_stats[\"std\"])\n", - ").round(decimals=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
factorsmeanstdplus_half_sdplus_one_sdplus_one_half_sd
0Seniors 75 Years and Over0.070.050.100.120.15
1People of Color0.600.230.720.830.94
2Limited English Proficiency0.070.080.110.150.19
3Single Parent Families0.120.090.160.210.26
4Low-Income (<200% Federal Poverty Level-FPL)0.190.130.260.320.38
5People with Disability0.100.050.120.150.18
6Zero-Vehicle Household0.090.140.160.230.30
7Rent-Burdened0.100.080.140.180.22
\n", - "
" - ], - "text/plain": [ - " factors mean std plus_half_sd \\\n", - "0 Seniors 75 Years and Over 0.07 0.05 0.10 \n", - "1 People of Color 0.60 0.23 0.72 \n", - "2 Limited English Proficiency 0.07 0.08 0.11 \n", - "3 Single Parent Families 0.12 0.09 0.16 \n", - "4 Low-Income (<200% Federal Poverty Level-FPL) 0.19 0.13 0.26 \n", - "5 People with Disability 0.10 0.05 0.12 \n", - "6 Zero-Vehicle Household 0.09 0.14 0.16 \n", - "7 Rent-Burdened 0.10 0.08 0.14 \n", - "\n", - " plus_one_sd plus_one_half_sd \n", - "0 0.12 0.15 \n", - "1 0.83 0.94 \n", - "2 0.15 0.19 \n", - "3 0.21 0.26 \n", - "4 0.32 0.38 \n", - "5 0.15 0.18 \n", - "6 0.23 0.30 \n", - "7 0.18 0.22 " - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "epc_region_stats" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "epc_region_stats.to_csv(\"Data/epc_regional_stats_ACS2021.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Join census tracts geo to epc df" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "acs_df.rename(columns={\"tract_geoid20\": \"tract_geoid\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1772, 86)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "acs_df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "epc_gdf = pd.merge(acs_gdf, acs_df, on=\"tract_geoid\", how=\"inner\")" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1765, 87)" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "epc_gdf.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "final_cols = [\n", - " \"tract_geoid\",\n", - " \"county_fip\",\n", - " \"tot_pop_poc\",\n", - " \"tot_pop_se\",\n", - " \"tot_pop_po\",\n", - " \"tot_pop_ci\",\n", - " \"tot_pop_ov\",\n", - " \"tot_hh\",\n", - " \"tot_fam\",\n", - " \"pop_poc\",\n", - " \"pop_over75\",\n", - " \"pop_spfam\",\n", - " \"pop_lep\",\n", - " \"pop_below2\",\n", - " \"pop_disabi\",\n", - " \"pop_hus_re\",\n", - " \"pop_zvhhs\",\n", - " \"pct_poc\",\n", - " \"pct_over75\",\n", - " \"pct_spfam\",\n", - " \"pct_lep\",\n", - " \"pct_below2\",\n", - " \"pct_disab\",\n", - " \"pct_hus_re\",\n", - " \"pct_zvhhs\",\n", - " \"poc_1_2\",\n", - " \"over75_1_2\",\n", - " \"spfam_1_2\",\n", - " \"lep_1_2\",\n", - " \"disab_1_2\",\n", - " \"below2_1_2\",\n", - " \"hus_re_1_2\",\n", - " \"zvhh_1_2\",\n", - " \"epc_2050p\",\n", - " \"epc_class\",\n", - " # \"epc_2050\",\n", - " # \"c2050_2050p\",\n", - " \"geometry\",\n", - "]\n", - "epc_path = \"Data/epc_acs2021.geojson\"\n", - "epc_gdf[final_cols].to_file(epc_path, driver=\"GeoJSON\")" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "# output tabular data\n", - "final_cols.remove(\"geometry\")\n", - "acs_df[final_cols].to_csv(\"Data/epc_acs2021.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Publish to arcgis online" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "# publish_geojson_to_agol(\n", - "# geojson_path=epc_path,\n", - "# layer_name=\"DRAFT Equity Priority Communities - Plan Bay Area 2050 Plus (ACS 2022)\",\n", - "# layer_snippet=\"\"\"This dataset represents tract information related to Equity Priority Communities \n", - "# for Plan Bay Area 2050 Plus. The dataset was developed using American Community Survey 2018-2022 data for eight variables considered.\"\"\",\n", - "# tags=\"bay area, equity, policy, planning, environmental justice, acs, american community survey, epc, community of concern\",\n", - "# client=gis,\n", - "# folder=\"plan_policy\",\n", - "# # overwrite=True,\n", - "# # f_layer_id=\"\",\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Field Map Dictionary to Rename Feature Class Alias" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "# field_metadata = pd.read_csv(\"Data/EPC_Schema_pba2050p.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "# field_metadata.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [], - "source": [ - "# dict(zip(field_metadata[\"Field Name\"], field_metadata[\"Alias\"]))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}