diff --git a/example_notebooks/regional_runoff_calculations.ipynb b/example_notebooks/regional_runoff_calculations.ipynb index 754bc8f..ee19e69 100644 --- a/example_notebooks/regional_runoff_calculations.ipynb +++ b/example_notebooks/regional_runoff_calculations.ipynb @@ -55,15 +55,17 @@ "source": [ "#| tbl-cap: The top rows of the intersection table created by the hyswap geospatial data assembly repository\n", "# This example initially reads in the entire huc_basin_intersection table.\n", - "intersection_table_full = pd.read_csv('example_data/huc_props_tbl_conus.csv', \n", - " converters = {0:str,1:str,2:str}\n", - " )\n", + "intersection_table_full = pd.read_csv(\n", + " filepath_or_buffer = 'example_data/huc_props_tbl_conus.csv', \n", + " converters = {0:str,1:str,2:str}\n", + " )\n", "\n", "# drop first col that is the index from the csv. \n", - "intersection_table_full.drop(columns=intersection_table_full.columns[0],\n", - " axis=1,\n", - " inplace=True\n", - ")\n", + "intersection_table_full.drop(\n", + " columns = intersection_table_full.columns[0],\n", + " axis = 1,\n", + " inplace = True\n", + " )\n", "# view\n", "intersection_table_full.head()" ] @@ -124,13 +126,14 @@ "outputs": [], "source": [ "# Pull basin site ids from selected huc8 geom using the hyswap runoff identify sites from geom intersection function\n", - "basins_overlap_single_huc = hyswap.runoff.identify_sites_from_geom_intersection(geom_id = single_huc,\n", - " geom_intersection_df = intersection_table_full,\n", - " geom_id_col='huc_id',\n", - " site_col='da_site_no',\n", - " prop_geom_in_basin_col='prop_huc_in_basin',\n", - " prop_basin_in_geom_col='prop_basin_in_huc'\n", - " )\n", + "basins_overlap_single_huc = hyswap.runoff.identify_sites_from_geom_intersection(\n", + " geom_id = single_huc,\n", + " geom_intersection_df = intersection_table_full,\n", + " geom_id_col = 'huc_id',\n", + " site_col = 'da_site_no',\n", + " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", + " prop_basin_in_geom_col = 'prop_basin_in_huc'\n", + " )\n", "\n", "# output should be long list of sites ids. These are all the site_ids within the selected huc8 polygon\n", "num_sites = len(basins_overlap_single_huc)\n", @@ -151,21 +154,24 @@ "metadata": {}, "outputs": [], "source": [ - "def query_nwis_runoff_data(sites,\n", + "def query_nwis_runoff_data(\n", + " sites,\n", " start_date,\n", " end_date):\n", " print(\"Pulling site streamflow data and converting it to runoff. This may take some time...\")\n", " # first, pull site info \n", - " info_df, _ = dataretrieval.nwis.get_info(sites=sites)\n", + " info_df, _ = dataretrieval.nwis.get_info(sites = sites)\n", " # convert drainage area from sq mi to sq km\n", " info_df['da'] = info_df['drain_area_va'] * 2.58998811\n", " # info_df = info_df[['da', 'site_no']]\n", " # get streamflow data between start and end date\n", " dv_df = dataretrieval.nwis.get_record(\n", - " sites=sites, parameterCd='00060',\n", - " start=start_date, end=end_date,\n", - " multi_index=False,\n", - " service='dv'\n", + " sites = sites, \n", + " parameterCd = '00060',\n", + " start = start_date, \n", + " end = end_date,\n", + " multi_index = False,\n", + " service = 'dv'\n", " )\n", " df_nwis_ro_data = pd.DataFrame()\n", "\n", @@ -177,9 +183,12 @@ " # Loop through sites retrieved from nwis and estimate\n", " # runoff using hyswap function\n", " for site in siteids:\n", - " ro_df = dv_df[dv_df['site_no']==site]\n", - " da = info_df.loc[info_df['site_no']==site]['da']\n", - " ro_df = hyswap.runoff.streamflow_to_runoff(ro_df, '00060_Mean', da, frequency='daily')\n", + " ro_df = dv_df[dv_df['site_no'] == site]\n", + " da = info_df.loc[info_df['site_no'] == site]['da']\n", + " ro_df = hyswap.runoff.streamflow_to_runoff(df = ro_df, \n", + " data_col = '00060_Mean', \n", + " drainage_area = da, \n", + " frequency = 'daily')\n", " df_nwis_ro_data = pd.concat([df_nwis_ro_data, ro_df])\n", " # print proportion of sites with data\n", " prop = len(siteids)/len(sites)\n", @@ -204,9 +213,11 @@ "metadata": {}, "outputs": [], "source": [ - "df_nwis_ro_data = query_nwis_runoff_data(basins_overlap_single_huc,\n", - "start_date = start_date,\n", - "end_date = end_date)\n", + "df_nwis_ro_data = query_nwis_runoff_data(\n", + " sites = basins_overlap_single_huc,\n", + " start_date = start_date,\n", + " end_date = end_date\n", + " )\n", "display(df_nwis_ro_data[df_nwis_ro_data['site_no'] == '06078500'])" ] }, @@ -231,25 +242,25 @@ "# create df to hold estimate results\n", "results = pd.DataFrame()\n", " \n", - " # loop through each water year\n", + "# loop through each water year\n", "for year, df_year in df_nwis_ro_data.groupby(df_nwis_ro_data.water_year):\n", " df_nwis_ro_data_sub = df_nwis_ro_data[df_nwis_ro_data['water_year'] == year]\n", "\n", - " # estimate runoff for each water year of data\n", + " # estimate runoff for each water year of data\n", " single_huc_runoff = hyswap.calculate_geometric_runoff(\n", - " geom_id=single_huc,\n", - " runoff_df=df_nwis_ro_data_sub,\n", - " geom_intersection_df=intersection_table_full,\n", - " site_col='da_site_no',\n", - " geom_id_col='huc_id',\n", - " prop_geom_in_basin_col='prop_huc_in_basin',\n", - " prop_basin_in_geom_col='prop_basin_in_huc',\n", - " percentage=False,\n", - " clip_downstream_basins=False,\n", - " full_overlap_threshold=0.98\n", + " geom_id = single_huc,\n", + " runoff_df = df_nwis_ro_data_sub,\n", + " geom_intersection_df = intersection_table_full,\n", + " site_col = 'da_site_no',\n", + " geom_id_col = 'huc_id',\n", + " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", + " prop_basin_in_geom_col = 'prop_basin_in_huc',\n", + " percentage = False,\n", + " clip_downstream_basins = False,\n", + " full_overlap_threshold = 0.98\n", " )\n", " \n", - " # concatenate with previous years runoffs\n", + " # concatenate with previous years runoffs\n", " results = pd.concat([results, single_huc_runoff])" ] }, @@ -269,10 +280,11 @@ "#| fig-cap: A runoff hydrograph for huc 17010209 using the `hyswap.plot_hydrograph` function\n", "hyswap.plot_hydrograph(\n", " results,\n", - " data_col='estimated_runoff',\n", - " title=f'Estimated Runoff for HUC {single_huc}',\n", - " yscale='linear',\n", - " ylab='Runoff (mm)')" + " data_col = 'estimated_runoff',\n", + " title = f'Estimated Runoff for HUC {single_huc}',\n", + " yscale = 'linear',\n", + " ylab = 'Runoff (mm)'\n", + ")" ] }, { @@ -296,10 +308,10 @@ " sites = hyswap.runoff.identify_sites_from_geom_intersection(\n", " geom_id = huc,\n", " geom_intersection_df = intersection_table_full,\n", - " geom_id_col='huc_id',\n", - " site_col='da_site_no',\n", - " prop_geom_in_basin_col='prop_huc_in_basin',\n", - " prop_basin_in_geom_col='prop_basin_in_huc'\n", + " geom_id_col = 'huc_id',\n", + " site_col = 'da_site_no',\n", + " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", + " prop_basin_in_geom_col = 'prop_basin_in_huc'\n", " )\n", " \n", " list_site_ids.append(sites)\n", @@ -315,8 +327,11 @@ "outputs": [], "source": [ "# grab basin data using the all_site_ids list\n", - "df_nwis_ro_data = query_nwis_runoff_data(all_site_ids, start_date=start_date,\n", - "end_date=end_date)" + "df_nwis_ro_data = query_nwis_runoff_data(\n", + " sites = all_site_ids, \n", + " start_date = start_date,\n", + " end_date = end_date\n", + " )" ] }, { @@ -341,14 +356,14 @@ " multiple_huc_runoff = hyswap.runoff.calculate_multiple_geometric_runoff(\n", " geom_id_list = huc_shapes['huc8'],\n", " runoff_df = df_nwis_ro_data,\n", - " geom_intersection_df=intersection_table_full,\n", - " site_col='da_site_no',\n", - " geom_id_col='huc_id',\n", - " prop_geom_in_basin_col= 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col='prop_basin_in_huc',\n", - " percentage=False,\n", - " clip_downstream_basins=True,\n", - " full_overlap_threshold=0.98\n", + " geom_intersection_df = intersection_table_full,\n", + " site_col = 'da_site_no',\n", + " geom_id_col = 'huc_id',\n", + " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", + " prop_basin_in_geom_col = 'prop_basin_in_huc',\n", + " percentage = False,\n", + " clip_downstream_basins = True,\n", + " full_overlap_threshold = 0.98\n", " )\n", " results = pd.concat([results, multiple_huc_runoff])" ] @@ -368,10 +383,12 @@ "source": [ "#| fig-cap: A multiple runoff hydrograph for all hucs pulled from `pynhd` in this example\n", "# Plot \n", - "results_plot = results.pivot_table(index=results.index, columns='geom_id', values='estimated_runoff')\n", + "results_plot = results.pivot_table(index = results.index, \n", + " columns = 'geom_id', \n", + " values = 'estimated_runoff')\n", "ax = results_plot.iloc[:, 10:14].plot()\n", "ax.set_ylabel(\"Runoff (mm)\")\n", - "ax.legend(title='HUC ID')\n", + "ax.legend(title = 'HUC ID')\n", "plt.show()" ] }, @@ -395,9 +412,9 @@ "\n", "for huc_id in huc_ids:\n", " results_sub = results[results['geom_id'] == huc_id]\n", - " results_sub.sort_index(inplace=True)\n", - " percentiles = hyswap.calculate_variable_percentile_thresholds_by_day(results_sub,\n", - " data_column_name='estimated_runoff')\n", + " results_sub.sort_index(inplace = True)\n", + " percentiles = hyswap.calculate_variable_percentile_thresholds_by_day(df = results_sub, \n", + " data_column_name = 'estimated_runoff')\n", " percentile_results[huc_id] = percentiles" ] }, @@ -418,19 +435,21 @@ "start_date = '2023-10-01'\n", "end_date = '2023-10-01'\n", "# grab basin data using the all_site_ids list\n", - "df_nwis_ro_data_new = query_nwis_runoff_data(all_site_ids, start_date=start_date, end_date=end_date)\n", + "df_nwis_ro_data_new = query_nwis_runoff_data(sites = all_site_ids, \n", + " start_date = start_date, \n", + " end_date = end_date)\n", "\n", "multiple_huc_runoff_new = hyswap.runoff.calculate_multiple_geometric_runoff(\n", " geom_id_list = huc_shapes['huc8'],\n", " runoff_df = df_nwis_ro_data_new,\n", - " geom_intersection_df=intersection_table_full,\n", - " site_col='da_site_no',\n", - " geom_id_col='huc_id',\n", - " prop_geom_in_basin_col= 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col='prop_basin_in_huc',\n", - " percentage=False,\n", - " clip_downstream_basins=True,\n", - " full_overlap_threshold=0.98\n", + " geom_intersection_df = intersection_table_full,\n", + " site_col = 'da_site_no',\n", + " geom_id_col = 'huc_id',\n", + " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", + " prop_basin_in_geom_col = 'prop_basin_in_huc',\n", + " percentage = False,\n", + " clip_downstream_basins = True,\n", + " full_overlap_threshold = 0.98\n", " )" ] }, @@ -451,12 +470,16 @@ "# the previously calculated percentile threshold levels\n", "huc_perc_df = pd.DataFrame()\n", "\n", - "for huc, huc_df in multiple_huc_runoff_new.groupby('geom_id', group_keys=False):\n", - " huc_perc_df = pd.concat([huc_perc_df, hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " huc_df,'estimated_runoff', percentile_results[huc])])\n", + "for huc, huc_df in multiple_huc_runoff_new.groupby('geom_id', group_keys = False):\n", + " percentile_addition = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(df = huc_df,\n", + " data_column_name = 'estimated_runoff', \n", + " percentile_df = percentile_results[huc])\n", + " huc_perc_df = pd.concat([huc_perc_df, percentile_addition])\n", "# categorize streamflow by the estimated streamflow percentiles\n", - "huc_perc_df = hyswap.utils.categorize_flows(huc_perc_df, 'est_pct', schema_name=\"NWD\")\n", - "huc_perc_df = huc_perc_df.reset_index(level='datetime')" + "huc_perc_df = hyswap.utils.categorize_flows(df = huc_perc_df, \n", + " percentile_col = 'est_pct', \n", + " schema_name = \"NWD\")\n", + "huc_perc_df = huc_perc_df.reset_index(level = 'datetime')" ] }, { @@ -474,7 +497,9 @@ "source": [ "#| fig-cap: 'HUC08 colors correspond to the runoff percentiles for 10-01-2023 flow based on 5 years of estimated runoff data at the HUC08 scale, derived from gage basin flow data.'\n", "# merge percentile information to geodataframe with polygon geometry\n", - "huc_shapes_percs = huc_shapes.merge(huc_perc_df.set_index('geom_id'), left_on='huc8', right_index=True)\n", + "huc_shapes_percs = huc_shapes.merge(huc_perc_df.set_index('geom_id'), \n", + " left_on = 'huc8', \n", + " right_index = True)\n", "# set up gdf format for plotting - ex. get rid of datetimes/timestamps\n", "huc_shapes_percs['Date'] = huc_shapes_percs['datetime'].dt.strftime('%Y-%m-%d %H:%M')\n", "schema = hyswap.utils.retrieve_schema('NWD')\n", @@ -488,20 +513,21 @@ "huc_shapes_percs.loc[huc_shapes_percs['est_pct'].isna(), 'flow_cat'] = 'Not Ranked'\n", "flow_cond_cmap = flow_cond_cmap + ['#d3d3d3'] # light grey\n", "# renaming columns with user friendly names for map\n", - "huc_shapes_percs = huc_shapes_percs.drop(['loaddate', 'datetime'], axis=1)\n", - "huc_shapes_percs = huc_shapes_percs.rename(columns={'estimated_runoff':'Runoff (mm/day)',\n", - " 'est_pct':'Estimated Percentile',\n", - " 'huc8':'HUC08 ID',\n", - " 'name':'HUC08 Name',\n", - " 'flow_cat':'Streamflow Category'})\n", + "huc_shapes_percs = huc_shapes_percs.drop(['loaddate', 'datetime'], axis = 1)\n", + "huc_shapes_percs = huc_shapes_percs.rename(columns = {'estimated_runoff' : 'Runoff (mm/day)',\n", + " 'est_pct' : 'Estimated Percentile',\n", + " 'huc8' : 'HUC08 ID',\n", + " 'name' : 'HUC08 Name',\n", + " 'flow_cat' : 'Streamflow Category'})\n", "# Create map of runoff \n", "huc_shapes_percs.explore(\n", - " column=\"Streamflow Category\",\n", - " cmap=flow_cond_cmap,\n", - " tooltip=[\"HUC08 ID\", 'HUC08 Name',\"Streamflow Category\", \"Runoff (mm/day)\", \"Estimated Percentile\", \"Date\"],\n", - " tiles=\"CartoDB Positron\",\n", - " marker_kwds=dict(radius=5),\n", - " legend_kwds=dict(caption='Daily Mean Runoff Category'))" + " column = \"Streamflow Category\",\n", + " cmap = flow_cond_cmap,\n", + " tooltip = [\"HUC08 ID\", 'HUC08 Name',\"Streamflow Category\", \"Runoff (mm/day)\", \"Estimated Percentile\", \"Date\"],\n", + " tiles = \"CartoDB Positron\",\n", + " marker_kwds = dict(radius = 5),\n", + " legend_kwds = dict(caption = 'Daily Mean Runoff Category')\n", + " )" ] } ], diff --git a/example_notebooks/single_gage_analysis.ipynb b/example_notebooks/single_gage_analysis.ipynb index 6c88866..c213784 100644 --- a/example_notebooks/single_gage_analysis.ipynb +++ b/example_notebooks/single_gage_analysis.ipynb @@ -41,7 +41,7 @@ "## Download streamflow data from USGS NWIS for an example site \n", "For demonstration purposes, retrieving data for gage 04288000 - MAD RIVER AT MORETOWN, VT.\n", "\n", - "Users can identify streamflow gage locations and site ID numbers through the [USGS National Water Dashboard](https://dashboard.waterdata.usgs.gov/app/nwd/en/)" + "Users can identify streamflow gage locations and site ID numbers through the [USGS National Water Dashboard](https://dashboard.waterdata.usgs.gov/app/nwd/en/)." ] }, { @@ -51,8 +51,11 @@ "outputs": [], "source": [ "StaID = '04288000'\n", - "flow_data = nwis.get_record(sites=StaID, parameterCd='00060', start='1900-01-01', service='dv')\n", - "station_name = nwis.get_record(sites=StaID, service='site').loc[0, 'station_nm']" + "flow_data = nwis.get_record(sites = StaID, \n", + " parameterCd = '00060', \n", + " start = '1900-01-01', \n", + " service = 'dv')\n", + "station_name = nwis.get_record(sites = StaID, service = 'site').loc[0, 'station_nm']" ] }, { @@ -66,7 +69,8 @@ " flow_data['00060_Mean'] = flow_data['00060_Mean'].replace(-999999, np.nan)\n", "\n", " # create a filtered version of data of only USGS approved data (quality-assured data that may be published and used in statistical analsysis)\n", - " approved_flow_data = hyswap.utils.filter_approved_data(flow_data, '00060_Mean_cd')\n", + " approved_flow_data = hyswap.utils.filter_approved_data(data = flow_data, \n", + " filter_column = '00060_Mean_cd')\n", "else:\n", " print('No standard discharge data column found for site ' + StaID + ', suggest using a different site')" ] @@ -98,7 +102,7 @@ "plot_start = \"2010-10-01\"\n", "plot_end = \"2011-09-30\"\n", "\n", - "fig, ax = plt.subplots(figsize=(9,4))\n", + "fig, ax = plt.subplots(figsize = (9,4))\n", "ax = hyswap.plot_hydrograph(flow_data, \n", " data_col = \"00060_Mean\", \n", " start_date = plot_start,\n", @@ -112,7 +116,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, try changing the plot to not have log scale on the y-axis" + "Next, try changing the plot to not have log scale on the y-axis." ] }, { @@ -122,12 +126,12 @@ "outputs": [], "source": [ "#| fig-cap: Sample HySwap hydrograph plot\n", - "fig, ax = plt.subplots(figsize=(9,4))\n", + "fig, ax = plt.subplots(figsize = (9,4))\n", "hyswap.plot_hydrograph(flow_data, \n", " data_col = \"00060_Mean\", \n", - " start_date=plot_start,\n", - " end_date=plot_end,\n", - " title=f\"Hydrograph for {StaID} - {station_name}\",\n", + " start_date = plot_start,\n", + " end_date = plot_end,\n", + " title = f\"Hydrograph for {StaID} - {station_name}\",\n", " yscale = 'linear',\n", " ax = ax)\n", "plt.show()" @@ -137,7 +141,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now test doing some plot customizations including axis labels, colors, plot style" + "Now test doing some plot customizations including axis labels, colors, plot style." ] }, { @@ -148,18 +152,18 @@ "source": [ "#| fig-cap: Sample HySwap hydrograph plot with customized appearance\n", "plot_start = \"2021-10-01\"\n", - "fig, ax = plt.subplots(figsize=(12,4))\n", + "fig, ax = plt.subplots(figsize = (12,4))\n", "ax = hyswap.plot_hydrograph(flow_data, \n", " data_col = \"00060_Mean\", \n", - " start_date=plot_start,\n", - " title=f\"Hydrograph for {StaID} - {station_name}\",\n", + " start_date = plot_start,\n", + " title = f\"Hydrograph for {StaID} - {station_name}\",\n", " yscale = 'linear',\n", " ylab = 'Streamflow (cfs)',\n", " xlab = '',\n", " color = '#360919',\n", " ax = ax)\n", - "ax.grid(which = \"major\", axis=\"y\", lw = 1.5)\n", - "ax.grid(which = \"minor\", axis=\"y\", linestyle = \"dashed\")\n", + "ax.grid(which = \"major\", axis = \"y\", lw = 1.5)\n", + "ax.grid(which = \"minor\", axis = \"y\", linestyle = \"dashed\")\n", "ax.minorticks_on()\n", "plt.show()" ] @@ -169,7 +173,7 @@ "metadata": {}, "source": [ "## Analyze the long-term record of streamflow\n", - "Review summary statistics and flow records using raster hydrographs" + "Review summary statistics and flow records using raster hydrographs." ] }, { @@ -179,7 +183,7 @@ "outputs": [], "source": [ "# calcualte summary statistics on only approved data (quality-assured data that may be published and used in statistical analsysis)\n", - "summary_stats = hyswap.calculate_summary_statistics(approved_flow_data)" + "summary_stats = hyswap.calculate_summary_statistics(df = approved_flow_data)" ] }, { @@ -196,8 +200,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To view long-term patterns and trends in streamflow, raster hydrographs can be plotted using `hyswap`\n", - "Here, let's utilize a raster hydrograph to see the historical flow at the site from the pst 50 years (1973-2023)" + "To view long-term patterns and trends in streamflow, raster hydrographs can be plotted using `hyswap`.\n", + "Here, let's utilize a raster hydrograph to see the historical flow at the site from the pst 50 years (1973-2023)." ] }, { @@ -207,7 +211,11 @@ "outputs": [], "source": [ "# format the data\n", - "df_formatted = hyswap.rasterhydrograph.format_data(flow_data, '00060_Mean', year_type=\"calendar\", begin_year=1973, end_year=2023)" + "df_formatted = hyswap.rasterhydrograph.format_data(df = flow_data, \n", + " data_column_name = '00060_Mean', \n", + " year_type = \"calendar\", \n", + " begin_year = 1973, \n", + " end_year = 2023)" ] }, { @@ -218,11 +226,11 @@ "source": [ "#| fig-cap: Sample raster hydrograph showing past 50 years of streamflow\n", "# make plot\n", - "fig, ax = plt.subplots(figsize=(8,10))\n", + "fig, ax = plt.subplots(figsize = (8,10))\n", "ax = hyswap.plots.plot_raster_hydrograph(\n", " df_formatted, \n", - " ax=ax,\n", - " title=f\"Raster Hydrograph for {StaID} - {station_name}\")\n", + " ax = ax,\n", + " title = f\"Raster Hydrograph for {StaID} - {station_name}\")\n", "plt.show()" ] }, @@ -240,10 +248,10 @@ "outputs": [], "source": [ "# re-format data\n", - "df_formatted = hyswap.rasterhydrograph.format_data(flow_data, \n", - " '00060_Mean', \n", - " year_type=\"water\", \n", - " clip_leap_day=True)" + "df_formatted = hyswap.rasterhydrograph.format_data(df = flow_data, \n", + " data_column_name= '00060_Mean', \n", + " year_type = \"water\", \n", + " clip_leap_day = True)" ] }, { @@ -254,13 +262,13 @@ "source": [ "#| fig-cap: Sample raster hydrograph showing all daily streamflow records from a site\n", "# make plot\n", - "fig = plt.figure(figsize=(8,18))\n", + "fig = plt.figure(figsize = (8,18))\n", "ax = fig.add_subplot()\n", "ax = hyswap.plots.plot_raster_hydrograph(\n", " df_formatted, \n", - " ax=ax,\n", - " title=f\"Raster Hydrograph for {StaID} - {station_name}\",\n", - " cmap='gist_ncar')\n", + " ax = ax,\n", + " title = f\"Raster Hydrograph for {StaID} - {station_name}\",\n", + " cmap = 'gist_ncar')\n", "plt.show()" ] }, @@ -269,7 +277,7 @@ "metadata": {}, "source": [ "## Analyze annual streamflow through cumulative flow and flow duration curves \n", - "Use calculations of streamflow percentiles (variable by day) to analyze annual patterns in streamflow\n", + "Use calculations of streamflow percentiles (variable by day) to analyze annual patterns in streamflow.\n", "\n", "Let's look at the last four years (2020-2023) of cumulative streamflow by plotting a cumulative hydrographs of different water years along with the envelope that 90% of annual cumulative streamflow is expected to be within, and the years with the minimum and maximum annual cumulative streamflows." ] @@ -283,18 +291,18 @@ "#| fig-cap: Sample cumulative hydrograph showing multiple water years of cumulative streamflow\n", "years_to_plot = [2020, 2021, 2022, 2023]\n", "# plot the cumulative streamflow hydrograph\n", - "fig, ax = plt.subplots(figsize=(10, 5))\n", + "fig, ax = plt.subplots(figsize = (10, 5))\n", "ax = hyswap.plots.plot_cumulative_hydrograph(\n", - " flow_data,\n", - " data_column_name='00060_Mean',\n", - " target_years=years_to_plot,\n", - " ax=ax, \n", - " title=f'Cumulative Streamflow Hydrograph for {StaID} - {station_name}',\n", + " df = flow_data,\n", + " data_column_name = '00060_Mean',\n", + " target_years = years_to_plot,\n", + " ax = ax, \n", + " title = f'Cumulative Streamflow Hydrograph for {StaID} - {station_name}',\n", " envelope_pct = [5,95],\n", - " max_year=True, \n", - " min_year=True,\n", + " max_year = True, \n", + " min_year = True,\n", " year_type = 'water',\n", - " alpha=0.2)\n", + " alpha = 0.2)\n", "plt.show()" ] }, @@ -302,7 +310,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, look at the flow duration curve for a site" + "Next, look at the flow duration curve for a site." ] }, { @@ -316,7 +324,9 @@ "values = np.linspace(flow_data['00060_Mean'].min(), flow_data['00060_Mean'].max(), 1000)\n", "# calculate exceedance probabilities from the daily mean streamflow values using only approved data\n", "exceedance_probabilities = hyswap.exceedance.calculate_exceedance_probability_from_values_multiple(\n", - " values, approved_flow_data['00060_Mean'])" + " values = values, \n", + " values_to_compare = approved_flow_data['00060_Mean']\n", + " )" ] }, { @@ -327,12 +337,12 @@ "source": [ "#| fig-cap: Example flow duration curve for a single gage\n", "# plot the flow duration curve\n", - "fig, ax = plt.subplots(figsize=(8, 6))\n", + "fig, ax = plt.subplots(figsize = (8, 6))\n", "ax = hyswap.plots.plot_flow_duration_curve(\n", - " values, \n", - " exceedance_probabilities, \n", - " ax=ax,\n", - " title=f'Flow Duration Curve for {StaID} - {station_name}')\n", + " values = values, \n", + " exceedance_probabilities = exceedance_probabilities, \n", + " ax = ax,\n", + " title = f'Flow Duration Curve for {StaID} - {station_name}')\n", "# show the plot\n", "plt.tight_layout()\n", "plt.show()" @@ -352,8 +362,10 @@ "outputs": [], "source": [ "# create data for streamflow duration hydrograph from approved flow data\n", - "percentiles_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(approved_flow_data, \n", - " \"00060_Mean\")" + "percentiles_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", + " df = approved_flow_data, \n", + " data_column_name = \"00060_Mean\"\n", + " )" ] }, { @@ -385,12 +397,12 @@ "df_year = df_indexed[df_indexed['index_year'] == water_year_to_plot].copy()\n", "# plot data\n", "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_day,\n", - " df_year,\n", - " \"00060_Mean\",\n", - " ax=ax,\n", - " data_label=f\"Water Year {water_year_to_plot}\",\n", - " title=f\"Streamflow Duration Hydrograph for {StaID} - {station_name}\"\n", + " percentiles_by_day = percentiles_by_day,\n", + " df = df_year,\n", + " data_col = \"00060_Mean\",\n", + " ax = ax,\n", + " data_label = f\"Water Year {water_year_to_plot}\",\n", + " title = f\"Streamflow Duration Hydrograph for {StaID} - {station_name}\"\n", ")\n", "plt.tight_layout()\n", "plt.show()" diff --git a/example_notebooks/single_gage_n_day_flows_analysis.ipynb b/example_notebooks/single_gage_n_day_flows_analysis.ipynb index 716ca98..a13165b 100644 --- a/example_notebooks/single_gage_n_day_flows_analysis.ipynb +++ b/example_notebooks/single_gage_n_day_flows_analysis.ipynb @@ -51,8 +51,11 @@ "outputs": [], "source": [ "StaID = '04286000'\n", - "flow_data = nwis.get_record(sites=StaID, parameterCd='00060', start='1900-01-01', service='dv')\n", - "station_name = nwis.get_record(sites=StaID, service='site').loc[0, 'station_nm']" + "flow_data = nwis.get_record(sites = StaID, \n", + " parameterCd = '00060', \n", + " start = '1900-01-01', \n", + " service = 'dv')\n", + "station_name = nwis.get_record(sites = StaID, service = 'site').loc[0, 'station_nm']" ] }, { @@ -111,14 +114,14 @@ "plot_start = \"2021-10-01\"\n", "plot_end = \"2023-09-30\"\n", "# make plot\n", - "fig, ax = plt.subplots(figsize=(10,4))\n", - "ax = hyswap.plot_hydrograph(flow_data_nday, \n", - " data_col = \"00060_Mean\", \n", - " start_date=plot_start,\n", - " end_date=plot_end,\n", - " title=f'{n}-Day Average Streamflow Hydrograph for {StaID} - {station_name}',\n", - " ylab=f\"{n}-Day Average Streamflow (cfs)\",\n", - " ax = ax)\n", + "fig, ax = plt.subplots(figsize = (10,4))\n", + "ax = hyswap.plot_hydrograph(df = flow_data_nday, \n", + " data_col = \"00060_Mean\", \n", + " start_date = plot_start, \n", + " end_date = plot_end, \n", + " title = f'{n}-Day Average Streamflow Hydrograph for {StaID} - {station_name}', \n", + " ylab = f\"{n}-Day Average Streamflow (cfs)\", \n", + " ax = ax)\n", "plt.show()" ] }, @@ -137,11 +140,11 @@ "source": [ "# try a raster hydrograph to see historical n-day flows\n", "# format the data from the last 50 years of records (1972-2023)\n", - "df_formatted = hyswap.rasterhydrograph.format_data(flow_data, \n", - " '00060_Mean', \n", - " year_type=\"calendar\", \n", - " begin_year=1973, \n", - " end_year=2023, \n", + "df_formatted = hyswap.rasterhydrograph.format_data(df = flow_data, \n", + " data_column_name = '00060_Mean', \n", + " year_type = \"calendar\", \n", + " begin_year = 1973, \n", + " end_year = 2023, \n", " data_type = \"7-day\")" ] }, @@ -154,13 +157,13 @@ "#| fig-cap: Sample raster hydrograph showing past 50 years of 7-day average streamflow\n", "\n", "# make plot\n", - "fig = plt.figure(figsize=(8,12))\n", + "fig = plt.figure(figsize = (8,12))\n", "ax = fig.add_subplot()\n", "ax = hyswap.plots.plot_raster_hydrograph(\n", - " df_formatted, \n", - " ax=ax,\n", - " title=f\"7-Day Average Raster Hydrograph for {StaID} - {station_name}\",\n", - " cbarlab='7-Day Average Streamflow, cubic feet per second')\n", + " df_formatted = df_formatted, \n", + " ax = ax,\n", + " title = f\"7-Day Average Raster Hydrograph for {StaID} - {station_name}\",\n", + " cbarlab = '7-Day Average Streamflow, cubic feet per second')\n", "plt.show()" ] }, @@ -180,15 +183,17 @@ "outputs": [], "source": [ "# get year/doy information\n", - "df_indexed = hyswap.utils.define_year_doy_columns(flow_data,\n", - " year_type='water',\n", - " clip_leap_day=True)\n", + "df_indexed = hyswap.utils.define_year_doy_columns(df_in = flow_data, \n", + " year_type = 'water', \n", + " clip_leap_day = True)\n", "year_to_plot = 2023\n", "# filter down to data from year to plot\n", "df_year = df_indexed[df_indexed['index_year'] == year_to_plot].copy()\n", "# calculate variable percentile thresholds (using only approved data)\n", "percentiles_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " approved_flow_data, \"00060_Mean\")" + " df = approved_flow_data, \n", + " data_column_name = \"00060_Mean\"\n", + " )" ] }, { @@ -199,16 +204,16 @@ "source": [ "#| fig-cap: Example streamflow duration hydrograph with daily streamflow for the 2023 Water Year\n", "# plot daily percentiles\n", - "fig, ax = plt.subplots(figsize=(12, 8))\n", + "fig, ax = plt.subplots(figsize = (12, 8))\n", "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_day,\n", - " df_year,\n", - " \"00060_Mean\",\n", - " ax=ax,\n", - " data_label=f\"Water Year {year_to_plot}\",\n", - " title=f\"Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", - " xlab=\"\",\n", - " ylab=\"Daily Streamflow (cfs)\"\n", + " percentiles_by_day = percentiles_by_day,\n", + " df = df_year,\n", + " data_col = \"00060_Mean\",\n", + " ax = ax,\n", + " data_label = f\"Water Year {year_to_plot}\",\n", + " title = f\"Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", + " xlab = \"\",\n", + " ylab = \"Daily Streamflow (cfs)\"\n", ")\n", "plt.show()" ] @@ -221,19 +226,23 @@ "source": [ "#| fig-cap: Example streamflow duration hydrograph with 7-day average streamflow for the 2023 Water Year\n", "percentiles_by_nday = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " approved_flow_data, \"00060_Mean\", data_type='7-day')\n", + " df = approved_flow_data, \n", + " data_column_name = \"00060_Mean\", \n", + " data_type = '7-day')\n", "n = 7\n", "# plot 7-day avg percentiles\n", "fig, ax = plt.subplots(figsize=(12, 8))\n", "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_nday,\n", - " hyswap.utils.rolling_average(df_year, \"00060_Mean\", n),\n", - " \"00060_Mean\",\n", - " ax=ax,\n", - " data_label=f\"Water Year {year_to_plot}\",\n", - " title=f\"{n}-Day Avg. Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", - " xlab=\"\",\n", - " ylab=f\"{n}-Day Avg. Streamflow (cfs)\"\n", + " percentiles_by_day = percentiles_by_nday,\n", + " df = hyswap.utils.rolling_average(df = df_year, \n", + " data_column_name = \"00060_Mean\", \n", + " data_type = n),\n", + " data_col = \"00060_Mean\",\n", + " ax = ax,\n", + " data_label = f\"Water Year {year_to_plot}\",\n", + " title = f\"{n}-Day Avg. Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", + " xlab = \"\",\n", + " ylab = f\"{n}-Day Avg. Streamflow (cfs)\"\n", ")\n", "plt.show()" ] @@ -246,19 +255,23 @@ "source": [ "#| fig-cap: Example streamflow duration hydrograph with 28-day average streamflow for the 2023 Water Year\n", "percentiles_by_nday = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " approved_flow_data, \"00060_Mean\", data_type='28-day')\n", + " df = approved_flow_data, \n", + " data_column_name = \"00060_Mean\", \n", + " data_type = '28-day')\n", "n = 28\n", "# plot 28-day average percentiles\n", - "fig, ax = plt.subplots(figsize=(12, 8))\n", + "fig, ax = plt.subplots(figsize = (12, 8))\n", "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_nday,\n", - " hyswap.utils.rolling_average(df_year, \"00060_Mean\", n),\n", - " \"00060_Mean\",\n", - " ax=ax,\n", - " data_label=f\"Water Year {year_to_plot}\",\n", - " title=f\"{n}-Day Avg. Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", - " xlab=\"\",\n", - " ylab=f\"{n}-Day Avg. Streamflow (cfs)\"\n", + " percentiles_by_day = percentiles_by_nday,\n", + " df = hyswap.utils.rolling_average(df = df_year, \n", + " data_column_name = \"00060_Mean\", \n", + " data_type = n),\n", + " data_col = \"00060_Mean\",\n", + " ax = ax,\n", + " data_label = f\"Water Year {year_to_plot}\",\n", + " title = f\"{n}-Day Avg. Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", + " xlab = \"\",\n", + " ylab = f\"{n}-Day Avg. Streamflow (cfs)\"\n", ")\n", "plt.show()" ] diff --git a/example_notebooks/site_conditions_examples.ipynb b/example_notebooks/site_conditions_examples.ipynb index d8c89c9..93a1e70 100644 --- a/example_notebooks/site_conditions_examples.ipynb +++ b/example_notebooks/site_conditions_examples.ipynb @@ -102,7 +102,7 @@ " gage_df['Date'] = gage_df['datetime'].dt.strftime('%Y-%m-%d')\n", " gage_df = gage_df.drop('datetime', axis=1)\n", " # create colormap for map from hyswap schema\n", - " schema = hyswap.utils.retrieve_schema(map_schema)\n", + " schema = hyswap.utils.retrieve_schema(schema_name = map_schema)\n", " flow_cond_cmap = schema['colors']\n", " if 'low_color' in schema:\n", " flow_cond_cmap = [schema['low_color']] + flow_cond_cmap\n", @@ -158,7 +158,10 @@ "#| tbl-cap: List of streamgage sites active within the last week\n", "state = 'VT'\n", "# Query NWIS for what streamgage sites were active within the last week\n", - "active_nwis_sites, _ = nwis.what_sites(stateCd=state, parameterCd='00060', period=\"P1W\", siteType='ST')\n", + "active_nwis_sites, _ = nwis.what_sites(stateCd = state, \n", + " parameterCd = '00060', \n", + " period = \"P1W\", \n", + " siteType = 'ST')\n", "# Find gages present in both of the above queries to get a list of gages that were\n", "# recently active that also have records from more than 10 years ago\n", "sites = active_nwis_sites#[active_nwis_sites['site_no'].isin(nwis_sites['site_no'])]\n", @@ -182,8 +185,12 @@ "# create a python dictionary of dataframes by site id number\n", "flow_data = {}\n", "\n", - "for StaID in tqdm(sites['site_no'], disable=disable_tqdm, desc=\"Downloading NWIS Flow Data for Sites\"):\n", - " flow_data[StaID] = nwis.get_record(sites=StaID, parameterCd='00060', start=\"1900-01-01\", end=\"2023-10-01\", service='dv')" + "for StaID in tqdm(sites['site_no'], disable = disable_tqdm, desc = \"Downloading NWIS Flow Data for Sites\"):\n", + " flow_data[StaID] = nwis.get_record(sites = StaID, \n", + " parameterCd = '00060', \n", + " start = \"1900-01-01\", \n", + " end = \"2023-10-01\", \n", + " service = 'dv')" ] }, { @@ -203,7 +210,7 @@ "# Define what percentile levels (thresholds) that we want to calculate.\n", "# Intervals of 5 or less are recommended to have sufficient levels to interpolate between in later calculations. \n", "# Note that 0 and 100 percentile levels are ignored. Refer to min and max values returned instead\n", - "percentile_levels = np.concatenate((np.array([1]), np.arange(5,96,5), np.array([99])), axis=0)\n", + "percentile_levels = np.concatenate((np.array([1]), np.arange(5,96,5), np.array([99])), axis = 0)\n", "print(percentile_levels)" ] }, @@ -214,12 +221,16 @@ "outputs": [], "source": [ "percentile_values = {}\n", - "for StaID in tqdm(sites['site_no'], disable=disable_tqdm, desc=\"Processing Sites\"):\n", + "for StaID in tqdm(sites['site_no'], disable = disable_tqdm, desc = \"Processing Sites\"):\n", " if '00060_Mean' in flow_data[StaID].columns:\n", " # Filter data as only approved data in NWIS should be used to calculate statistics\n", - " df = hyswap.utils.filter_approved_data(flow_data[StaID], '00060_Mean_cd')\n", + " df = hyswap.utils.filter_approved_data(data = flow_data[StaID], \n", + " filter_column = '00060_Mean_cd')\n", " percentile_values[StaID] = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df, '00060_Mean', percentiles=percentile_levels)\n", + " df = df, \n", + " data_column_name = '00060_Mean', \n", + " percentiles = percentile_levels\n", + " )\n", " else:\n", " print('No standard discharge data column found for site ' + StaID + ', skipping')" ] @@ -251,9 +262,14 @@ "metadata": {}, "outputs": [], "source": [ - "yesterday = datetime.strftime(datetime.now(tz=ZoneInfo(\"US/Eastern\")) - timedelta(1), '%Y-%m-%d')\n", - "recent_dvs = nwis.get_record(sites=sites['site_no'].tolist(), parameterCd='00060', start=yesterday, end=yesterday, service='dv')\n", - "recent_dvs = qaqc_nwis_data(recent_dvs, '00060_Mean')" + "yesterday = datetime.strftime(datetime.now(tz = ZoneInfo(\"US/Eastern\")) - timedelta(1), '%Y-%m-%d')\n", + "recent_dvs = nwis.get_record(sites = sites['site_no'].tolist(), \n", + " parameterCd = '00060', \n", + " start = yesterday, \n", + " end = yesterday, \n", + " service = 'dv')\n", + "recent_dvs = qaqc_nwis_data(df = recent_dvs, \n", + " data_col = '00060_Mean')" ] }, { @@ -272,17 +288,22 @@ "source": [ "# estimate percentiles\n", "df = pd.DataFrame()\n", - "for StaID, site_df in recent_dvs.groupby(level=\"site_no\", group_keys=False):\n", + "for StaID, site_df in recent_dvs.groupby(level = \"site_no\", group_keys = False):\n", " if StaID in list(percentile_values.keys()):\n", " if not percentile_values[StaID].isnull().all().all():\n", " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " site_df,'00060_Mean', percentile_values[StaID])\n", + " df = site_df,\n", + " data_column_name = '00060_Mean', \n", + " percentile_df = percentile_values[StaID]\n", + " )\n", " df = pd.concat([df, percentiles])\n", "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df, 'est_pct', schema_name=\"NWD\")\n", - "df = df.reset_index(level='datetime')\n", + "df = hyswap.utils.categorize_flows(df = df, \n", + " percentile_col = 'est_pct', \n", + " schema_name = \"NWD\")\n", + "df = df.reset_index(level = 'datetime')\n", "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how=\"right\", on=\"site_no\")" + "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" ] }, { @@ -299,7 +320,10 @@ "outputs": [], "source": [ "#| fig-cap: Map showing most recent daily mean streamflow and corresponding flow conditions\n", - "map = create_gage_condition_map(gage_df, '00060_Mean', 'NWD', 'Current Daily Mean')\n", + "map = create_gage_condition_map(gage_df = gage_df, \n", + " flow_data_col = '00060_Mean', \n", + " map_schema = 'NWD', \n", + " streamflow_data_type = 'Current Daily Mean')\n", "display(map)" ] }, @@ -319,7 +343,10 @@ "#| fig-cap: Map showing most recent daily mean streamflow and corresponding flow conditions using a brown-blue schema\n", "\n", "# Prep Data for mapping by joining site information and flow data \n", - "map = create_gage_condition_map(gage_df, '00060_Mean', 'WaterWatch_BrownBlue', 'Current Daily Mean')\n", + "map = create_gage_condition_map(gage_df = gage_df, \n", + " flow_data_col = '00060_Mean', \n", + " map_schema = 'WaterWatch_BrownBlue', \n", + " streamflow_data_type = 'Current Daily Mean')\n", "display(map)" ] }, @@ -339,8 +366,8 @@ "metadata": {}, "outputs": [], "source": [ - "recent_ivs = nwis.get_record(sites=sites['site_no'].tolist(), parameterCd='00060', service='iv')\n", - "recent_ivs = qaqc_nwis_data(recent_ivs, '00060')" + "recent_ivs = nwis.get_record(sites=sites['site_no'].tolist(), parameterCd = '00060', service = 'iv')\n", + "recent_ivs = qaqc_nwis_data(df = recent_ivs, data_col = '00060')" ] }, { @@ -359,18 +386,23 @@ "source": [ "# estimate percentiles\n", "df = pd.DataFrame()\n", - "for StaID, site_df in recent_ivs.groupby(level=\"site_no\", group_keys=False):\n", + "for StaID, site_df in recent_ivs.groupby(level = \"site_no\", group_keys = False):\n", " if StaID in list(percentile_values.keys()):\n", " if not percentile_values[StaID].isnull().all().all():\n", " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " site_df,'00060', percentile_values[StaID])\n", + " df = site_df,\n", + " data_column_name = '00060', \n", + " percentile_df = percentile_values[StaID]\n", + " )\n", " df = pd.concat([df, percentiles])\n", "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df, 'est_pct', schema_name=\"NWD\")\n", - "df = df.tz_convert(tz='US/Eastern')\n", - "df = df.reset_index(level='datetime')\n", + "df = hyswap.utils.categorize_flows(df = df, \n", + " percentile_col = 'est_pct', \n", + " schema_name = \"NWD\")\n", + "df = df.tz_convert(tz = 'US/Eastern')\n", + "df = df.reset_index(level = 'datetime')\n", "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how=\"right\", on=\"site_no\")" + "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" ] }, { @@ -388,7 +420,10 @@ "source": [ "#| fig-cap: Map showing most real-time streamflow conditions\n", "\n", - "map = create_gage_condition_map(gage_df, '00060', 'NWD', 'Real-Time Instantaneous')\n", + "map = create_gage_condition_map(gage_df = gage_df, \n", + " flow_data_col = '00060', \n", + " map_schema = 'NWD', \n", + " streamflow_data_type = 'Real-Time Instantaneous')\n", "display(map)" ] }, @@ -409,16 +444,20 @@ "outputs": [], "source": [ "past_dvs = nwis.get_record(\n", - " sites=sites['site_no'].tolist(), \n", - " parameterCd='00060',\n", - " start=datetime.strftime(datetime.now(tz=ZoneInfo(\"US/Eastern\")) - timedelta(7), '%Y-%m-%d'),\n", - " end=yesterday,\n", - " service='dv'\n", + " sites = sites['site_no'].tolist(), \n", + " parameterCd = '00060',\n", + " start = datetime.strftime(datetime.now(tz = ZoneInfo(\"US/Eastern\")) - timedelta(7), '%Y-%m-%d'),\n", + " end = yesterday,\n", + " service = 'dv'\n", ")\n", - "past_dvs = qaqc_nwis_data(past_dvs, '00060_Mean')\n", + "past_dvs = qaqc_nwis_data(df = past_dvs, \n", + " data_col = '00060_Mean')\n", "past_dvs_7d = past_dvs.copy()\n", - "for StaID, new_df in past_dvs.groupby(level=0):\n", - " past_dvs_7d.loc[StaID] = hyswap.utils.rolling_average(new_df, '00060_Mean', 7).round(2)\n", + "for StaID, new_df in past_dvs.groupby(level = 0):\n", + " past_dvs_7d.loc[StaID] = hyswap.utils.rolling_average(\n", + " df = new_df,\n", + " data_column_name = '00060_Mean', \n", + " data_type = 7).round(2)\n", "\n", "past_dvs_7d = past_dvs_7d.dropna()" ] @@ -439,7 +478,10 @@ "flow_data_7d = {}\n", "for StaID in tqdm(sites['site_no'], disable=disable_tqdm):\n", " if '00060_Mean' in flow_data[StaID].columns:\n", - " flow_data_7d[StaID] = hyswap.utils.rolling_average(flow_data[StaID], '00060_Mean', 7).round(2)\n", + " flow_data_7d[StaID] = hyswap.utils.rolling_average(\n", + " df = flow_data[StaID], \n", + " data_column_name = '00060_Mean', \n", + " data_type = 7).round(2)\n", " else:\n", " print('No standard discharge data column found for site ' + StaID + ', skipping')" ] @@ -451,10 +493,12 @@ "outputs": [], "source": [ "percentile_values_7d = {}\n", - "for StaID in tqdm(sites['site_no'], disable=disable_tqdm, desc=\"Processing\"):\n", + "for StaID in tqdm(sites['site_no'], disable = disable_tqdm, desc = \"Processing\"):\n", " if '00060_Mean' in flow_data[StaID].columns:\n", " # Filter data as only approved data in NWIS should be used to calculate statistics\n", - " df = hyswap.utils.filter_approved_data(flow_data_7d[StaID], '00060_Mean_cd')\n", + " df = hyswap.utils.filter_approved_data(\n", + " data = flow_data_7d[StaID], \n", + " filter_column = '00060_Mean_cd')\n", " percentile_values_7d[StaID] = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", " df, '00060_Mean', percentiles=percentile_levels)\n", " else:\n", @@ -477,19 +521,24 @@ "source": [ "# estimate percentiles\n", "df = pd.DataFrame()\n", - "for StaID, site_df in past_dvs_7d.groupby(level=\"site_no\", group_keys=False):\n", + "for StaID, site_df in past_dvs_7d.groupby(level = \"site_no\", group_keys = False):\n", " if StaID in list(percentile_values_7d.keys()):\n", " if not percentile_values[StaID].isnull().all().all():\n", " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " site_df,'00060_Mean', percentile_values[StaID])\n", + " df = site_df,\n", + " data_column_name = '00060_Mean', \n", + " percentile_df = percentile_values[StaID]\n", + " )\n", " df = pd.concat([df, percentiles])\n", "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df, 'est_pct', schema_name=\"NWD\")\n", + "df = hyswap.utils.categorize_flows(df = df, \n", + " percentile_col = 'est_pct', \n", + " schema_name = \"NWD\")\n", "# keep only most recent 7-day average flow for plotting\n", "df = df[df.index.get_level_values('datetime') == yesterday]\n", "df = df.reset_index(level='datetime')\n", "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how=\"right\", on=\"site_no\")" + "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" ] }, { @@ -507,7 +556,10 @@ "source": [ "#| fig-cap: Map showing most recent 7-day average streamflow and corresponding flow conditions\n", "\n", - "map = create_gage_condition_map(gage_df, '00060_Mean', 'NWD', 'Current 7-Day Average')\n", + "map = create_gage_condition_map(gage_df = gage_df, \n", + " flow_data_col = '00060_Mean', \n", + " map_schema = 'NWD', \n", + " streamflow_data_type = 'Current 7-Day Average')\n", "display(map)" ] }, @@ -529,12 +581,13 @@ "source": [ "past_day = \"2023-05-30\"\n", "\n", - "past_dvs = nwis.get_record(sites=sites['site_no'].tolist(),\n", - " parameterCd='00060',\n", - " start=past_day,\n", - " end=past_day,\n", - " service='dv')\n", - "past_dvs = qaqc_nwis_data(past_dvs, '00060_Mean')" + "past_dvs = nwis.get_record(sites = sites['site_no'].tolist(),\n", + " parameterCd = '00060',\n", + " start = past_day,\n", + " end = past_day,\n", + " service = 'dv')\n", + "past_dvs = qaqc_nwis_data(df = past_dvs, \n", + " data_col = '00060_Mean')" ] }, { @@ -553,17 +606,22 @@ "# Calculate estimated streamflow percentile for the new data by interpolating against\n", "# the previously calculated percentile threshold levels\n", "df = pd.DataFrame()\n", - "for StaID, site_df in past_dvs.groupby(level=\"site_no\", group_keys=False):\n", + "for StaID, site_df in past_dvs.groupby(level = \"site_no\", group_keys = False):\n", " if StaID in list(percentile_values.keys()):\n", " if not percentile_values[StaID].isnull().all().all():\n", " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " site_df,'00060_Mean', percentile_values[StaID])\n", + " df = site_df,\n", + " data_column_name = '00060_Mean', \n", + " percentile_df = percentile_values[StaID]\n", + " )\n", " df = pd.concat([df, percentiles])\n", "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df, 'est_pct', schema_name=\"WaterWatch_Drought\")\n", - "df = df.reset_index(level='datetime')\n", + "df = hyswap.utils.categorize_flows(df = df, \n", + " percentile_col = 'est_pct', \n", + " schema_name = \"WaterWatch_Drought\")\n", + "df = df.reset_index(level = 'datetime')\n", "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how=\"right\", on=\"site_no\")" + "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" ] }, { @@ -580,7 +638,10 @@ "outputs": [], "source": [ "#| fig-cap: Map showing historical daily mean streamflow and corresponding flow conditions using a drought categorization schema\n", - "map = create_gage_condition_map(gage_df, '00060_Mean', 'WaterWatch_Drought', 'Daily Mean')\n", + "map = create_gage_condition_map(gage_df = gage_df, \n", + " flow_data_col = '00060_Mean', \n", + " map_schema = 'WaterWatch_Drought', \n", + " streamflow_data_type = 'Daily Mean')\n", "display(map)" ] }, @@ -603,12 +664,13 @@ "source": [ "past_day = \"2023-07-10\"\n", "\n", - "past_dvs = nwis.get_record(sites=sites['site_no'].tolist(),\n", - " parameterCd='00060',\n", - " start=past_day,\n", - " end=past_day,\n", - " service='dv')\n", - "past_dvs = qaqc_nwis_data(past_dvs, '00060_Mean')" + "past_dvs = nwis.get_record(sites = sites['site_no'].tolist(),\n", + " parameterCd = '00060',\n", + " start = past_day,\n", + " end = past_day,\n", + " service = 'dv')\n", + "past_dvs = qaqc_nwis_data(df = past_dvs, \n", + " data_col = '00060_Mean')" ] }, { @@ -622,10 +684,12 @@ "for StaID in tqdm(sites['site_no'], disable=disable_tqdm):\n", " if '00060_Mean' in flow_data[StaID].columns:\n", " # Filter data as only approved data in NWIS should be used to calculate statistics\n", - " df = hyswap.utils.filter_approved_data(flow_data[StaID], '00060_Mean_cd')\n", + " df = hyswap.utils.filter_approved_data(data = flow_data[StaID], \n", + " filter_column = '00060_Mean_cd')\n", " if not df.empty:\n", " fixed_percentile_values[StaID] = hyswap.percentiles.calculate_fixed_percentile_thresholds(\n", - " df['00060_Mean'], percentiles=percentile_levels)\n", + " data = df['00060_Mean'], \n", + " percentiles = percentile_levels)\n", " else:\n", " print(StaID + ' has no approved data, skipping')\n", " else:\n", @@ -649,12 +713,15 @@ "for StaID in past_dvs.index.get_level_values(0):\n", " if StaID in list(fixed_percentile_values.keys()):\n", " past_dvs.at[(StaID, past_day), 'est_pct'] = hyswap.percentiles.calculate_fixed_percentile_from_value(\n", - " past_dvs.at[(StaID, past_day), '00060_Mean'], fixed_percentile_values[StaID])\n", + " value = past_dvs.at[(StaID, past_day), '00060_Mean'], \n", + " percentile_df = fixed_percentile_values[StaID])\n", "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(past_dvs, 'est_pct', schema_name=\"WaterWatch_Flood\")\n", - "df = df.reset_index(level='datetime')\n", + "df = hyswap.utils.categorize_flows(df = past_dvs, \n", + " percentile_col = 'est_pct', \n", + " schema_name = \"WaterWatch_Flood\")\n", + "df = df.reset_index(level = 'datetime')\n", "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how=\"right\", on=\"site_no\")" + "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" ] }, { @@ -671,7 +738,10 @@ "outputs": [], "source": [ "#| fig-cap: Map showing historical daily mean streamflow and corresponding flow conditions using a high-flow categorization schema\n", - "map = create_gage_condition_map(gage_df, '00060_Mean', 'WaterWatch_Flood', 'Daily Mean')\n", + "map = create_gage_condition_map(gage_df = gage_df, \n", + " flow_data_col = '00060_Mean', \n", + " map_schema = 'WaterWatch_Flood', \n", + " streamflow_data_type = 'Daily Mean')\n", "display(map)" ] } diff --git a/regional_runoff_calculations.ipynb b/regional_runoff_calculations.ipynb deleted file mode 100644 index ee19e69..0000000 --- a/regional_runoff_calculations.ipynb +++ /dev/null @@ -1,558 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "# Using hyswap to estimate runoff for one and multiple HUC08s" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook demonstrates how to use the [hyswap](https://doi-usgs.github.io/hyswap/) python package to estimate runoff over 10 water years (2013-2023) for a set of hydrologic units using streamflow measured at gages that overlap with the hydrologic units of interest. \n", - "\n", - "This example notebook relies on use of the [dataretrieval](https://github.com/DOI-USGS/dataRetrieval) package for downloading streamflow information from USGS NWIS.\n", - "\n", - "Hydrologic units will be referred to as \"hucs\" and the drainage area captured by a streamflow gage will be referred to as a \"basin\". " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import dataretrieval\n", - "import hyswap\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib.patches\n", - "from pynhd import WaterData\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # ignore warnings from dataretrieval" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in huc-basin intersection table\n", - "\n", - "The huc-basin intersection dataset is created using shapefiles for hucs and site drainage basins, and is the output of the [hyswap geospatial data assembly repository](https://code.usgs.gov/water/computational-tools/surface-water-work/hyswap-geospatial-data-assembly) . For each huc-basin intersection, it contains the proportion of the huc's area in the basin, and the proportion of the basin's area in the huc. You can find this file in the 'example_data' folder within the 'example_notebooks' folder in the `hyswap` repository. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: The top rows of the intersection table created by the hyswap geospatial data assembly repository\n", - "# This example initially reads in the entire huc_basin_intersection table.\n", - "intersection_table_full = pd.read_csv(\n", - " filepath_or_buffer = 'example_data/huc_props_tbl_conus.csv', \n", - " converters = {0:str,1:str,2:str}\n", - " )\n", - "\n", - "# drop first col that is the index from the csv. \n", - "intersection_table_full.drop(\n", - " columns = intersection_table_full.columns[0],\n", - " axis = 1,\n", - " inplace = True\n", - " )\n", - "# view\n", - "intersection_table_full.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Select watershed to estimate runoff\n", - "This example focuses on a bounding box of HUC08's in Montana and Idaho. We will first filter to one of the hucs to show how to estimate runoff for a single huc, but later on we will estimate runoff for all hucs in the dataset. We will also establish the start and end dates over which we'd like to estimate runoff." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "huc_shapes = WaterData('huc08').bybox([-115.065380, 45.947037, -112.692334, 47.572536])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: The first few rows of the geopandas dataframe from `pynhd`\n", - "huc_shapes.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: 'A map of the single HUC08, 17010209'\n", - "single_huc = huc_shapes['huc8'][9]\n", - "\n", - "start_date = '2012-10-01'\n", - "end_date = '2023-09-30'\n", - "\n", - "huc_shapes[huc_shapes['huc8'] == single_huc].explore()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we wanted to estimate runoff for one huc, we would first identify which basins intersect the huc using the `identify_sites_from_geom_intersection` function. From this list of basins, we would then download their corresponding gage streamflow data. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Pull basin site ids from selected huc8 geom using the hyswap runoff identify sites from geom intersection function\n", - "basins_overlap_single_huc = hyswap.runoff.identify_sites_from_geom_intersection(\n", - " geom_id = single_huc,\n", - " geom_intersection_df = intersection_table_full,\n", - " geom_id_col = 'huc_id',\n", - " site_col = 'da_site_no',\n", - " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col = 'prop_basin_in_huc'\n", - " )\n", - "\n", - "# output should be long list of sites ids. These are all the site_ids within the selected huc8 polygon\n", - "num_sites = len(basins_overlap_single_huc)\n", - "print(f'There are {num_sites} gaged basins that overlap this huc.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Custom function for grabbing gage flow data from `dataretrieval` and converting it to runoff \n", - "To estimate runoff for a huc, we need to create a dictionary of runoff data, where each key corresponds to a site id for a basin, and the item is a dataframe with a datetime index and runoff values for that site in a 'runoff' column. The function below leverages `dataretrieval` to pull streamflow data for a set of input sites over a specified date range, and then uses `hyswap`'s `streamflow_to_runoff` function in the `runoff` module to estimate area-based runoff using site drainage areas. It produces runoff values in millimeters per day. Note that this function can take some time, depending upon the size/number of queries. Additionally, it is not unusual for many sites to return no data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def query_nwis_runoff_data(\n", - " sites,\n", - " start_date,\n", - " end_date):\n", - " print(\"Pulling site streamflow data and converting it to runoff. This may take some time...\")\n", - " # first, pull site info \n", - " info_df, _ = dataretrieval.nwis.get_info(sites = sites)\n", - " # convert drainage area from sq mi to sq km\n", - " info_df['da'] = info_df['drain_area_va'] * 2.58998811\n", - " # info_df = info_df[['da', 'site_no']]\n", - " # get streamflow data between start and end date\n", - " dv_df = dataretrieval.nwis.get_record(\n", - " sites = sites, \n", - " parameterCd = '00060',\n", - " start = start_date, \n", - " end = end_date,\n", - " multi_index = False,\n", - " service = 'dv'\n", - " )\n", - " df_nwis_ro_data = pd.DataFrame()\n", - "\n", - " if not dv_df.empty:\n", - " # get site ids from dv_df and create empty\n", - " # df to hold site runoff data\n", - " siteids = dv_df['site_no'].unique().tolist()\n", - "\n", - " # Loop through sites retrieved from nwis and estimate\n", - " # runoff using hyswap function\n", - " for site in siteids:\n", - " ro_df = dv_df[dv_df['site_no'] == site]\n", - " da = info_df.loc[info_df['site_no'] == site]['da']\n", - " ro_df = hyswap.runoff.streamflow_to_runoff(df = ro_df, \n", - " data_col = '00060_Mean', \n", - " drainage_area = da, \n", - " frequency = 'daily')\n", - " df_nwis_ro_data = pd.concat([df_nwis_ro_data, ro_df])\n", - " # print proportion of sites with data\n", - " prop = len(siteids)/len(sites)\n", - " print(f'\\nProp of successful nwis queries from list of sites:\\n {prop}')\n", - " else:\n", - " print(f\"No site data available, returning empty dataframe\")\n", - " \n", - " return(df_nwis_ro_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download gage data\n", - "Let's try using `query_nwis_runoff_data` to download streamflow data and convert it to runoff (in mm/day) from 2013-2023 for our selected huc '07090001'. Note that not all gage sites listed in `basins_overlap_single_huc` will necessarily have data for the date range specified." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_nwis_ro_data = query_nwis_runoff_data(\n", - " sites = basins_overlap_single_huc,\n", - " start_date = start_date,\n", - " end_date = end_date\n", - " )\n", - "display(df_nwis_ro_data[df_nwis_ro_data['site_no'] == '06078500'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Estimate runoff for the huc\n", - "With daily basin runoff data in hand, we are ready to estimate runoff for the huc in mm/day. Please reference `hyswap`'s Calculations page to understand how runoff is estimated using this function. Briefly, this function identifies basins contained within the huc and basins that contain the huc that have runoff data for the entire time period in the dataset. It then estimates a weighted mean runoff value for each day using applicable and available basins. If `clip_downstream_basins` is set to True, the function only uses the smallest basin that contains the huc and disregards downstream gages that represent larger basins that contain the huc. This example will loop through each water year and estimate runoff using basins with complete records for each water year." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# get water year in dataset so can loop by water year to estimate runoff\n", - "df_nwis_ro_data['water_year'] = df_nwis_ro_data.index.year\n", - "df_nwis_ro_data['water_year'][df_nwis_ro_data.index.month >= 10] = df_nwis_ro_data['water_year'] + 1\n", - " \n", - "# create df to hold estimate results\n", - "results = pd.DataFrame()\n", - " \n", - "# loop through each water year\n", - "for year, df_year in df_nwis_ro_data.groupby(df_nwis_ro_data.water_year):\n", - " df_nwis_ro_data_sub = df_nwis_ro_data[df_nwis_ro_data['water_year'] == year]\n", - "\n", - " # estimate runoff for each water year of data\n", - " single_huc_runoff = hyswap.calculate_geometric_runoff(\n", - " geom_id = single_huc,\n", - " runoff_df = df_nwis_ro_data_sub,\n", - " geom_intersection_df = intersection_table_full,\n", - " site_col = 'da_site_no',\n", - " geom_id_col = 'huc_id',\n", - " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col = 'prop_basin_in_huc',\n", - " percentage = False,\n", - " clip_downstream_basins = False,\n", - " full_overlap_threshold = 0.98\n", - " )\n", - " \n", - " # concatenate with previous years runoffs\n", - " results = pd.concat([results, single_huc_runoff])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's take a quick look at the estimated runoff." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: A runoff hydrograph for huc 17010209 using the `hyswap.plot_hydrograph` function\n", - "hyswap.plot_hydrograph(\n", - " results,\n", - " data_col = 'estimated_runoff',\n", - " title = f'Estimated Runoff for HUC {single_huc}',\n", - " yscale = 'linear',\n", - " ylab = 'Runoff (mm)'\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Estimate runoff for all hucs\n", - "Now, we will estimate runoff for all of the hucs in the region selected. First, we'll identify the sites with drainage basins that intersect the seven hucs. Next, we'll need to use our custom function to download flow data and estimate runoff for each basin. Finally, we can leverage `hyswap`'s `calculate_multiple_geometric_runoff`, which loops through a list of hucs, finds their intersecting gage basins, and estimates runoff for each day in the dataset. Note that by setting the `clip_downstream_basins` argument to True, the function is only considering basins within each HUC08 and the smallest basin containing each HUC08 in the runoff estimate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list_site_ids = []\n", - "\n", - "# loop through to get all sites (identify_sites_from_geom_intersection() cannot accept multiple geom_ids) \n", - "for huc in huc_shapes['huc8']:\n", - " sites = hyswap.runoff.identify_sites_from_geom_intersection(\n", - " geom_id = huc,\n", - " geom_intersection_df = intersection_table_full,\n", - " geom_id_col = 'huc_id',\n", - " site_col = 'da_site_no',\n", - " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col = 'prop_basin_in_huc'\n", - " )\n", - " \n", - " list_site_ids.append(sites)\n", - "\n", - "# join list of lists, as for loop above separates out the list of dfs \n", - "all_site_ids = sum(list_site_ids,[])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# grab basin data using the all_site_ids list\n", - "df_nwis_ro_data = query_nwis_runoff_data(\n", - " sites = all_site_ids, \n", - " start_date = start_date,\n", - " end_date = end_date\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# get water year in dataset so can loop by water year to estimate runoff\n", - "df_nwis_ro_data['water_year'] = df_nwis_ro_data.index.year\n", - "df_nwis_ro_data['water_year'][df_nwis_ro_data.index.month >= 10] = df_nwis_ro_data['water_year'] + 1\n", - " \n", - "# create df to hold estimate results\n", - "results = pd.DataFrame()\n", - " \n", - "# loop through each water year\n", - "for year, df_year in df_nwis_ro_data.groupby(df_nwis_ro_data.water_year):\n", - " # subset data to water year\n", - " df_nwis_ro_data_sub = df_nwis_ro_data[df_nwis_ro_data['water_year'] == year]\n", - " \n", - " multiple_huc_runoff = hyswap.runoff.calculate_multiple_geometric_runoff(\n", - " geom_id_list = huc_shapes['huc8'],\n", - " runoff_df = df_nwis_ro_data,\n", - " geom_intersection_df = intersection_table_full,\n", - " site_col = 'da_site_no',\n", - " geom_id_col = 'huc_id',\n", - " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col = 'prop_basin_in_huc',\n", - " percentage = False,\n", - " clip_downstream_basins = True,\n", - " full_overlap_threshold = 0.98\n", - " )\n", - " results = pd.concat([results, multiple_huc_runoff])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like our single huc runoff calculation, we can take a look at the estimated runoff across multiple hucs. Because there are 16 HUCs in this dataset, we will only plot a subset below using the `pivot_table` function from `pandas`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: A multiple runoff hydrograph for all hucs pulled from `pynhd` in this example\n", - "# Plot \n", - "results_plot = results.pivot_table(index = results.index, \n", - " columns = 'geom_id', \n", - " values = 'estimated_runoff')\n", - "ax = results_plot.iloc[:, 10:14].plot()\n", - "ax.set_ylabel(\"Runoff (mm)\")\n", - "ax.legend(title = 'HUC ID')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Mapping the results of the runoff calculation\n", - "In this step of the analysis, we will estimate variable percentiles by day for each huc, and then use those percentiles to estimate a percentile for a new estimated runoff value. We will then plot these percentiles in a map of the hucs. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "huc_ids = results['geom_id'].unique()\n", - "\n", - "percentile_results = {}\n", - "\n", - "for huc_id in huc_ids:\n", - " results_sub = results[results['geom_id'] == huc_id]\n", - " results_sub.sort_index(inplace = True)\n", - " percentiles = hyswap.calculate_variable_percentile_thresholds_by_day(df = results_sub, \n", - " data_column_name = 'estimated_runoff')\n", - " percentile_results[huc_id] = percentiles" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we will pull a new set of gage streamflow data for the first day of the next water year, and estimate runoff." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "start_date = '2023-10-01'\n", - "end_date = '2023-10-01'\n", - "# grab basin data using the all_site_ids list\n", - "df_nwis_ro_data_new = query_nwis_runoff_data(sites = all_site_ids, \n", - " start_date = start_date, \n", - " end_date = end_date)\n", - "\n", - "multiple_huc_runoff_new = hyswap.runoff.calculate_multiple_geometric_runoff(\n", - " geom_id_list = huc_shapes['huc8'],\n", - " runoff_df = df_nwis_ro_data_new,\n", - " geom_intersection_df = intersection_table_full,\n", - " site_col = 'da_site_no',\n", - " geom_id_col = 'huc_id',\n", - " prop_geom_in_basin_col = 'prop_huc_in_basin',\n", - " prop_basin_in_geom_col = 'prop_basin_in_huc',\n", - " percentage = False,\n", - " clip_downstream_basins = True,\n", - " full_overlap_threshold = 0.98\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With runoff in hand, we can then calculate the percentile for this date for each HUC08, based on the percentiles we calculated for the previous 5-year period." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate estimated streamflow percentile for the new data by interpolating against\n", - "# the previously calculated percentile threshold levels\n", - "huc_perc_df = pd.DataFrame()\n", - "\n", - "for huc, huc_df in multiple_huc_runoff_new.groupby('geom_id', group_keys = False):\n", - " percentile_addition = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(df = huc_df,\n", - " data_column_name = 'estimated_runoff', \n", - " percentile_df = percentile_results[huc])\n", - " huc_perc_df = pd.concat([huc_perc_df, percentile_addition])\n", - "# categorize streamflow by the estimated streamflow percentiles\n", - "huc_perc_df = hyswap.utils.categorize_flows(df = huc_perc_df, \n", - " percentile_col = 'est_pct', \n", - " schema_name = \"NWD\")\n", - "huc_perc_df = huc_perc_df.reset_index(level = 'datetime')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And finally, create an interactive map showing HUC08 runoff percentiles for October 1, 2023." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: 'HUC08 colors correspond to the runoff percentiles for 10-01-2023 flow based on 5 years of estimated runoff data at the HUC08 scale, derived from gage basin flow data.'\n", - "# merge percentile information to geodataframe with polygon geometry\n", - "huc_shapes_percs = huc_shapes.merge(huc_perc_df.set_index('geom_id'), \n", - " left_on = 'huc8', \n", - " right_index = True)\n", - "# set up gdf format for plotting - ex. get rid of datetimes/timestamps\n", - "huc_shapes_percs['Date'] = huc_shapes_percs['datetime'].dt.strftime('%Y-%m-%d %H:%M')\n", - "schema = hyswap.utils.retrieve_schema('NWD')\n", - "flow_cond_cmap = schema['colors']\n", - "if 'low_color' in schema:\n", - " flow_cond_cmap = [schema['low_color']] + flow_cond_cmap\n", - "if 'high_color' in schema:\n", - " flow_cond_cmap = flow_cond_cmap + [schema['high_color']]\n", - "# set NA values to \"Not Ranked\" category\n", - "huc_shapes_percs['flow_cat'] = huc_shapes_percs['flow_cat'].cat.add_categories('Not Ranked')\n", - "huc_shapes_percs.loc[huc_shapes_percs['est_pct'].isna(), 'flow_cat'] = 'Not Ranked'\n", - "flow_cond_cmap = flow_cond_cmap + ['#d3d3d3'] # light grey\n", - "# renaming columns with user friendly names for map\n", - "huc_shapes_percs = huc_shapes_percs.drop(['loaddate', 'datetime'], axis = 1)\n", - "huc_shapes_percs = huc_shapes_percs.rename(columns = {'estimated_runoff' : 'Runoff (mm/day)',\n", - " 'est_pct' : 'Estimated Percentile',\n", - " 'huc8' : 'HUC08 ID',\n", - " 'name' : 'HUC08 Name',\n", - " 'flow_cat' : 'Streamflow Category'})\n", - "# Create map of runoff \n", - "huc_shapes_percs.explore(\n", - " column = \"Streamflow Category\",\n", - " cmap = flow_cond_cmap,\n", - " tooltip = [\"HUC08 ID\", 'HUC08 Name',\"Streamflow Category\", \"Runoff (mm/day)\", \"Estimated Percentile\", \"Date\"],\n", - " tiles = \"CartoDB Positron\",\n", - " marker_kwds = dict(radius = 5),\n", - " legend_kwds = dict(caption = 'Daily Mean Runoff Category')\n", - " )" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,qmd" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/single_gage_analysis.ipynb b/single_gage_analysis.ipynb deleted file mode 100644 index c213784..0000000 --- a/single_gage_analysis.ipynb +++ /dev/null @@ -1,436 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "# Analysis of Streamflow Conditions and Historical Streamflows at a Single Gage" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook provides a demonstration of the use of [hywsap](https://doi-usgs.github.io/hyswap/) python package for analyzing streamflow conditions at a single streamgage including plotting of flow duration curves, duration hydrographs, and cumulative streamflow. \n", - "\n", - "This example notebook relies on use of the [dataretrieval](https://github.com/DOI-USGS/dataRetrieval) package for downloading streamflow information from USGS NWIS\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from dataretrieval import nwis\n", - "import hyswap\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # ignore warnings from dataretrieval" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download streamflow data from USGS NWIS for an example site \n", - "For demonstration purposes, retrieving data for gage 04288000 - MAD RIVER AT MORETOWN, VT.\n", - "\n", - "Users can identify streamflow gage locations and site ID numbers through the [USGS National Water Dashboard](https://dashboard.waterdata.usgs.gov/app/nwd/en/)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "StaID = '04288000'\n", - "flow_data = nwis.get_record(sites = StaID, \n", - " parameterCd = '00060', \n", - " start = '1900-01-01', \n", - " service = 'dv')\n", - "station_name = nwis.get_record(sites = StaID, service = 'site').loc[0, 'station_nm']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if '00060_Mean' in flow_data.columns:\n", - " # set preliminary, non-valid observations (-999999) to NaN\n", - " flow_data['00060_Mean'] = flow_data['00060_Mean'].replace(-999999, np.nan)\n", - "\n", - " # create a filtered version of data of only USGS approved data (quality-assured data that may be published and used in statistical analsysis)\n", - " approved_flow_data = hyswap.utils.filter_approved_data(data = flow_data, \n", - " filter_column = '00060_Mean_cd')\n", - "else:\n", - " print('No standard discharge data column found for site ' + StaID + ', suggest using a different site')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# view a sample of data records\n", - "flow_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Perform simple data check and exploration by plotting hydrographs of select water years" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample HySwap hydrograph plot\n", - "plot_start = \"2010-10-01\"\n", - "plot_end = \"2011-09-30\"\n", - "\n", - "fig, ax = plt.subplots(figsize = (9,4))\n", - "ax = hyswap.plot_hydrograph(flow_data, \n", - " data_col = \"00060_Mean\", \n", - " start_date = plot_start,\n", - " end_date = plot_end,\n", - " title = f\"Hydrograph for {StaID} - {station_name}\",\n", - " ax = ax)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, try changing the plot to not have log scale on the y-axis." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample HySwap hydrograph plot\n", - "fig, ax = plt.subplots(figsize = (9,4))\n", - "hyswap.plot_hydrograph(flow_data, \n", - " data_col = \"00060_Mean\", \n", - " start_date = plot_start,\n", - " end_date = plot_end,\n", - " title = f\"Hydrograph for {StaID} - {station_name}\",\n", - " yscale = 'linear',\n", - " ax = ax)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now test doing some plot customizations including axis labels, colors, plot style." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample HySwap hydrograph plot with customized appearance\n", - "plot_start = \"2021-10-01\"\n", - "fig, ax = plt.subplots(figsize = (12,4))\n", - "ax = hyswap.plot_hydrograph(flow_data, \n", - " data_col = \"00060_Mean\", \n", - " start_date = plot_start,\n", - " title = f\"Hydrograph for {StaID} - {station_name}\",\n", - " yscale = 'linear',\n", - " ylab = 'Streamflow (cfs)',\n", - " xlab = '',\n", - " color = '#360919',\n", - " ax = ax)\n", - "ax.grid(which = \"major\", axis = \"y\", lw = 1.5)\n", - "ax.grid(which = \"minor\", axis = \"y\", linestyle = \"dashed\")\n", - "ax.minorticks_on()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Analyze the long-term record of streamflow\n", - "Review summary statistics and flow records using raster hydrographs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# calcualte summary statistics on only approved data (quality-assured data that may be published and used in statistical analsysis)\n", - "summary_stats = hyswap.calculate_summary_statistics(df = approved_flow_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: Example table of summary statistics\n", - "summary_stats" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To view long-term patterns and trends in streamflow, raster hydrographs can be plotted using `hyswap`.\n", - "Here, let's utilize a raster hydrograph to see the historical flow at the site from the pst 50 years (1973-2023)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# format the data\n", - "df_formatted = hyswap.rasterhydrograph.format_data(df = flow_data, \n", - " data_column_name = '00060_Mean', \n", - " year_type = \"calendar\", \n", - " begin_year = 1973, \n", - " end_year = 2023)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample raster hydrograph showing past 50 years of streamflow\n", - "# make plot\n", - "fig, ax = plt.subplots(figsize = (8,10))\n", - "ax = hyswap.plots.plot_raster_hydrograph(\n", - " df_formatted, \n", - " ax = ax,\n", - " title = f\"Raster Hydrograph for {StaID} - {station_name}\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, try customizing the raster hydrograph by changing to water years, using a different colormap, removing the leap day, and showing all years that have data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# re-format data\n", - "df_formatted = hyswap.rasterhydrograph.format_data(df = flow_data, \n", - " data_column_name= '00060_Mean', \n", - " year_type = \"water\", \n", - " clip_leap_day = True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample raster hydrograph showing all daily streamflow records from a site\n", - "# make plot\n", - "fig = plt.figure(figsize = (8,18))\n", - "ax = fig.add_subplot()\n", - "ax = hyswap.plots.plot_raster_hydrograph(\n", - " df_formatted, \n", - " ax = ax,\n", - " title = f\"Raster Hydrograph for {StaID} - {station_name}\",\n", - " cmap = 'gist_ncar')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Analyze annual streamflow through cumulative flow and flow duration curves \n", - "Use calculations of streamflow percentiles (variable by day) to analyze annual patterns in streamflow.\n", - "\n", - "Let's look at the last four years (2020-2023) of cumulative streamflow by plotting a cumulative hydrographs of different water years along with the envelope that 90% of annual cumulative streamflow is expected to be within, and the years with the minimum and maximum annual cumulative streamflows." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample cumulative hydrograph showing multiple water years of cumulative streamflow\n", - "years_to_plot = [2020, 2021, 2022, 2023]\n", - "# plot the cumulative streamflow hydrograph\n", - "fig, ax = plt.subplots(figsize = (10, 5))\n", - "ax = hyswap.plots.plot_cumulative_hydrograph(\n", - " df = flow_data,\n", - " data_column_name = '00060_Mean',\n", - " target_years = years_to_plot,\n", - " ax = ax, \n", - " title = f'Cumulative Streamflow Hydrograph for {StaID} - {station_name}',\n", - " envelope_pct = [5,95],\n", - " max_year = True, \n", - " min_year = True,\n", - " year_type = 'water',\n", - " alpha = 0.2)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, look at the flow duration curve for a site." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create data for flow duration curve\n", - "# generate 1,000 evenly spaced values between the min and max flow to create a smooth line\n", - "values = np.linspace(flow_data['00060_Mean'].min(), flow_data['00060_Mean'].max(), 1000)\n", - "# calculate exceedance probabilities from the daily mean streamflow values using only approved data\n", - "exceedance_probabilities = hyswap.exceedance.calculate_exceedance_probability_from_values_multiple(\n", - " values = values, \n", - " values_to_compare = approved_flow_data['00060_Mean']\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Example flow duration curve for a single gage\n", - "# plot the flow duration curve\n", - "fig, ax = plt.subplots(figsize = (8, 6))\n", - "ax = hyswap.plots.plot_flow_duration_curve(\n", - " values = values, \n", - " exceedance_probabilities = exceedance_probabilities, \n", - " ax = ax,\n", - " title = f'Flow Duration Curve for {StaID} - {station_name}')\n", - "# show the plot\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, look at the seasonality and variability of streamflow at a single site by plotting a streamflow duration hydrograph." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create data for streamflow duration hydrograph from approved flow data\n", - "percentiles_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df = approved_flow_data, \n", - " data_column_name = \"00060_Mean\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: Calculated streamflow percentile thresholds for a streamgage\n", - "# View a sample of this data\n", - "display(percentiles_by_day.head())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Example Streamflow Duration Hydrograph for a Streamgage\n", - "# plotting percentiles by day with line shade between\n", - "fig, ax = plt.subplots(figsize=(10, 6))\n", - "water_year_to_plot = 2022\n", - "# get year/doy information\n", - "df_indexed = hyswap.utils.define_year_doy_columns(flow_data,\n", - " year_type='water',\n", - " clip_leap_day=True)\n", - "# filter down to data from selected year\n", - "df_year = df_indexed[df_indexed['index_year'] == water_year_to_plot].copy()\n", - "# plot data\n", - "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_day = percentiles_by_day,\n", - " df = df_year,\n", - " data_col = \"00060_Mean\",\n", - " ax = ax,\n", - " data_label = f\"Water Year {water_year_to_plot}\",\n", - " title = f\"Streamflow Duration Hydrograph for {StaID} - {station_name}\"\n", - ")\n", - "plt.tight_layout()\n", - "plt.show()" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,qmd" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/single_gage_n_day_flows_analysis.ipynb b/single_gage_n_day_flows_analysis.ipynb deleted file mode 100644 index a13165b..0000000 --- a/single_gage_n_day_flows_analysis.ipynb +++ /dev/null @@ -1,304 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "# Analysis of N-Day Streamflows for a Single Gage" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Demonstration of use of [hywsap](https://doi-usgs.github.io/hyswap/) python package for analyzing different n-day flows for a single streamgage. \n", - "\n", - "This example notebook relies on use of the [dataretrieval](https://github.com/DOI-USGS/dataRetrieval) package for downloading streamflow information from USGS NWIS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import dataretrieval as nwis\n", - "import hyswap\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # ignore warnings from dataretrieval" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download streamflow data from USGS NWIS for an example site \n", - "For demonstration purposes, use gage 04286000 - WINOOSKI RIVER AT MONTPELIER, VT\n", - "\n", - "Users can identify streamflow gage locations and site ID numbers through the [USGS National Water Dashboard](https://dashboard.waterdata.usgs.gov/app/nwd/en/)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "StaID = '04286000'\n", - "flow_data = nwis.get_record(sites = StaID, \n", - " parameterCd = '00060', \n", - " start = '1900-01-01', \n", - " service = 'dv')\n", - "station_name = nwis.get_record(sites = StaID, service = 'site').loc[0, 'station_nm']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if '00060_Mean' in flow_data.columns:\n", - " # set preliminary, non-valid observations (-999999) to NaN\n", - " flow_data['00060_Mean'] = flow_data['00060_Mean'].replace(-999999, np.nan)\n", - "\n", - " # create a filtered version of data of only USGS approved data (quality-assured data that may be published and used in statistical analsysis)\n", - " approved_flow_data = hyswap.utils.filter_approved_data(flow_data, '00060_Mean_cd')\n", - "else:\n", - " print('No standard discharge data column found for site ' + StaID + ', suggest using a different site')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# summary statistics for the approved data (quality-assured data that may be published and used in statistical analsysis)\n", - "summary_stats = hyswap.calculate_summary_statistics(approved_flow_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: Example table of summary statistics\n", - "summary_stats" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Check data and plot simple hydrograph of 7-day average flows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample HySwap hydrograph plot for 7-Day Average Streamflow\n", - "# calculate n-Day average streamflow\n", - "n = 7 \n", - "flow_data_nday = hyswap.utils.rolling_average(flow_data, '00060_Mean', n)\n", - "plot_start = \"2021-10-01\"\n", - "plot_end = \"2023-09-30\"\n", - "# make plot\n", - "fig, ax = plt.subplots(figsize = (10,4))\n", - "ax = hyswap.plot_hydrograph(df = flow_data_nday, \n", - " data_col = \"00060_Mean\", \n", - " start_date = plot_start, \n", - " end_date = plot_end, \n", - " title = f'{n}-Day Average Streamflow Hydrograph for {StaID} - {station_name}', \n", - " ylab = f\"{n}-Day Average Streamflow (cfs)\", \n", - " ax = ax)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Look at long-term record of n-day streamflow as raster hydrograph" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# try a raster hydrograph to see historical n-day flows\n", - "# format the data from the last 50 years of records (1972-2023)\n", - "df_formatted = hyswap.rasterhydrograph.format_data(df = flow_data, \n", - " data_column_name = '00060_Mean', \n", - " year_type = \"calendar\", \n", - " begin_year = 1973, \n", - " end_year = 2023, \n", - " data_type = \"7-day\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Sample raster hydrograph showing past 50 years of 7-day average streamflow\n", - "\n", - "# make plot\n", - "fig = plt.figure(figsize = (8,12))\n", - "ax = fig.add_subplot()\n", - "ax = hyswap.plots.plot_raster_hydrograph(\n", - " df_formatted = df_formatted, \n", - " ax = ax,\n", - " title = f\"7-Day Average Raster Hydrograph for {StaID} - {station_name}\",\n", - " cbarlab = '7-Day Average Streamflow, cubic feet per second')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculate streamflow percentiles for n-day flows\n", - "This example focuses on how the year 2023 compares to other years on record for this site. \n", - "First, we will view how daily streamflow in 2023 compares to other years, and then compare 7-day and 28-day rolling means." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# get year/doy information\n", - "df_indexed = hyswap.utils.define_year_doy_columns(df_in = flow_data, \n", - " year_type = 'water', \n", - " clip_leap_day = True)\n", - "year_to_plot = 2023\n", - "# filter down to data from year to plot\n", - "df_year = df_indexed[df_indexed['index_year'] == year_to_plot].copy()\n", - "# calculate variable percentile thresholds (using only approved data)\n", - "percentiles_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df = approved_flow_data, \n", - " data_column_name = \"00060_Mean\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Example streamflow duration hydrograph with daily streamflow for the 2023 Water Year\n", - "# plot daily percentiles\n", - "fig, ax = plt.subplots(figsize = (12, 8))\n", - "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_day = percentiles_by_day,\n", - " df = df_year,\n", - " data_col = \"00060_Mean\",\n", - " ax = ax,\n", - " data_label = f\"Water Year {year_to_plot}\",\n", - " title = f\"Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", - " xlab = \"\",\n", - " ylab = \"Daily Streamflow (cfs)\"\n", - ")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Example streamflow duration hydrograph with 7-day average streamflow for the 2023 Water Year\n", - "percentiles_by_nday = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df = approved_flow_data, \n", - " data_column_name = \"00060_Mean\", \n", - " data_type = '7-day')\n", - "n = 7\n", - "# plot 7-day avg percentiles\n", - "fig, ax = plt.subplots(figsize=(12, 8))\n", - "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_day = percentiles_by_nday,\n", - " df = hyswap.utils.rolling_average(df = df_year, \n", - " data_column_name = \"00060_Mean\", \n", - " data_type = n),\n", - " data_col = \"00060_Mean\",\n", - " ax = ax,\n", - " data_label = f\"Water Year {year_to_plot}\",\n", - " title = f\"{n}-Day Avg. Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", - " xlab = \"\",\n", - " ylab = f\"{n}-Day Avg. Streamflow (cfs)\"\n", - ")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Example streamflow duration hydrograph with 28-day average streamflow for the 2023 Water Year\n", - "percentiles_by_nday = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df = approved_flow_data, \n", - " data_column_name = \"00060_Mean\", \n", - " data_type = '28-day')\n", - "n = 28\n", - "# plot 28-day average percentiles\n", - "fig, ax = plt.subplots(figsize = (12, 8))\n", - "ax = hyswap.plots.plot_duration_hydrograph(\n", - " percentiles_by_day = percentiles_by_nday,\n", - " df = hyswap.utils.rolling_average(df = df_year, \n", - " data_column_name = \"00060_Mean\", \n", - " data_type = n),\n", - " data_col = \"00060_Mean\",\n", - " ax = ax,\n", - " data_label = f\"Water Year {year_to_plot}\",\n", - " title = f\"{n}-Day Avg. Streamflow Duration Hydrograph for {StaID} - {station_name}\",\n", - " xlab = \"\",\n", - " ylab = f\"{n}-Day Avg. Streamflow (cfs)\"\n", - ")\n", - "plt.show()" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,qmd" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/site_conditions_examples.ipynb b/site_conditions_examples.ipynb deleted file mode 100644 index 93a1e70..0000000 --- a/site_conditions_examples.ipynb +++ /dev/null @@ -1,773 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "# Visualization of Streamflow Conditions at Streamgages" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook provides a demonstration of the use of [hyswap](https://doi-usgs.github.io/hyswap/) python package for calculating streamflow percentiles and then visualizing streamflow conditions at multiple streamflow gages. \n", - "\n", - "This example notebook relies on use of the [dataretrieval](https://github.com/DOI-USGS/dataRetrieval) package for downloading streamflow information from USGS NWIS as well as the [geopandas](https://geopandas.org/) package for mapping functionality.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Run commented lines below to install geopandas and mapping dependencies from within the notebook\n", - "#import sys\n", - "#!{sys.executable} -m pip install geopandas folium mapclassify" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from dataretrieval import nwis\n", - "import hyswap\n", - "import numpy as np\n", - "import pandas as pd\n", - "from datetime import datetime, timedelta\n", - "from zoneinfo import ZoneInfo\n", - "\n", - "from tqdm import tqdm # used for progress bar indicators\n", - "import geopandas # has dependencies of folium and mapclassify to create maps within this notebook\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # ignore warnings from dataretrieval" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**NOTE:** The `tqdm` package is used in for-loops in this notebook to show a data download progress bar, which may be informative to the user. The specification below (`disable_tdqm`) determines whether this progress bar is displayed when the notebook renders. It is set to `True` when rendering the notebook in the `hyswap` GitHub documentation site. To see the progress bars in this notebook, set `disable_tqdm=False`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "disable_tqdm=True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define Helper Functions\n", - "The `hyswap` package provides functionality for calculating non-interpretive streamflow statistics but does not provide functionality for correcting invalid data or geospatial capabilities for mapping. Here we setup some simple helper functions we can re-use throughout the notebook to QAQC data and create maps." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Data QAQC function for provisional NWIS data\n", - "def qaqc_nwis_data(df, data_col):\n", - " #replace invalid -999999 values with NA\n", - " df[data_col] = df[data_col].replace(-999999, np.nan)\n", - " # add any additional QAQC steps needed\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def create_gage_condition_map(gage_df, flow_data_col, map_schema, streamflow_data_type):\n", - " # Format date and set to str type for use in map tooltips\n", - " if flow_data_col == '00060':\n", - " gage_df['Date'] = gage_df['datetime'].dt.strftime('%Y-%m-%d %H:%M')\n", - " elif flow_data_col == '00060_Mean':\n", - " gage_df['Date'] = gage_df['datetime'].dt.strftime('%Y-%m-%d')\n", - " gage_df = gage_df.drop('datetime', axis=1)\n", - " # create colormap for map from hyswap schema\n", - " schema = hyswap.utils.retrieve_schema(schema_name = map_schema)\n", - " flow_cond_cmap = schema['colors']\n", - " if 'low_color' in schema:\n", - " flow_cond_cmap = [schema['low_color']] + flow_cond_cmap\n", - " if 'high_color' in schema:\n", - " flow_cond_cmap = flow_cond_cmap + [schema['high_color']]\n", - " # if creating a drought map, set handling of non-drought flows\n", - " if map_schema in ['WaterWatch_Drought', 'NIDIS_Drought']:\n", - " gage_df['flow_cat'] = gage_df['flow_cat'].cat.add_categories('Other')\n", - " gage_df.loc[gage_df['flow_cat'].isnull(), 'flow_cat'] = 'Other'\n", - " flow_cond_cmap = flow_cond_cmap + ['#e3e0ca'] # light taupe\n", - " # set NA values to \"Not Ranked\" category\n", - " gage_df['flow_cat'] = gage_df['flow_cat'].cat.add_categories('Not Ranked')\n", - " gage_df.loc[gage_df['est_pct'].isna(), 'flow_cat'] = 'Not Ranked'\n", - " flow_cond_cmap = flow_cond_cmap + ['#d3d3d3'] # light grey\n", - " # renaming columns with user friendly names for map\n", - " gage_df = gage_df.rename(columns={flow_data_col:'Discharge (cfs)',\n", - " 'est_pct':'Estimated Percentile',\n", - " 'site_no':'USGS Gage ID',\n", - " 'station_nm':'Streamgage Name',\n", - " 'flow_cat':'Streamflow Category'})\n", - " # convert dataframe to geopandas GeoDataFrame\n", - " gage_df = geopandas.GeoDataFrame(gage_df, \n", - " geometry=geopandas.points_from_xy(gage_df.dec_long_va,\n", - " gage_df.dec_lat_va), \n", - " crs=\"EPSG:4326\").to_crs(\"EPSG:5070\")\n", - " # Create map\n", - " m = gage_df.explore(column=\"Streamflow Category\",\n", - " cmap=flow_cond_cmap,\n", - " tooltip=[\"USGS Gage ID\", \"Streamgage Name\", \"Streamflow Category\", \"Discharge (cfs)\", \"Estimated Percentile\", \"Date\"],\n", - " tiles=\"CartoDB Positron\",\n", - " marker_kwds=dict(radius=5),\n", - " legend_kwds=dict(caption=streamflow_data_type + '
Streamflow Category'))\n", - " return m #returns a folium map object" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Downloading and Processing\n", - "Utilize an example state to select streamgages for generating various flow condition maps. Certain past days selected in the notebook are relevant to using the state of Vermont (VT) as an example, but the notebook can be run for any state.\n", - "\n", - "### Find all stream sites active in the last year within a State\n", - "Limit the search to streamgages that have also been operational prior to approximately 10 years ago as a minimum of 10 years of flow records should be available for calculating streamflow percentiles." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: List of streamgage sites active within the last week\n", - "state = 'VT'\n", - "# Query NWIS for what streamgage sites were active within the last week\n", - "active_nwis_sites, _ = nwis.what_sites(stateCd = state, \n", - " parameterCd = '00060', \n", - " period = \"P1W\", \n", - " siteType = 'ST')\n", - "# Find gages present in both of the above queries to get a list of gages that were\n", - "# recently active that also have records from more than 10 years ago\n", - "sites = active_nwis_sites#[active_nwis_sites['site_no'].isin(nwis_sites['site_no'])]\n", - "display(sites)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Retrieve Streamflow Data from NWIS\n", - "For the sites identified above, download all historical daily streamflow data (1900 through 2023 Water Years). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a python dictionary of dataframes by site id number\n", - "flow_data = {}\n", - "\n", - "for StaID in tqdm(sites['site_no'], disable = disable_tqdm, desc = \"Downloading NWIS Flow Data for Sites\"):\n", - " flow_data[StaID] = nwis.get_record(sites = StaID, \n", - " parameterCd = '00060', \n", - " start = \"1900-01-01\", \n", - " end = \"2023-10-01\", \n", - " service = 'dv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Calculate Variable Streamflow Percentile Thresholds\n", - "For the sites identified above, calculate streamflow percentile thresholds at 0, 1, 5, 10, ... , 90, 95, 99, 100 percentiles" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define what percentile levels (thresholds) that we want to calculate.\n", - "# Intervals of 5 or less are recommended to have sufficient levels to interpolate between in later calculations. \n", - "# Note that 0 and 100 percentile levels are ignored. Refer to min and max values returned instead\n", - "percentile_levels = np.concatenate((np.array([1]), np.arange(5,96,5), np.array([99])), axis = 0)\n", - "print(percentile_levels)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "percentile_values = {}\n", - "for StaID in tqdm(sites['site_no'], disable = disable_tqdm, desc = \"Processing Sites\"):\n", - " if '00060_Mean' in flow_data[StaID].columns:\n", - " # Filter data as only approved data in NWIS should be used to calculate statistics\n", - " df = hyswap.utils.filter_approved_data(data = flow_data[StaID], \n", - " filter_column = '00060_Mean_cd')\n", - " percentile_values[StaID] = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df = df, \n", - " data_column_name = '00060_Mean', \n", - " percentiles = percentile_levels\n", - " )\n", - " else:\n", - " print('No standard discharge data column found for site ' + StaID + ', skipping')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| tbl-cap: Sample of calcualted variable streamflow percentile thresholds for first site in list\n", - "# View percentile thresholds for the first site\n", - "display(percentile_values[list(percentile_values.keys())[0]].head())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a Current Flow Conditions Map for Daily Mean Streamflow\n", - "\n", - "### Retrieve most recent (yesterday) daily mean streamflow\n", - "Download data from NWIS and calculate corresponding streamflow percentile for the most recent daily mean discharge" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "yesterday = datetime.strftime(datetime.now(tz = ZoneInfo(\"US/Eastern\")) - timedelta(1), '%Y-%m-%d')\n", - "recent_dvs = nwis.get_record(sites = sites['site_no'].tolist(), \n", - " parameterCd = '00060', \n", - " start = yesterday, \n", - " end = yesterday, \n", - " service = 'dv')\n", - "recent_dvs = qaqc_nwis_data(df = recent_dvs, \n", - " data_col = '00060_Mean')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Categorize streamflow based on calculated percentile values\n", - "Calculate estimated streamflow percentile for the new data by interpolating against the previously calculated percentile threshold levels." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# estimate percentiles\n", - "df = pd.DataFrame()\n", - "for StaID, site_df in recent_dvs.groupby(level = \"site_no\", group_keys = False):\n", - " if StaID in list(percentile_values.keys()):\n", - " if not percentile_values[StaID].isnull().all().all():\n", - " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " df = site_df,\n", - " data_column_name = '00060_Mean', \n", - " percentile_df = percentile_values[StaID]\n", - " )\n", - " df = pd.concat([df, percentiles])\n", - "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df = df, \n", - " percentile_col = 'est_pct', \n", - " schema_name = \"NWD\")\n", - "df = df.reset_index(level = 'datetime')\n", - "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Map of Streamflow Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Map showing most recent daily mean streamflow and corresponding flow conditions\n", - "map = create_gage_condition_map(gage_df = gage_df, \n", - " flow_data_col = '00060_Mean', \n", - " map_schema = 'NWD', \n", - " streamflow_data_type = 'Current Daily Mean')\n", - "display(map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Map of Streamflow Conditions using Alternative Categorization Schema" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Map showing most recent daily mean streamflow and corresponding flow conditions using a brown-blue schema\n", - "\n", - "# Prep Data for mapping by joining site information and flow data \n", - "map = create_gage_condition_map(gage_df = gage_df, \n", - " flow_data_col = '00060_Mean', \n", - " map_schema = 'WaterWatch_BrownBlue', \n", - " streamflow_data_type = 'Current Daily Mean')\n", - "display(map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a \"Real-Time\" Flow Conditions Map for Instantaneous Streamflow\n", - "\n", - "### Retrieve most recent instantaneous streamflow records\n", - "Download data from NWIS and calculate corresponding streamflow percentile for the most recent instantaneous discharge measurement" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "recent_ivs = nwis.get_record(sites=sites['site_no'].tolist(), parameterCd = '00060', service = 'iv')\n", - "recent_ivs = qaqc_nwis_data(df = recent_ivs, data_col = '00060')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Categorize streamflow based on calculated percentile values\n", - "Calculate estimated streamflow percentile for the new instantaneous data by interpolating against the previously calculated percentile threshold levels from daily streamflow records." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# estimate percentiles\n", - "df = pd.DataFrame()\n", - "for StaID, site_df in recent_ivs.groupby(level = \"site_no\", group_keys = False):\n", - " if StaID in list(percentile_values.keys()):\n", - " if not percentile_values[StaID].isnull().all().all():\n", - " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " df = site_df,\n", - " data_column_name = '00060', \n", - " percentile_df = percentile_values[StaID]\n", - " )\n", - " df = pd.concat([df, percentiles])\n", - "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df = df, \n", - " percentile_col = 'est_pct', \n", - " schema_name = \"NWD\")\n", - "df = df.tz_convert(tz = 'US/Eastern')\n", - "df = df.reset_index(level = 'datetime')\n", - "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Map of Real-Time Streamflow Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Map showing most real-time streamflow conditions\n", - "\n", - "map = create_gage_condition_map(gage_df = gage_df, \n", - " flow_data_col = '00060', \n", - " map_schema = 'NWD', \n", - " streamflow_data_type = 'Real-Time Instantaneous')\n", - "display(map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a Current Flow Conditions Map for n-Day Daily Streamflow\n", - "\n", - "### Retrieve daily streamflow records for past 7 days\n", - "Download data from NWIS and calculate corresponding streamflow percentiles for each day" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "past_dvs = nwis.get_record(\n", - " sites = sites['site_no'].tolist(), \n", - " parameterCd = '00060',\n", - " start = datetime.strftime(datetime.now(tz = ZoneInfo(\"US/Eastern\")) - timedelta(7), '%Y-%m-%d'),\n", - " end = yesterday,\n", - " service = 'dv'\n", - ")\n", - "past_dvs = qaqc_nwis_data(df = past_dvs, \n", - " data_col = '00060_Mean')\n", - "past_dvs_7d = past_dvs.copy()\n", - "for StaID, new_df in past_dvs.groupby(level = 0):\n", - " past_dvs_7d.loc[StaID] = hyswap.utils.rolling_average(\n", - " df = new_df,\n", - " data_column_name = '00060_Mean', \n", - " data_type = 7).round(2)\n", - "\n", - "past_dvs_7d = past_dvs_7d.dropna()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Calculate 7-day average streamflow and corresponding variable percentile thresholds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "flow_data_7d = {}\n", - "for StaID in tqdm(sites['site_no'], disable=disable_tqdm):\n", - " if '00060_Mean' in flow_data[StaID].columns:\n", - " flow_data_7d[StaID] = hyswap.utils.rolling_average(\n", - " df = flow_data[StaID], \n", - " data_column_name = '00060_Mean', \n", - " data_type = 7).round(2)\n", - " else:\n", - " print('No standard discharge data column found for site ' + StaID + ', skipping')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "percentile_values_7d = {}\n", - "for StaID in tqdm(sites['site_no'], disable = disable_tqdm, desc = \"Processing\"):\n", - " if '00060_Mean' in flow_data[StaID].columns:\n", - " # Filter data as only approved data in NWIS should be used to calculate statistics\n", - " df = hyswap.utils.filter_approved_data(\n", - " data = flow_data_7d[StaID], \n", - " filter_column = '00060_Mean_cd')\n", - " percentile_values_7d[StaID] = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(\n", - " df, '00060_Mean', percentiles=percentile_levels)\n", - " else:\n", - " print('No standard discharge data column found for site ' + StaID + ', skipping')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Categorize streamflow based on calculated percentile values\n", - "Calculate estimated streamflow percentile for the new data by interpolating against the previously calculated percentile threshold levels." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# estimate percentiles\n", - "df = pd.DataFrame()\n", - "for StaID, site_df in past_dvs_7d.groupby(level = \"site_no\", group_keys = False):\n", - " if StaID in list(percentile_values_7d.keys()):\n", - " if not percentile_values[StaID].isnull().all().all():\n", - " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " df = site_df,\n", - " data_column_name = '00060_Mean', \n", - " percentile_df = percentile_values[StaID]\n", - " )\n", - " df = pd.concat([df, percentiles])\n", - "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df = df, \n", - " percentile_col = 'est_pct', \n", - " schema_name = \"NWD\")\n", - "# keep only most recent 7-day average flow for plotting\n", - "df = df[df.index.get_level_values('datetime') == yesterday]\n", - "df = df.reset_index(level='datetime')\n", - "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Map of 7-Day Average Streamflow Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Map showing most recent 7-day average streamflow and corresponding flow conditions\n", - "\n", - "map = create_gage_condition_map(gage_df = gage_df, \n", - " flow_data_col = '00060_Mean', \n", - " map_schema = 'NWD', \n", - " streamflow_data_type = 'Current 7-Day Average')\n", - "display(map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a Drought Conditions Map for a Previous Day's Streamflow\n", - "\n", - "### Retrieve daily streamflow records from a past day\n", - "Download data from NWIS and calculate corresponding streamflow percentiles for the given day's streamflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "past_day = \"2023-05-30\"\n", - "\n", - "past_dvs = nwis.get_record(sites = sites['site_no'].tolist(),\n", - " parameterCd = '00060',\n", - " start = past_day,\n", - " end = past_day,\n", - " service = 'dv')\n", - "past_dvs = qaqc_nwis_data(df = past_dvs, \n", - " data_col = '00060_Mean')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Categorize streamflow based on calculated percentile values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate estimated streamflow percentile for the new data by interpolating against\n", - "# the previously calculated percentile threshold levels\n", - "df = pd.DataFrame()\n", - "for StaID, site_df in past_dvs.groupby(level = \"site_no\", group_keys = False):\n", - " if StaID in list(percentile_values.keys()):\n", - " if not percentile_values[StaID].isnull().all().all():\n", - " percentiles = hyswap.percentiles.calculate_multiple_variable_percentiles_from_values(\n", - " df = site_df,\n", - " data_column_name = '00060_Mean', \n", - " percentile_df = percentile_values[StaID]\n", - " )\n", - " df = pd.concat([df, percentiles])\n", - "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df = df, \n", - " percentile_col = 'est_pct', \n", - " schema_name = \"WaterWatch_Drought\")\n", - "df = df.reset_index(level = 'datetime')\n", - "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Map of Streamflow Drought Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Map showing historical daily mean streamflow and corresponding flow conditions using a drought categorization schema\n", - "map = create_gage_condition_map(gage_df = gage_df, \n", - " flow_data_col = '00060_Mean', \n", - " map_schema = 'WaterWatch_Drought', \n", - " streamflow_data_type = 'Daily Mean')\n", - "display(map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a Flood Conditions Map for a past Day's Streamflow\n", - "This example uses fixed percentiles that are not calculated by day of year, but instead across all days of the year together. Flow categories are therefore relative to absolute streamflow levels rather than what is normal for that day of the year.\n", - "\n", - "### Retrieve daily streamflow records from a past day\n", - "Download data from NWIS and calculate corresponding fixed streamflow percentiles for the given day's streamflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "past_day = \"2023-07-10\"\n", - "\n", - "past_dvs = nwis.get_record(sites = sites['site_no'].tolist(),\n", - " parameterCd = '00060',\n", - " start = past_day,\n", - " end = past_day,\n", - " service = 'dv')\n", - "past_dvs = qaqc_nwis_data(df = past_dvs, \n", - " data_col = '00060_Mean')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fixed_percentile_values = {}\n", - "\n", - "for StaID in tqdm(sites['site_no'], disable=disable_tqdm):\n", - " if '00060_Mean' in flow_data[StaID].columns:\n", - " # Filter data as only approved data in NWIS should be used to calculate statistics\n", - " df = hyswap.utils.filter_approved_data(data = flow_data[StaID], \n", - " filter_column = '00060_Mean_cd')\n", - " if not df.empty:\n", - " fixed_percentile_values[StaID] = hyswap.percentiles.calculate_fixed_percentile_thresholds(\n", - " data = df['00060_Mean'], \n", - " percentiles = percentile_levels)\n", - " else:\n", - " print(StaID + ' has no approved data, skipping')\n", - " else:\n", - " print(StaID + ' does not have standard discharge data column, skipping')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Categorize streamflow based on calculated percentile values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# estimate percentiles\n", - "for StaID in past_dvs.index.get_level_values(0):\n", - " if StaID in list(fixed_percentile_values.keys()):\n", - " past_dvs.at[(StaID, past_day), 'est_pct'] = hyswap.percentiles.calculate_fixed_percentile_from_value(\n", - " value = past_dvs.at[(StaID, past_day), '00060_Mean'], \n", - " percentile_df = fixed_percentile_values[StaID])\n", - "# categorize streamflow by the estimated streamflow percentiles\n", - "df = hyswap.utils.categorize_flows(df = past_dvs, \n", - " percentile_col = 'est_pct', \n", - " schema_name = \"WaterWatch_Flood\")\n", - "df = df.reset_index(level = 'datetime')\n", - "# Prep Data for mapping by joining site information and flow data \n", - "gage_df = pd.merge(sites, df, how = \"right\", on = \"site_no\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Map of Streamflow High-Flow Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| fig-cap: Map showing historical daily mean streamflow and corresponding flow conditions using a high-flow categorization schema\n", - "map = create_gage_condition_map(gage_df = gage_df, \n", - " flow_data_col = '00060_Mean', \n", - " map_schema = 'WaterWatch_Flood', \n", - " streamflow_data_type = 'Daily Mean')\n", - "display(map)" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,qmd" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}