diff --git a/docs/source/notebooks/AWSDataNotebook.ipynb b/docs/source/notebooks/AWSDataNotebook.ipynb index 0406652..82e7825 100644 --- a/docs/source/notebooks/AWSDataNotebook.ipynb +++ b/docs/source/notebooks/AWSDataNotebook.ipynb @@ -2428,7 +2428,7 @@ ], "source": [ "# FLNS collection\n", - "col_flns = ldcpy.collect_datasets(\"cam-fv\", [\"FLNS\"], [ds_flns, aws_flns], [\"original\", \"lossy\"])\n" + "col_flns = ldcpy.collect_datasets(\"cam-fv\", [\"FLNS\"], [ds_flns, aws_flns], [\"original\", \"lossy\"])" ] }, { @@ -2789,7 +2789,9 @@ ], "source": [ "# print statistics about 'original', 'lossy', and diff between the two datasets for TMQ at time slice 365\n", - "ldcpy.compare_stats(col_tmq.isel(time=365), \"TMQ\", [\"original\", \"lossy\"], aggregate_dims = [\"lat\", \"lon\"])" + "ldcpy.compare_stats(\n", + " col_tmq.isel(time=365), \"TMQ\", [\"original\", \"lossy\"], aggregate_dims=[\"lat\", \"lon\"]\n", + ")" ] }, { diff --git a/docs/source/notebooks/MetricsNotebook.ipynb b/docs/source/notebooks/MetricsNotebook.ipynb index af92e16..7e3d81a 100644 --- a/docs/source/notebooks/MetricsNotebook.ipynb +++ b/docs/source/notebooks/MetricsNotebook.ipynb @@ -417,7 +417,9 @@ ], "source": [ "data = ts_col.isel(time=0)\n", - "ldcpy.compare_stats(data, \"TS\", [\"orig\", \"zfp1.0\", \"zfp1e-1\", \"zfp1e-3\"], aggregate_dims=[\"lat\", \"lon\"])" + "ldcpy.compare_stats(\n", + " data, \"TS\", [\"orig\", \"zfp1.0\", \"zfp1e-1\", \"zfp1e-3\"], aggregate_dims=[\"lat\", \"lon\"]\n", + ")" ] }, { @@ -733,7 +735,13 @@ "source": [ "# now using weighted averages\n", "data = ts_col.isel(time=0)\n", - "ldcpy.compare_stats(data, \"TS\", [\"orig\", \"zfp1.0\", \"zfp1e-1\", \"zfp1e-3\"], weighted=True, aggregate_dims=[\"lat\", \"lon\"])" + "ldcpy.compare_stats(\n", + " data,\n", + " \"TS\",\n", + " [\"orig\", \"zfp1.0\", \"zfp1e-1\", \"zfp1e-3\"],\n", + " weighted=True,\n", + " aggregate_dims=[\"lat\", \"lon\"],\n", + ")" ] }, { diff --git a/ldcpy/util.py b/ldcpy/util.py index e56093d..a0380a9 100644 --- a/ldcpy/util.py +++ b/ldcpy/util.py @@ -106,7 +106,7 @@ def collect_datasets( weighted = False else: weighted = True - + # preprocess_vars is here for working on jupyter hub... def preprocess_vars(ds, varnames): return ds[varnames] @@ -137,7 +137,7 @@ def preprocess_vars(ds, varnames): full_ds.attrs['data_type'] = data_type full_ds.attrs['file_size'] = None full_ds.attrs['weighted'] = weighted - + # file sizes? if file_sizes is not None: file_size_dict = {} @@ -154,7 +154,7 @@ def preprocess_vars(ds, varnames): new_ds[i].attrs['data_type'] = data_type new_ds[i].attrs['set_name'] = label new_ds[i].attrs['weighted'] = weighted - + # d = xr.combine_by_coords(new_ds) d = xr.concat(new_ds, 'collection') full_ds[v] = d @@ -237,7 +237,7 @@ def preprocess_vars(ds): return ds[varnames] - #check the weights + # check the weights tmp_ds = xr.open_dataset(list_of_files[0]) if data_type == 'cam-fv' and weights is True: weights_name = 'gw' @@ -256,7 +256,7 @@ def preprocess_vars(ds): weighted = False else: weighted = True - + full_ds = xr.open_mfdataset( list_of_files, concat_dim='collection', @@ -286,7 +286,7 @@ def preprocess_vars(ds): full_ds.attrs['data_type'] = data_type full_ds.attrs['file_size'] = file_size_dict full_ds.attrs['weighted'] = weighted - + for v in varnames[:-1]: new_ds = [] i = 0 @@ -295,7 +295,7 @@ def preprocess_vars(ds): new_ds[i].attrs['data_type'] = data_type new_ds[i].attrs['set_name'] = label new_ds[i].attrs['weighted'] = weighted - + # d = xr.combine_by_coords(new_ds) d = xr.concat(new_ds, 'collection') full_ds[v] = d @@ -345,13 +345,15 @@ def compare_stats( da = ds[varname] data_type = ds.attrs['data_type'] attr_weighted = ds.attrs['weighted'] - + # no weights for wrf if data_type == 'cam-fv': if weighted: if not attr_weighted: - print('Warning - this data does not contain weights, so averages will be unweighted.') - weighted = False + print( + 'Warning - this data does not contain weights, so averages will be unweighted.' + ) + weighted = False if data_type == 'wrf': weighted = False @@ -723,7 +725,7 @@ def check_metrics( num_fail = 0 # Pearson less than pcc_tol means fail pcc = diff_calcs.get_diff_calc('pearson_correlation_coefficient') - #print(type(pcc)) + # print(type(pcc)) if pcc < pcc_tol: print(' *FAILED pearson correlation coefficient test...(pcc = {0:.5f}'.format(pcc), ')')