Merge branch 'develop' into feature/floris_tuning

NREL · Sep 12, 2023 · c5dd9bb · c5dd9bb
2 parents 8adae25 + 7c08af9
commit c5dd9bb
Show file tree

Hide file tree

Showing 6 changed files with 427 additions and 113 deletions.
diff --git a/examples_artificial_data/03_energy_ratio/05_wake_steering_example.py b/examples_artificial_data/03_energy_ratio/05_wake_steering_example.py
@@ -10,20 +10,16 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import numpy as np
+
 import matplotlib.pyplot as plt
-import os
+import numpy as np
 import pandas as pd
 import seaborn as sns
 
-from floris import tools as wfct
-from floris.utilities import wrap_360
-
 from flasc.energy_ratio import energy_ratio as er
 from flasc.energy_ratio.energy_ratio_input import EnergyRatioInput
-# from flasc import floris_tools as fsatools
-from flasc.visualization import plot_layout_with_waking_directions, plot_binned_mean_and_ci
 from flasc.utilities_examples import load_floris_artificial as load_floris
+from flasc.visualization import plot_binned_mean_and_ci, plot_layout_with_waking_directions
 
 
 if __name__ == "__main__":
@@ -129,21 +125,13 @@
 
     # Make a color palette that visually links the nominal and noisy data sets together
     color_palette = sns.color_palette("Paired",4)[::-1]
-    # color_palette = ['r','g','b','k']
-
-    # Initialize the energy ratio suite object and add each dataframe
-    # separately. 
 
+    # Initialize the energy ratio input object
     er_in = EnergyRatioInput(
         [df_baseline, df_wakesteering, df_baseline_noisy, df_wakesteering_noisy], 
         ["Baseline", "WakeSteering", "Baseline (Noisy)", "WakeSteering (Noisy)"]
     )
 
-    # fsc.add_df(df_baseline, 'Baseline', color_palette[0])
-    # fsc.add_df(df_wakesteering, 'WakeSteering', color_palette[1])
-    # fsc.add_df(df_baseline_noisy, 'Baseline (Noisy)', color_palette[2])
-    # fsc.add_df(df_wakesteering_noisy, 'WakeSteering (Noisy)', color_palette[3])
-
     # Calculate and plot the energy ratio for the downstream turbine [2]
     # With respect to reference turbine [0]
     # datasets with uncertainty quantification using 50 bootstrap samples
@@ -159,16 +147,10 @@
         percentiles=[5., 95.],
         uplift_pairs=[("Baseline", "WakeSteering"), 
                       ("Baseline (Noisy)", "WakeSteering (Noisy)")],
-        uplift_names=["Clean", "Noisy"]
+        uplift_names=["Clean", "Noisy"],
+        weight_by='min'
     )
-    # fsc.get_energy_ratios(
-    #     test_turbines=[2],
-    #     wd_step=2.0,
-    #     ws_step=1.0,
-    #     N=10,
-    #     percentiles=[5., 95.],
-    #     verbose=False
-    # )
+
     er_out.plot_energy_ratios(
         color_dict={"Baseline":"blue", 
                     "WakeSteering":"green", 

diff --git a/flasc/energy_ratio/energy_ratio.py b/flasc/energy_ratio/energy_ratio.py
@@ -12,7 +12,17 @@
 
 from flasc.energy_ratio.energy_ratio_output import EnergyRatioOutput
 from flasc.energy_ratio.energy_ratio_input import EnergyRatioInput
-from flasc.energy_ratio.energy_ratio_utilities import add_ws_bin, add_wd, add_wd_bin, add_power_ref, add_power_test, add_reflected_rows
+from flasc.energy_ratio.energy_ratio_utilities import (
+    add_ws_bin,
+    add_wd,
+    add_wd_bin,
+    add_power_ref,
+    add_power_test,
+    add_reflected_rows,
+    check_compute_energy_ratio_inputs,
+    filter_all_nulls,
+    filter_any_nulls
+)
 
 
 # Internal version, returns a polars dataframe
@@ -29,9 +39,11 @@ def _compute_energy_ratio_single(df_,
                          ws_min = 0.0,
                          ws_max = 50.0,
                          bin_cols_in = ['wd_bin','ws_bin'],
+                         weight_by = 'min', #min, sum
                          wd_bin_overlap_radius = 0.,
                          uplift_pairs = [],
-                         uplift_names = []
+                         uplift_names = [],
+                         remove_all_nulls = False
                          ):
 
     """
@@ -51,14 +63,20 @@ def _compute_energy_ratio_single(df_,
         ws_min (float): The minimum wind speed to use.
         ws_max (float): The maximum wind speed to use.
         bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins.
+        weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'.  'min' means
+            the minimum count across the dataframes is used to weight the energy ratio.   'sum' means the sum of the counts
+            across the dataframes is used to weight the energy ratio.   Defaults to 'min'.
         wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be 
             less or equal to half the value of wd_step
         uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element 
             of the list should be a tuple (or list) of length 2, where the first element will be the 
             base case in the uplift calculation and the second element will be the test case in the 
             uplift calculation. If None, no uplifts are computed.
         uplift_names: (list[str]): Names for the uplift columns, following the order of the 
-            pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
+            pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2",
+        remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+            available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+            must be available to compute the bin. Defaults to False.
 
     Returns:
         pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin
@@ -67,23 +85,25 @@ def _compute_energy_ratio_single(df_,
     # Identify the number of dataframes
     num_df = len(df_names)
 
-    # Filter df_ that all the columns are not null
-    print(ref_cols + test_cols + ws_cols + wd_cols)
-    df_ = df_.filter(pl.all_horizontal(pl.col(ref_cols + test_cols + ws_cols + wd_cols).is_not_null()))
+    # Filter df_ to remove null values
+    null_filter = filter_all_nulls if remove_all_nulls else filter_any_nulls
+    df_ = null_filter(df_, ref_cols, test_cols, ws_cols, wd_cols)
+    if len(df_) == 0:
+        raise RuntimeError("After removing nulls, no data remains for computation.")
 
     # If wd_bin_overlap_radius is not zero, add reflected rows
     if wd_bin_overlap_radius > 0.:
 
         # Need to obtain the wd column now rather than during binning
-        df_ = add_wd(df_, wd_cols)
+        df_ = add_wd(df_, wd_cols, remove_all_nulls)
 
         # Add reflected rows
         edges = np.arange(wd_min, wd_max + wd_step, wd_step)
         df_ = add_reflected_rows(df_, edges, wd_bin_overlap_radius)
 
     # Assign the wd/ws bins
-    df_ = add_ws_bin(df_, ws_cols, ws_step, ws_min, ws_max)
-    df_ = add_wd_bin(df_, wd_cols, wd_step, wd_min, wd_max)
+    df_ = add_ws_bin(df_, ws_cols, ws_step, ws_min, ws_max, remove_all_nulls=remove_all_nulls)
+    df_ = add_wd_bin(df_, wd_cols, wd_step, wd_min, wd_max, remove_all_nulls=remove_all_nulls)
 
 
 
@@ -102,13 +122,15 @@ def _compute_energy_ratio_single(df_,
         .agg([pl.mean("pow_ref"), pl.mean("pow_test"),pl.count()]) 
         .with_columns(
             [
-                pl.col('count').min().over(bin_cols_without_df_name).alias('count_min')#, # Find the min across df_name
+                # Get the weighting by counts
+                pl.col('count').min().over(bin_cols_without_df_name).alias('count_weight') if weight_by == 'min' else
+                pl.col('count').sum().over(bin_cols_without_df_name).alias('count_weight')
             ]
         )
         .with_columns(
             [
-                pl.col('pow_ref').mul(pl.col('count_min')).alias('ref_energy'), # Compute the reference energy
-                pl.col('pow_test').mul(pl.col('count_min')).alias('test_energy'), # Compute the test energy
+                pl.col('pow_ref').mul(pl.col('count_weight')).alias('ref_energy'), # Compute the reference energy
+                pl.col('pow_test').mul(pl.col('count_weight')).alias('test_energy'), # Compute the test energy
             ]
         )
         .groupby(['wd_bin','df_name'], maintain_order=True)
@@ -146,11 +168,13 @@ def _compute_energy_ratio_bootstrap(er_in,
                          ws_min = 0.0,
                          ws_max = 50.0,
                          bin_cols_in = ['wd_bin','ws_bin'],
+                         weight_by = 'min', #min, sum
                          wd_bin_overlap_radius = 0.,
                          uplift_pairs = [],
                          uplift_names = [],
                          N = 1,
-                         percentiles=[5., 95.]
+                         percentiles=[5., 95.],
+                         remove_all_nulls=False,
                          ):
 
     """
@@ -169,6 +193,9 @@ def _compute_energy_ratio_bootstrap(er_in,
         ws_min (float): The minimum wind speed to use.
         ws_max (float): The maximum wind speed to use.
         bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins.
+        weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'.  'min' means
+            the minimum count across the dataframes is used to weight the energy ratio. 'sum' means the sum of the counts
+            across the dataframes is used to weight the energy ratio.
         wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be 
             less or equal to half the value of wd_step
         uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element 
@@ -178,6 +205,12 @@ def _compute_energy_ratio_bootstrap(er_in,
         uplift_names: (list[str]): Names for the uplift columns, following the order of the 
             pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
         N (int): The number of bootstrap samples to use.
+        percentiles: (list or None): percentiles to use when returning energy ratio bounds. 
+            If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
+        remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+                available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+                must be available to compute the bin. Defaults to False.
+
 
     Returns:
         pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines.
@@ -199,9 +232,11 @@ def _compute_energy_ratio_bootstrap(er_in,
                         ws_min,
                         ws_max,
                         bin_cols_in,
+                        weight_by,
                         wd_bin_overlap_radius,
                         uplift_pairs,
-                        uplift_names
+                        uplift_names,
+                        remove_all_nulls
                         ) for i in range(N)])
 
     bound_names = er_in.df_names + uplift_names
@@ -231,11 +266,13 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
                          ws_min = 0.0,
                          ws_max = 50.0,
                          bin_cols_in = ['wd_bin','ws_bin'],
+                         weight_by = 'min', #min or sum
                          wd_bin_overlap_radius = 0.,
                          uplift_pairs = None,
                          uplift_names = None,
                          N = 1,
-                         percentiles = None
+                         percentiles = None,
+                         remove_all_nulls = False
                          )-> EnergyRatioOutput:
 
     """
@@ -257,6 +294,9 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
         ws_min (float): The minimum wind speed to use.
         ws_max (float): The maximum wind speed to use.
         bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins.
+        weight_by (str): How to weight the energy ratio, options are 'min', , or 'sum'.  'min' means
+            the minimum count across the dataframes is used to weight the energy ratio.   'sum' means the sum of the counts
+            across the dataframes is used to weight the energy ratio.
         wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be 
             less or equal to half the value of wd_step
         uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element 
@@ -268,6 +308,9 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
         N (int): The number of bootstrap samples to use.
         percentiles: (list or None): percentiles to use when returning energy ratio bounds. 
             If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
+        remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+                available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+                must be available to compute the bin. Defaults to False.
 
     Returns:
         EnergyRatioOutput: An EnergyRatioOutput object containing the energy ratio between the two sets of turbines.
@@ -277,55 +320,31 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
     # Get the polars dataframe from within the er_in
     df_ = er_in.get_df()
 
-    # Check that the inputs are valid
-    # If use_predefined_ref is True, df_ must have a column named 'pow_ref'
-    if use_predefined_ref:
-        if 'pow_ref' not in df_.columns:
-            raise ValueError('df_ must have a column named pow_ref when use_predefined_ref is True')
-        # If ref_turbines supplied, warn user that it will be ignored
-        if ref_turbines is not None:
-            warnings.warn('ref_turbines will be ignored when use_predefined_ref is True')
-    else:
-        # ref_turbine must be supplied
-        if ref_turbines is None:
-            raise ValueError('ref_turbines must be supplied when use_predefined_ref is False')
-
-    # If use_predefined_ws is True, df_ must have a column named 'ws'
-    if use_predefined_ws:
-        if 'ws' not in df_.columns:
-            raise ValueError('df_ must have a column named ws when use_predefined_ws is True')
-        # If ws_turbines supplied, warn user that it will be ignored
-        if ws_turbines is not None:
-            warnings.warn('ws_turbines will be ignored when use_predefined_ws is True')
-    else:
-        # ws_turbine must be supplied
-        if ws_turbines is None:
-            raise ValueError('ws_turbines must be supplied when use_predefined_ws is False')
-
-    # If use_predefined_wd is True, df_ must have a column named 'wd'
-    if use_predefined_wd:
-        if 'wd' not in df_.columns:
-            raise ValueError('df_ must have a column named wd when use_predefined_wd is True')
-        # If wd_turbines supplied, warn user that it will be ignored
-        if wd_turbines is not None:
-            warnings.warn('wd_turbines will be ignored when use_predefined_wd is True')
-    else:
-        # wd_turbine must be supplied
-        if wd_turbines is None:
-            raise ValueError('wd_turbines must be supplied when use_predefined_wd is False')
-
-
-    # Confirm that test_turbines is a list of ints or a numpy array of ints
-    if not isinstance(test_turbines, list) and not isinstance(test_turbines, np.ndarray):
-        raise ValueError('test_turbines must be a list or numpy array of ints')
-
-    # Confirm that test_turbines is not empty  
-    if len(test_turbines) == 0:
-        raise ValueError('test_turbines cannot be empty')
-
-    # Confirm that wd_bin_overlap_radius is less than or equal to wd_step/2
-    if wd_bin_overlap_radius > wd_step/2:
-        raise ValueError('wd_bin_overlap_radius must be less than or equal to wd_step/2')
+    # Check that inputs are valid
+    check_compute_energy_ratio_inputs(
+        df_,
+        ref_turbines,
+        test_turbines,
+        wd_turbines,
+        ws_turbines,
+        use_predefined_ref,
+        use_predefined_wd,
+        use_predefined_ws,
+        wd_step,
+        wd_min,
+        wd_max,
+        ws_step,
+        ws_min,
+        ws_max,
+        bin_cols_in,
+        weight_by,
+        wd_bin_overlap_radius,
+        uplift_pairs,
+        uplift_names,
+        N,
+        percentiles,
+        remove_all_nulls
+    )
 
      # Set up the column names for the reference and test power
     if not use_predefined_ref:
@@ -380,9 +399,11 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
                         ws_min,
                         ws_max,
                         bin_cols_in,
+                        weight_by,
                         wd_bin_overlap_radius,
                         uplift_pairs,
-                        uplift_names
+                        uplift_names,
+                        remove_all_nulls
                     )
     else:
         if percentiles is None:
@@ -403,6 +424,7 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
                             ws_min,
                             ws_max,
                             bin_cols_in,
+                            weight_by,
                             wd_bin_overlap_radius,
                             uplift_pairs,
                             uplift_names,
@@ -427,6 +449,7 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
                                 ws_min,
                                 ws_max,
                                 bin_cols_in,
+                                weight_by,
                                 wd_bin_overlap_radius,
                                 N)
 

diff --git a/flasc/energy_ratio/energy_ratio_input.py b/flasc/energy_ratio/energy_ratio_input.py
@@ -4,7 +4,6 @@
 
 from typing import Optional, Dict, List, Any, Tuple, Union
 
-from flasc.energy_ratio.energy_ratio_utilities import add_ws_bin, add_wd_bin
 from flasc.dataframe_operations.dataframe_manipulations import df_reduce_precision