diff --git a/flasc/energy_ratio/__init__.py b/flasc/energy_ratio/__init__.py index d30c1e6f..30ebde4a 100644 --- a/flasc/energy_ratio/__init__.py +++ b/flasc/energy_ratio/__init__.py @@ -13,5 +13,6 @@ energy_ratio_gain, energy_ratio_suite, energy_ratio_visualization, - energy_ratio_wd_bias_estimation + energy_ratio_wd_bias_estimation, + energy_ratio_polars ) \ No newline at end of file diff --git a/flasc/energy_ratio/demo_capabilities.ipynb b/flasc/energy_ratio/demo_capabilities.ipynb new file mode 100644 index 00000000..1b74d29f --- /dev/null +++ b/flasc/energy_ratio/demo_capabilities.ipynb @@ -0,0 +1,1064 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Demo polars capabilities and interactively develop it" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import polars as pl\n", + "import seaborn as sns\n", + "\n", + "from floris import tools as wfct\n", + "from floris.utilities import wrap_360\n", + "\n", + "\n", + "from flasc.energy_ratio import energy_ratio_suite\n", + "from flasc.energy_ratio import energy_ratio_polars as erp\n", + "\n", + "from flasc.visualization import plot_layout_with_waking_directions, plot_binned_mean_and_ci\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "N = 2" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use FLORIS to generate a wake steering data set" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/pfleming/Projects/FLORIS/flasc/flasc/energy_ratio/../examples/floris_input_artificial/gch.yaml\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "\n", + "file_path = Path.cwd()\n", + "fi_path = file_path / \"../examples/floris_input_artificial/gch.yaml\"\n", + "print(fi_path)\n", + "fi = wfct.floris_interface.FlorisInterface(fi_path)\n", + "fi.reinitialize(layout_x = [0, 0, 5*126], layout_y = [5*126, 0, 0])\n", + "\n", + "# # Show the wind farm\n", + "plot_layout_with_waking_directions(fi)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num Points 16000\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create a time history of points where the wind speed and wind direction step different combinations\n", + "ws_points = np.arange(5.0,10.0,0.25)\n", + "wd_points = np.arange(250.0, 290.0, 0.25,)\n", + "num_points_per_combination = 5 # How many \"seconds\" per combination\n", + "\n", + "# I know this is dumb but will come back, can't quite work out the numpy version\n", + "ws_array = []\n", + "wd_array = []\n", + "for ws in ws_points:\n", + " for wd in wd_points:\n", + " for i in range(num_points_per_combination):\n", + " ws_array.append(ws)\n", + " wd_array.append(wd)\n", + "t = np.arange(len(ws_array))\n", + "\n", + "print(f'Num Points {len(t)}')\n", + "\n", + "fig, axarr = plt.subplots(2,1,sharex=True)\n", + "axarr[0].plot(t, ws_array,label='Wind Speed')\n", + "axarr[0].set_ylabel('m/s')\n", + "axarr[0].legend()\n", + "axarr[0].grid(True)\n", + "axarr[1].plot(t, wd_array,label='Wind Direction')\n", + "axarr[1].set_ylabel('deg')\n", + "axarr[1].legend()\n", + "axarr[1].grid(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute the power of the second turbine for two cases\n", + "# Baseline: The front turbine is aligned to the wind\n", + "# WakeSteering: The front turbine is yawed 25 deg\n", + "fi.reinitialize(wind_speeds=ws_array, wind_directions=wd_array, time_series=True)\n", + "fi.calculate_wake()\n", + "power_baseline_ref = fi.get_turbine_powers().squeeze()[:,0].flatten() / 1000.\n", + "power_baseline_control = fi.get_turbine_powers().squeeze()[:,1].flatten() / 1000.\n", + "power_baseline_downstream = fi.get_turbine_powers().squeeze()[:,2].flatten() / 1000.\n", + "\n", + "yaw_angles = np.zeros([len(t),1,3]) * 25\n", + "yaw_angles[:,:,1] = 25 # Set control turbine yaw angles to 25 deg\n", + "fi.calculate_wake(yaw_angles=yaw_angles)\n", + "power_wakesteering_ref = fi.get_turbine_powers().squeeze()[:,0].flatten() / 1000.\n", + "power_wakesteering_control = fi.get_turbine_powers().squeeze()[:,1].flatten() /1000.\n", + "power_wakesteering_downstream = fi.get_turbine_powers().squeeze()[:,2].flatten() /1000." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pandas version (for time)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Build up the data frames needed for energy ratio suite\n", + "df_baseline_pd = pd.DataFrame({\n", + " 'wd':wd_array,\n", + " 'ws':ws_array,\n", + " 'pow_ref':power_baseline_ref,\n", + " 'pow_000':power_baseline_ref, \n", + " 'pow_001':power_baseline_control,\n", + " 'pow_002':power_baseline_downstream\n", + "})\n", + "\n", + "df_wakesteering_pd = pd.DataFrame({\n", + " 'wd':wd_array,\n", + " 'ws':ws_array,\n", + " 'pow_ref':power_wakesteering_ref,\n", + " 'pow_000':power_wakesteering_ref, \n", + " 'pow_001':power_wakesteering_control,\n", + " 'pow_002':power_wakesteering_downstream\n", + "})\n", + "\n", + "# Create alternative versions of each of the above dataframes where the wd/ws are perturbed by noise\n", + "df_baseline_noisy_pd = pd.DataFrame({\n", + " 'wd':wd_array + np.random.randn(len(wd_array))*5,\n", + " 'ws':ws_array + np.random.randn(len(ws_array)),\n", + " 'pow_ref':power_baseline_ref,\n", + " 'pow_000':power_baseline_ref, \n", + " 'pow_001':power_baseline_control,\n", + " 'pow_002':power_baseline_downstream\n", + "})\n", + "\n", + "df_wakesteering_noisy_pd = pd.DataFrame({\n", + " 'wd':wd_array + np.random.randn(len(wd_array))*5,\n", + " 'ws':ws_array + np.random.randn(len(ws_array)),\n", + " 'pow_ref':power_wakesteering_ref,\n", + " 'pow_000':power_wakesteering_ref, \n", + " 'pow_001':power_wakesteering_control,\n", + " 'pow_002':power_wakesteering_downstream\n", + "})\n", + "\n", + "color_palette = sns.color_palette(\"Paired\",4)[::-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the energy ratio suite object and add each dataframe\n", + "# separately. \n", + "fsc = energy_ratio_suite.energy_ratio_suite()\n", + "# fsc.add_df(df_baseline_pd, 'Baseline', color_palette[0])\n", + "# fsc.add_df(df_wakesteering_pd, 'WakeSteering', color_palette[1])\n", + "fsc.add_df(df_baseline_noisy_pd, 'Baseline (Noisy)', color_palette[2])\n", + "fsc.add_df(df_wakesteering_noisy_pd, 'WakeSteering (Noisy)', color_palette[3])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframes differ in wd and ws. Rebalancing.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/pfleming/opt/anaconda3/envs/floris/lib/python3.10/site-packages/numpy/lib/nanfunctions.py:1559: RuntimeWarning: All-NaN slice encountered\n", + " r, k = function_base._ureduce(a,\n", + "/Users/pfleming/opt/anaconda3/envs/floris/lib/python3.10/site-packages/numpy/lib/nanfunctions.py:1559: RuntimeWarning: All-NaN slice encountered\n", + " r, k = function_base._ureduce(a,\n", + "/Users/pfleming/opt/anaconda3/envs/floris/lib/python3.10/site-packages/numpy/lib/nanfunctions.py:1559: RuntimeWarning: All-NaN slice encountered\n", + " r, k = function_base._ureduce(a,\n", + "/Users/pfleming/opt/anaconda3/envs/floris/lib/python3.10/site-packages/numpy/lib/nanfunctions.py:1559: RuntimeWarning: All-NaN slice encountered\n", + " r, k = function_base._ureduce(a,\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n", + "Dataframes differ in wd and ws. Rebalancing.\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'name': 'WakeSteering (Noisy)/Baseline (Noisy)',\n", + " 'color': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),\n", + " 'er_results': baseline baseline_lb baseline_ub wd_bin bin_count\n", + " 0 0.993021 0.991214 0.992926 237.0 4\n", + " 1 0.994481 0.994541 0.995625 239.0 11\n", + " 2 0.988364 0.988472 0.990429 241.0 34\n", + " 3 0.990310 0.990412 0.992252 243.0 59\n", + " 4 0.986453 0.981036 0.986168 245.0 127\n", + " 5 0.981644 0.981765 0.983944 247.0 220\n", + " 6 0.969089 0.969207 0.971333 249.0 306\n", + " 7 0.974457 0.974508 0.975429 251.0 443\n", + " 8 0.954166 0.953161 0.954113 253.0 593\n", + " 9 0.929688 0.929786 0.931557 255.0 641\n", + " 10 0.923411 0.924082 0.936161 257.0 761\n", + " 11 0.916194 0.892904 0.914968 259.0 769\n", + " 12 0.908919 0.910919 0.946925 261.0 793\n", + " 13 0.962032 0.962271 0.966579 263.0 760\n", + " 14 1.007537 0.964714 1.005283 265.0 835\n", + " 15 1.149424 1.150844 1.176417 267.0 748\n", + " 16 1.295031 1.297871 1.348999 269.0 762\n", + " 17 1.344629 1.274221 1.340923 271.0 763\n", + " 18 1.457927 1.458811 1.474730 273.0 800\n", + " 19 1.363313 1.364282 1.381737 275.0 751\n", + " 20 1.311677 1.314399 1.363382 277.0 776\n", + " 21 1.201737 1.162489 1.199671 279.0 789\n", + " 22 1.129018 1.121481 1.128621 281.0 763\n", + " 23 1.108013 1.107674 1.107995 283.0 710\n", + " 24 1.058602 1.049012 1.058098 285.0 659\n", + " 25 1.038914 1.034224 1.038668 287.0 551\n", + " 26 1.023737 1.023991 1.028571 289.0 460\n", + " 27 1.010827 1.010936 1.012893 291.0 325\n", + " 28 1.008223 1.008413 1.011834 293.0 206\n", + " 29 1.005245 1.005332 1.006900 295.0 121\n", + " 30 1.011489 1.011803 1.017442 297.0 53\n", + " 31 1.012907 1.003173 1.012395 299.0 20\n", + " 32 1.001096 1.000699 1.001075 301.0 5\n", + " 33 1.000172 1.000101 1.000168 303.0 3,\n", + " 'df_freq': ws_bin wd_bin ws_bin_edges wd_bin_edges freq\n", + " 0 6.5 233.0 [6.0, 7.0) [232.0, 234.0) 0\n", + " 1 9.5 233.0 [9.0, 10.0) [232.0, 234.0) 0\n", + " 2 10.5 235.0 [10.0, 11.0) [234.0, 236.0) 0\n", + " 3 5.5 237.0 [5.0, 6.0) [236.0, 238.0) 1\n", + " 4 7.5 237.0 [7.0, 8.0) [236.0, 238.0) 0\n", + " .. ... ... ... ... ...\n", + " 335 2.5 297.0 [2.0, 3.0) [296.0, 298.0) 0\n", + " 336 8.5 299.0 [8.0, 9.0) [298.0, 300.0) 0\n", + " 337 9.5 301.0 [9.0, 10.0) [300.0, 302.0) 0\n", + " 338 10.5 301.0 [10.0, 11.0) [300.0, 302.0) 0\n", + " 339 5.5 303.0 [5.0, 6.0) [302.0, 304.0) 0\n", + " \n", + " [340 rows x 5 columns],\n", + " 'er_test_turbines': [2],\n", + " 'er_wd_step': 2.0,\n", + " 'er_ws_step': 1.0,\n", + " 'er_wd_bin_width': 2.0,\n", + " 'er_bootstrap_N': 2}]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "# Print out the energy ratio\n", + "# fsc.print_dfs()\n", + "\n", + "\n", + "# Calculate and plot the energy ratio for the downstream turbine [2]\n", + "# With respect to reference turbine [0]\n", + "# datasets with uncertainty quantification using 50 bootstrap samples\n", + "fsc.get_energy_ratios(\n", + " test_turbines=[2],\n", + " wd_step=2.0,\n", + " ws_step=1.0,\n", + " N=N,\n", + " percentiles=[5., 95.],\n", + " verbose=False,\n", + " num_blocks=10\n", + ")\n", + "print('done')\n", + "# fsc.plot_energy_ratios(superimpose=True)\n", + "\n", + "fsc.get_energy_ratios_gain(\n", + " test_turbines=[2],\n", + " wd_step=2.0,\n", + " ws_step=1.0,\n", + " N=N,\n", + " percentiles=[5., 95.],\n", + " verbose=False,\n", + " num_blocks=10\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Polars implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# # Build the dataframes\n", + "# # Build up the data frames needed for energy ratio suite\n", + "# df_baseline = pd.DataFrame({\n", + "# 'wd_000':wd_array,\n", + "# 'ws_000':ws_array,\n", + "# # 'pow_ref':power_baseline_ref,\n", + "# 'pow_000':power_baseline_ref, \n", + "# 'pow_001':power_baseline_control,\n", + "# 'pow_002':power_baseline_downstream\n", + "# })\n", + "\n", + "# df_wakesteering = pd.DataFrame({\n", + "# 'wd_000':wd_array,\n", + "# 'ws_000':ws_array,\n", + "# # 'pow_ref':power_wakesteering_ref,\n", + "# 'pow_000':power_wakesteering_ref, \n", + "# 'pow_001':power_wakesteering_control,\n", + "# 'pow_002':power_wakesteering_downstream\n", + "# })\n", + "\n", + "# # Create alternative versions of each of the above dataframes where the wd/ws are perturbed by noise\n", + "# df_baseline_noisy = pd.DataFrame({\n", + "# 'wd_000':df_baseline_noisy_pd.wd.values,# wd_array + np.random.randn(len(wd_array))*5,\n", + "# 'ws_000':df_baseline_noisy_pd.ws.values,# ws_array + np.random.randn(len(ws_array)),\n", + "# # 'pow_ref':power_baseline_ref,\n", + "# 'pow_000':power_baseline_ref, \n", + "# 'pow_001':power_baseline_control,\n", + "# 'pow_002':power_baseline_downstream\n", + "# })\n", + "\n", + "# df_wakesteering_noisy = pl.DataFrame({\n", + "# 'wd_000':df_wakesteering_noisy_pd.wd.values,# wd_array + np.random.randn(len(wd_array))*5,\n", + "# 'ws_000':df_wakesteering_noisy_pd.ws.values,#ws_array + np.random.randn(len(ws_array)),\n", + "# # 'pow_ref':power_wakesteering_ref,\n", + "# 'pow_000':power_wakesteering_ref, \n", + "# 'pow_001':power_wakesteering_control,\n", + "# 'pow_002':power_wakesteering_downstream\n", + "# })" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (32_000, 8)
wdwspow_refpow_000pow_001pow_002df_nameblock
f64f64f64f64f64f64stri64
246.2981134.525955383.695142383.695142383.695142383.588139"baseline"0
251.7289275.36309383.695142383.695142383.695142383.588139"baseline"0
254.2108195.610068383.695142383.695142383.695142383.588139"baseline"0
244.1505453.934049383.695142383.695142383.695142383.588139"baseline"0
252.8023994.285775383.695142383.695142383.695142383.588139"baseline"0
251.5398817.4119383.695142383.695142383.695142383.561983"baseline"0
259.6482764.711346383.695142383.695142383.695142383.561983"baseline"0
254.7765194.026199383.695142383.695142383.695142383.561983"baseline"0
258.4587365.951735383.695142383.695142383.695142383.561983"baseline"0
249.7148554.737944383.695142383.695142383.695142383.561983"baseline"0
258.6655515.674499383.695142383.695142383.695142383.529944"baseline"0
257.0476215.11975383.695142383.695142383.695142383.529944"baseline"0
292.36212910.2366093063.4467363063.4467362547.6563853063.440125"wakesteering"9
285.16445711.1661873063.4467363063.4467362547.6563853063.440125"wakesteering"9
295.5962749.104363063.4467363063.4467362547.6563853063.441878"wakesteering"9
297.7518111.2263383063.4467363063.4467362547.6563853063.441878"wakesteering"9
285.869410.3785453063.4467363063.4467362547.6563853063.441878"wakesteering"9
295.95339210.4461323063.4467363063.4467362547.6563853063.441878"wakesteering"9
288.8381499.4008783063.4467363063.4467362547.6563853063.441878"wakesteering"9
293.20595210.8687173063.4467363063.4467362547.6563853063.443181"wakesteering"9
296.6732579.3469843063.4467363063.4467362547.6563853063.443181"wakesteering"9
286.38269510.8019143063.4467363063.4467362547.6563853063.443181"wakesteering"9
284.8069878.1595853063.4467363063.4467362547.6563853063.443181"wakesteering"9
283.7917259.5220043063.4467363063.4467362547.6563853063.443181"wakesteering"9
" + ], + "text/plain": [ + "shape: (32_000, 8)\n", + "┌────────────┬───────────┬─────────────┬────────────┬────────────┬────────────┬────────────┬───────┐\n", + "│ wd ┆ ws ┆ pow_ref ┆ pow_000 ┆ pow_001 ┆ pow_002 ┆ df_name ┆ block │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ i64 │\n", + "╞════════════╪═══════════╪═════════════╪════════════╪════════════╪════════════╪════════════╪═══════╡\n", + "│ 246.298113 ┆ 4.525955 ┆ 383.695142 ┆ 383.695142 ┆ 383.695142 ┆ 383.588139 ┆ baseline ┆ 0 │\n", + "│ 251.728927 ┆ 5.36309 ┆ 383.695142 ┆ 383.695142 ┆ 383.695142 ┆ 383.588139 ┆ baseline ┆ 0 │\n", + "│ 254.210819 ┆ 5.610068 ┆ 383.695142 ┆ 383.695142 ┆ 383.695142 ┆ 383.588139 ┆ baseline ┆ 0 │\n", + "│ 244.150545 ┆ 3.934049 ┆ 383.695142 ┆ 383.695142 ┆ 383.695142 ┆ 383.588139 ┆ baseline ┆ 0 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 296.673257 ┆ 9.346984 ┆ 3063.446736 ┆ 3063.44673 ┆ 2547.65638 ┆ 3063.44318 ┆ wakesteeri ┆ 9 │\n", + "│ ┆ ┆ ┆ 6 ┆ 5 ┆ 1 ┆ ng ┆ │\n", + "│ 286.382695 ┆ 10.801914 ┆ 3063.446736 ┆ 3063.44673 ┆ 2547.65638 ┆ 3063.44318 ┆ wakesteeri ┆ 9 │\n", + "│ ┆ ┆ ┆ 6 ┆ 5 ┆ 1 ┆ ng ┆ │\n", + "│ 284.806987 ┆ 8.159585 ┆ 3063.446736 ┆ 3063.44673 ┆ 2547.65638 ┆ 3063.44318 ┆ wakesteeri ┆ 9 │\n", + "│ ┆ ┆ ┆ 6 ┆ 5 ┆ 1 ┆ ng ┆ │\n", + "│ 283.791725 ┆ 9.522004 ┆ 3063.446736 ┆ 3063.44673 ┆ 2547.65638 ┆ 3063.44318 ┆ wakesteeri ┆ 9 │\n", + "│ ┆ ┆ ┆ 6 ┆ 5 ┆ 1 ┆ ng ┆ │\n", + "└────────────┴───────────┴─────────────┴────────────┴────────────┴────────────┴────────────┴───────┘" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "df_energy = erp.get_energy_table([df_baseline_noisy_pd, df_wakesteering_noisy_pd], ['baseline', 'wakesteering'])\n", + "df_energy" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['wd', 'ws', 'pow_ref', 'pow_000', 'pow_001', 'pow_002', 'df_name', 'block']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_energy.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# erp.compute_energy_ratio(df_energy, [0], [2], df_names=['Baseline', 'WakeSteering'])\n", + "\n", + "ero = erp.compute_energy_ratio(df_energy,\n", + " ['baseline', 'wakesteering'],\n", + " test_turbines=[2],\n", + " use_predefined_ref=True,\n", + " use_predefined_wd=True,\n", + " use_predefined_ws=True,\n", + " N=N)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (38, 12)
wd_binbaselinewakesteeringupliftbaseline_ubwakesteering_ubuplift_ubbaseline_lbwakesteering_lbuplift_lbcount_baselinecount_wakesteering
f64f64f64f64f64f64f64f64f64f64u32u32
233.00.999801nullnull0.999801nullnull0.999782nullnull2null
235.00.999893nullnull0.999893nullnull0.999893nullnull1null
237.00.9981070.980529-1.7611420.9981070.980529-1.6923850.9974090.980529-1.76114264
239.00.9992780.995383-0.3898290.9992780.995383-0.3898290.999070.994991-0.4082021114
241.00.9991230.986547-1.2587510.9991230.986547-1.2587510.9990530.980091-1.8979393437
243.00.9960550.986839-0.9253040.9962040.986839-0.9253040.9960550.986731-0.9509257259
245.00.9913950.977964-1.3547320.9913950.978022-1.2529850.9904310.977964-1.354732127150
247.00.9917690.973564-1.8355990.9921390.974896-1.7379030.9917690.973564-1.835599220265
249.00.9865780.956335-3.0654070.9865780.960221-2.5987680.9858410.956335-3.065407346306
251.00.9736970.948914-2.5452560.9747430.95026-2.5116570.9736970.948914-2.545256498443
253.00.9522130.908568-4.5835410.9522130.91254-3.9342290.9499120.908568-4.583541616593
255.00.9368170.871192-7.0050130.9378710.871192-7.0050130.9368170.869496-7.290522641650
285.00.9306580.9849675.8356140.9352060.9852325.8356140.9306580.9849675.349238685659
287.00.9546420.9918233.8947930.9555240.9918233.8947930.9546420.9909573.708271567551
289.00.9738160.9969342.3738760.9738160.9969342.4785270.9723340.9964332.373876460471
291.00.9869610.997651.0829640.9871790.9983061.1271640.9869610.997651.082964325378
293.00.9903130.9984570.8223440.9915380.9992320.8223440.9903130.9984570.776028211206
295.00.994410.9997710.5390730.994410.9998420.572280.9941530.9997710.539073138121
297.00.9885760.9999351.148970.9888950.9999351.148970.9885760.9999331.1162495359
299.00.9895410.9992160.9776490.9895410.9992590.982620.9895350.9992160.9776492020
301.00.9989640.9999590.0995250.9990780.9999590.0995250.9989640.9999590.088127515
303.00.9998530.9999930.0139590.9998530.9999930.0139590.9998530.9999910.0137734
305.00.999773nullnull0.999773nullnull0.999773nullnull2null
307.00.998718nullnull0.998718nullnull0.99841nullnull2null
" + ], + "text/plain": [ + "shape: (38, 12)\n", + "┌────────┬──────────┬────────────┬───────────┬───┬────────────┬───────────┬────────────┬────────────┐\n", + "│ wd_bin ┆ baseline ┆ wakesteeri ┆ uplift ┆ … ┆ wakesteeri ┆ uplift_lb ┆ count_base ┆ count_wake │\n", + "│ --- ┆ --- ┆ ng ┆ --- ┆ ┆ ng_lb ┆ --- ┆ line ┆ steering │\n", + "│ f64 ┆ f64 ┆ --- ┆ f64 ┆ ┆ --- ┆ f64 ┆ --- ┆ --- │\n", + "│ ┆ ┆ f64 ┆ ┆ ┆ f64 ┆ ┆ u32 ┆ u32 │\n", + "╞════════╪══════════╪════════════╪═══════════╪═══╪════════════╪═══════════╪════════════╪════════════╡\n", + "│ 233.0 ┆ 0.999801 ┆ null ┆ null ┆ … ┆ null ┆ null ┆ 2 ┆ null │\n", + "│ 235.0 ┆ 0.999893 ┆ null ┆ null ┆ … ┆ null ┆ null ┆ 1 ┆ null │\n", + "│ 237.0 ┆ 0.998107 ┆ 0.980529 ┆ -1.761142 ┆ … ┆ 0.980529 ┆ -1.761142 ┆ 6 ┆ 4 │\n", + "│ 239.0 ┆ 0.999278 ┆ 0.995383 ┆ -0.389829 ┆ … ┆ 0.994991 ┆ -0.408202 ┆ 11 ┆ 14 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 301.0 ┆ 0.998964 ┆ 0.999959 ┆ 0.099525 ┆ … ┆ 0.999959 ┆ 0.088127 ┆ 5 ┆ 15 │\n", + "│ 303.0 ┆ 0.999853 ┆ 0.999993 ┆ 0.013959 ┆ … ┆ 0.999991 ┆ 0.01377 ┆ 3 ┆ 4 │\n", + "│ 305.0 ┆ 0.999773 ┆ null ┆ null ┆ … ┆ null ┆ null ┆ 2 ┆ null │\n", + "│ 307.0 ┆ 0.998718 ┆ null ┆ null ┆ … ┆ null ┆ null ┆ 2 ┆ null │\n", + "└────────┴──────────┴────────────┴───────────┴───┴────────────┴───────────┴────────────┴────────────┘" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ero.df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "_, df_freq_plot = ero._compute_df_freq()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wd_binws_bindf_namecount
0255.05.5baseline111
1243.05.5baseline11
2243.04.5baseline4
3247.06.5baseline40
4251.06.5baseline86
...............
625299.09.5wakesteering1
626279.011.5wakesteering12
627243.010.5wakesteering5
628249.011.5wakesteering5
629243.011.5wakesteering1
\n", + "

630 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " wd_bin ws_bin df_name count\n", + "0 255.0 5.5 baseline 111\n", + "1 243.0 5.5 baseline 11\n", + "2 243.0 4.5 baseline 4\n", + "3 247.0 6.5 baseline 40\n", + "4 251.0 6.5 baseline 86\n", + ".. ... ... ... ...\n", + "625 299.0 9.5 wakesteering 1\n", + "626 279.0 11.5 wakesteering 12\n", + "627 243.0 10.5 wakesteering 5\n", + "628 249.0 11.5 wakesteering 5\n", + "629 243.0 11.5 wakesteering 1\n", + "\n", + "[630 rows x 4 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_freq_plot" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/pfleming/Projects/FLORIS/flasc/flasc/energy_ratio/energy_ratio_polars.py:1160: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n", + " ax.scatter(df_unbinned[\"wd_bin\"], df_unbinned[\"ws_bin\"], c=color_dict[label],alpha=0.25, s=1)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ero.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ero.plot_uplift()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ero.plot(polar_plot=True, show_wind_speed_distrubution=False)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare the energy ratio plots" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compare the mean values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_erb = ero.df_result\n", + "\n", + "axarr = fsc.plot_energy_ratios(superimpose=True)\n", + "\n", + "ax = axarr[0]\n", + "ax.plot(df_erb['wd_bin'], df_erb['baseline'], color='k',label='POLARS result (baseline)',ls='--')\n", + "ax.plot(df_erb['wd_bin'], df_erb['wakesteering'], color='k',label='POLARS result (wake steering)',ls='-')\n", + "\n", + "ax.legend()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compare uncertainty bounds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig ,ax = plt.subplots()\n", + "\n", + "# Plot pandas results with uncertainty\n", + "df_pandas_base = fsc.df_list[0]['er_results']\n", + "ax.plot(df_pandas_base['wd_bin'], df_pandas_base['baseline_lb'],'-', color='k', label='PANDAS lower bound')\n", + "ax.plot(df_pandas_base['wd_bin'], df_pandas_base['baseline_ub'],'--', color='k', label='PANDAS upper bound')\n", + "\n", + "ax.plot(df_erb['wd_bin'], df_erb['baseline_lb'],':', color='r', label='POLARS lower bound')\n", + "ax.plot(df_erb['wd_bin'], df_erb['baseline_ub'],':', color='orange', label='POLARS upper bound')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compare the gain values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig ,ax = plt.subplots()\n", + "\n", + "# NOTE THAT PANDAS VERSION IS RATIO WHILE POLARS IS PERCENT CHANGE\n", + "\n", + "# Plot pandas results with uncertainty\n", + "df_pandas_base = fsc.df_list_gains[0]['er_results']\n", + "ax.plot(df_pandas_base['wd_bin'], df_pandas_base['baseline'],'-', color='k', label='PANDAS')\n", + "ax.fill_between(df_pandas_base['wd_bin'], df_pandas_base['baseline_lb'], df_pandas_base['baseline_ub'], color='k', alpha=0.2)\n", + "\n", + "ax.plot(df_erb['wd_bin'], df_erb['uplift'] * .01 + 1.0,'--', color='r', label='POLARS')\n", + "\n", + "ax.plot(df_erb['wd_bin'], df_erb['uplift_lb'] * .01 + 1.0,':', color='r', label='POLARS lower bound')\n", + "ax.plot(df_erb['wd_bin'], df_erb['uplift_ub'] * .01 + 1.0,':', color='orange', label='POLARS upper bound')\n", + "\n", + "ax.grid(True)\n", + "ax.axhline(1, color='k')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test out simley approach to calculating energy ratio uplift in regions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 3)
delta_energybase_test_energyuplift
f64f64f64
732234.6340549.6320e67.602095
" + ], + "text/plain": [ + "shape: (1, 3)\n", + "┌───────────────┬──────────────────┬──────────┐\n", + "│ delta_energy ┆ base_test_energy ┆ uplift │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 │\n", + "╞═══════════════╪══════════════════╪══════════╡\n", + "│ 732234.634054 ┆ 9.6320e6 ┆ 7.602095 │\n", + "└───────────────┴──────────────────┴──────────┘" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "ero = erp.compute_uplift_in_region(df_energy,\n", + " ['baseline', 'wakesteering'],\n", + " test_turbines=[2],\n", + " use_predefined_ref=True,\n", + " use_predefined_wd=True,\n", + " use_predefined_ws=True)\n", + "\n", + "ero.df_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 9)
delta_energy_expdelta_energy_ubdelta_energy_lbbase_test_energy_expbase_test_energy_ubbase_test_energy_lbuplift_expuplift_ubuplift_lb
f64f64f64f64f64f64f64f64f64
732234.634054931394.533294581022.7371359.6320e61.2662e77.5244e67.6020958.1043937.34586
" + ], + "text/plain": [ + "shape: (1, 9)\n", + "┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬──────────┬─────────┬─────────┐\n", + "│ delta_ener ┆ delta_ener ┆ delta_ener ┆ base_test_ ┆ … ┆ base_test_ ┆ uplift_e ┆ uplift_ ┆ uplift_ │\n", + "│ gy_exp ┆ gy_ub ┆ gy_lb ┆ energy_exp ┆ ┆ energy_lb ┆ xp ┆ ub ┆ lb │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪══════════╪═════════╪═════════╡\n", + "│ 732234.634 ┆ 931394.533 ┆ 581022.737 ┆ 9.6320e6 ┆ … ┆ 7.5244e6 ┆ 7.602095 ┆ 8.10439 ┆ 7.34586 │\n", + "│ 054 ┆ 294 ┆ 135 ┆ ┆ ┆ ┆ ┆ 3 ┆ │\n", + "└────────────┴────────────┴────────────┴────────────┴───┴────────────┴──────────┴─────────┴─────────┘" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "ero = erp.compute_uplift_in_region(df_energy,\n", + " ['baseline', 'wakesteering'],\n", + " test_turbines=[2],\n", + " use_predefined_ref=True,\n", + " use_predefined_wd=True,\n", + " use_predefined_ws=True,\n", + " N=20)\n", + "\n", + "ero.df_result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env_scada", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/flasc/energy_ratio/energy_ratio_polars.py b/flasc/energy_ratio/energy_ratio_polars.py new file mode 100644 index 00000000..fc81e02a --- /dev/null +++ b/flasc/energy_ratio/energy_ratio_polars.py @@ -0,0 +1,1176 @@ +# This is a work in progress as we try to synthesize ideas from the +# table based methods and energy ratios back into one thing, +# some ideas we're incorporating: + +# Conversion from polars to pandas +# Constructing tables (but now including tables of ratios) +# Keeping track of frequencies is matching sized tables + +import warnings + +import numpy as np +import pandas as pd +import polars as pl +import seaborn as sns +import matplotlib.pyplot as plt + + +# def get_mid_bins(bin_edges): +# """_summary_ + +# Args: +# bin_edges (NDArray): a set of bin edges +# """ + +# print(bin_edges[:-1] + np.diff(bin_edges)/2.0) + +# def convert_to_polars(df_): +# """_summary_ + +# Args: +# df_ (Pandas DataFrame): a pandas dataframe + +# Returns: +# Polars DataFrame: a polars dataframe +# """ +# return pl.from_pandas(df_) + +def cut(col_name, edges): + """ + Bins the values in the specified column according to the given edges. + + Parameters: + col_name (str): The name of the column to bin. + edges (array-like): The edges of the bins. Values will be placed into the bin + whose left edge is the largest edge less than or equal to + the value, and whose right edge is the smallest edge + greater than the value. + + Returns: + expression: An expression object that can be used to bin the column. + """ + c = pl.col(col_name) + labels = edges[:-1] + np.diff(edges)/2.0 + expr = pl.when(c < edges[0]).then(None) + for edge, label in zip(edges[1:], labels): + expr = expr.when(c < edge).then(label) + expr = expr.otherwise(None) + + return expr + + +def bin_column(df_, col_name, bin_col_name, edges): + """ + Bins the values in the specified column of a Polars DataFrame according to the given edges. + + Parameters: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + col_name (str): The name of the column to bin. + bin_col_name (str): The name to give the new column containing the bin labels. + edges (array-like): The edges of the bins. Values will be placed into the bin + whose left edge is the largest edge less than or equal to + the value, and whose right edge is the smallest edge + greater than the value. + + Returns: + pl.DataFrame: A new Polars DataFrame with an additional column containing the bin labels. + """ + return df_.with_columns( + cut( + col_name=col_name, + edges = edges + ).alias(bin_col_name) + ) + +def add_ws_bin(df_, ws_cols, ws_step=1.0, ws_min=-0.5, ws_max=50.0): + """ + Add the ws_bin column to a dataframe, given which columns to average over + and the step sizes to use + + Parameters: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + ws_cols (str): The name of the columns to average across. + ws_step (float): Step size for binning + ws_min (float): Minimum wind speed + ws_max (float): Maximum wind speed + + Returns: + pl.DataFrame: A new Polars DataFrame with an additional ws_bin column + """ + + edges = np.arange(ws_min, ws_max+ws_step,ws_step) + + df_with_mean_ws = ( + # df_.select(pl.exclude('ws_bin')) # In case ws_bin already exists + df_.with_columns( + # df_.select(ws_cols).mean(axis=1).alias('ws_bin') + ws_bin = pl.concat_list(ws_cols).list.mean() # Initially ws_bin is just the mean + ) + .filter( + pl.all(pl.col(ws_cols).is_not_null()) # Select for all bin cols present + ) + + .filter( + (pl.col('ws_bin') > ws_min) & # Filter the mean wind speed + (pl.col('ws_bin') < ws_max) & + (pl.col('ws_bin').is_not_null()) + ) + ) + + return bin_column(df_with_mean_ws, 'ws_bin', 'ws_bin', edges) + +def add_wd_bin(df_, wd_cols, wd_step=2.0, wd_min=0.0, wd_max=360.0): + """ + Add the wd_bin column to a dataframe, given which columns to average over + and the step sizes to use + + Parameters: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + wd_cols (str): The name of the columns to average across. + wd_step (float): Step size for binning + wd_min (float): Minimum wind direction + wd_max (float): Maximum wind direction + + Returns: + pl.DataFrame: A new Polars DataFrame with an additional ws_bin column + """ + + edges = np.arange(wd_min, wd_max + wd_step, wd_step) + + # Gather up intermediate column names and final column names + wd_cols_cos = [c + '_cos' for c in wd_cols] + wd_cols_sin = [c + '_sin' for c in wd_cols] + cols_to_return = df_.columns + if 'wd_bin' not in cols_to_return: + cols_to_return = cols_to_return + ['wd_bin'] + + + df_with_mean_wd = ( + # df_.select(pl.exclude('wd_bin')) # In case wd_bin already exists + df_.filter( + pl.all(pl.col(wd_cols).is_not_null()) # Select for all bin cols present + ) + # Add the cosine columns + .with_columns( + [ + pl.col(wd_cols).mul(np.pi/180).cos().suffix('_cos'), + pl.col(wd_cols).mul(np.pi/180).sin().suffix('_sin'), + ] + ) + ) + df_with_mean_wd = ( + df_with_mean_wd + .with_columns( + [ + # df_with_mean_wd.select(wd_cols_cos).mean(axis=1).alias('cos_mean'), + # df_with_mean_wd.select(wd_cols_sin).mean(axis=1).alias('sin_mean'), + pl.concat_list(wd_cols_cos).list.mean().alias('cos_mean'), + pl.concat_list(wd_cols_sin).list.mean().alias('sin_mean'), + ] + ) + .with_columns( + wd_bin = np.mod(pl.reduce(np.arctan2, [pl.col('sin_mean'), pl.col('cos_mean')]) + .mul(180/np.pi), 360.0) + ) + .filter( + (pl.col('wd_bin') > wd_min) & # Filter the mean wind speed + (pl.col('wd_bin') < wd_max) & + (pl.col('wd_bin').is_not_null()) + ) + .select(cols_to_return) # Select for just the columns we want to return + ) + + return bin_column(df_with_mean_wd, 'wd_bin', 'wd_bin', edges) + + +def add_power_test(df_, test_cols): + + return df_.with_columns( + pow_test = pl.concat_list(test_cols).list.mean() + #df_.select(test_cols).mean(axis=1).alias('pow_test') + ) + + +def add_power_ref(df_, ref_cols): + + return df_.with_columns( + pow_ref = pl.concat_list(ref_cols).list.mean() + # df_.select(ref_cols).mean(axis=1).alias('pow_ref') + ) + +def generate_block_list(N, num_blocks=10): + """Generate an np.array of length N where each element is an integer between 0 and num_blocks-1 + with each value repeating N/num_blocks times. + + Args: + N (int): Length of the array to generate + num_blocks (int): Number of blocks to generate + + """ + + # Test than N and num_blocks are integers greater than 0 + if not isinstance(N, int) or not isinstance(num_blocks, int): + raise ValueError('N and num_blocks must be integers') + if N <= 0 or num_blocks <= 0: + raise ValueError('N and num_blocks must be greater than 0') + + # Num blocks must be less than or equal to N + if num_blocks > N: + raise ValueError('num_blocks must be less than or equal to N') + + + block_list = np.zeros(N) + for i in range(num_blocks): + block_list[i*N//num_blocks:(i+1)*N//num_blocks] = i + return block_list.astype(int) + +def get_energy_table( + df_list_in, + df_names=None, + num_blocks=10,): + """ + Given a list of PANDAS dataframes, return a single + POLARS dataframe with a column + indicating which dataframe the row came from as well as a block + list to use in bootstrapping. + + Parameters: + df_list_in (list): A list of PANDAS dataframes to combine. + df_names (list): A list of names to give to the dataframes. If None, + the dataframes will be named df_0, df_1, etc. + n_blocks (int): The number of blocks to add to the block column for later bootstrapping. + + Returns: + pl.DataFrame: A new Polars DataFrame with an additional column containing the df_names + """ + + # Convert to polars + df_list = [pl.from_pandas(df) for df in df_list_in] + + if df_names is None: + df_names = ['df_'+str(i) for i in range(len(df_list))] + + # Add a name column to each dataframe + for i in range(len(df_list)): + df_list[i] = df_list[i].with_columns([ + pl.lit(df_names[i]).alias('df_name') + ]) + + # Add a block column to each dataframe + for i in range(len(df_list)): + df_list[i] = df_list[i].with_columns([ + pl.Series(generate_block_list(df_list[i].shape[0], num_blocks=num_blocks)).alias('block') + ]) + + return pl.concat(df_list) + +def resample_energy_table(df_e_, i): + """Use the block column of an energy table to resample the data. + + Args: + df_e_ (pl.DataFrame): An energy table with a block column + + Returns: + pl.DataFrame: A new energy table with (approximately) + the same number of rows as the original + """ + + if i == 0: #code to return as is + return df_e_ + + else: + + num_blocks = df_e_['block'].max() + 1 + + # Generate a random np.array, num_blocks long, where each element is + # an integer between 0 and num_blocks-1 + block_list = np.random.randint(0, num_blocks, num_blocks) + + return pl.DataFrame( + { + 'block':block_list + } + ).join(df_e_, how='inner', on='block') + + +# Internal version, returns a polars dataframe +def _compute_energy_ratio_single(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'], + ): + + """ + Compute the energy ratio between two sets of turbines. + + Args: + df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. + df_names (list): A list of names to give to the dataframes. + ref_cols (list[str]): A list of columns to use as the reference turbines + test_cols (list[str]): A list of columns to use as the test turbines + wd_cols (list[str]): A list of columns to derive the wind directions from + ws_cols (list[str]): A list of columns to derive the wind speeds from + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + + Returns: + pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin + """ + + # Identify the number of dataframes + num_df = len(df_names) + + # Filter df_ that all the columns are not null + df_ = df_.filter(pl.all(pl.col(ref_cols + test_cols + ws_cols + wd_cols).is_not_null())) + + # Assign the wd/ws bins + df_ = add_ws_bin(df_, ws_cols, ws_step, ws_min, ws_max) + df_ = add_wd_bin(df_, wd_cols, wd_step, wd_min, wd_max) + + # Assign the reference and test power columns + df_ = add_power_ref(df_, ref_cols) + df_ = add_power_test(df_, test_cols) + + bin_cols_without_df_name = [c for c in bin_cols_in if c != 'df_name'] + bin_cols_with_df_name = bin_cols_without_df_name + ['df_name'] + + df_ = (df_ + .filter(pl.all(pl.col(bin_cols_with_df_name).is_not_null())) # Select for all bin cols present + .groupby(bin_cols_with_df_name, maintain_order=True) + .agg([pl.mean("pow_ref"), pl.mean("pow_test"),pl.count()]) + .with_columns( + [ + pl.col('count').min().over(bin_cols_without_df_name).alias('count_min')#, # Find the min across df_name + ] + ) + .with_columns( + [ + pl.col('pow_ref').mul(pl.col('count_min')).alias('ref_energy'), # Compute the reference energy + pl.col('pow_test').mul(pl.col('count_min')).alias('test_energy'), # Compute the test energy + ] + ) + .groupby(['wd_bin','df_name'], maintain_order=True) + .agg([pl.sum("ref_energy"), pl.sum("test_energy"),pl.sum("count")]) + .with_columns( + energy_ratio = pl.col('test_energy') / pl.col('ref_energy') + ) + .pivot(values=['energy_ratio','count'], columns='df_name', index='wd_bin',aggregate_function='first') + .rename({f'energy_ratio_df_name_{n}' : n for n in df_names}) + .rename({f'count_df_name_{n}' : f'count_{n}' for n in df_names}) + .sort('wd_bin') + ) + + # In the case of two turbines, compute an uplift column + if num_df == 2: + df_ = df_.with_columns( + uplift = 100 * (pl.col(df_names[1]) - pl.col(df_names[0])) / pl.col(df_names[0]) + ) + + # Enforce a column order + df_ = df_.select(['wd_bin'] + df_names + ['uplift'] + [f'count_{n}' for n in df_names]) + + else: + # Enforce a column order + df_ = df_.select(['wd_bin'] + df_names + [f'count_{n}' for n in df_names]) + + + return(df_) + +# Bootstrap function wraps the _compute_energy_ratio function +def _compute_energy_ratio_bootstrap(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'], + N = 1, + ): + + """ + Compute the energy ratio between two sets of turbines with bootstrapping + + Args: + df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. + df_names (list): A list of names to give to the dataframes. + ref_cols (list[str]): A list of columns to use as the reference turbines + test_cols (list[str]): A list of columns to use as the test turbines + wd_cols (list[str]): A list of columns to derive the wind directions from + ws_cols (list[str]): A list of columns to derive the wind speeds from + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + N (int): The number of bootstrap samples to use. + + Returns: + pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines. + + """ + + # Otherwise run the function N times and concatenate the results to compute statistics + df_concat = pl.concat([_compute_energy_ratio_single(resample_energy_table(df_, i), + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + ) for i in range(N)]) + + if 'uplift' in df_concat.columns: + df_names_with_uplift = df_names + ['uplift'] + else: + df_names_with_uplift = df_names + + return (df_concat + .groupby(['wd_bin'], maintain_order=True) + .agg([pl.first(n) for n in df_names_with_uplift] + + [pl.quantile(n, 0.95).alias(n + "_ub") for n in df_names_with_uplift] + + [pl.quantile(n, 0.05).alias(n + "_lb") for n in df_names_with_uplift] + + [pl.first(f'count_{n}') for n in df_names] + ) + .sort('wd_bin') + ) + + +def compute_energy_ratio(df_, + df_names, + ref_turbines=None, + test_turbines= None, + wd_turbines=None, + ws_turbines=None, + use_predefined_ref = False, + use_predefined_wd = False, + use_predefined_ws = False, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'], + N = 1, + ): + + """ + Compute the energy ratio between two sets of turbines with bootstrapping + + Args: + df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. + df_names (list): A list of names to give to the dataframes. + ref_turbines (list[int]): A list of turbine numbers to use as the reference. + test_turbines (list[int]): A list of turbine numbers to use as the test. + ws_turbines (list[int]): A list of turbine numbers to use for the wind speeds + wd_turbines (list[int]): A list of turbine numbers to use for the wind directions + use_predefined_ref (bool): If True, use the pow_ref column of df_ as the reference power. + use_predefined_ws (bool): If True, use the ws column of df_ as the wind speed. + use_predefined_wd (bool): If True, use the wd column of df_ as the wind direction. + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + N (int): The number of bootstrap samples to use. + + Returns: + pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines. + + """ + + # Check that the inputs are valid + # If use_predefined_ref is True, df_ must have a column named 'pow_ref' + if use_predefined_ref: + if 'pow_ref' not in df_.columns: + raise ValueError('df_ must have a column named pow_ref when use_predefined_ref is True') + # If ref_turbines supplied, warn user that it will be ignored + if ref_turbines is not None: + warnings.warn('ref_turbines will be ignored when use_predefined_ref is True') + else: + # ref_turbine must be supplied + if ref_turbines is None: + raise ValueError('ref_turbines must be supplied when use_predefined_ref is False') + + # If use_predefined_ws is True, df_ must have a column named 'ws' + if use_predefined_ws: + if 'ws' not in df_.columns: + raise ValueError('df_ must have a column named ws when use_predefined_ws is True') + # If ws_turbines supplied, warn user that it will be ignored + if ws_turbines is not None: + warnings.warn('ws_turbines will be ignored when use_predefined_ws is True') + else: + # ws_turbine must be supplied + if ws_turbines is None: + raise ValueError('ws_turbines must be supplied when use_predefined_ws is False') + + # If use_predefined_wd is True, df_ must have a column named 'wd' + if use_predefined_wd: + if 'wd' not in df_.columns: + raise ValueError('df_ must have a column named wd when use_predefined_wd is True') + # If wd_turbines supplied, warn user that it will be ignored + if wd_turbines is not None: + warnings.warn('wd_turbines will be ignored when use_predefined_wd is True') + else: + # wd_turbine must be supplied + if wd_turbines is None: + raise ValueError('wd_turbines must be supplied when use_predefined_wd is False') + + + # Confirm that test_turbines is a list of ints or a numpy array of ints + if not isinstance(test_turbines, list) and not isinstance(test_turbines, np.ndarray): + raise ValueError('test_turbines must be a list or numpy array of ints') + + # Confirm that test_turbines is not empty + if len(test_turbines) == 0: + raise ValueError('test_turbines cannot be empty') + + # Set up the column names for the reference and test power + if not use_predefined_ref: + ref_cols = [f'pow_{i:03d}' for i in ref_turbines] + else: + ref_cols = ['pow_ref'] + + if not use_predefined_ws: + ws_cols = [f'ws_{i:03d}' for i in ws_turbines] + else: + ws_cols = ['ws'] + + if not use_predefined_wd: + wd_cols = [f'wd_{i:03d}' for i in wd_turbines] + else: + wd_cols = ['wd'] + + # Convert the numbered arrays to appropriate column names + test_cols = [f'pow_{i:03d}' for i in test_turbines] + + # If N=1, don't use bootstrapping + if N == 1: + # Compute the energy ratio + df_res = _compute_energy_ratio_single(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in) + else: + df_res = _compute_energy_ratio_bootstrap(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + N) + + # Return the results as an EnergyRatioResult object + return EnergyRatioResult(df_res, + df_names, + df_, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + N) + + + + + +# Use method of Eric Simley's slide 2 +def _compute_uplift_in_region_single(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'] + ): + + """ + Compute the energy uplift between two dataframes using method of Eric Simley's slide 2 + Args: + df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. + df_names (list): A list of names to give to the dataframes. + ref_cols (list[str]): A list of columns to use as the reference turbines + test_cols (list[str]): A list of columns to use as the test turbines + wd_cols (list[str]): A list of columns to derive the wind directions from + ws_cols (list[str]): A list of columns to derive the wind speeds from + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + + Returns: + pl.DataFrame: A dataframe containing the energy uplift + """ + + # Filter df_ that all the columns are not null + df_ = df_.filter(pl.all(pl.col(ref_cols + test_cols + ws_cols + wd_cols).is_not_null())) + + # Assign the wd/ws bins + df_ = add_ws_bin(df_, ws_cols, ws_step, ws_min, ws_max) + df_ = add_wd_bin(df_, wd_cols, wd_step, wd_min, wd_max) + + # Assign the reference and test power columns + df_ = add_power_ref(df_, ref_cols) + df_ = add_power_test(df_, test_cols) + + bin_cols_without_df_name = [c for c in bin_cols_in if c != 'df_name'] + bin_cols_with_df_name = bin_cols_without_df_name + ['df_name'] + + df_ = (df_.with_columns( + power_ratio = pl.col('pow_test') / pl.col('pow_ref')) + .filter(pl.all(pl.col(bin_cols_with_df_name).is_not_null())) # Select for all bin cols present + .groupby(bin_cols_with_df_name, maintain_order=True) + .agg([pl.mean("pow_ref"), pl.mean("power_ratio"),pl.count()]) + .with_columns( + [ + pl.col('count').min().over(bin_cols_without_df_name).alias('count_min'), # Find the min across df_name + pl.col('pow_ref').mul(pl.col('power_ratio')).alias('pow_test'), # Compute the test power + ] + ) + + .pivot(values=['power_ratio','pow_test','pow_ref','count_min'], columns='df_name', index=['wd_bin','ws_bin'],aggregate_function='first') + .drop_nulls() + .with_columns( + f_norm = pl.col(f'count_min_df_name_{df_names[0]}') / pl.col(f'count_min_df_name_{df_names[0]}').sum() + ) + .with_columns( + delta_power_ratio = pl.col(f'power_ratio_df_name_{df_names[1]}') - pl.col(f'power_ratio_df_name_{df_names[0]}'), + pow_ref_both_cases = pl.concat_list([f'pow_ref_df_name_{n}' for n in df_names]).list.mean() + ) + .with_columns( + delta_energy = pl.col('delta_power_ratio') * pl.col('f_norm') * pl.col('pow_ref_both_cases'), # pl.col(f'pow_ref_df_name_{df_names[0]}'), + base_test_energy = pl.col(f'pow_test_df_name_{df_names[0]}') * pl.col('f_norm') + ) + + ) + + return pl.DataFrame({'delta_energy':8760 * df_['delta_energy'].sum(), + 'base_test_energy':8760 * df_['base_test_energy'].sum(), + 'uplift':100 * df_['delta_energy'].sum() / df_['base_test_energy'].sum()}) + + +def _compute_uplift_in_region_bootstrap(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'], + N = 20, + ): + + """ + Compute the uplift in a region using bootstrap resampling + + Args: + df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. + df_names (list): A list of names to give to the dataframes. + ref_cols (list[str]): A list of columns to use as the reference turbines + test_cols (list[str]): A list of columns to use as the test turbines + wd_cols (list[str]): A list of columns to derive the wind directions from + ws_cols (list[str]): A list of columns to derive the wind speeds from + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + N (int): The number of bootstrap samples to use. + + Returns: + pl.DataFrame: A dataframe containing the energy uplift + """ + + df_concat = pl.concat([_compute_uplift_in_region_single(resample_energy_table(df_, i), + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + ) for i in range(N)]) + + return pl.DataFrame({ + 'delta_energy_exp':df_concat['delta_energy'][0], + 'delta_energy_ub':df_concat['delta_energy'].quantile(0.95), + 'delta_energy_lb':df_concat['delta_energy'].quantile(0.05), + 'base_test_energy_exp':df_concat['base_test_energy'][0], + 'base_test_energy_ub':df_concat['base_test_energy'].quantile(0.95), + 'base_test_energy_lb':df_concat['base_test_energy'].quantile(0.05), + 'uplift_exp':df_concat['uplift'][0], + 'uplift_ub':df_concat['uplift'].quantile(0.95), + 'uplift_lb':df_concat['uplift'].quantile(0.05), + }) + + +def compute_uplift_in_region(df_, + df_names, + ref_turbines=None, + test_turbines= None, + wd_turbines=None, + ws_turbines=None, + use_predefined_ref = False, + use_predefined_wd = False, + use_predefined_ws = False, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'], + N = 1, + ): + + """ + Compute the energy ratio between two sets of turbines with bootstrapping + + Args: + df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. + df_names (list): A list of names to give to the dataframes. + ref_turbines (list[int]): A list of turbine numbers to use as the reference. + test_turbines (list[int]): A list of turbine numbers to use as the test. + ws_turbines (list[int]): A list of turbine numbers to use for the wind speeds + wd_turbines (list[int]): A list of turbine numbers to use for the wind directions + use_predefined_ref (bool): If True, use the pow_ref column of df_ as the reference power. + use_predefined_ws (bool): If True, use the ws column of df_ as the wind speed. + use_predefined_wd (bool): If True, use the wd column of df_ as the wind direction. + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + N (int): The number of bootstrap samples to use. + + Returns: + pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines. + + """ + + # Check if inputs are valid + # If use_predefined_ref is True, df_ must have a column named 'pow_ref' + if use_predefined_ref: + if 'pow_ref' not in df_.columns: + raise ValueError('df_ must have a column named pow_ref when use_predefined_ref is True') + # If ref_turbines supplied, warn user that it will be ignored + if ref_turbines is not None: + warnings.warn('ref_turbines will be ignored when use_predefined_ref is True') + else: + # ref_turbine must be supplied + if ref_turbines is None: + raise ValueError('ref_turbines must be supplied when use_predefined_ref is False') + + # If use_predefined_ws is True, df_ must have a column named 'ws' + if use_predefined_ws: + if 'ws' not in df_.columns: + raise ValueError('df_ must have a column named ws when use_predefined_ws is True') + # If ws_turbines supplied, warn user that it will be ignored + if ws_turbines is not None: + warnings.warn('ws_turbines will be ignored when use_predefined_ws is True') + else: + # ws_turbine must be supplied + if ws_turbines is None: + raise ValueError('ws_turbines must be supplied when use_predefined_ws is False') + + # If use_predefined_wd is True, df_ must have a column named 'wd' + if use_predefined_wd: + if 'wd' not in df_.columns: + raise ValueError('df_ must have a column named wd when use_predefined_wd is True') + # If wd_turbines supplied, warn user that it will be ignored + if wd_turbines is not None: + warnings.warn('wd_turbines will be ignored when use_predefined_wd is True') + else: + # wd_turbine must be supplied + if wd_turbines is None: + raise ValueError('wd_turbines must be supplied when use_predefined_wd is False') + + # Confirm that test_turbines is a list of ints or a numpy array of ints + if not isinstance(test_turbines, list) and not isinstance(test_turbines, np.ndarray): + raise ValueError('test_turbines must be a list or numpy array of ints') + + # Confirm that test_turbines is not empty + if len(test_turbines) == 0: + raise ValueError('test_turbines cannot be empty') + + num_df = len(df_names) + + # Confirm num_df == 2 + if num_df != 2: + raise ValueError('Number of dataframes must be 2') + + # Set up the column names for the reference and test power + if not use_predefined_ref: + ref_cols = [f'pow_{i:03d}' for i in ref_turbines] + else: + ref_cols = ['pow_ref'] + + if not use_predefined_ws: + ws_cols = [f'ws_{i:03d}' for i in ws_turbines] + else: + ws_cols = ['ws'] + + if not use_predefined_wd: + wd_cols = [f'wd_{i:03d}' for i in wd_turbines] + else: + wd_cols = ['wd'] + + # Convert the numbered arrays to appropriate column names + test_cols = [f'pow_{i:03d}' for i in test_turbines] + + # If N=1, don't use bootstrapping + if N == 1: + # Compute the energy ratio + df_res = _compute_uplift_in_region_single(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in) + else: + df_res = _compute_uplift_in_region_bootstrap(df_, + df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + N) + + # Return the results as an EnergyRatioResult object + return EnergyRatioResult(df_res, + df_names, + df_, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + N) + + + +class EnergyRatioResult: + """ This class is used to store the results of the energy ratio calculations + and provide convenient methods for plotting and saving the results. + """ + def __init__(self, + df_result, + df_names, + energy_table, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + N + ): + + self.df_result = df_result + self.df_names = df_names + self.energy_table = energy_table + self.num_df = len(df_names) + self.ref_cols = ref_cols + self.test_cols = test_cols + self.wd_cols = wd_cols + self.ws_cols = ws_cols + self.wd_step = wd_step + self.wd_min = wd_min + self.wd_max = wd_max + self.ws_step = ws_step + self.ws_min = ws_min + self.ws_max = ws_max + self.bin_cols_in = bin_cols_in + self.N = N + + # self.df_freq = self._compute_df_freq() + + def _compute_df_freq(self): + """ Compute the of ws/wd as previously computed but not presently + computed with the energy calculation. """ + + # Temporary copy of energy table + df_ = self.energy_table.clone() + + # Filter df_ that all the columns are not null + df_ = df_.filter(pl.all(pl.col(self.ref_cols + self.test_cols + self.ws_cols + self.wd_cols).is_not_null())) + + # Assign the wd/ws bins + df_ = add_ws_bin(df_, self.ws_cols, self.ws_step, self.ws_min, self.ws_max) + df_ = add_wd_bin(df_, self.wd_cols, self.wd_step, self.wd_min, self.wd_max) + + # Get the bin count by wd, ws and df_name + df_group = df_.groupby(['wd_bin','ws_bin','df_name']).count() + + return df_.to_pandas(), df_group.to_pandas() + + def plot(self, + df_names_subset = None, + labels = None, + color_dict = None, + axarr = None, + polar_plot=False, + show_wind_speed_distrubution=True, + ): + + # Only allow showing the wind speed distribution if polar_plot is False + if polar_plot and show_wind_speed_distrubution: + raise ValueError('show_wind_speed_distrubution cannot be True if polar_plot is True') + + # If df_names_subset is None, plot all the dataframes + if df_names_subset is None: + df_names_subset = self.df_names + + # If df_names_subset is not a list, convert it to a list + if not isinstance(df_names_subset, list): + df_names_subset = [df_names_subset] + + # Total number of energy ratios to plot + N = len(df_names_subset) + + # If labels is None, use the dataframe names + if labels is None: + labels = df_names_subset + + # If labels is not a list, convert it to a list + if not isinstance(labels, list): + labels = [labels] + + # Confirm that the length of labels is the same as the length of df_names_subset + if len(labels) != N: + raise ValueError('Length of labels must be the same as the length of df_names_subset') + + # Generate the default colors using the seaborn color palette + default_colors = sns.color_palette('colorblind', N) + + # If color_dict is None, use the default colors + if color_dict is None: + color_dict = {labels[i]: default_colors[i] for i in range(N)} + + # If color_dict is not a dictionary, raise an error + if not isinstance(color_dict, dict): + raise ValueError('color_dict must be a dictionary') + + # Make sure the keys of color_dict are in df_names_subset + if not all([label in df_names_subset for label in color_dict.keys()]): + raise ValueError('color_dict keys must be in df_names_subset') + + if axarr is None: + if polar_plot: + _, axarr = plt.subplots(nrows=1, ncols=2, figsize=(10, 5), subplot_kw={'projection': 'polar'}) + else: + if show_wind_speed_distrubution: + num_rows = 3 # Add rows to show wind speed distribution + else: + num_rows = 2 + _, axarr = plt.subplots(nrows=num_rows, ncols=1, sharex=True, figsize=(10, 5)) + + # Set the bar width using self.wd_step + bar_width = (0.7 / N) * self.wd_step + if polar_plot: + bar_width = bar_width * np.pi / 180.0 + + # For plotting, get a pandas dataframe + df = self.df_result.to_pandas() + + # Get x-axis values + x = np.array(df["wd_bin"], dtype=float) + + # Add NaNs to avoid connecting plots over gaps + dwd = np.min(x[1::] - x[0:-1]) + jumps = np.where(np.diff(x) > dwd * 1.50)[0] + if len(jumps) > 0: + df = pd.concat( + [ + df, + pd.DataFrame( + { + "wd_bin": x[jumps] + dwd / 2.0, + "N_bin": [0] * len(jumps), + } + ) + ], + axis=0, + ignore_index=False, + ) + df = df.iloc[np.argsort(df["wd_bin"])].reset_index(drop=True) + x = np.array(df["wd_bin"], dtype=float) + + # Plot horizontal black line at 1. + xlims = np.linspace(np.min(x) - 4.0, np.max(x) + 4.0, 1000) + + if polar_plot: + x = (90.0 - x) * np.pi / 180.0 # Convert to radians + xlims = (90.0 - xlims) * np.pi / 180.0 # Convert to radians + + # Plot the horizontal line at 1 + axarr[0].plot(xlims, np.ones_like(xlims), color="black") + + # Plot the energy ratios + for df_name, label in zip(df_names_subset, labels): + + axarr[0].plot(x, df[df_name], "-o", markersize=3.0, label=label, color=color_dict[label]) + + # If data includes upper and lower bounds plot them + if df_name + "_ub" in df.columns: + + axarr[0].fill_between( + x, + df[df_name + "_lb"], + df[df_name + "_ub"], + alpha=0.25, + color=color_dict[label], + ) + + # Format the energy ratio plot + axarr[0].legend() + axarr[0].grid(visible=True, which="major", axis="both", color="gray") + axarr[0].grid(visible=True, which="minor", axis="both", color="lightgray") + axarr[0].minorticks_on() + # axarr[0].set_grid(True) + + # Plot the bin counts + df_unbinned, df_freq = self._compute_df_freq() + df_freq_sum_all_ws = df_freq.groupby(["wd_bin","df_name"]).sum().reset_index() + + + for i, (df_name, label) in enumerate(zip(df_names_subset, labels)): + df_sub = df_freq_sum_all_ws[df_freq_sum_all_ws["df_name"] == df_name] + + x = np.array(df_sub["wd_bin"], dtype=float) + if polar_plot: + x = (90.0 - x) * np.pi / 180.0 # Convert to radians + axarr[1].bar(x - (i - N / 2) * bar_width, df_sub["count"], width=bar_width, label = label, color=color_dict[label]) + + axarr[1].legend() + + # Get the bins + wd_bins = np.array(df_freq["wd_bin"].unique(), dtype=float) + ws_bins = np.array(df_freq["ws_bin"].unique(), dtype=float) + num_wd_bins = len(wd_bins) + num_ws_bins = len(ws_bins) + + if show_wind_speed_distrubution: + # Plot the wind speed distribution in df_freq as a heat map with wd on the x-axis and ws on the y-axis + + ax = axarr[2] + for df_name, label in zip(df_names_subset, labels): + df_sub = df_freq[df_freq["df_name"] == df_name] + ax.scatter(df_unbinned["wd_bin"], df_unbinned["ws_bin"], c=color_dict[label],alpha=0.25, s=1) + + + def plot_uplift(self, + axarr = None, + polar_plot=False, + show_wind_speed_distrubution=True, + ): + self.plot( + df_names_subset = 'uplift', + labels = ['uplift'], + color_dict = {'uplift':'k'}, + axarr = axarr, + polar_plot=polar_plot, + show_wind_speed_distrubution=show_wind_speed_distrubution, + ) + diff --git a/flasc/energy_ratio/energy_ratio_visualization.py b/flasc/energy_ratio/energy_ratio_visualization.py index 9bb4c747..2887430e 100644 --- a/flasc/energy_ratio/energy_ratio_visualization.py +++ b/flasc/energy_ratio/energy_ratio_visualization.py @@ -265,402 +265,3 @@ def plot( plt.tight_layout() return axarr - -# def table_analysis( -# df_list, -# fout_xlsx, -# hide_bin_count_columns=False, -# hide_ws_ti_columns=False, -# hide_pow_columns=False, -# hide_unbalanced_cols=True, -# fi=None -# ): -# # Save some useful info -# header_row = 2 -# first_data_row = header_row + 1 -# first_data_col = 1 - -# # Extract variables -# ws_step = df_list[0]["er_ws_step"] -# wd_bin_width = df_list[0]["er_wd_bin_width"] - -# # Extract relevant details -# name_list = [df["name"] for df in df_list] -# df_per_wd_bin_list = [ -# d["er_results_info_dict"]["df_per_wd_bin"]for d in df_list -# ] -# df_per_ws_bin_list = [ -# d["er_results_info_dict"]["df_per_ws_bin"] for d in df_list -# ] -# test_turbines = np.array(df_list[0]["er_test_turbines"], dtype=int) - -# # Append column names with data label for df_per_wd_bin -# for ii, df in enumerate(df_per_wd_bin_list): -# name = name_list[ii] -# df.columns = ["{:s}_{:s}".format(c, name) for c in df.columns] - -# # Append column names with data label for df_per_ws_bin -# for ii, df in enumerate(df_per_ws_bin_list): -# name = name_list[ii] -# df = df.reset_index(drop=False).set_index(["wd_bin", "ws_bin"]) -# df.columns = ["{:s}_{:s}".format(c, name) for c in df.columns] -# df_per_ws_bin_list[ii] = df - -# # Concatenate information from different dataframes -# df_per_wd_bin = pd.concat(df_per_wd_bin_list, axis=1) -# df_per_ws_bin = pd.concat(df_per_ws_bin_list, axis=1) - -# # Merge df_per_wd_bin information into df_per_ws_bin -# df_per_ws_bin = df_per_ws_bin.reset_index(drop=False).set_index("wd_bin") -# df_per_wd_bin["ws_bin"] = 999999.9 # very high number -# df_merged = pd.concat([df_per_ws_bin, df_per_wd_bin]) -# df_merged = df_merged.sort_values(by=["wd_bin", "ws_bin"]) -# df_merged = df_merged.reset_index(drop=False) - -# wd_intervals = [pd.Interval(a, b, "left") for a, b in zip( -# df_merged["wd_bin"] - wd_bin_width / 2.0, -# df_merged["wd_bin"] + wd_bin_width / 2.0 -# ) -# ] -# ws_intervals = [pd.Interval(a, b, "left") for a, b in zip( -# df_merged["ws_bin"] - ws_step / 2.0, -# df_merged["ws_bin"] + ws_step / 2.0 -# ) -# ] - -# df_table = pd.DataFrame( -# { -# "wd_bin": wd_intervals, -# "ws_bin": ws_intervals, -# } -# ) - -# # Overwrite placeholder large numbers with new interval -# total_ids = [i.right >= 999999.9 for i in ws_intervals] -# df_table.loc[total_ids, "ws_bin"] = "TOTALS" -# df_merged.loc[total_ids, "ws_bin"] = "TOTALS" - -# # Add bin counts for the dataframes -# cols = ["bin_count_{:s}".format(n) for n in name_list] -# for c in cols: -# df_table[c] = df_merged[c].fillna(0).astype(int) - -# # Add balanced bin count per ws and in total -# df_table["bin_count_balanced"] = df_table[cols].min(axis=1).astype(int) -# df_table.loc[total_ids, "bin_count_balanced"] = int(0) -# Ntot = df_table.groupby(["wd_bin"])["bin_count_balanced"].sum() -# df_table.loc[total_ids, "bin_count_balanced"] = np.array(Ntot, dtype=int) - -# df_merged["bin_count_balanced_tot"] = df_table.loc[total_ids, "bin_count_balanced"] -# df_merged["bin_count_balanced_tot"] = df_merged["bin_count_balanced_tot"].bfill().astype(int) - -# # add ws_mean and ti_mean for all dataframes -# for col in ["ws_mean", "ti_mean"]: -# for n in name_list: -# c = "{:s}_{:s}".format(col, n) -# if c in df_merged.columns: -# df_table[c] = df_merged[c] - -# # Add reference power and energy -# bin_totals = np.array(df_merged["bin_count_balanced_tot"]) -# for n in name_list: -# pow_mean = df_merged["pow_ref_mean_{:s}".format(n)] -# energy_unbal = df_merged["energy_ref_unbalanced_{:s}".format(n)] -# energy_bal_norm = df_merged["energy_ref_balanced_norm_{:s}".format(n)] -# energy_bal = bin_totals * energy_bal_norm - -# df_table["ref_pow_{:s}".format(n)] = pow_mean -# df_table["ref_energy_unbalanced_{:s}".format(n)] = energy_unbal -# df_table["ref_energy_balanced_{:s}".format(n)] = energy_bal - -# # Fill empty entries with 0.0 for energy -# df_table["ref_energy_unbalanced_{:s}".format(n)] = ( -# df_table["ref_energy_unbalanced_{:s}".format(n)].fillna(0.0) -# ) -# df_table["ref_energy_balanced_{:s}".format(n)] = ( -# df_table["ref_energy_balanced_{:s}".format(n)].fillna(0.0) -# ) - -# # Add empty column/spacer -# df_table["___"] = None - -# # Add test power and energy -# bin_totals = np.array(df_merged["bin_count_balanced_tot"]) -# for n in name_list: -# pow_mean = df_merged["pow_test_mean_{:s}".format(n)] -# energy_unbal = df_merged["energy_test_unbalanced_{:s}".format(n)] -# energy_bal_norm = df_merged["energy_test_balanced_norm_{:s}".format(n)] -# energy_bal = bin_totals * energy_bal_norm -# energy_ratio = df_merged["energy_ratio_unbalanced_{:s}".format(n)] -# energy_ratio_bal = df_merged["energy_ratio_balanced_{:s}".format(n)] - -# df_table["test_pow_{:s}".format(n)] = pow_mean -# df_table["test_energy_unbalanced_{:s}".format(n)] = energy_unbal -# df_table["test_energy_balanced_{:s}".format(n)] = energy_bal -# df_table["energy_ratio_unbalanced_{:s}".format(n)] = energy_ratio -# df_table["energy_ratio_balanced_{:s}".format(n)] = energy_ratio_bal - -# # Fill empty entries with 0.0 for energy -# df_table["test_energy_unbalanced_{:s}".format(n)] = ( -# df_table["test_energy_unbalanced_{:s}".format(n)].fillna(0.0) -# ) -# df_table["test_energy_balanced_{:s}".format(n)] = ( -# df_table["test_energy_balanced_{:s}".format(n)].fillna(0.0) -# ) - -# # Define change in unbalanced and balanced energy ratios -# bl = df_table["energy_ratio_unbalanced_{:s}".format(name_list[0])] -# bl_bal = df_table["energy_ratio_balanced_{:s}".format(name_list[0])] - -# for n in name_list[1::]: -# df_table["change_energy_ratio_unbalanced_{:s}".format(n)] = ( -# (df_table["energy_ratio_unbalanced_{:s}".format(n)] - bl) / bl -# ) -# df_table["change_energy_ratio_balanced_{:s}".format(n)] = ( -# (df_table["energy_ratio_balanced_{:s}".format(n)] - bl_bal) -# / bl_bal -# ) - -# # Add empty column/spacer -# df_table["___0"] = None - -# # Add empty rows in df_table after each wd_bin -# df_empty = pd.DataFrame([None]) -# df_array = [] -# splits = np.where(total_ids)[0] -# splits = np.hstack([0, splits + 1]) # Add zero - -# for ii in range(len(splits) - 1): -# lb = splits[ii] -# ub = splits[ii+1] -# df_array.append(df_table[lb:ub]) -# df_array.append(df_empty) - -# df_table_spaced = pd.concat(df_array, axis=0, ignore_index=True) -# df_table_spaced = df_table_spaced[df_table.columns] -# df_table = df_table_spaced - -# # Write out the dataframe with xslxwriter -# writer = pd.ExcelWriter(fout_xlsx, engine="xlsxwriter") -# df_table.to_excel( -# writer, -# index=False, -# sheet_name="results", -# startcol=first_data_col, -# startrow=header_row, -# ) -# workbook = writer.book -# worksheet = writer.sheets["results"] - -# # FORMATTING - -# # Format large numbers to 2 decimal -# fmt_rate = workbook.add_format({"num_format": "0.00", "bold": False}) -# cols = df_table.columns -# change_list = [ -# i -# for i in range(len(cols)) -# if ("_mean_" in cols[i]) or ("_pow_" in cols[i]) or ( -# ("_energy_" in cols[i]) and not ("energy_ratio_" in cols[i]) -# ) -# ] -# for c in change_list: -# worksheet.set_column( -# c + first_data_col, c + first_data_col, 10, fmt_rate -# ) - -# # Format energy ratios to 3 decimal -# fmt_rate = workbook.add_format({"num_format": "0.000", "bold": False}) -# cols = df_table.columns -# change_list = [i for i in range(len(cols)) if "energy_ratio" in cols[i]] -# for c in change_list: -# worksheet.set_column( -# c + first_data_col, c + first_data_col, 10, fmt_rate -# ) - -# # Format change and TI into a percentage -# fmt_rate = workbook.add_format({"num_format": "%0.0", "bold": False}) -# cols = df_table.columns -# change_list = [ -# i -# for i in range(len(cols)) -# if ("change" in cols[i]) or ("ti_" in cols[i]) -# ] -# for c in change_list: -# worksheet.set_column( -# c + first_data_col, c + first_data_col, 10, fmt_rate -# ) - -# # # Make "totals" rows bold -# # bold_format = workbook.add_format({'bold': True}) -# # total_ids = np.where(df_table["ws_bin"] == "TOTALS")[0] -# # for ri in total_ids: -# # worksheet.set_row(first_data_row + ri, 15, bold_format) - -# # Make the seperator columns very narrow and black -# fmt_black = workbook.add_format({"fg_color": "#000000"}) -# change_list = [i for i in range(len(cols)) if "___" in cols[i]] -# for c in change_list: -# worksheet.set_column( -# c + first_data_col, c + first_data_col, 1, fmt_black -# ) - -# # Add data bars to the bins counts -# change_list = [i for i in range(len(cols)) if "bin" in cols[i]] -# for c in change_list: -# worksheet.conditional_format( -# first_data_row, -# c + first_data_col, -# df_table.shape[0] + first_data_row, -# c + first_data_col, -# {"type": "data_bar", "max_value": 100}, -# ) - -# # Add color to the change columns -# change_list = [i for i in range(len(cols)) if "change" in cols[i]] - -# for c in change_list: -# worksheet.conditional_format( -# first_data_row, -# c + first_data_col, -# df_table.shape[0] + first_data_row, -# c + first_data_col, -# { -# "type": "3_color_scale", -# "min_value": -1.0, -# "min_type": "num", -# "max_value": 1.0, -# "mid_value": 0.0, -# "mid_type": "num", -# "min_color": "#FF0000", -# "mid_color": "#FFFFFF", -# "max_color": "#00FF00", -# "max_type": "num", -# }, -# ) - -# # Add color to energy ratios -# change_list = [ -# i -# for i in range(len(cols)) -# if ("er_" in cols[i]) and not ("change" in cols[i]) -# ] -# for c in change_list: -# worksheet.conditional_format( -# first_data_row, -# c + first_data_col, -# df_table.shape[0] + first_data_row, -# c + first_data_col, -# { -# "type": "3_color_scale", -# "min_value": 0.25, -# "min_type": "num", -# "max_value": 2.0, -# "mid_value": 1.0, -# "mid_type": "num", -# "min_color": "#0000FF", -# "mid_color": "#FFFFFF", -# "max_color": "#00FF00", -# "max_type": "num", -# }, -# ) - -# # Header -# # Adding formats for header row. -# fmt_header = workbook.add_format( -# { -# "bold": True, -# "text_wrap": True, -# "valign": "top", -# "fg_color": "#5DADE2", -# "font_color": "#FFFFFF", -# "border": 1, -# } -# ) -# for col, value in enumerate(df_table.columns.values): -# worksheet.write(header_row, col + first_data_col, value, fmt_header) - -# # If a floris model is provided, use it to make layout images -# if fi is not None: -# # Make that first colum wide -# worksheet.set_column("A:A", 30) - -# # Create image folder -# img_folder = os.path.join(os.path.dirname(fout_xlsx), "xlsx_images") -# if not os.path.exists(img_folder): -# os.makedirs(img_folder, exist_ok=True) - -# # For each bin were checking, make image of wake scenarios -# sort_df = df_table[["wd_bin", "ws_bin"]].copy() -# sort_df = sort_df.sort_values(["wd_bin", "ws_bin"]).dropna() -# for wdb in sort_df.wd_bin.unique(): -# row_top_ws_bin = sort_df.index[wdb == sort_df["wd_bin"]][0] -# wd_arrow = wdb.mid # Put arrow in middle of bin -# fig, ax = plt.subplots(figsize=(2, 2)) -# fi.reinitialize_flow_field( -# wind_direction=wd_arrow, wind_speed=8.0 -# ) -# fi.calculate_wake() -# hor_plane = fi.get_hor_plane() -# hor_plane = wfct.cut_plane.change_resolution( -# hor_plane, -# resolution=(200, 200), -# ) -# wfct.visualization.visualize_cut_plane(hor_plane, ax=ax) -# im_name = os.path.join(img_folder, "wd_%03d.png" % wd_arrow) -# fig.savefig(im_name, bbox_inches="tight") - -# # Insert the figure -# worksheet.insert_image(first_data_row + row_top_ws_bin, 0, im_name) - -# # Make the first row bigger -# worksheet.set_row(0, 120) - -# # Get a list of blank columns indicating turbine starts -# blank_cols = [i for i in range(len(cols)) if "___" in cols[i]] - -# # Plot the layout on top -# fig, ax = plt.subplots(figsize=(3, 2)) -# fi.vis_layout(ax=ax) -# xt = np.array(fi.layout_x) -# yt = np.array(fi.layout_y) -# ax.plot(xt[test_turbines], yt[test_turbines], "mo", ms=25) -# im_name = os.path.join(img_folder, "layout.png") -# fig.savefig(im_name, bbox_inches="tight") -# worksheet.insert_image(0, blank_cols[0] + 1, im_name) - -# # Hide columns if necessary -# if hide_bin_count_columns: -# cols = [i for i, c in enumerate(df_table.columns) if "bin_count" in c] -# for ii in cols: -# worksheet.set_column(ii + 1, ii + 1, None, None, {'hidden': 1}) - -# if hide_ws_ti_columns: -# cols = [ -# i for i, c in enumerate(df_table.columns) if ( -# ("ws_mean_" in c) or ("ws_std_" in c) or -# ("ti_mean_" in c) or ("ti_std_" in c) -# ) -# ] -# for ii in cols: -# worksheet.set_column(ii + 1, ii + 1, None, None, {'hidden': 1}) - -# if hide_pow_columns: -# cols = [ -# i for i, c in enumerate(df_table.columns) if ( -# ("ref_pow_" in c) or ("test_pow_" in c) -# ) -# ] -# for ii in cols: -# worksheet.set_column(ii + 1, ii + 1, None, None, {'hidden': 1}) - -# if hide_unbalanced_cols: -# cols = [i for i, c in enumerate(df_table.columns) if "unbalance" in c] -# for ii in cols: -# worksheet.set_column(ii + 1, ii + 1, None, None, {'hidden': 1}) - -# # Freeze the panes -# worksheet.freeze_panes(first_data_row, first_data_col) - -# writer.save() -# print("File successfully written to {:s}.".format(fout_xlsx)) diff --git a/flasc/raw_data_handling/sqldatabase_management.py b/flasc/raw_data_handling/sqldatabase_management.py index b017f96f..bd468260 100644 --- a/flasc/raw_data_handling/sqldatabase_management.py +++ b/flasc/raw_data_handling/sqldatabase_management.py @@ -11,9 +11,10 @@ # the License. -import os import numpy as np import pandas as pd +import polars as pl +from pathlib import Path from time import perf_counter as timerpc import datetime @@ -27,6 +28,7 @@ import sqlalchemy as sqlalch + class sql_database_manager: # Private methods @@ -44,45 +46,88 @@ def _create_sql_engine(self, password): name = self.db_name usn = self.username address = "%s:%d" % (self.host, self.port) + self.url = "%s://%s:%s@%s/%s" % (dr, usn, password, address, name) self.engine = sqlalch.create_engine( - url="%s://%s:%s@%s/%s" % (dr, usn, password, address, name) + url= self.url ) + self.inspector = sqlalch.inspect(self.engine) self.print_properties() def _get_table_names(self): - return self.engine.table_names() + return self.inspector.get_table_names() + + def _does_table_exist(self, table_name): + return table_name in self._get_table_names() def _get_column_names(self, table_name): - df = pd.read_sql_query( - "SELECT * FROM " + table_name + " WHERE false;", self.engine + columns = self.inspector.get_columns(table_name) + return [c['name'] for c in columns] + + def _create_table_from_df(self, table_name, df): + + print(f'Creating Table: {table_name} with {df.shape[1]} columns') + + # Convert to pandas for upload + df_pandas = df.to_pandas() + df_pandas = df_pandas.iloc[:10] + + df_pandas.to_sql( + table_name, + self.engine, + index=False, + method="multi" + ) + + # Make time unique and an index to speed queries + query = 'CREATE UNIQUE INDEX idx_time_%s ON %s (time);' % (table_name, table_name) + print('Setting time to unique index') + with self.engine.connect() as con: + rs = con.execute(sqlalch.text(query)) + print(f'...RESULT: {rs}') + con.commit() # commit the transaction + + def _remove_duplicated_time(self, table_name, df): + + start_time = df.select(pl.min("time"))[0, 0] + end_time = df.select(pl.max("time"))[0, 0] + original_size = df.shape[0] + + print(f'Checking for time entries already in {table_name} between {start_time} and {end_time}') + time_in_db = self.get_data(table_name, + ['time'], + start_time=start_time, + end_time=end_time, + end_inclusive=True ) - return list(df.columns) + + df = df.join(time_in_db, on='time',how="anti") + new_size = df.shape[0] + if new_size < original_size: + print(f'...Dataframe size reduced from {original_size} to {new_size} by time values already in {table_name}') + return df + def _get_first_time_entry(self, table_name): - tn = table_name - column_names = self._get_column_names(tn) - if 'time' in column_names: - df_time = pd.read_sql_query( - sql="SELECT time FROM %s ORDER BY time asc LIMIT 1" % tn, - con=self.engine - ) - if df_time.shape[0] > 0: - return df_time["time"][0] - return None + # Get the table corresponding to the table name + table = sqlalch.Table(table_name, sqlalch.MetaData(), autoload_with=self.engine) + + stmt = sqlalch.select(table.c.time).order_by(table.c.time.asc()).limit(1) + with self.engine.begin() as conn: + result = conn.execute(stmt) + for row in result: + return row[0] def _get_last_time_entry(self, table_name): - tn = table_name - column_names = self._get_column_names(tn) - if 'time' in column_names: - df_time = pd.read_sql_query( - sql="SELECT time FROM %s ORDER BY time desc LIMIT 1" % tn, - con=self.engine - ) - if df_time.shape[0] > 0: - return df_time["time"][0] + # Get the table corresponding to the table name + table = sqlalch.Table(table_name, sqlalch.MetaData(), autoload_with=self.engine) - return None + stmt = sqlalch.select(table.c.time).order_by(table.c.time.desc()).limit(1) + print(stmt) + with self.engine.begin() as conn: + result = conn.execute(stmt) + for row in result: + return row[0] # General info functions from data def print_properties(self): @@ -103,7 +148,11 @@ def print_properties(self): def launch_gui(self, turbine_names=None, sort_columns=False): root = tk.Tk() - sql_db_explorer_gui(master=root, dbc=self, turbine_names=turbine_names, sort_columns=sort_columns) + sql_db_explorer_gui(master=root, + dbc=self, + turbine_names=turbine_names, + sort_columns=sort_columns + ) root.mainloop() def get_column_names(self, table_name): @@ -113,8 +162,8 @@ def batch_get_data(self, table_name, columns=None, start_time=None, end_time=None, fn_out=None, no_rows_per_file=10000): if fn_out is None: fn_out = table_name + ".ftr" - if not (fn_out[-4::] == ".ftr"): - fn_out = fn_out + ".ftr" + if not (fn_out.suffix == ".ftr"): + fn_out = fn_out.with_suffix(".ftr") # Ensure 'time' in database column_names = self._get_column_names(table_name=table_name) @@ -122,13 +171,16 @@ def batch_get_data(self, table_name, columns=None, start_time=None, raise KeyError("Cannot find 'time' column in database table.") # Get time column from database + print("Getting time column from database...") time_in_db = self.get_data(table_name=table_name, columns=['time'], start_time=start_time, end_time=end_time) - time_in_db = list(time_in_db['time']) + time_in_db = list(time_in_db.select("time").to_numpy().flatten()) + print("...finished, N.o. entries: %d." % len(time_in_db)) splits = np.arange(0, len(time_in_db) - 1, no_rows_per_file, dtype=int) splits = np.append(splits, len(time_in_db) - 1) splits = np.unique(splits) + print(f"Splitting {len(time_in_db)} entries data into {len(splits)} subsets of {no_rows_per_file}.") for ii in range(len(splits) - 1): print("Downloading subset %d out of %d." % (ii, len(splits) - 1)) @@ -138,12 +190,17 @@ def batch_get_data(self, table_name, columns=None, start_time=None, start_time=time_in_db[splits[ii]], end_time=time_in_db[splits[ii+1]] ) - fn_out_ii = fn_out + '.%d' % ii - print("Saving file to %s." % fn_out_ii) - df.to_feather(fn_out_ii) + fn_out_ii = fn_out.with_suffix(".ftr.%03d" % ii) + print("Saving file to %s" % fn_out_ii) + df.write_ipc(fn_out_ii) def get_data( - self, table_name, columns=None, start_time=None, end_time=None + self, + table_name, + columns=None, + start_time=None, + end_time=None, + end_inclusive=False, ): # Get the data from tables if columns is None: @@ -156,22 +213,33 @@ def get_data( query_string += " WHERE time >= '" + str(start_time) + "'" if (start_time is not None) and (end_time is not None): - query_string += " AND time < '" + str(end_time) + "'" + if end_inclusive: + query_string += " AND time <= '" + str(end_time) + "'" + else: + query_string += " AND time < '" + str(end_time) + "'" elif (start_time is None) and (end_time is not None): - query_string += " WHERE time < '" + str(end_time) + "'" + if end_inclusive: + query_string += " WHERE time <= '" + str(end_time) + "'" + else: + query_string += " WHERE time < '" + str(end_time) + "'" - query_string += " ORDER BY time;" - df = pd.read_sql_query(query_string, self.engine) + query_string += " ORDER BY time" + + df = pl.read_database(query_string,self.url) # Drop a column called index if "index" in df.columns: - df = df.drop(["index"], axis=1) + df = df.drop("index") - # Make sure time column is in datetime format - df["time"] = pd.to_datetime(df.time) + # Confirm that the time column is in datetime format + if "time" in df.columns: + if not (df.schema["time"] == pl.Datetime): + df = df.with_columns(pl.col("time").cast(pl.Datetime)) return df + + #TODO: This is a fresh redo check it works def send_data( self, table_name, @@ -181,81 +249,135 @@ def send_data( df_chunk_size=2000, sql_chunk_size=50 ): - table_name = table_name.lower() - table_names = [t.lower() for t in self._get_table_names()] - - if (if_exists == "append"): - print("Warning: risk of adding duplicate rows using 'append'.") - print("You are suggested to use 'append_new' instead.") - - if (if_exists == "append_new") and (table_name in table_names): - if len(unique_cols) > 1: - raise NotImplementedError("Not yet implemented.") - - col = unique_cols[0] - idx_in_db = self.get_data(table_name=table_name, columns=[col])[ - col - ] - - # Check if values in SQL database are unique - if not idx_in_db.is_unique: - raise IndexError( - "Column '%s' is not unique in the SQL database." % col - ) + + # Make a local copy + df_ = df.clone() + + # Check if table exists + if not self._does_table_exist(table_name): + + print(f'{table_name} does not yet exist') + + # Create the table + self._create_table_from_df(table_name, df_) + + # Check for times already in database + df_ = self._remove_duplicated_time(table_name, df_) + + # Check if df_ is now + if df_.shape[0] == 0: + print('Dataframe is empty') + return + + # Write to database + print(f'Inserting {df_.shape[0]} rows into {table_name} in chunks of {df_chunk_size}') + time_start_total = timerpc() + + # Parition into chunks + df_list = (df_.with_row_count('id') + .with_columns(pl.col('id').apply(lambda i: int(i/df_chunk_size))) + .partition_by('id') + ) - idx_in_df = set(df[col]) - idx_in_db = set(idx_in_db) - idx_to_add = np.sort(list(idx_in_df - idx_in_db)) - print( - "{:d} entries already exist in SQL database.".format( - len(idx_in_df) - len(idx_to_add) - ) + num_par = len(df_list) + for df_par_idx, df_par in enumerate(df_list): + print(f'...inserting chunk {df_par_idx} of {num_par}') + + df_par.drop('id').write_database( + table_name, + self.url, + if_exists='append' ) - - print("Adding {:d} new entries...".format(len(idx_to_add))) - df_subset = df.set_index('time').loc[idx_to_add].reset_index( - drop=False) - - else: - df_subset = df - - if (if_exists == "append_new"): - if_exists = "append" - - # Upload data - N = df_subset.shape[0] - if N < 1: - print("Skipping data upload. Dataframe is empty.") - else: - print("Attempting to insert %d rows into table '%s'." - % (df_subset.shape[0], table_name)) - df_chunks_id = np.arange(0, df_subset.shape[0], df_chunk_size) - df_chunks_id = np.append(df_chunks_id, df_subset.shape[0]) - df_chunks_id = np.unique(df_chunks_id) - - time_start_total = timerpc() - for i in range(len(df_chunks_id)-1): - Nl = df_chunks_id[i] - Nu = df_chunks_id[i+1] - print("Inserting rows %d to %d." % (Nl, Nu)) - time_start_i = timerpc() - df_sub = df_subset[Nl:Nu] - df_sub.to_sql( - table_name, - self.engine, - if_exists=if_exists, - index=False, - method="multi", - chunksize=sql_chunk_size, - ) - time_i = timerpc() - time_start_i - total_time = timerpc() - time_start_total - est_time_left = (total_time / Nu) * (N - Nu) - eta = datetime.datetime.now() + td(seconds=est_time_left) - eta = eta.strftime("%a, %d %b %Y %H:%M:%S") - print("Data insertion took %.1f s. ETA: %s." % (time_i, eta)) - - + total_time = timerpc() - time_start_total + print(f'...Finished in {total_time}') + + + # #TODO: UPDATE TO POLARS + # #TODO: Paul note (may 31 2023), POLARS API not up to PANDAS so using PANDAS here + # def send_data( + # self, + # table_name, + # df, + # if_exists="append_new", + # unique_cols=["time"], + # df_chunk_size=2000, + # sql_chunk_size=50 + # ): + # table_name = table_name.lower() + # table_names = [t.lower() for t in self._get_table_names()] + + # if (if_exists == "append"): + # print("Warning: risk of adding duplicate rows using 'append'.") + # print("You are suggested to use 'append_new' instead.") + + # if (if_exists == "append_new") and (table_name in table_names): + # if len(unique_cols) > 1: + # raise NotImplementedError("Not yet implemented.") + + # col = unique_cols[0] + # idx_in_db = self.get_data(table_name=table_name, columns=[col])[ + # col + # ] + + # # Check if values in SQL database are unique + # if not idx_in_db.is_unique: + # raise IndexError( + # "Column '%s' is not unique in the SQL database." % col + # ) + + # idx_in_df = set(df[col]) + # idx_in_db = set(idx_in_db) + # idx_to_add = np.sort(list(idx_in_df - idx_in_db)) + # print( + # "{:d} entries already exist in SQL database.".format( + # len(idx_in_df) - len(idx_to_add) + # ) + # ) + + # print("Adding {:d} new entries...".format(len(idx_to_add))) + # df_subset = df.set_index('time').loc[idx_to_add].reset_index( + # drop=False) + + # else: + # df_subset = df + + # if (if_exists == "append_new"): + # if_exists = "append" + + # # Upload data + # N = df_subset.shape[0] + # if N < 1: + # print("Skipping data upload. Dataframe is empty.") + # else: + # print("Attempting to insert %d rows into table '%s'." + # % (df_subset.shape[0], table_name)) + # df_chunks_id = np.arange(0, df_subset.shape[0], df_chunk_size) + # df_chunks_id = np.append(df_chunks_id, df_subset.shape[0]) + # df_chunks_id = np.unique(df_chunks_id) + + # time_start_total = timerpc() + # for i in range(len(df_chunks_id)-1): + # Nl = df_chunks_id[i] + # Nu = df_chunks_id[i+1] + # print("Inserting rows %d to %d." % (Nl, Nu)) + # time_start_i = timerpc() + # df_sub = df_subset[Nl:Nu] + # df_sub.to_sql( + # table_name, + # self.engine, + # if_exists=if_exists, + # index=False, + # method="multi", + # chunksize=sql_chunk_size, + # ) + # time_i = timerpc() - time_start_i + # total_time = timerpc() - time_start_total + # est_time_left = (total_time / Nu) * (N - Nu) + # eta = datetime.datetime.now() + td(seconds=est_time_left) + # eta = eta.strftime("%a, %d %b %Y %H:%M:%S") + # print("Data insertion took %.1f s. ETA: %s." % (time_i, eta)) + +#TODO: UPDATE TO POLARS class sql_db_explorer_gui: def __init__(self, master, dbc, turbine_names = None, sort_columns=False): @@ -264,7 +386,7 @@ def __init__(self, master, dbc, turbine_names = None, sort_columns=False): self.master = master # Get basic database properties - self.df = pd.DataFrame() + self.df = pl.DataFrame() table_names = dbc._get_table_names() min_table_dates = [ dbc._get_first_time_entry(table_name=t) for t in table_names @@ -453,7 +575,7 @@ def load_data(self): start_time=start_time, end_time=end_time, ) - df = df.set_index("time", drop=True) + # df = df.set_index("time", drop=True) if df.shape[0] <= 0: print( @@ -463,29 +585,40 @@ def load_data(self): else: print("...Imported data successfully.") - old_col_names = list(df.columns) + old_col_names = [c for c in list(df.columns) if not c=='time'] new_col_names = [ chr(97 + tables_selected[ii]).upper() + "_%s" % c - for c in df.columns + for c in old_col_names ] col_mapping = dict(zip(old_col_names, new_col_names)) - df = df.rename(columns=col_mapping) + df = df.rename(col_mapping) # If specific turbine names are supplied apply them here if self.turbine_names is not None: columns = df.columns for t in range(len(self.turbine_names)): columns = [c.replace('%03d' % t,self.turbine_names[t]) for c in columns] - df.columns = columns + # df.columns = columns + df = df.rename(dict(zip(df.columns,columns))) df_array.append(df) # Merge dataframes - self.df = pd.concat(df_array, axis=1).reset_index(drop=False) + # self.df = pl.concat(df_array, axis=1)# .reset_index(drop=False) + df_merge = df_array[0] + + if len(df_array) > 1: + for df_ in df_array[1:]: + df_merge = df_merge.join(df_, on='time',how='outer') + + #Save it now + self.df = df_merge # If sorting the columns do it now if self.sort_columns: - self.df = self.df[sorted(self.df.columns)] + # self.df = self.df[sorted(self.df.columns)] + self.df = self.df.select(sorted(self.df.columns)) + self.update_channel_cols() self.create_figures() @@ -533,7 +666,7 @@ def update_plot(self, channel_no): ax = self.axes[channel_no] ax.clear() for c in self.channel_selection[channel_no]: - ax.plot(self.df.time, np.array(self.df[c].values), label=c) + ax.plot(self.df['time'], np.array(self.df[c]), label=c) ax.legend() ax.grid(True) @@ -572,186 +705,3 @@ def ci_select(self, channel_no, evt=None): channels = [self.df.columns[idx] for idx in indices] self.channel_selection[channel_no] = channels self.update_plot(channel_no=channel_no) - - -# def get_timestamp_of_last_downloaded_datafile(filelist): -# time_latest = None -# for fi in filelist: -# df = pd.read_feather(fi) -# time_array = df['time'] -# if not all(np.isnan(time_array)): -# tmp_time_max = np.max(df['time']) -# if time_latest is None: -# time_latest = tmp_time_max -# else: -# if tmp_time_max > time_latest: -# time_latest = tmp_time_max - -# return time_latest - - -# # # # # # # # # # # # # # # # # # # # # # # # # # -# # # # RAW DATA READING FUNCTIONS -# # # # # # # # # # # # # # # # # # # # # # # # # # -# def find_files_to_add_to_sqldb(sqldb_engine, files_paths, filenames_table): -# """This function is used to figure out which files are already -# uploaded to the SQL database, and which files still need to be -# uploaded. - -# Args: -# sqldb_engine ([SQL engine]): SQL Engine from sqlalchemy.create_engine -# used to access the SQL database of interest. This is used to call -# which files have previously been uploaded. -# files_paths ([list, str]): List of strings or a single string containing -# the path to the raw data files. One example is: -# files_paths = ['/home/user/data/windfarm1/year1/*.csv', -# '/home/user/data/windfarm1/year2/*.csv', -# '/home/user/data/windfarm1/year3/*.csv',] -# filenames_table ([str]): SQL table name containing the filenames of -# the previously uploaded data files. - -# Returns: -# files ([list]): List of files that are not yet in the SQL database -# """ -# # Convert files_paths to a list -# if isinstance(files_paths, str): -# files_paths = [files_paths] - -# # Figure out which files exists on local system -# files = [] -# for fpath in files_paths: -# fl = glob.glob(fpath) -# if len(fl) <= 0: -# print('No files found in directory %s.' % fpath) -# else: -# files.extend(fl) - -# # Figure out which files have already been uploaded to sql db -# query_string = "select * from " + filenames_table + ";" -# df = pd.read_sql_query(query_string, sqldb_engine) - -# # # Check for the files not in the database -# files = [f for f in files -# if os.path.basename(f) not in df['filename'].values] - -# # Sort the file list according to ascending name -# files = sorted(files, reverse=False) - -# return files - - -# # # # # # # # # # # # # # # # # # # # # # # # # # -# # # # DATA UPLOAD FUNCTIONS -# # # # # # # # # # # # # # # # # # # # # # # # # # - - -# def omit_last_rows_by_buffer(df, omit_buffer): -# if not 'time' in df.columns: -# df = df.reset_index(drop=False) - -# num_rows = df.shape[0] -# df = df[df['time'] < max(df['time']) - omit_buffer] -# print('Omitting last %d rows (%s s) as a buffer for future files.' -# % (num_rows-df.shape[0], omit_buffer)) -# return df - - -# def remove_duplicates_with_sqldb(df, sql_engine, table_name): -# min_time = df.time.min() -# max_time = df.time.max() - -# time_query = ( -# "select time from "+ table_name + -# " where time BETWEEN '%s' and '%s';" % (min_time, max_time) -# ) - -# df_time = pd.read_sql_query(time_query, sql_engine) -# df_time["time"] = pd.to_datetime(df_time.time) -# print("......Before duplicate removal there are %d rows" % df.shape[0]) -# df = df[~df.time.isin(df_time.time)] -# print("......After duplicate removal there are %d rows" % df.shape[0]) - -# # Check for self duplicates in to-be-uploaded dataset -# print("......Before SELF duplicate removal there are %d rows" % df.shape[0]) -# if "turbid" in df.columns: -# df = df.drop_duplicates(subset=["time", "turbid"], keep="first") -# else: -# df = df.drop_duplicates(subset=["time"], keep="first") -# print("......After SELF duplicate removal there are %d rows" % df.shape[0]) - -# # Drop null times in to-be-uploaded dataset -# print( -# "......Before null time/turbid duplicate removal there are %d rows" -# % df.shape[0] -# ) -# df = df.dropna(subset=["time"]) -# print("......After null time duplicate removal there are %d rows" % df.shape[0]) - -# return df - - -# def batch_download_data_from_sql(dbc, destination_path, table_name): -# print("Batch downloading data from table %s." % table_name) - -# # Check if output directory exists, if not, create -# if not os.path.exists(destination_path): -# os.makedirs(destination_path) - -# # Check current start and end time of database -# db_end_time = get_last_time_entry_sqldb(dbc.engine, table_name) -# db_end_time = db_end_time + datetime.timedelta(minutes=10) - -# # Check for past files and continue download or start a fresh download -# files_result = fsio.browse_downloaded_datafiles(destination_path, -# table_name=table_name) -# print('A total of %d existing files found.' % len(files_result)) - -# # Next timestamp is going to be next first of the month -# latest_timestamp = get_timestamp_of_last_downloaded_datafile(files_result) -# if latest_timestamp is None: -# db_start_time = get_first_time_entry_sqldb(dbc.engine, table_name) -# db_start_time = db_start_time - datetime.timedelta(minutes=10) -# current_timestamp = db_start_time -# elif latest_timestamp.month == 12: -# current_timestamp = pd.to_datetime('%s-01-01' % -# str(latest_timestamp.year+1)) -# else: -# current_timestamp = pd.to_datetime("%s-%s-01" % ( -# str(latest_timestamp.year), str(latest_timestamp.month+1))) - -# print('Continuing import from timestep: ', current_timestamp) -# while current_timestamp <= db_end_time: -# print('Importing data for ' + -# str(current_timestamp.strftime("%B")) + -# ', ' + str(current_timestamp.year) + '.') -# if current_timestamp.month == 12: -# next_timestamp = current_timestamp.replace( -# year=current_timestamp.year+1, month=1, -# day=1, hour=0, minute=0, second=0) -# else: -# next_timestamp = current_timestamp.replace( -# month=current_timestamp.month+1, -# day=1, hour=0, minute=0, second=0) - -# df = dbc.get_table_data_from_db_wide( -# table_name=table_name, -# start_time=current_timestamp, -# end_time=next_timestamp -# ) - -# # Drop NaN rows -# df = dfm.df_drop_nan_rows(df) - -# # Save dataset as a .ftr file -# fout = os.path.join(destination_path, "%s_%s.ftr" % -# (current_timestamp.strftime("%Y-%m"), table_name)) - -# df = df.reset_index(drop=('time' in df.columns)) -# df.to_feather(fout) - -# print('Data for ' + table_name + -# ' saved to .ftr files for ' + -# str(current_timestamp.strftime("%B")) + -# ', ' + str(current_timestamp.year) + '.') - -# current_timestamp = next_timestamp diff --git a/flasc/timing_tests/energy_ratio_timing.py b/flasc/timing_tests/energy_ratio_timing.py index 00d7a01c..22e2ffd8 100644 --- a/flasc/timing_tests/energy_ratio_timing.py +++ b/flasc/timing_tests/energy_ratio_timing.py @@ -25,15 +25,15 @@ import pandas as pd from flasc.energy_ratio import energy_ratio_suite +from flasc.energy_ratio import energy_ratio_polars as erp N_ITERATIONS = 5 - def load_data_and_prep_data(): # Load dataframe with artificial SCADA data root_dir = os.path.dirname(os.path.abspath(__file__)) ftr_path = os.path.join( - root_dir, '..','examples_artificial_data', 'raw_data_processing', 'postprocessed', 'df_scada_data_600s_filtered_and_northing_calibrated.ftr' + root_dir, '..','..','examples_artificial_data', 'raw_data_processing', 'postprocessed', 'df_scada_data_600s_filtered_and_northing_calibrated.ftr' ) if not os.path.exists(ftr_path): @@ -62,33 +62,41 @@ def time_energy_ratio_with_bootstrapping(): # Load the data df = load_data_and_prep_data() - # Load an energy ratio suite from FLASC - s = energy_ratio_suite.energy_ratio_suite(verbose=False) - - # Add dataframe to energy suite - s.add_df(df, 'data') + # Build the polars energy table object + # Speciy num_blocks = num_rows to implement normal boostrapping + df_energy = erp.get_energy_table([df],['baseline'],num_blocks=df.shape[0]) # For forward consistency, define the bins by the edges ws_edges = np.arange(5,25,1.) wd_edges = np.arange(0,360,2.) - # Create bins - ws_bins = [(ws_edges[i], ws_edges[i+1]) for i in range(len(ws_edges)-1)] - wd_bins = [(wd_edges[i], wd_edges[i+1]) for i in range(len(wd_edges)-1)] + # Get what new polars needs from this + ws_max = np.max(ws_edges) + ws_min = np.min(ws_edges) + ws_step = ws_edges[1] - ws_edges[0] + wd_max = np.max(wd_edges) + wd_min = np.min(wd_edges) + wd_step = wd_edges[1] - wd_edges[0] - # Run this calculation N_ITERATIONS times and take the average time - + # # Run this calculation N_ITERATIONS times and take the average time time_results = np.zeros(N_ITERATIONS) for i in range(N_ITERATIONS): start_time = time.time() - er = s.get_energy_ratios( - test_turbines=1, - ws_bins=ws_bins, - wd_bins=wd_bins, - N=N, - percentiles=[5.0, 95.0], - verbose=False - ) + + df_erb = erp.compute_energy_ratio( + df_energy, + ['baseline'], + test_turbines=[1], + use_predefined_ref=True, + use_predefined_wd=True, + use_predefined_ws=True, + ws_max=ws_max, + ws_min=ws_min, + ws_step=ws_step, + wd_max=wd_max, + wd_min=wd_min, + wd_step=wd_step, + N=N) end_time = time.time() time_results[i] = end_time - start_time @@ -98,8 +106,6 @@ def time_energy_ratio_with_bootstrapping(): - - if __name__=="__main__": warnings.filterwarnings('ignore') diff --git a/requirements.txt b/requirements.txt index d3b67ba6..192b5d05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ sqlalchemy streamlit tkcalendar seaborn +polars \ No newline at end of file diff --git a/setup.py b/setup.py index bdb6ecf2..587f7ce5 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,8 @@ 'sqlalchemy', 'streamlit', 'tkcalendar', - 'seaborn' + 'seaborn', + 'polars' ] ROOT = Path(__file__).parent diff --git a/tests/energy_ratio_test.py b/tests/energy_ratio_test.py index 02301b1c..f70336dc 100644 --- a/tests/energy_ratio_test.py +++ b/tests/energy_ratio_test.py @@ -9,6 +9,7 @@ from flasc.dataframe_operations import dataframe_manipulations as dfm from flasc import floris_tools as ftools from flasc.examples.models import load_floris_artificial as load_floris +from flasc.energy_ratio import energy_ratio_polars as erp def load_data(): @@ -151,3 +152,44 @@ def test_energy_ratio_regression(self): self.assertEqual(out.loc[3, "bin_count"], 34) self.assertEqual(out.loc[4, "bin_count"], 38) self.assertEqual(out.loc[5, "bin_count"], 6) + + def test_energy_ratio_regression_polars(self): + # Load data and FLORIS model + fi, _ = load_floris() + df = load_data() + df = dfm.set_wd_by_all_turbines(df) + df_upstream = ftools.get_upstream_turbs_floris(fi) + df = dfm.set_ws_by_upstream_turbines(df, df_upstream) + df = dfm.set_pow_ref_by_turbines(df, turbine_numbers=[0, 6]) + + wd_step=2.0 + ws_step=1.0 + # wd_bin_width=3.0, + + df_energy = erp.get_energy_table([df],['baseline']) + + ero = erp.compute_energy_ratio( + df_energy, + ['baseline'], + test_turbines=[1], + use_predefined_ref=True, + use_predefined_wd=True, + use_predefined_ws=True, + wd_max=360.0, + wd_min=wd_step/2.0, + wd_step=wd_step, + ws_max=30.0, + ws_min=ws_step/2.0, + ws_step=ws_step, + ) + + # Get the underlying polars data frame + df_erb = ero.df_result + + self.assertAlmostEqual(df_erb['baseline'].item(1), 0.8087321721301793, places=4) + self.assertAlmostEqual(df_erb['baseline'].item(2), 0.903263, places=4) + self.assertAlmostEqual(df_erb['baseline'].item(3), 0.930883, places=4) + + self.assertEqual(df_erb['count_baseline'].item(1), 25) + self.assertEqual(df_erb['count_baseline'].item(2), 27) + self.assertEqual(df_erb['count_baseline'].item(3), 22) \ No newline at end of file