From 96955caefb250cbae0a14ece9f348bbcdda3a0de Mon Sep 17 00:00:00 2001 From: jtsherba Date: Wed, 28 Dec 2016 12:49:47 -0800 Subject: [PATCH] include group_by and percentile as options within queries --- .idea/misc.xml | 12 +----- .idea/modules.xml | 7 +--- .idea/ssim-api.iml | 11 +++++ README.md | 2 +- query_ssim_summary.py | 4 +- query_ssim_tabular.py | 37 ++++++++--------- requirements.txt | 2 +- ssim_api/all_dictionaries.py | 11 +++-- ssim_api/ssim_general_functions.py | 66 +++++++++++++++++++++++++++++- ssim_api/ssim_query_functions.py | 48 ++++++++++++++++------ 10 files changed, 143 insertions(+), 57 deletions(-) create mode 100644 .idea/ssim-api.iml diff --git a/.idea/misc.xml b/.idea/misc.xml index 469c713..06a3561 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,14 +1,4 @@ - - - - - - - - - - - + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 936224a..d7f41d3 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,12 +2,7 @@ -<<<<<<< HEAD - -======= ->>>>>>> aab518b5ca5d7be6656057570fc0f3b9ab18917d - - + \ No newline at end of file diff --git a/.idea/ssim-api.iml b/.idea/ssim-api.iml new file mode 100644 index 0000000..6711606 --- /dev/null +++ b/.idea/ssim-api.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 6223a28..fa9f139 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ SETUP - activate virtual environment: \path\to\virtualenv name\Scripts\activate 3. Install package - - install requirements: pip install pandas==0.18.0 + - install requirements: pip install pandas==0.18.1 - install ssim_api with pip: pip install git+https://github.com/usgs/ssim-api.git@v0.0.2 ABOUT THE CODE diff --git a/query_ssim_summary.py b/query_ssim_summary.py index 7b7c7c0..84cf0c0 100644 --- a/query_ssim_summary.py +++ b/query_ssim_summary.py @@ -10,7 +10,7 @@ # define query vals project_id = (7096,) -scenario_id = (6368, 6370) +scenario_id = (6370) # run project_summary() # Returns: @@ -23,7 +23,7 @@ # ls[5] = strata # ls[6] = secondary_strata -ProjectSummary = project_summary(sqlite_file, project=project_id) +#ProjectSummary = project_summary(sqlite_file, project=project_id) print("Project summary query finished") # run query_projects() diff --git a/query_ssim_tabular.py b/query_ssim_tabular.py index 1329a37..be42b77 100644 --- a/query_ssim_tabular.py +++ b/query_ssim_tabular.py @@ -6,55 +6,52 @@ from ssim_api.ssim_query_functions import db_query_stateclass, db_query_transitiongroup, db_query_stock from ssim_api.ssim_postprocessing_functions import aggregate_over, calculate_percentile, df_to_csv import time +import sqlite3 # ********************************************************** # Hawaii LandCarbon Assessment.ssim # ********************************************************** # path to .ssim database -sqlite_file = r"/path/to/db.ssim" - +#sqlite_file = r"/path/to/db.ssim" +sqlite_file=r"/path/to/.ssim" #csvout csv_out = r"/path/to/fileout.csv" # define query vals project_id = (7096,) -scenario_id = (6368, 6370) -state_label_x = ("Forest","Grassland") -stratum = ("Dry","Wet","Mesic") -secondary_stratum = ("Hawai'i", "Maiu", "O'ahu") +scenario_id = (6370,) +state_label_x = ("Forest","") +stratum = ("Dry",) +secondary_stratum = ("Hawai'i",) timestep =(2015,) - +group_by=("Timestep","Iteration","StockType") # start timer start = time.time() +percentile = ("Iteration", 95) +transition_group = ("test") # run query and return pandas dataframe # run db_query_stateclass() # returns: # dataframe with stateclass query results -StateClassOutput = db_query_stateclass(sqlite_file, state_label_x=state_label_x, stratum=stratum,project_id=project_id, scenario_id = scenario_id, timestep=timestep) +StateClassOutput = db_query_stateclass(sqlite_file, project_id=project_id, scenario_id = scenario_id, state_label_x=None, stratum=stratum, secondary_stratum=None, group_by=group_by, percentile=None) print("Stateclass query finished") + # run db_query_transitiongroup(() # returns: # dataframe with transitiongroup query results -TransitionGroupOutput = db_query_transitiongroup(sqlite_file, project_id=project_id, timestep=timestep) +TransitionGroupOutput = db_query_transitiongroup(sqlite_file, project_id=project_id, scenario_id = scenario_id, transition_group=None, stratum=None, secondary_stratum=None, group_by=group_by, percentile=percentile) print("Transitiongroup query finished") + # run db_query_stock() # returns: # dataframe with stock query results -StockOutput = db_query_stock(sqlite_file, scenario_id=scenario_id, stratum= stratum, secondary_stratum=secondary_stratum) -print("Stock query finished") - -#define variables to aggregate over and return aggregated values -aggregate_by_columns = ["AgeMin", "AgeMax", 'SecondaryStratum'] -column = "Amount" -StateClassOutput = aggregate_over(StateClassOutput, aggregate_by_columns, column) +StockOutput = db_query_stock(sqlite_file, scenario_id=scenario_id, stratum= None, secondary_stratum=None, group_by=group_by, percentile=percentile) +#print("Stock query finished") -# define percentile variables and return percentile values -percentile = ["Iteration", 0.05, 0.95] -StateClassOutput = calculate_percentile(StateClassOutput, percentile, column) # export df to csv -df_to_csv(StateClassOutput, csv_out) +#df_to_csv(StateClassOutput, csv_out) # end timer end = time.time() diff --git a/requirements.txt b/requirements.txt index 8416d4b..3c00aa8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ numpy==1.11.0 -pandas==0.18.0 +pandas==0.18.1 python-dateutil==2.5.2 pytz==2016.3 six==1.10.0 diff --git a/ssim_api/all_dictionaries.py b/ssim_api/all_dictionaries.py index df3a7c9..d051801 100644 --- a/ssim_api/all_dictionaries.py +++ b/ssim_api/all_dictionaries.py @@ -3,9 +3,9 @@ # Dictionary to store SQL queries # ********************************************************** query_dictionary = { -'OutputStratumState_query': '''SELECT SSim_Scenario.ProjectID, STSim_OutputStratumState.ScenarioID, STSim_OutputStratumState.Iteration, STSim_OutputStratumState.Timestep, STSim_Stratum.Name AS Stratum, STSim_SecondaryStratum.Name AS SecondaryStratum, STSim_StateLabelX.Name AS StateLabelX, STSim_StateLabelY.Name AS StateLabelY, STSim_OutputStratumState.AgeMin, STSim_OutputStratumState.AgeMax, STSim_OutputStratumState.Amount FROM ((((STSim_OutputStratumState INNER JOIN STSim_Stratum ON STSim_OutputStratumState.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumState.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON STSim_OutputStratumState.StateClassID = STSim_StateClass.StateClassID) INNER JOIN STSim_StateLabelX ON (STSim_StateClass.StateLabelXID = STSim_StateLabelX.StateLabelXID) AND (STSim_StateClass.ProjectID = STSim_StateLabelX.ProjectID)) INNER JOIN STSim_StateLabelY ON (STSim_StateClass.StateLabelYID = STSim_StateLabelY.StateLabelYID) AND (STSim_StateClass.ProjectID = STSim_StateLabelY.ProjectID) INNER JOIN SSim_Scenario ON (STSim_OutputStratumState.ScenarioID = SSim_Scenario.ScenarioID)''', -'OutputStratumTransition_query': '''SELECT SSim_Scenario.ProjectID, STSim_OutputStratumTransition.ScenarioID, STSim_OutputStratumTransition.Iteration, STSim_OutputStratumTransition.Timestep, STSim_Stratum.Name AS Stratum, STSim_SecondaryStratum.Name AS SecondaryStratum, STSim_TransitionGroup.Name AS TransitionGroup, STSim_OutputStratumTransition.AgeMin, STSim_OutputStratumTransition.AgeMax, STSim_OutputStratumTransition.Amount FROM (((STSim_OutputStratumTransition INNER JOIN STSim_Stratum ON STSim_OutputStratumTransition.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumTransition.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_TransitionGroup ON STSim_OutputStratumTransition.TransitionGroupID = STSim_TransitionGroup.TransitionGroupID) INNER JOIN SSim_Scenario ON STSim_OutputStratumTransition.ScenarioID = SSim_Scenario.ScenarioID ''', -'OutputStock_query':'''SELECT SSim_Scenario.ProjectID, SF_OutputStock.ScenarioID, SF_OutputStock.Iteration, SF_OutputStock.Timestep, STSim_Stratum.Name AS Stratum, STSim_SecondaryStratum.Name AS SecondaryStratum, STSim_StateClass.Name AS StateClass, SF_StockType.Name AS StockType, SF_OutputStock.Amount FROM ((((SF_OutputStock INNER JOIN STSim_Stratum ON SF_OutputStock.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON SF_OutputStock.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON SF_OutputStock.StateClassID = STSim_StateClass.StateClassID) INNER JOIN SF_StockType ON SF_OutputStock.StockTypeID = SF_StockType.StockTypeID) INNER JOIN SSim_Scenario ON SF_OutputStock.ScenarioID = SSim_Scenario.ScenarioID''', +'OutputStratumState_query_from':''' FROM ((((STSim_OutputStratumState INNER JOIN STSim_Stratum ON STSim_OutputStratumState.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumState.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON STSim_OutputStratumState.StateClassID = STSim_StateClass.StateClassID) INNER JOIN STSim_StateLabelX ON (STSim_StateClass.StateLabelXID = STSim_StateLabelX.StateLabelXID) AND (STSim_StateClass.ProjectID = STSim_StateLabelX.ProjectID)) INNER JOIN STSim_StateLabelY ON (STSim_StateClass.StateLabelYID = STSim_StateLabelY.StateLabelYID) AND (STSim_StateClass.ProjectID = STSim_StateLabelY.ProjectID) INNER JOIN SSim_Scenario ON (STSim_OutputStratumState.ScenarioID = SSim_Scenario.ScenarioID)''', +'OutputStratumTransition_query_from': ''' FROM (((STSim_OutputStratumTransition INNER JOIN STSim_Stratum ON STSim_OutputStratumTransition.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumTransition.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_TransitionGroup ON STSim_OutputStratumTransition.TransitionGroupID = STSim_TransitionGroup.TransitionGroupID) INNER JOIN SSim_Scenario ON STSim_OutputStratumTransition.ScenarioID = SSim_Scenario.ScenarioID''', +'OutputStock_query_from':''' FROM ((((SF_OutputStock INNER JOIN STSim_Stratum ON SF_OutputStock.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON SF_OutputStock.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON SF_OutputStock.StateClassID = STSim_StateClass.StateClassID) INNER JOIN SF_StockType ON SF_OutputStock.StockTypeID = SF_StockType.StockTypeID) INNER JOIN SSim_Scenario ON SF_OutputStock.ScenarioID = SSim_Scenario.ScenarioID''', 'Scenario_Names': '''SELECT SSim_Scenario.Name, SSim_Scenario.ScenarioID, SSim_Scenario.RunStatus, STSim_RunControl.MaximumIteration, STSim_RunControl.MaximumTimestep, STSim_RunControl.MinimumIteration, STSim_RunControl.MinimumTimestep, STSim_RunControl.IsSpatial FROM (SSim_Scenario INNER JOIN STSim_RunControl ON SSim_Scenario.ScenarioID = STSim_RunControl.ScenarioID) ''', 'Project_Ids': '''SELECT Name, ProjectID FROM SSim_Project''', 'Scenario_Names_All': '''SELECT Name, ScenarioID FROM SSim_Scenario''', @@ -22,3 +22,8 @@ +select_dic = { +"OutputStratumState_select_dic":{'IDProject': 'SSim_Scenario.ProjectID AS IDProject', 'IDScenario': 'STSim_OutputStratumState.ScenarioID AS IDScenario', 'Iteration': 'STSim_OutputStratumState.Iteration AS Iteration', 'Timestep': 'STSim_OutputStratumState.Timestep AS Timestep', 'Stratum': 'STSim_Stratum.Name AS Stratum', 'SecondaryStratum': 'STSim_SecondaryStratum.Name AS SecondaryStratum', 'StateLabelX': 'STSim_StateLabelX.Name AS StateLabelX', 'StateLabelY': 'STSim_StateLabelY.Name AS StateLabelY', 'AgeMin': 'STSim_OutputStratumState.AgeMin AS AgeMin', 'AgeMax': 'STSim_OutputStratumState.AgeMax AS AgeMax', 'Amount': 'STSim_OutputStratumState.Amount AS Amount'}, +"OutputStratumTransition_select_dic":{'IDProject': 'SSim_Scenario.ProjectID AS IDProject', 'IDScenario':'STSim_OutputStratumTransition.ScenarioID AS IDScenario', 'Iteration': 'STSim_OutputStratumTransition.Iteration AS Iteration', 'Timestep': 'STSim_OutputStratumTransition.Timestep AS Timestep', 'Stratum': 'STSim_Stratum.Name AS Stratum', 'SecondaryStratum': 'STSim_SecondaryStratum.Name AS SecondaryStratum', 'TransitionGroup': 'STSim_TransitionGroup.Name AS TransitionGroup', 'AgeMin': 'STSim_OutputStratumTransition.AgeMin AS AgeMin', 'AgeMax': 'STSim_OutputStratumTransition.AgeMax AS AgeMax', 'Amount': 'STSim_OutputStratumTransition.Amount AS Amount'}, +"OutputStock_query_select_dic": {'IDProject': 'SSim_Scenario.ProjectID AS IDProject', 'IDScenario': 'SF_OutputStock.ScenarioID AS IDScenario', 'Iteration': 'SF_OutputStock.Iteration AS Iteration', 'Timestep':'SF_OutputStock.Timestep AS Timestep', 'Stratum': 'STSim_Stratum.Name AS Stratum', 'SecondaryStratum': 'STSim_SecondaryStratum.Name AS SecondaryStratum', 'StateClass': 'STSim_StateClass.Name AS StateClass', 'StockType':'SF_StockType.Name AS StockType', 'Amount': 'SF_OutputStock.Amount AS Amount'}, +} diff --git a/ssim_api/ssim_general_functions.py b/ssim_api/ssim_general_functions.py index 434b126..aeeeebb 100644 --- a/ssim_api/ssim_general_functions.py +++ b/ssim_api/ssim_general_functions.py @@ -8,7 +8,30 @@ # ********************************************************** import sqlite3, sys, os.path import pandas as pd -from ssim_api.all_dictionaries import query_dictionary +import numpy as np +from ssim_api.all_dictionaries import query_dictionary, select_dic + +class pc: + """ + percentile aggregate class + """ + def __init__(self): + self.s = [] + self.pval = 0 + + def step(self, value, percent_val): + + self.s.append(value) + self.pval=percent_val + + def finalize(self): + if len(self.s)==0: + return None + else: + a = np.array(self.s) + p = np.percentile(a, self.pval) + return p + def apply_query(sqlite_connection, query_sql, all_params=None): # Creates connected to the database, executes query, and returns pandas dataframe with results @@ -21,6 +44,7 @@ def apply_query(sqlite_connection, query_sql, all_params=None): # sql statement with either WHERE or AND appended to the end # conn = sqlite3.connect(sqlite_connection) + conn.create_aggregate("pc", 2, pc) c = conn.cursor() if len(all_params) > 0: c.execute(query_sql, all_params) @@ -82,6 +106,46 @@ def update_query_string(all_params, query_sql, query_column, variable, variable_ return query_sql, all_params +def update_group_by_query(query_sql, selection_params, group_by, variable_name): + # Appends WHERE or AND to Sql statement depending on whether parameter is the first parameter in the WHERE statement + # + # Args: + # all_params: list of all parameters that will be queried with WHERE statement + # squery_sql: current sql statement + # + # Returns: + # sql statement with either WHERE or AND appended to the end + # + raise_type_error(group_by, variable_name=variable_name) + + query_sql += " GROUP BY " + ", ".join(group_by) + + for header in selection_params.keys(): + if header not in group_by: + del selection_params[header] + + query_select = "SELECT " + ", ".join(selection_params.values()) + ", SUM(Amount) AS sum" + + return query_sql, query_select, selection_params + +def update_percentile_query(query_sql, selection_params, query_select, percentile, variable_name): + # Appends WHERE or AND to Sql statement depending on whether parameter is the first parameter in the WHERE statement + # + # Args: + # all_params: list of all parameters that will be queried with WHERE statement + # squery_sql: current sql statement + # + # Returns: + # sql statement with either WHERE or AND appended to the end + # + raise_type_error(percentile, variable_name=variable_name) + + del selection_params[percentile[0]] + query_sql += ") GROUP BY " + ", ".join(selection_params.keys()) + query_select = "select " + ", ".join(selection_params.keys()) + ", pc(sum, %d), pc(sum, 50), pc(sum, %d) from (" % (100-percentile[1], percentile[1]) + query_select + + return query_sql, query_select + # ********************************************************** # General database functions diff --git a/ssim_api/ssim_query_functions.py b/ssim_api/ssim_query_functions.py index d1b5c11..27d48a7 100644 --- a/ssim_api/ssim_query_functions.py +++ b/ssim_api/ssim_query_functions.py @@ -7,15 +7,12 @@ # ********************************************************** import sqlite3, sys, os.path -import pandas as pd -from ssim_api.all_dictionaries import query_dictionary from ssim_api.ssim_general_functions import * # ********************************************************** # Query database functions # ********************************************************** - -def db_query_stateclass(sqlite_connection, project_id=None, scenario_id=None, iteration=None, timestep=None, stratum=None, secondary_stratum=None, state_label_x=None, state_label_y=None): +def db_query_stateclass(sqlite_connection, project_id=None, scenario_id=None, iteration=None, timestep=None, stratum=None, secondary_stratum=None, state_label_x=None, state_label_y=None, group_by=None, percentile=None): # Function for querying the STSim_OutputStratumState table in the database # # Args: @@ -32,8 +29,11 @@ def db_query_stateclass(sqlite_connection, project_id=None, scenario_id=None, it # Dataframe with the results of the query # try: - query_name = "OutputStratumState_query" - query_sql = query_dictionary[query_name] + #query_select = query_dictionary["OutputStratumState_query_select"] + query_from = query_dictionary["OutputStratumState_query_from"] + selection_params = select_dic["OutputStratumState_select_dic"] + query_select = "SELECT " + ", ".join(selection_params.values()) + query_sql = "" all_params = () if project_id: query_sql, all_params = update_query_string(all_params, query_sql, "SSim_Scenario.ProjectID", project_id, "project_id") @@ -51,7 +51,15 @@ def db_query_stateclass(sqlite_connection, project_id=None, scenario_id=None, it query_sql, all_params = update_query_string(all_params, query_sql, "STSim_StateLabelX.Name", state_label_x, "state_label_x") if state_label_y: query_sql, all_params = update_query_string(all_params, query_sql, "STSim_StateLabelY.Name", state_label_y, "state_label_y") + if group_by: + query_sql, query_select, selection_params = update_group_by_query(query_sql, selection_params, group_by, "group_by") + if percentile: + query_sql, query_select = update_percentile_query(query_sql, selection_params, query_select, percentile, "percentile") + + query_sql = query_select + query_from + query_sql + print(query_sql) + print(all_params) df = apply_query(sqlite_connection, query_sql, all_params) raise_error_empty_df(df) return df @@ -60,7 +68,7 @@ def db_query_stateclass(sqlite_connection, project_id=None, scenario_id=None, it print("Error:", explanation) -def db_query_transitiongroup(sqlite_connection, project_id=None, scenario_id=None, iteration=None, timestep=None, stratum=None, secondary_stratum=None, transition_group=None): +def db_query_transitiongroup(sqlite_connection, project_id=None, scenario_id=None, iteration=None, timestep=None, stratum=None, secondary_stratum=None, transition_group=None, group_by=None, percentile=None): # Function for querying the STSim_OutputStratumState table in the database # # Args: @@ -77,8 +85,10 @@ def db_query_transitiongroup(sqlite_connection, project_id=None, scenario_id=Non # Dataframe with the results of the query # try: - query_name = "OutputStratumTransition_query" - query_sql = query_dictionary[query_name] + query_from = query_dictionary["OutputStratumTransition_query_from"] + selection_params = select_dic["OutputStratumTransition_select_dic"] + query_select = "SELECT " + ", ".join(selection_params.values()) + query_sql = "" all_params = () if project_id: query_sql, all_params = update_query_string(all_params, query_sql, "SSim_Scenario.ProjectID", project_id, "project_id") @@ -94,6 +104,12 @@ def db_query_transitiongroup(sqlite_connection, project_id=None, scenario_id=Non query_sql, all_params = update_query_string(all_params, query_sql, "STSim_SecondaryStratum.Name", secondary_stratum, "secondary_stratum") if transition_group: query_sql, all_params = update_query_string(all_params, query_sql, "STSim_TransitionGroup.Name", transition_group, "transition_group") + if group_by: + query_sql, query_select, selection_params = update_group_by_query(query_sql, selection_params, group_by, "group_by") + if percentile: + query_sql, query_select = update_percentile_query(query_sql, selection_params, query_select, percentile, "percentile") + + query_sql = query_select + query_from + query_sql df = apply_query(sqlite_connection, query_sql, all_params) raise_error_empty_df(df) @@ -102,7 +118,7 @@ def db_query_transitiongroup(sqlite_connection, project_id=None, scenario_id=Non except Exception as explanation: print("Error:", explanation) -def db_query_stock(sqlite_connection, project_id=None, scenario_id=None, iteration=None, timestep=None, stratum=None, secondary_stratum=None, stateclass=None, stock_type=None): +def db_query_stock(sqlite_connection, project_id=None, scenario_id=None, iteration=None, timestep=None, stratum=None, secondary_stratum=None, stateclass=None, stock_type=None, group_by=None, percentile=None): # Function for querying the STSim_OutputStratumState table in the database # # Args: @@ -118,8 +134,10 @@ def db_query_stock(sqlite_connection, project_id=None, scenario_id=None, iterati # Returns: # Dataframe with the results of the query # - query_name = "OutputStock_query" - query_sql = query_dictionary[query_name] + query_from = query_dictionary["OutputStock_query_from"] + selection_params = select_dic["OutputStock_query_select_dic"] + query_select = "SELECT " + ", ".join(selection_params.values()) + query_sql = "" all_params = () if project_id: query_sql, all_params = update_query_string(all_params, query_sql, "SSim_Scenario.ProjectID", project_id, "project_id") @@ -137,6 +155,12 @@ def db_query_stock(sqlite_connection, project_id=None, scenario_id=None, iterati query_sql, all_params = update_query_string(all_params, query_sql, "STSim_StateClass.Name", stateclass, "stateclass") if stock_type: query_sql, all_params = update_query_string(all_params, query_sql, "SF_StockType.Name", stock_type, "stock_type") + if group_by: + query_sql, query_select, selection_params = update_group_by_query(query_sql, selection_params, group_by, "group_by") + if percentile: + query_sql, query_select = update_percentile_query(query_sql, selection_params, query_select, percentile, "percentile") + + query_sql = query_select + query_from + query_sql df = apply_query(sqlite_connection, query_sql, all_params) raise_error_empty_df(df)