Skip to content

Commit

Permalink
include group_by and percentile as options within queries
Browse files Browse the repository at this point in the history
  • Loading branch information
jsherba-usgs committed Dec 28, 2016
1 parent 45de74c commit 96955ca
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 57 deletions.
12 changes: 1 addition & 11 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 1 addition & 6 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions .idea/ssim-api.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SETUP
- activate virtual environment: \path\to\virtualenv name\Scripts\activate

3. Install package
- install requirements: pip install pandas==0.18.0
- install requirements: pip install pandas==0.18.1
- install ssim_api with pip: pip install git+https://github.com/usgs/[email protected]

ABOUT THE CODE
Expand Down
4 changes: 2 additions & 2 deletions query_ssim_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# define query vals
project_id = (7096,)
scenario_id = (6368, 6370)
scenario_id = (6370)

# run project_summary()
# Returns:
Expand All @@ -23,7 +23,7 @@
# ls[5] = strata
# ls[6] = secondary_strata

ProjectSummary = project_summary(sqlite_file, project=project_id)
#ProjectSummary = project_summary(sqlite_file, project=project_id)
print("Project summary query finished")

# run query_projects()
Expand Down
37 changes: 17 additions & 20 deletions query_ssim_tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,52 @@
from ssim_api.ssim_query_functions import db_query_stateclass, db_query_transitiongroup, db_query_stock
from ssim_api.ssim_postprocessing_functions import aggregate_over, calculate_percentile, df_to_csv
import time
import sqlite3

# **********************************************************
# Hawaii LandCarbon Assessment.ssim
# **********************************************************
# path to .ssim database
sqlite_file = r"/path/to/db.ssim"

#sqlite_file = r"/path/to/db.ssim"
sqlite_file=r"/path/to/.ssim"
#csvout
csv_out = r"/path/to/fileout.csv"

# define query vals
project_id = (7096,)
scenario_id = (6368, 6370)
state_label_x = ("Forest","Grassland")
stratum = ("Dry","Wet","Mesic")
secondary_stratum = ("Hawai'i", "Maiu", "O'ahu")
scenario_id = (6370,)
state_label_x = ("Forest","")
stratum = ("Dry",)
secondary_stratum = ("Hawai'i",)
timestep =(2015,)

group_by=("Timestep","Iteration","StockType")
# start timer
start = time.time()
percentile = ("Iteration", 95)
transition_group = ("test")

# run query and return pandas dataframe
# run db_query_stateclass()
# returns:
# dataframe with stateclass query results
StateClassOutput = db_query_stateclass(sqlite_file, state_label_x=state_label_x, stratum=stratum,project_id=project_id, scenario_id = scenario_id, timestep=timestep)
StateClassOutput = db_query_stateclass(sqlite_file, project_id=project_id, scenario_id = scenario_id, state_label_x=None, stratum=stratum, secondary_stratum=None, group_by=group_by, percentile=None)
print("Stateclass query finished")

# run db_query_transitiongroup(()
# returns:
# dataframe with transitiongroup query results
TransitionGroupOutput = db_query_transitiongroup(sqlite_file, project_id=project_id, timestep=timestep)
TransitionGroupOutput = db_query_transitiongroup(sqlite_file, project_id=project_id, scenario_id = scenario_id, transition_group=None, stratum=None, secondary_stratum=None, group_by=group_by, percentile=percentile)
print("Transitiongroup query finished")

# run db_query_stock()
# returns:
# dataframe with stock query results
StockOutput = db_query_stock(sqlite_file, scenario_id=scenario_id, stratum= stratum, secondary_stratum=secondary_stratum)
print("Stock query finished")

#define variables to aggregate over and return aggregated values
aggregate_by_columns = ["AgeMin", "AgeMax", 'SecondaryStratum']
column = "Amount"
StateClassOutput = aggregate_over(StateClassOutput, aggregate_by_columns, column)
StockOutput = db_query_stock(sqlite_file, scenario_id=scenario_id, stratum= None, secondary_stratum=None, group_by=group_by, percentile=percentile)
#print("Stock query finished")

# define percentile variables and return percentile values
percentile = ["Iteration", 0.05, 0.95]
StateClassOutput = calculate_percentile(StateClassOutput, percentile, column)

# export df to csv
df_to_csv(StateClassOutput, csv_out)
#df_to_csv(StateClassOutput, csv_out)

# end timer
end = time.time()
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy==1.11.0
pandas==0.18.0
pandas==0.18.1
python-dateutil==2.5.2
pytz==2016.3
six==1.10.0
11 changes: 8 additions & 3 deletions ssim_api/all_dictionaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# Dictionary to store SQL queries
# **********************************************************
query_dictionary = {
'OutputStratumState_query': '''SELECT SSim_Scenario.ProjectID, STSim_OutputStratumState.ScenarioID, STSim_OutputStratumState.Iteration, STSim_OutputStratumState.Timestep, STSim_Stratum.Name AS Stratum, STSim_SecondaryStratum.Name AS SecondaryStratum, STSim_StateLabelX.Name AS StateLabelX, STSim_StateLabelY.Name AS StateLabelY, STSim_OutputStratumState.AgeMin, STSim_OutputStratumState.AgeMax, STSim_OutputStratumState.Amount FROM ((((STSim_OutputStratumState INNER JOIN STSim_Stratum ON STSim_OutputStratumState.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumState.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON STSim_OutputStratumState.StateClassID = STSim_StateClass.StateClassID) INNER JOIN STSim_StateLabelX ON (STSim_StateClass.StateLabelXID = STSim_StateLabelX.StateLabelXID) AND (STSim_StateClass.ProjectID = STSim_StateLabelX.ProjectID)) INNER JOIN STSim_StateLabelY ON (STSim_StateClass.StateLabelYID = STSim_StateLabelY.StateLabelYID) AND (STSim_StateClass.ProjectID = STSim_StateLabelY.ProjectID) INNER JOIN SSim_Scenario ON (STSim_OutputStratumState.ScenarioID = SSim_Scenario.ScenarioID)''',
'OutputStratumTransition_query': '''SELECT SSim_Scenario.ProjectID, STSim_OutputStratumTransition.ScenarioID, STSim_OutputStratumTransition.Iteration, STSim_OutputStratumTransition.Timestep, STSim_Stratum.Name AS Stratum, STSim_SecondaryStratum.Name AS SecondaryStratum, STSim_TransitionGroup.Name AS TransitionGroup, STSim_OutputStratumTransition.AgeMin, STSim_OutputStratumTransition.AgeMax, STSim_OutputStratumTransition.Amount FROM (((STSim_OutputStratumTransition INNER JOIN STSim_Stratum ON STSim_OutputStratumTransition.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumTransition.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_TransitionGroup ON STSim_OutputStratumTransition.TransitionGroupID = STSim_TransitionGroup.TransitionGroupID) INNER JOIN SSim_Scenario ON STSim_OutputStratumTransition.ScenarioID = SSim_Scenario.ScenarioID ''',
'OutputStock_query':'''SELECT SSim_Scenario.ProjectID, SF_OutputStock.ScenarioID, SF_OutputStock.Iteration, SF_OutputStock.Timestep, STSim_Stratum.Name AS Stratum, STSim_SecondaryStratum.Name AS SecondaryStratum, STSim_StateClass.Name AS StateClass, SF_StockType.Name AS StockType, SF_OutputStock.Amount FROM ((((SF_OutputStock INNER JOIN STSim_Stratum ON SF_OutputStock.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON SF_OutputStock.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON SF_OutputStock.StateClassID = STSim_StateClass.StateClassID) INNER JOIN SF_StockType ON SF_OutputStock.StockTypeID = SF_StockType.StockTypeID) INNER JOIN SSim_Scenario ON SF_OutputStock.ScenarioID = SSim_Scenario.ScenarioID''',
'OutputStratumState_query_from':''' FROM ((((STSim_OutputStratumState INNER JOIN STSim_Stratum ON STSim_OutputStratumState.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumState.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON STSim_OutputStratumState.StateClassID = STSim_StateClass.StateClassID) INNER JOIN STSim_StateLabelX ON (STSim_StateClass.StateLabelXID = STSim_StateLabelX.StateLabelXID) AND (STSim_StateClass.ProjectID = STSim_StateLabelX.ProjectID)) INNER JOIN STSim_StateLabelY ON (STSim_StateClass.StateLabelYID = STSim_StateLabelY.StateLabelYID) AND (STSim_StateClass.ProjectID = STSim_StateLabelY.ProjectID) INNER JOIN SSim_Scenario ON (STSim_OutputStratumState.ScenarioID = SSim_Scenario.ScenarioID)''',
'OutputStratumTransition_query_from': ''' FROM (((STSim_OutputStratumTransition INNER JOIN STSim_Stratum ON STSim_OutputStratumTransition.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON STSim_OutputStratumTransition.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_TransitionGroup ON STSim_OutputStratumTransition.TransitionGroupID = STSim_TransitionGroup.TransitionGroupID) INNER JOIN SSim_Scenario ON STSim_OutputStratumTransition.ScenarioID = SSim_Scenario.ScenarioID''',
'OutputStock_query_from':''' FROM ((((SF_OutputStock INNER JOIN STSim_Stratum ON SF_OutputStock.StratumID = STSim_Stratum.StratumID) INNER JOIN STSim_SecondaryStratum ON SF_OutputStock.SecondaryStratumID = STSim_SecondaryStratum.SecondaryStratumID) INNER JOIN STSim_StateClass ON SF_OutputStock.StateClassID = STSim_StateClass.StateClassID) INNER JOIN SF_StockType ON SF_OutputStock.StockTypeID = SF_StockType.StockTypeID) INNER JOIN SSim_Scenario ON SF_OutputStock.ScenarioID = SSim_Scenario.ScenarioID''',
'Scenario_Names': '''SELECT SSim_Scenario.Name, SSim_Scenario.ScenarioID, SSim_Scenario.RunStatus, STSim_RunControl.MaximumIteration, STSim_RunControl.MaximumTimestep, STSim_RunControl.MinimumIteration, STSim_RunControl.MinimumTimestep, STSim_RunControl.IsSpatial FROM (SSim_Scenario INNER JOIN STSim_RunControl ON SSim_Scenario.ScenarioID = STSim_RunControl.ScenarioID) ''',
'Project_Ids': '''SELECT Name, ProjectID FROM SSim_Project''',
'Scenario_Names_All': '''SELECT Name, ScenarioID FROM SSim_Scenario''',
Expand All @@ -22,3 +22,8 @@



select_dic = {
"OutputStratumState_select_dic":{'IDProject': 'SSim_Scenario.ProjectID AS IDProject', 'IDScenario': 'STSim_OutputStratumState.ScenarioID AS IDScenario', 'Iteration': 'STSim_OutputStratumState.Iteration AS Iteration', 'Timestep': 'STSim_OutputStratumState.Timestep AS Timestep', 'Stratum': 'STSim_Stratum.Name AS Stratum', 'SecondaryStratum': 'STSim_SecondaryStratum.Name AS SecondaryStratum', 'StateLabelX': 'STSim_StateLabelX.Name AS StateLabelX', 'StateLabelY': 'STSim_StateLabelY.Name AS StateLabelY', 'AgeMin': 'STSim_OutputStratumState.AgeMin AS AgeMin', 'AgeMax': 'STSim_OutputStratumState.AgeMax AS AgeMax', 'Amount': 'STSim_OutputStratumState.Amount AS Amount'},
"OutputStratumTransition_select_dic":{'IDProject': 'SSim_Scenario.ProjectID AS IDProject', 'IDScenario':'STSim_OutputStratumTransition.ScenarioID AS IDScenario', 'Iteration': 'STSim_OutputStratumTransition.Iteration AS Iteration', 'Timestep': 'STSim_OutputStratumTransition.Timestep AS Timestep', 'Stratum': 'STSim_Stratum.Name AS Stratum', 'SecondaryStratum': 'STSim_SecondaryStratum.Name AS SecondaryStratum', 'TransitionGroup': 'STSim_TransitionGroup.Name AS TransitionGroup', 'AgeMin': 'STSim_OutputStratumTransition.AgeMin AS AgeMin', 'AgeMax': 'STSim_OutputStratumTransition.AgeMax AS AgeMax', 'Amount': 'STSim_OutputStratumTransition.Amount AS Amount'},
"OutputStock_query_select_dic": {'IDProject': 'SSim_Scenario.ProjectID AS IDProject', 'IDScenario': 'SF_OutputStock.ScenarioID AS IDScenario', 'Iteration': 'SF_OutputStock.Iteration AS Iteration', 'Timestep':'SF_OutputStock.Timestep AS Timestep', 'Stratum': 'STSim_Stratum.Name AS Stratum', 'SecondaryStratum': 'STSim_SecondaryStratum.Name AS SecondaryStratum', 'StateClass': 'STSim_StateClass.Name AS StateClass', 'StockType':'SF_StockType.Name AS StockType', 'Amount': 'SF_OutputStock.Amount AS Amount'},
}
66 changes: 65 additions & 1 deletion ssim_api/ssim_general_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,30 @@
# **********************************************************
import sqlite3, sys, os.path
import pandas as pd
from ssim_api.all_dictionaries import query_dictionary
import numpy as np
from ssim_api.all_dictionaries import query_dictionary, select_dic

class pc:
"""
percentile aggregate class
"""
def __init__(self):
self.s = []
self.pval = 0

def step(self, value, percent_val):

self.s.append(value)
self.pval=percent_val

def finalize(self):
if len(self.s)==0:
return None
else:
a = np.array(self.s)
p = np.percentile(a, self.pval)
return p


def apply_query(sqlite_connection, query_sql, all_params=None):
# Creates connected to the database, executes query, and returns pandas dataframe with results
Expand All @@ -21,6 +44,7 @@ def apply_query(sqlite_connection, query_sql, all_params=None):
# sql statement with either WHERE or AND appended to the end
#
conn = sqlite3.connect(sqlite_connection)
conn.create_aggregate("pc", 2, pc)
c = conn.cursor()
if len(all_params) > 0:
c.execute(query_sql, all_params)
Expand Down Expand Up @@ -82,6 +106,46 @@ def update_query_string(all_params, query_sql, query_column, variable, variable_
return query_sql, all_params


def update_group_by_query(query_sql, selection_params, group_by, variable_name):
# Appends WHERE or AND to Sql statement depending on whether parameter is the first parameter in the WHERE statement
#
# Args:
# all_params: list of all parameters that will be queried with WHERE statement
# squery_sql: current sql statement
#
# Returns:
# sql statement with either WHERE or AND appended to the end
#
raise_type_error(group_by, variable_name=variable_name)

query_sql += " GROUP BY " + ", ".join(group_by)

for header in selection_params.keys():
if header not in group_by:
del selection_params[header]

query_select = "SELECT " + ", ".join(selection_params.values()) + ", SUM(Amount) AS sum"

return query_sql, query_select, selection_params

def update_percentile_query(query_sql, selection_params, query_select, percentile, variable_name):
# Appends WHERE or AND to Sql statement depending on whether parameter is the first parameter in the WHERE statement
#
# Args:
# all_params: list of all parameters that will be queried with WHERE statement
# squery_sql: current sql statement
#
# Returns:
# sql statement with either WHERE or AND appended to the end
#
raise_type_error(percentile, variable_name=variable_name)

del selection_params[percentile[0]]
query_sql += ") GROUP BY " + ", ".join(selection_params.keys())
query_select = "select " + ", ".join(selection_params.keys()) + ", pc(sum, %d), pc(sum, 50), pc(sum, %d) from (" % (100-percentile[1], percentile[1]) + query_select

return query_sql, query_select


# **********************************************************
# General database functions
Expand Down
Loading

0 comments on commit 96955ca

Please sign in to comment.