From 411247c0d3330e02768a36e023c0f046e7797f3b Mon Sep 17 00:00:00 2001 From: jdebacker Date: Wed, 28 Feb 2024 23:13:06 -0500 Subject: [PATCH 1/5] update s3 bucket info --- cs-config/cs_config/functions.py | 4 ++- cs-config/cs_config/helpers.py | 42 +++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index 9a1ac8c2..8389f60b 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -23,6 +23,8 @@ AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "") AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "") +PUF_S3_FILE_LOCATION = os.environ.get( + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz") CUR_DIR = os.path.dirname(os.path.realpath(__file__)) # Get Tax-Calculator default parameters @@ -172,7 +174,7 @@ def run_model(meta_param_dict, adjustment): meta_params = MetaParams() meta_params.adjust(meta_param_dict) if meta_params.data_source == "PUF": - data = retrieve_puf(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + data = retrieve_puf(PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) # set name of cached baseline file in case use below cached_pickle = "TxFuncEst_baseline_PUF.pkl" else: diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index b9bf36fe..14f69263 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -7,10 +7,20 @@ except ImportError: boto3 = None import gzip +import os import pandas as pd from taxcalc import Policy from collections import defaultdict +from pathlib import Path +try: + from s3fs import S3FileSystem +except ImportError as ie: + S3FileSystem = None +AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None) +AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None) +PUF_S3_FILE_LOCATION = os.environ.get( + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz") TC_LAST_YEAR = Policy.LAST_BUDGET_YEAR POLICY_SCHEMA = { @@ -73,20 +83,30 @@ } -def retrieve_puf(aws_access_key_id, aws_secret_access_key): +def retrieve_puf(puf_s3_file_location=PUF_S3_FILE_LOCATION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY): """ Function for retrieving the PUF from the OSPC S3 bucket """ - has_credentials = aws_access_key_id and aws_secret_access_key - if has_credentials and boto3 is not None: - client = boto3.client( - "s3", - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - obj = client.get_object(Bucket="ospc-data-files", Key="puf.csv.gz") - gz = gzip.GzipFile(fileobj=obj["Body"]) - puf_df = pd.read_csv(gz) + s3_reader_installed = S3FileSystem is not None + has_credentials = ( + aws_access_key_id is not None and aws_secret_access_key is not None + ) + if puf_s3_file_location and has_credentials and s3_reader_installed: + print("Reading puf from S3 bucket.", puf_s3_file_location) + fs = S3FileSystem(key=AWS_ACCESS_KEY_ID, secret=AWS_SECRET_ACCESS_KEY,) + with fs.open(PUF_S3_FILE_NAME) as f: + # Skips over header from top of file. + puf_df = pd.read_csv(f, compression="gzip") return puf_df + elif Path("puf.csv.gz").exists(): + print("Reading puf from puf.csv.gz.") + return pd.read_csv("puf.csv.gz", compression="gzip") + elif Path("puf.csv").exists(): + print("Reading puf from puf.csv.") + return pd.read_csv("puf.csv") else: + warnings.warn( + f"PUF file not available (has_credentials={has_credentials}, " + f"s3_reader_installed={s3_reader_installed})" + ) return None From 493dfd49e92b2cb201307db1fe782639fc63f62a Mon Sep 17 00:00:00 2001 From: jdebacker Date: Wed, 28 Feb 2024 23:17:52 -0500 Subject: [PATCH 2/5] format --- cs-config/cs_config/functions.py | 7 +++++-- cs-config/cs_config/helpers.py | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index a748c95f..eec847f0 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -24,7 +24,8 @@ AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "") AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "") PUF_S3_FILE_LOCATION = os.environ.get( - "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz") + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz" +) CUR_DIR = os.path.dirname(os.path.realpath(__file__)) # Get Tax-Calculator default parameters @@ -174,7 +175,9 @@ def run_model(meta_param_dict, adjustment): meta_params = MetaParams() meta_params.adjust(meta_param_dict) if meta_params.data_source == "PUF": - data = retrieve_puf(PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + data = retrieve_puf( + PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY + ) # set name of cached baseline file in case use below cached_pickle = "TxFuncEst_baseline_PUF.pkl" else: diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index 14f69263..7416702e 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -12,6 +12,7 @@ from taxcalc import Policy from collections import defaultdict from pathlib import Path + try: from s3fs import S3FileSystem except ImportError as ie: @@ -20,7 +21,8 @@ AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None) AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None) PUF_S3_FILE_LOCATION = os.environ.get( - "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz") + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz" +) TC_LAST_YEAR = Policy.LAST_BUDGET_YEAR POLICY_SCHEMA = { @@ -83,7 +85,11 @@ } -def retrieve_puf(puf_s3_file_location=PUF_S3_FILE_LOCATION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY): +def retrieve_puf( + puf_s3_file_location=PUF_S3_FILE_LOCATION, + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, +): """ Function for retrieving the PUF from the OSPC S3 bucket """ @@ -93,7 +99,10 @@ def retrieve_puf(puf_s3_file_location=PUF_S3_FILE_LOCATION, aws_access_key_id=AW ) if puf_s3_file_location and has_credentials and s3_reader_installed: print("Reading puf from S3 bucket.", puf_s3_file_location) - fs = S3FileSystem(key=AWS_ACCESS_KEY_ID, secret=AWS_SECRET_ACCESS_KEY,) + fs = S3FileSystem( + key=AWS_ACCESS_KEY_ID, + secret=AWS_SECRET_ACCESS_KEY, + ) with fs.open(PUF_S3_FILE_NAME) as f: # Skips over header from top of file. puf_df = pd.read_csv(f, compression="gzip") From dc3e9648b1e045367e19a0498953c59d5744c74f Mon Sep 17 00:00:00 2001 From: jdebacker Date: Wed, 28 Feb 2024 23:32:40 -0500 Subject: [PATCH 3/5] update install.sh --- cs-config/cs_config/helpers.py | 1 + cs-config/install.sh | 12 +++--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index 7416702e..941c5093 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -12,6 +12,7 @@ from taxcalc import Policy from collections import defaultdict from pathlib import Path +import warnings try: from s3fs import S3FileSystem diff --git a/cs-config/install.sh b/cs-config/install.sh index 205fdcbc..86a77c80 100644 --- a/cs-config/install.sh +++ b/cs-config/install.sh @@ -1,12 +1,6 @@ # bash commands for installing your package -git clone -b master --depth 1 https://github.com/PSLmodels/OG-USA -cd OG-USA - -# Explicitly add channels for looking up dependencies outside of -# taxcalc and paramtools. If the channels are not specified like this, -# the tests fail due to not being able to converge on a solution. -conda config --add channels PSLmodels -conda config --add channels conda-forge -conda install scipy mkl dask matplotlib PSLmodels::taxcalc conda-forge::paramtools +# install packages needed by CS, but not in ogusa-dev env +pip install s3fs +# install OG-USA from source pip install -e . From a000cf6083e7e6f1f791f3de3b6aa8a21456b5e8 Mon Sep 17 00:00:00 2001 From: jdebacker Date: Wed, 28 Feb 2024 23:41:14 -0500 Subject: [PATCH 4/5] fix typo for global var --- cs-config/cs_config/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index 941c5093..e6986c50 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -104,7 +104,7 @@ def retrieve_puf( key=AWS_ACCESS_KEY_ID, secret=AWS_SECRET_ACCESS_KEY, ) - with fs.open(PUF_S3_FILE_NAME) as f: + with fs.open(PUF_S3_FILE_LOCATION) as f: # Skips over header from top of file. puf_df = pd.read_csv(f, compression="gzip") return puf_df From c916947a5acefda14f1dd3fff28442e0a400833b Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 29 Feb 2024 19:26:30 -0500 Subject: [PATCH 5/5] make argument to solve for demographics --- ogusa/calibrate.py | 70 +++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/ogusa/calibrate.py b/ogusa/calibrate.py index 7a4ef0fe..8c42c5d5 100644 --- a/ogusa/calibrate.py +++ b/ogusa/calibrate.py @@ -17,6 +17,7 @@ def __init__( estimate_tax_functions=False, estimate_beta=False, estimate_chi_n=False, + estimate_pop=False, tax_func_path=None, iit_reform={}, guid="", @@ -27,6 +28,7 @@ def __init__( self.estimate_tax_functions = estimate_tax_functions self.estimate_beta = estimate_beta self.estimate_chi_n = estimate_chi_n + self.estimate_pop = estimate_pop if estimate_tax_functions: if tax_func_path is not None: run_micro = False @@ -42,7 +44,7 @@ def __init__( run_micro=run_micro, tax_func_path=tax_func_path, ) - if estimate_beta: + if self.estimate_beta: self.beta_j = estimate_beta_j.beta_estimate(self) # if estimate_chi_n: # chi_n = self.get_chi_n() @@ -57,35 +59,44 @@ def __init__( self.zeta = bequest_transmission.get_bequest_matrix(p.J, p.lambdas) # demographics - self.demographic_params = demographics.get_pop_objs( - p.E, - p.S, - p.T, - 0, - 99, - initial_data_year=p.start_year - 1, - final_data_year=p.start_year, - ) + if estimate_pop: + self.demographic_params = demographics.get_pop_objs( + p.E, + p.S, + p.T, + 0, + 99, + initial_data_year=p.start_year - 1, + final_data_year=p.start_year, + ) - # demographics for 80 period lives (needed for getting e below) - demog80 = demographics.get_pop_objs( - 20, - 80, - p.T, - 0, - 99, - initial_data_year=p.start_year - 1, - final_data_year=p.start_year, - ) + # demographics for 80 period lives (needed for getting e below) + demog80 = demographics.get_pop_objs( + 20, + 80, + p.T, + 0, + 99, + initial_data_year=p.start_year - 1, + final_data_year=p.start_year, + ) - # earnings profiles - self.e = income.get_e_interp( - p.S, - self.demographic_params["omega_SS"], - demog80["omega_SS"], - p.lambdas, - plot=False, - ) + # earnings profiles + self.e = income.get_e_interp( + p.S, + self.demographic_params["omega_SS"], + demog80["omega_SS"], + p.lambdas, + plot=False, + ) + else: + self.e = income.get_e_interp( + p.S, + p.omega_SS, + p.omega_SS, + p.lambdas, + plot=False, + ) # Tax Functions def get_tax_function_parameters( @@ -334,6 +345,7 @@ def get_dict(self): dict["zeta"] = self.zeta dict.update(self.macro_params) dict["e"] = self.e - dict.update(self.demographic_params) + if self.estimate_pop: + dict.update(self.demographic_params) return dict