Skip to content

Commit

Permalink
sync w master and resolve conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
jdebacker committed Jun 19, 2024
2 parents 887afb7 + acb734c commit 579c526
Show file tree
Hide file tree
Showing 29 changed files with 574 additions and 109,395 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.10", "3.11"]

steps:
Expand All @@ -52,9 +52,9 @@ jobs:
shell: bash -l {0}
working-directory: ./
run: |
python -m pytest -m "not local" --cov=./ --cov-report=xml
python -m pytest -m "not local and not needs_puf and not needs_tmd" --cov=./ --cov-report=xml
- name: Upload coverage to Codecov
if: matrix.os == 'ubuntu-latest'
if: matrix.os == 'ubuntu-latest' && contains(github.repository, 'PSLmodels/OG-USA')
uses: codecov/codecov-action@v4
with:
files: ./coverage.xml
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ htmlcov/*
*.asv
*.nav
*.snm
*.gz
*.bib.bak
*.fls
*.m~
Expand All @@ -47,6 +46,7 @@ examples/OG-USA-Example/*
cs-config/cs_config/OUTPUT_BASELINE/*
data/csv_output_files/*
data/images/*
data/PSID/psid_lifetime_income.csv
ogusa/csv_output_files/*
ogusa/images/*
.vscode/
Expand Down
33 changes: 33 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [0.1.10] - 2024-06-10 12:00:00

### Added

- Removes the `rpy2` dependency from the `environment.yml` and `setup.py` files, and modifies use of PSID data to avoid needing this package in OG-USA.


## [0.1.9] - 2024-06-07 12:00:00

### Added

- Updates the `get_micro_data.py` and `calibration.py` modules to allow for the user to use the CPS, PUF, and TMD files with Tax-Calculator or to provide their own custom datafile, with associated grow factors and weights.


## [0.1.8] - 2024-05-20 12:00:00

### Added

- Updates the `ogusa` package to include the zipped `psid_lifetime_income.csv.gz` file, which is now called in some calibration modules (`bequest_transmission.py`, `deterministic_profiles.py`, and `transfer_distirbution.py`), but with an option for the user to provide their own custom datafile. These changes allow for Jupyter notebook users to execute the `Calibration` class object and for those who install the `ogusa` package from PyPI to have the required datafile for the major calibration modules.


## [0.1.7] - 2024-05-14 16:30:00

### Added

- Updates the dependency `rpy2>=3.5.12` in `environment.yml` and `setup.py`.


## [0.1.6] - 2024-05-08 10:30:00

### Added
Expand Down Expand Up @@ -90,6 +119,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0



[0.1.10]: https://github.com/PSLmodels/OG-USA/compare/v0.1.9...v0.1.10
[0.1.9]: https://github.com/PSLmodels/OG-USA/compare/v0.1.8...v0.1.9
[0.1.8]: https://github.com/PSLmodels/OG-USA/compare/v0.1.7...v0.1.8
[0.1.7]: https://github.com/PSLmodels/OG-USA/compare/v0.1.6...v0.1.7
[0.1.6]: https://github.com/PSLmodels/OG-USA/compare/v0.1.5...v0.1.6
[0.1.5]: https://github.com/PSLmodels/OG-USA/compare/v0.1.4...v0.1.5
[0.1.4]: https://github.com/PSLmodels/OG-USA/compare/v0.1.3...v0.1.4
Expand Down
51 changes: 44 additions & 7 deletions cs-config/cs_config/functions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ogusa
from ogusa.calibrate import Calibration
from ogcore.parameters import Specifications
from ogusa.constants import (
Expand All @@ -14,18 +15,22 @@
import pickle
import json
import inspect
import pandas as pd
import paramtools
from distributed import Client
from taxcalc import Policy
from taxcalc import Policy, Records, GrowFactors
from collections import OrderedDict
from .helpers import retrieve_puf
from .helpers import retrieve_puf, retrieve_tmd
from cs2tc import convert_policy_adjustment

AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "")
PUF_S3_FILE_LOCATION = os.environ.get(
"PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz"
)
TMD_S3_FILE_LOCATION = os.environ.get(
"TMD_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz"
)
CUR_DIR = os.path.dirname(os.path.realpath(__file__))

# Get Tax-Calculator default parameters
Expand Down Expand Up @@ -78,7 +83,7 @@ class MetaParams(paramtools.Parameters):


def get_version():
return "0.1.2"
return ogusa.__version__


def get_inputs(meta_param_dict):
Expand Down Expand Up @@ -188,16 +193,46 @@ def run_model(meta_param_dict, adjustment):

meta_params = MetaParams()
meta_params.adjust(meta_param_dict)
# Get data chosen by user
if meta_params.data_source == "PUF":
data = retrieve_puf(
PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
)
weights = Records.PUF_WEIGHTS_FILENAME
records_start_year = Records.PUFCSV_YEAR
# set name of cached baseline file in case use below
cached_pickle = "TxFuncEst_baseline_PUF.pkl"
else:
if data is not None:
if not isinstance(data, pd.DataFrame):
raise TypeError("'data' must be a Pandas DataFrame.")
else:
# Access keys are not available. Default to the CPS.
print("Defaulting to the CPS")
meta_params.adjust({"data_source": "CPS"})
elif meta_params.data_source == "TMD":
data = retrieve_tmd(
TMD_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
)
weights = Records.TMD_WEIGHTS_FILENAME
records_start_year = Records.TMDCSV_YEAR
if data is not None:
if not isinstance(data, pd.DataFrame):
raise TypeError("'data' must be a Pandas DataFrame.")
else:
# Access keys are not available. Default to the CPS.
print("Defaulting to the CPS")
meta_params.adjust({"data_source": "CPS"})
elif meta_params.data_source == "CPS":
data = "cps"
weights = Records.PUF_WEIGHTS_FILENAME
records_start_year = Records.CPSCSV_YEAR
# set name of cached baseline file in case use below
cached_pickle = "TxFuncEst_baseline_CPS.pkl"
else:
raise ValueError(
f"Data source '{meta_params.data_source}' is not supported."
)

# Get TC params adjustments
iit_mods = convert_policy_adjustment(
adjustment["Tax-Calculator Parameters"]
Expand All @@ -211,7 +246,7 @@ def run_model(meta_param_dict, adjustment):

# Dask parmeters
num_workers = 2
memory_limit = "10GiB"
memory_per_worker = "10GiB"
client = Client(
n_workers=num_workers,
threads_per_worker=1,
Expand All @@ -222,8 +257,7 @@ def run_model(meta_param_dict, adjustment):
# num_workers_txf = 5
# num_workers_mod = 6

# whether to estimate tax functions from microdata
run_micro = True
# Read in whether user chose to solve for transition path
time_path = meta_param_dict["time_path"][0]["value"]

# filter out OG-USA params that will not change between baseline and
Expand Down Expand Up @@ -363,6 +397,9 @@ def run_model(meta_param_dict, adjustment):
iit_reform=iit_mods,
estimate_tax_functions=True,
data=data,
gfactors=GrowFactors.FILE_NAME,
weights=weights,
records_start_year=records_start_year,
client=client,
)
# update tax function parameters in Specifications Object
Expand Down
40 changes: 40 additions & 0 deletions cs-config/cs_config/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
PUF_S3_FILE_LOCATION = os.environ.get(
"PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz"
)
TMD_S3_FILE_LOCATION = os.environ.get(
"TMD_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz"
)
TC_LAST_YEAR = Policy.LAST_BUDGET_YEAR

POLICY_SCHEMA = {
Expand Down Expand Up @@ -120,3 +123,40 @@ def retrieve_puf(
f"s3_reader_installed={s3_reader_installed})"
)
return None


def retrieve_tmd(
tmd_s3_file_location=TMD_S3_FILE_LOCATION,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
):
"""
Function for retrieving the TMD from the S3 bucket
"""
s3_reader_installed = S3FileSystem is not None
has_credentials = (
aws_access_key_id is not None and aws_secret_access_key is not None
)
if tmd_s3_file_location and has_credentials and s3_reader_installed:
print("Reading tmd from S3 bucket.", tmd_s3_file_location)
fs = S3FileSystem(
key=AWS_ACCESS_KEY_ID,
secret=AWS_SECRET_ACCESS_KEY,
)
with fs.open(tmd_s3_file_location) as f:
# Skips over header from top of file.
tmd_df = pd.read_csv(f)
return tmd_df
elif Path("tmd.csv.gz").exists():
print("Reading tmd from tmd.csv.gz.")
return pd.read_csv("tmd.csv.gz", compression="gzip")
elif Path("tmd.csv").exists():
print("Reading tmd from tmd.csv.")
return pd.read_csv("tmd.csv")
else:
warnings.warn(
f"TMD file not available (tmd_location={tmd_s3_file_location}, "
f"has_credentials={has_credentials}, "
f"s3_reader_installed={s3_reader_installed})"
)
return None
Binary file added data/PSID/psid1968to2015.csv.gz
Binary file not shown.
Binary file not shown.
3 changes: 2 additions & 1 deletion data/PSID/psid_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,5 @@ for (var in names(ind_var_names)){
print('Beginning to build panel')
# Build PSID panel
psid_df <- build.panel(datadir=mydir, fam.vars=famvars, ind.vars=indvars, sample="SRC", design='all')
save(psid_df,file=file.path(script.dir, 'psid1968to2017.RData'))

write.csv(psid_df, file=gzfile("psid1968to2017.csv.gz"))
Loading

0 comments on commit 579c526

Please sign in to comment.