Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data upload prototype [Not to be merged] #930

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions distributed/api/celery_tasks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
from io import BytesIO

from celery import Celery

Expand Down Expand Up @@ -129,3 +130,19 @@ def btax_async(user_mods, start_year):
results['btax_version'] = binfo['version']
json_res = json.dumps(results)
return json_res


@celery_app.task(name='api.celery_tasks.file_upload_test')
def file_upload_test(data, compression):
import pandas as pd
df = pd.read_csv(BytesIO(data), compression='gzip')
desc = df.describe()
formatted = {'outputs': [], 'aggr_outputs': []}
formatted['aggr_outputs'].append({
'tags': {'default': 'default'},
'title': 'desc',
'downloadable': [{'filename': 'desc' + '.csv',
'text': desc.to_csv()}],
'renderable': desc.to_html()
})
return json.dumps(formatted)
18 changes: 17 additions & 1 deletion distributed/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
dropq_task_async,
dropq_task_small_async,
taxbrain_elast_async,
btax_async)
btax_async, file_upload_test)

bp = Blueprint('endpoints', __name__)

Expand Down Expand Up @@ -48,6 +48,17 @@ def endpoint(task):
return json.dumps(data)


def file_test_endpoint(task):
print('file test endpoint')
data = request.get_data()
inputs = msgpack.loads(data, encoding='utf8',
use_list=True)
result = task.apply_async(kwargs=inputs[0], serializer='msgpack')
length = client.llen(queue_name) + 1
data = {'job_id': str(result), 'qlength': length}
return json.dumps(data)


@bp.route("/dropq_start_job", methods=['POST'])
def dropq_endpoint_full():
return aggr_endpoint(dropq_task_async, taxbrain_postprocess)
Expand All @@ -68,6 +79,11 @@ def elastic_endpoint():
return aggr_endpoint(taxbrain_elast_async, taxbrain_elast_postprocess)


@bp.route("/file_upload_test", methods=['POST'])
def file_upload_test_endpoint():
return file_test_endpoint(file_upload_test)


@bp.route("/dropq_get_result", methods=['GET'])
def dropq_results():
job_id = request.args.get('job_id', '')
Expand Down
20 changes: 20 additions & 0 deletions templates/fileuploadex/file.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{% extends 'taxbrain/input_form_section.html' %}

{% block input_section_shortname %}payroll-taxes{% endblock %}
{% block input_section_title %}Payroll Taxes{% endblock %}

{% load flatblocks %}

{% block content %}
<div class="inputs-block-header">
<h1>Data File: A CSV file to describe</a></h1>
{% block section_warnings %}
{% endblock %}
</div>

<p><label for="id_datafile">Upload a data file:</label> </p>
<p><input id="id_datafile" name="datafile" type="file" /></p>

{% endblock %}

{% block provide_if_no_continue %}{% endblock %}
176 changes: 176 additions & 0 deletions templates/fileuploadex/input.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
{% extends 'taxbrain/input_base.html' %}

{% load staticfiles %}

{% load flatblocks %}

{% block content %}
<div class="wrapper">
<div id="affix-offset">

<nav class="logobar" role="navigation">
<a class="logobar-brand" href="/">
<img alt="TaxBrain: A disruptive open source platform for exploring tax policy impact" src="{% static 'images/logo-no-icon.png' %}">
</a>
</nav>



<section class="taxbrain-hero">
<div class="taxbrain-hero-block">
<div class="logo">
<h2 class="sr-only">TaxBrain</h2>
<img src="{% static 'images/taxbrain/logo-taxbrain-altbeta.png' %}" alt="TaxBrain" class="img-responsive">
</div>
<p class="lead">A platform for accessing open-source tax models.</p>
<a href="#get-started" class="btn btn-primary btn-explore">Start Exploring</a>
<div class="what-is">
<a class="collapsed" data-toggle="collapse" href="#taxbrainInfo" aria-expanded="false" aria-controls="taxbrainInfo">What is TaxBrain?</a>
</div>
</div>
</section>

<section class="taxbrain-info collapse" id="taxbrainInfo">
<div class="container">
<div class="taxbrain-history">
<h2>About TaxBrain</h2>
<p>TaxBrain is an interface to <a href = "//www.github.com/open-source-economics" target="_blank">open source economic models</a> for tax policy analysis. <a href = '//www.github.com/opensourcepolicycenter/webapp-public' target="_blank">The code</a> for the TaxBrain webapp interface is itself open source. </p>

<ul>
<li> <strong>Step 1.</strong> Create a policy reform by modifying tax law parameters such as rates and deductions, adjust the economic baseline, and request the static result. You can do so with the graphical user interface below or by uploading a policy reform file <a href = "//www.ospc.org/taxbrain/file" target="_blank">from this page</a>.</li>
<li><strong>Step 2.</strong> Review your static output carefully. Ask questions.</li>
<li><strong>Step 3.</strong> Choose a dynamic modeling approach. Because different approaches generally lead to different estimates, you may want to compare several approaches.</li>
<li><strong>Step 4.</strong> Adjust economic assumptions and request the dynamic analysis.</li>
<li><strong>Step 5.</strong> Review your dynamic output carefully. Ask questions.</li>
<li><strong>Step 6.</strong> Share your results! The link to every results page is static and will never change. Send them around.</li>
</ul>

<p> Throughout this process, if you have a question about how to use TaxBrain or interpret the results, if you want to make a suggestion for making the interface or underlying models better, or if you discover a bug, please send a message to our mailing list, which you can join at <a href = '//list.ospc.org/mailman/listinfo/users_list.ospc.org' target="_blank">list.ospc.org/mailman/listinfo/users_list.ospc.org</a>.</p>

<p><strong>Disclaimer</strong>

<p>Proper use of this tool and description of that use is ultimately your responsibility. If you plan on publishing your results, I highly recommend that you confirm with the community that you are using the tools properly and interpreting the results correctly before you publish them. If you have a compelling reason not to leave a public note on the mailing list, email me at [email protected].</p>

<p>Results will change as the underlying models improve. A fundamental reason for adopting open source methods in this project is to let people from all backgrounds contribute to the models that our society uses to assess economic policy; when community-contributed improvements are incorporated, the models will produce different results.</p>

<p>Neither the Open Source Policy Center nor the American Enterprise Institute maintain institutional positions, and the results from models accessible via the TaxBrain interface should not be attributed directly to OSPC or AEI. A suggested acknowledgement is, "We thank AEI for making TaxBrain available, but we bear sole responsibility for the use of the models and any conclusions drawn."</p>

<p><strong> - Matt Jensen, managing director and founder of the Open Source Policy Center </strong></p>

<hr>

<h1>Static modeling (Step 1)</h1>

<p> Static tax analysis entails computing individuals' tax changes under the assumption that behavior does not change in response to tax policy. Static analyses are useful for understanding the mechanistic effects of tax policy changes, and they form the basis to which behavior is applied for dynamic analyses.

<p>TaxBrain's static modeling capabilities rely on several open source economic models and other packages:</p>
<ul>
<li> <a href = "//www.github.com/open-source-economics/tax-calculator" target="_blank">Tax-Calculator</a> computes federal individual income taxes and Federal Insurance Contribution Act(FICA) taxes for a sample of tax filing units in years beginning with 2013 .</li>
<li><a href = '//www.github.com/open-source-economics/taxdata' target="_blank">TaxData</a> creates a microdataset that closely reproduces the multivariate distribution of income, deduction and credit items in 2009, extrapolated to 2015-2026 levels in accordance with Congressional Budget Office forecasts available in spring 2016. It is intended to match similar but confidential data used by the Congressional Joint Committee on Taxation. The underlying dataset must be purchased from the Statistics of Income division of the Internal Revenue Service. Additional information on non-filers is taken from the March 2013 Current Population Survey. </li>
<li> DropQ implements a disclosure avoidance algorithm initially proposed by the U.S. Census Bureau to protect confidential data from differencing attacks. The code is undergoing security review before it is open sourced.</li>
<li> <a href = '//www.github.com/opensourcepolicycenter/webapp-public' target="_blank">TaxBrain</a> itself is an open source project. The underlying models are deployed to TaxBrain using conda, a free and open source package management system supported by <a href = '//www.continuum.io' target="_blank">Continuum Analytics</a></li>
</ul>

<p><strong>Transparency and Replicability</strong></p>

<p>In addition to relying on open source models, we are devoted to making it easy for reviewers to understand the models even if they can't understand the source code or don't have access to the underlying data. Toward that end we produce several additional reports to enhance transparency, peer review, collaboration and a scientific advancement.</p>

<p>Note that these reports currently rely on the latest versions of tax-calculator and TaxData, which might not correspond perfectly to TaxBrain.</p>

<ul>
<li><a href = "https://github.com/open-source-economics/Tax-Calculator/blob/master/taxcalc/comparison/reform_results.txt" target = "_blank">Federal income tax and FICA liability deltas for example reforms</a></li>
<li>Dummy datasets and associated Tax-Calculator results (Coming soon)</li>
<li><a href = "https://github.com/open-source-economics/Tax-Calculator/blob/master/taxcalc/comparison/variable_stats_summary.csv">Basic summary statistics for variables used in Tax-Calculator and major intermediate results (2013-2026) </a></li>Currently only weighted mean is available, more statistics will be added shortly.
<li><a href = "https://github.com/open-source-economics/Tax-Calculator/blob/master/taxcalc/comparison/correlation.csv" target= "_blank">Correlation matrix for the same set of variables</a> 2016 available now, other years coming soon.</li>
<li>Extrapolation related: <br>
<a href = "https://github.com/open-source-economics/taxdata/blob/master/Stage%20II/Stage_I_factors.csv" target= "_blank">Blow-up factors</a><br>
<a href = "https://github.com/open-source-economics/taxdata/blob/master/Stage%20II/Stage_II_targets.csv" target= "_blank">Aggregate and distributional targets</a></li>
<li>Equations and coefficients for imputations (Coming soon)</li>
</ul>

<p><strong>Accuracy notes</strong></p>

<p>The Python code that performs the tax calculations has been validated in a
number of ways. First, Tax-Calculator results for a number of tax filing
units have been compared to hand calculations performed using IRS tax
forms. Second, Tax-Calculator results for a large sample of tax filing units
have been compared to results for the same sample generated by a
<a href = '//www.nber.org/taxcalc' target="_blank">detailed SAS program</a> developed by Dan
Feenberg and Ina Shapiro of NBER. Third, a subset of input variables has been used to compare the results of Tax-Calculator to <a href = '//users.nber.org/~taxsim' target="_blank">Internet TAXSIM</a> as well as against the Policy Simulation Group's <a href = '//www.polsim.com/inctax-caps.html' target="_blank">PENSIM tax module</a></p>

<p> Bugs aside, the results from TaxBrain might differ in comparison to those produced by Congress or the Administration for other reasons. Modeling requires many assumptions, and neither Congress nor the executive branch publicize all of their assumptions. For example, the distribution of wages in <a href = '//www.github.com/open-source-economics/taxdata' target="_blank">TaxData</a> is assumed to stay the same in real terms for all years after the last year we have available data (2013). We know that Congress assumes this distribution changes over time, but it doesn't publish by how much. These assumptions are all flexible in <a href = '//www.github.com/open-source-economics/taxdata' target="_blank">TaxData</a>, so please conduct sensitivity analyses. Other assumptions can be made flexible in TaxBrain based on user requests.</p>

<p><strong>
Core Maintainers (static modeling)*:
<ul style = "list-style-type:none">
<li>- <a href = "https://github.com/talumbau" target = "blank" >T.J. Alumbaugh, Continuum Analytics</a></li>
<li>- <a href = "http://www.nber.org/people/daniel_feenberg" target = "blank" >Dan Feenberg, National Bureau of Economic Research</a></li>
<li>- <a href = "http://www.polsim.com/MRH_vita.pdf" target = "blank" >Martin Holmer, Policy Simulation Group</a></li>
<li>- <a href = "https://www.aei.org/scholar/matthew-h-jensen/" target = "blank" >Matt Jensen, American Enterprise Institute</a></li>
<li>- <a href = "http://quantria.com/#team" target = "blank" >John O'Hare, Quantria Strategies</a></li>
<li>- <a href = "https://github.com/Amy-Xu" target = "blank" >Amy Xu, American Enterprise Institute</a></li>
</ul>
</strong></p>
<p> These members have "write access" to one or both of the core static modeling repositories, Tax-Calculator and TaxData, and work as a team to determine which open source contributions are accepted.</p>
</div>
<div class="taxbrain-build">
<h2>PolicyBrain Code Build</h2>
<p><a href="https://github.com/OpenSourcePolicyCenter/webapp-public/tree/v{{webapp_version}}">Version {{ webapp_version }} - GitHub</a></p>
</div>
<div class="taxbrain-build">
<h2>Tax-Calculator Code Build</h2>
<p><a href="https://github.com/open-source-economics/Tax-Calculator/tree/{{taxcalc_version}}">Version {{ taxcalc_version }} - GitHub</a></p>
</div>
</div>
</section>
</div>
<form class="inputs-form" method="post" action="/upload/" enctype="multipart/form-data">
{% csrf_token %}
<input type="hidden" name="has_errors" value="{{ has_errors }}" />
<input type="hidden" name="start_year" value="{{ start_year }}" />
<input type="hidden" name="form_id" value="{{ form_id }}" />
<div class="container">
<div class="row">
<div class="col-xs-3">
<div class="inputs-sidebar" data-spy="affix" data-offset-top="435" data-offset-bottom="245">
<div class="sidebar-button">
<a href="#" ></a>
<input id="tax-submit" class="btn btn-secondary btn-block btn-animate" type="submit" name="full_calc" value="Show me the results!">
</div>
<div class="sidebar-button">
<a href="#" ></a>
<button id="tax-submit-quick" class="btn btn-tertiary btn-block" type="submit" value="quick_calc" name="quick_calc">
Quick Calculation! <br> <span style="font-size:smaller;">(1 year; small sample)</span>
</button>
</div>

</div> <!-- sidebar -->
</div>
<div class="col-xs-9">
<div class="inputs-main">

{% for error in errors %}
<div class="alert alert-danger text-center lert-dismissible" role="alert">
<button type="button" class="close" data-dismiss="alert" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
{{ error }}
</div>
{% endfor %}

<div class="inputs-block inputs-block-intro">
<div class="inputs-block-header">
<h1 id="get-started">Get Started</h1>
</div>
</div>

{% include 'fileuploadex/file.html' %}
</div> <!-- main -->
</div>
</div>
</div>
</form> <!-- form -->
<div class="push"></div>
</div> <!-- /wrapper -->

{% endblock %}
12 changes: 8 additions & 4 deletions webapp/apps/core/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def remote_submit_job(
data,
timeout=TIMEOUT_IN_SECONDS,
headers=None):
print(theurl, data)
# print(theurl, data)
if headers is not None:
response = requests.post(theurl,
data=data,
Expand Down Expand Up @@ -56,13 +56,17 @@ def submit_elastic_calculation(self, data):
url_template = "http://{hn}/elastic_gdp_start_job"
return self.submit(data, url_template)

def submit_file_upload_test(self, data):
url_template = "http://{hn}/file_upload_test"
return self.submit(data, url_template)

def submit(self,
data_list,
url_template,
increment_counter=True,
use_wnc_offset=True):
print("hostnames: ", WORKER_HN)
print("submitting data: ", data_list)
# print("submitting data: ", data_list)
queue_length = 0
submitted = False
attempts = 0
Expand All @@ -80,7 +84,7 @@ def submit(self,
job_id = response_d['job_id']
queue_length = response_d['qlength']
else:
print("FAILED: ", data_list, WORKER_HN)
print("FAILED: ", WORKER_HN)
attempts += 1
except Timeout:
print("Couldn't submit to: ", WORKER_HN)
Expand Down Expand Up @@ -124,7 +128,7 @@ def _get_results_base(self, job_id, job_failure=False):
msg = 'PROBLEM WITH RESPONSE. TEXT RECEIVED: {}'
raise ValueError(msg)
else:
raise # TODO
raise IOError()

def get_results(self, job_id, job_failure=False):
if job_failure:
Expand Down
7 changes: 6 additions & 1 deletion webapp/apps/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ def dispatch(self, request, *args, **kwargs):
return self.fail()

if job_ready == 'YES':
results = self.dropq_compute.get_results(job_id)
try:
results = self.dropq_compute.get_results(job_id)
except Exception as e:
self.object.error_text = str(e)
self.object.save()
return self.fail()
self.object.outputs = results['outputs']
self.object.aggr_outputs = results['aggr_outputs']
self.object.creation_date = timezone.now()
Expand Down
Empty file.
3 changes: 3 additions & 0 deletions webapp/apps/fileuploadex/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
5 changes: 5 additions & 0 deletions webapp/apps/fileuploadex/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class FileuploadexConfig(AppConfig):
name = 'fileuploadex'
Loading