diff --git a/.github/workflows/generate_FBS.yml b/.github/workflows/generate_FBS.yml
new file mode 100644
index 000000000..8e7abdccf
--- /dev/null
+++ b/.github/workflows/generate_FBS.yml
@@ -0,0 +1,50 @@
+# This workflow will generate all FlowBySector files and store as artifact
+
+name: Generate FBS
+
+on:
+ pull_request:
+ branches: [master]
+ types: [opened, reopened, ready_for_review] # excludes syncronize to avoid redundant trigger from commits on PRs
+ workflow_dispatch: # also allow manual trigger, for testing purposes
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: "3.10"
+
+ - name: Update pip & install testing pkgs
+ run: |
+ python -VV
+ python -m pip install --upgrade pip setuptools wheel
+ pip install pytest
+
+ # install package & dependencies
+ - name: Install package and dependencies
+ run: |
+ pip install .
+
+ - name: Generate FBS
+ run: |
+ pytest -m generate_fbs
+
+ - name: Upload files
+ if: always()
+ uses: actions/upload-artifact@v3
+ with:
+ # Artifact name
+ name: FlowBySector
+ # A file, directory or wildcard patter that describes what to upload
+ path: | # uses local user data dir for ubuntu
+ ~/.local/share/flowsa/FlowBySector/*
+ ~/.local/share/stewi/Log/*
+ if-no-files-found: warn # 'warn' or 'ignore' are also available, defaults to `warn`
+ # retention-days: 5 # cannot exceed the retention limit set by the repository, organization, or enterprise.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index ef43e1de1..fc805c820 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -61,4 +61,4 @@ jobs:
- name: Test with pytest
run: |
- pytest --doctest-modules
+ pytest --doctest-modules -m "not generate_fbs"
diff --git a/.github/workflows/test_methods.yml b/.github/workflows/test_methods.yml
index 43cc4e982..98062adec 100644
--- a/.github/workflows/test_methods.yml
+++ b/.github/workflows/test_methods.yml
@@ -34,7 +34,7 @@ jobs:
- name: Test FBA config
run: |
- python flowsa/test_FBA_urls.py
+ python flowsa/test_methods.py
- name: Compare FBS with remote
id: FBS
@@ -50,6 +50,9 @@ jobs:
# Artifact name
name: FBS diff files
# A file, directory or wildcard patter that describes what to upload
- path: ${{ env.LD_LIBRARY_PATH }}/python3.10/site-packages/flowsa/data/fbs_diff/*_diff.csv
+ path: |
+ ${{ env.LD_LIBRARY_PATH }}/python3.10/site-packages/flowsa/data/fbs_diff/*_diff.csv
+ ~/.local/share/flowsa/FlowBySector/*
+ ~/.local/share/flowsa/Log/*
if-no-files-found: warn # 'warn' or 'ignore' are also available, defaults to `warn`
# retention-days: 5 # cannot exceed the retention limit set by the repository, organization, or enterprise.
diff --git a/LICENSE b/LICENSE
index 670154e35..8a545bcfc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,116 +1,21 @@
-CC0 1.0 Universal
-
-Statement of Purpose
-
-The laws of most jurisdictions throughout the world automatically confer
-exclusive Copyright and Related Rights (defined below) upon the creator and
-subsequent owner(s) (each and all, an "owner") of an original work of
-authorship and/or a database (each, a "Work").
-
-Certain owners wish to permanently relinquish those rights to a Work for the
-purpose of contributing to a commons of creative, cultural and scientific
-works ("Commons") that the public can reliably and without fear of later
-claims of infringement build upon, modify, incorporate in other works, reuse
-and redistribute as freely as possible in any form whatsoever and for any
-purposes, including without limitation commercial purposes. These owners may
-contribute to the Commons to promote the ideal of a free culture and the
-further production of creative, cultural and scientific works, or to gain
-reputation or greater distribution for their Work in part through the use and
-efforts of others.
-
-For these and/or other purposes and motivations, and without any expectation
-of additional consideration or compensation, the person associating CC0 with a
-Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
-and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
-and publicly distribute the Work under its terms, with knowledge of his or her
-Copyright and Related Rights in the Work and the meaning and intended legal
-effect of CC0 on those rights.
-
-1. Copyright and Related Rights. A Work made available under CC0 may be
-protected by copyright and related or neighboring rights ("Copyright and
-Related Rights"). Copyright and Related Rights include, but are not limited
-to, the following:
-
- i. the right to reproduce, adapt, distribute, perform, display, communicate,
- and translate a Work;
-
- ii. moral rights retained by the original author(s) and/or performer(s);
-
- iii. publicity and privacy rights pertaining to a person's image or likeness
- depicted in a Work;
-
- iv. rights protecting against unfair competition in regards to a Work,
- subject to the limitations in paragraph 4(a), below;
-
- v. rights protecting the extraction, dissemination, use and reuse of data in
- a Work;
-
- vi. database rights (such as those arising under Directive 96/9/EC of the
- European Parliament and of the Council of 11 March 1996 on the legal
- protection of databases, and under any national implementation thereof,
- including any amended or successor version of such directive); and
-
- vii. other similar, equivalent or corresponding rights throughout the world
- based on applicable law or treaty, and any national implementations thereof.
-
-2. Waiver. To the greatest extent permitted by, but not in contravention of,
-applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
-unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
-and Related Rights and associated claims and causes of action, whether now
-known or unknown (including existing as well as future claims and causes of
-action), in the Work (i) in all territories worldwide, (ii) for the maximum
-duration provided by applicable law or treaty (including future time
-extensions), (iii) in any current or future medium and for any number of
-copies, and (iv) for any purpose whatsoever, including without limitation
-commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
-the Waiver for the benefit of each member of the public at large and to the
-detriment of Affirmer's heirs and successors, fully intending that such Waiver
-shall not be subject to revocation, rescission, cancellation, termination, or
-any other legal or equitable action to disrupt the quiet enjoyment of the Work
-by the public as contemplated by Affirmer's express Statement of Purpose.
-
-3. Public License Fallback. Should any part of the Waiver for any reason be
-judged legally invalid or ineffective under applicable law, then the Waiver
-shall be preserved to the maximum extent permitted taking into account
-Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
-is so judged Affirmer hereby grants to each affected person a royalty-free,
-non transferable, non sublicensable, non exclusive, irrevocable and
-unconditional license to exercise Affirmer's Copyright and Related Rights in
-the Work (i) in all territories worldwide, (ii) for the maximum duration
-provided by applicable law or treaty (including future time extensions), (iii)
-in any current or future medium and for any number of copies, and (iv) for any
-purpose whatsoever, including without limitation commercial, advertising or
-promotional purposes (the "License"). The License shall be deemed effective as
-of the date CC0 was applied by Affirmer to the Work. Should any part of the
-License for any reason be judged legally invalid or ineffective under
-applicable law, such partial invalidity or ineffectiveness shall not
-invalidate the remainder of the License, and in such case Affirmer hereby
-affirms that he or she will not (i) exercise any of his or her remaining
-Copyright and Related Rights in the Work or (ii) assert any associated claims
-and causes of action with respect to the Work, in either case contrary to
-Affirmer's express Statement of Purpose.
-
-4. Limitations and Disclaimers.
-
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
- surrendered, licensed or otherwise affected by this document.
-
- b. Affirmer offers the Work as-is and makes no representations or warranties
- of any kind concerning the Work, express, implied, statutory or otherwise,
- including without limitation warranties of title, merchantability, fitness
- for a particular purpose, non infringement, or the absence of latent or
- other defects, accuracy, or the present or absence of errors, whether or not
- discoverable, all to the greatest extent permissible under applicable law.
-
- c. Affirmer disclaims responsibility for clearing rights of other persons
- that may apply to the Work or any use thereof, including without limitation
- any person's Copyright and Related Rights in the Work. Further, Affirmer
- disclaims responsibility for obtaining any necessary consents, permissions
- or other rights required for any use of the Work.
-
- d. Affirmer understands and acknowledges that Creative Commons is not a
- party to this document and has no duty or obligation with respect to this
- CC0 or use of the Work.
-
-For more information, please see
-
+MIT License
+
+Copyright (c) 2022 U.S. Environmental Protection Agency
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index e76070665..4addebd13 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,44 @@
# flowsa
-`flowsa` is a data processing library that attributes resource use, waste, emissions, and loss to economic sectors. `flowsa` aggregates, combines, and allocates data from a variety of sources. The sources can be found in the [GitHub wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-activity-datasets) under "Flow-By-Activity Datasets".
-
-`flowsa` helps support [USEEIO](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-technical-content) as part of the [USEEIO modeling](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-models) framework. The USEEIO models estimate potential impacts of goods and services in the US economy. The [Flow-By-Sector datasets](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-sector-datasets) created in FLOWSA are the environmental inputs to [`useeior`](https://github.com/USEPA/useeior).
+`flowsa` is a data processing library attributing resources (environmental,
+monetary, and human), wastes, emissions, and losses to sectors, typically
+[NAICS codes](https://www.census.gov/naics/). `flowsa` aggregates, combines,
+and allocates data from a variety of sources. The sources can be found in the
+[GitHub wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-activity-datasets)
+under "Flow-By-Activity Datasets".
+
+`flowsa` helps support
+[USEEIO](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-technical-content)
+as part of the [USEEIO modeling](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-models)
+framework. The USEEIO models estimate potential impacts of goods and
+services in the US economy. The
+[Flow-By-Sector datasets](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-sector-datasets)
+created in FLOWSA are the environmental inputs to
+[`useeior`](https://github.com/USEPA/useeior).
## Usage
-### Flow-By-Activity Datasets
-Flow-By-Activity datasets are formatted tables from a variety of sources. They are largely unchanged from the original data source, with the exception of formatting.
+### Flow-By-Activity (FBA) Datasets
+Flow-By-Activity datasets are formatted tables from a variety of sources.
+They are largely unchanged from the original data source, with the
+exception of formatting. A list of available FBA datasets can be found in
+the [Wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-activity-datasets).
`import flowsa` \
`flowsa.seeAvailableFlowByModels('FBA')` \
`flowsa.getFlowByActivity(datasource="USDA_CoA_Cropland", year=2017)`
-### Flow-By-Sector Datasets
-Flow-By-Sector datasets are tables of environmental and other data attributed to [sectors](https://www.census.gov/naics/).
+### Flow-By-Sector (FBS) Datasets
+Flow-By-Sector datasets are tables of environmental and other data
+attributed to [sectors](https://www.census.gov/naics/). A list of available
+FBS datasets can be found in the [Wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-sector-datasets).
`import flowsa` \
`flowsa.seeAvailableFlowByModels('FBS')` \
`flowsa.getFlowBySector('Water_national_2015_m1')`
## Installation
-`pip install git+https://github.com/USEPA/flowsa.git@v1.1#egg=flowsa`
+`pip install git+https://github.com/USEPA/flowsa.git@vX.X.X#egg=flowsa`
-where v1.1 can be replaced with the version you wish to install under
+where vX.X.X can be replaced with the version you wish to install under
[Releases](https://github.com/USEPA/flowsa/releases).
### Additional Information on Installation, Examples, Detailed Documentation
@@ -29,10 +46,13 @@ For more information on `flowsa` see the [wiki](https://github.com/USEPA/flowsa/
## Disclaimer
-The United States Environmental Protection Agency (EPA) GitHub project code is provided on an "as is" basis
-and the user assumes responsibility for its use. EPA has relinquished control of the information and no longer
-has responsibility to protect the integrity , confidentiality, or availability of the information. Any
-reference to specific commercial products, processes, or services by service mark, trademark, manufacturer,
-or otherwise, does not constitute or imply their endorsement, recommendation or favoring by EPA. The EPA seal
-and logo shall not be used in any manner to imply endorsement of any commercial product or activity by EPA or
+The United States Environmental Protection Agency (EPA) GitHub project code
+is provided on an "as is" basis and the user assumes responsibility for its
+use. EPA has relinquished control of the information and no longer has
+responsibility to protect the integrity, confidentiality, or availability
+of the information. Any reference to specific commercial products,
+processes, or services by service mark, trademark, manufacturer, or
+otherwise, does not constitute or imply their endorsement, recommendation
+or favoring by EPA. The EPA seal and logo shall not be used in any manner
+to imply endorsement of any commercial product or activity by EPA or
the United States Government.
diff --git a/flowsa/README.md b/flowsa/README.md
index 4ba903a31..0cb770597 100644
--- a/flowsa/README.md
+++ b/flowsa/README.md
@@ -2,18 +2,24 @@
Python scripts used to generate Flow-By-Activity (FBA)
and Flow-By-Sector (FBS) datasets
-1. _"_init_.py"_
+1. _"\_\_init\_\_.py"_
2. _"allocation.py"_
3. _"bibliography.py"_
4. _"common.py"_
5. _"dataclean.py"_
-6. _"fbs_allocation.py"_
-7. _"flowbyactivity.py"_
-8. _"flowbyfunctions.py"_
-9. _"flowbysector.py"_
-10. _"literature_values.py"_
-11. _"metadata.py"_
-12. _"sectormapping.py"_
-13. _"settings.py"_
-14. _"test_examples.py"_
-15. _"validation.py"_
+6. _"datavisualization.py"_
+7. _"fbs_allocation.py"_
+8. _"flowbyactivity.py"_
+9. _"flowbyfunctions.py"_
+10. _"flowbysector.py"_
+11. _"flowsa_yaml.py"_
+12. _"literature_values.py"_
+13. _"location.py"_
+14. _"metadata.py"_
+15. _"schema.py"_
+16. _"sectormapping.py"_
+17. _"settings.py"_
+18. _"test_examples.py"_
+19. _"test_FBS_against_remote.py"_
+20. _"test_methods.py"_
+21. _"validation.py"_
diff --git a/flowsa/__init__.py b/flowsa/__init__.py
index db6d1768f..bf0789cba 100644
--- a/flowsa/__init__.py
+++ b/flowsa/__init__.py
@@ -41,6 +41,10 @@ def getFlowByActivity(datasource, year, flowclass=None, geographic_level=None,
year=int(year),
download_ok=download_FBA_if_missing
)
+
+ if len(fba) ==0:
+ raise flowsa.exceptions.FBANotAvailableError(
+ message=f"Error generating {datasource} for {str(year)}")
if flowclass is not None:
fba = fba.query('Class == @flowclass')
# if geographic level specified, only load rows in geo level
diff --git a/flowsa/allocation.py b/flowsa/allocation.py
index ae9f2dcf6..cdd01b9c3 100644
--- a/flowsa/allocation.py
+++ b/flowsa/allocation.py
@@ -6,9 +6,9 @@
Methods of allocating datasets
"""
import pandas as pd
-from flowsa.settings import log
-from flowsa.common import fbs_activity_fields, sector_level_key, load_crosswalk
-from flowsa.settings import vLogDetailed
+from flowsa.common import fbs_activity_fields, sector_level_key, \
+ load_crosswalk, check_activities_sector_like
+from flowsa.settings import log, vLogDetailed
from flowsa.dataclean import replace_NoneType_with_empty_cells, \
replace_strings_with_NoneType
from flowsa.flowbyfunctions import sector_aggregation, \
@@ -143,7 +143,8 @@ def proportional_allocation(df, attr):
# calculate ratio
allocation_df.loc[:, 'FlowAmountRatio'] = \
allocation_df['FlowAmount'] / allocation_df['Denominator']
- allocation_df = allocation_df.drop(columns=['Denominator']).reset_index()
+ allocation_df = allocation_df.drop(columns=['Denominator']).reset_index(
+ drop=True)
# add nonetypes
allocation_df = replace_strings_with_NoneType(allocation_df)
@@ -219,17 +220,21 @@ def proportional_allocation_by_location_and_activity(df_load, sectorcolumn):
return allocation_df
-def equally_allocate_parent_to_child_naics(df_load, method):
+def equally_allocate_parent_to_child_naics(
+ df_load, method, overwritetargetsectorlevel=None):
"""
Determine rows of data that will be lost if subset data at
target sector level.
Equally allocate parent NAICS to child NAICS where child NAICS missing
:param df_load: df, FBS format
- :param target_sector_level: str, target NAICS level for FBS output
+ :param overwritetargetsectorlevel: str, optional, specify what sector
+ level to allocate to
:return: df, with all child NAICS at target sector level
"""
# determine which sector level to use, use the least aggregated level
sector_level = method.get('target_sector_level')
+ if overwritetargetsectorlevel is not None:
+ sector_level = overwritetargetsectorlevel
# if secondary sector levels are identified, set the sector level to the
# least aggregated
sector_level_list = [sector_level]
@@ -244,12 +249,23 @@ def equally_allocate_parent_to_child_naics(df_load, method):
# exclude nonsectors
df = replace_NoneType_with_empty_cells(df_load)
+ # determine if activities are sector-like, if aggregating a df with a
+ # 'SourceName'
+ sector_like_activities = check_activities_sector_like(df_load)
+
+ # if activities are source like, drop from df,
+ # add back in as copies of sector columns columns to keep
+ if sector_like_activities:
+ # subset df
+ df_cols = [e for e in df.columns if e not in
+ ('ActivityProducedBy', 'ActivityConsumedBy')]
+ df = df[df_cols]
+
rows_lost = pd.DataFrame()
for i in range(2, sector_level_key[sector_level]):
- dfm = subset_and_merge_df_by_sector_lengths(df_load, i, i+1)
-
+ dfm = subset_and_merge_df_by_sector_lengths(df, i, i+1)
# extract the rows that are not disaggregated to more
- # specific naics
+ # specific sectors
rl = dfm.query('_merge=="left_only"').drop(
columns=['_merge', 'SPB_tmp', 'SCB_tmp'])
rl_list = rl[['SectorProducedBy', 'SectorConsumedBy']]\
@@ -269,11 +285,11 @@ def equally_allocate_parent_to_child_naics(df_load, method):
# merge df & conditionally replace sector produced/consumed columns
# merge dfs assigning sector length
sectype_list = ['Produced', 'Consumed']
- for s in sectype_list:
- rl = rl.merge(cw, how='left', left_on=[f'Sector{s}By'],
+ for sec in sectype_list:
+ rl = rl.merge(cw, how='left', left_on=[f'Sector{sec}By'],
right_on=nlength).rename(
- columns={'sector_count': f'{s}Count'})
- rl[f'Sector{s}By'] = rl[sector_level]
+ columns={'sector_count': f'{sec}Count'})
+ rl[f'Sector{sec}By'] = rl[sector_level]
rl = rl.drop(columns=[sector_level, nlength])
# create one sector count column, using max value
@@ -287,7 +303,7 @@ def equally_allocate_parent_to_child_naics(df_load, method):
# append to df
if len(rl) != 0:
- vLogDetailed.warning('Data found at %s digit NAICS not '
+ vLogDetailed.warning('Data found at %s digit sectors not '
'represented in current data subset: '
'{}'.format(' '.join(map(str, rl_list))),
str(i))
@@ -297,9 +313,18 @@ def equally_allocate_parent_to_child_naics(df_load, method):
vLogDetailed.info('Allocating FlowAmounts equally to '
'each %s associated with the sectors previously '
'dropped', sector_level)
+ # if activities are source-like, set col values as copies
+ # of the sector columns
+ if sector_like_activities:
+ rows_lost = rows_lost.assign(ActivityProducedBy=
+ rows_lost['SectorProducedBy'])
+ rows_lost = rows_lost.assign(ActivityConsumedBy=
+ rows_lost['SectorConsumedBy'])
+ # reindex columns
+ rows_lost = rows_lost.reindex(df_load.columns, axis=1)
# add rows of missing data to the fbs sector subset
- df_w_lost_data = pd.concat([df, rows_lost], ignore_index=True, sort=True)
+ df_w_lost_data = pd.concat([df_load, rows_lost], ignore_index=True)
df_w_lost_data = replace_strings_with_NoneType(df_w_lost_data)
return df_w_lost_data
@@ -311,21 +336,54 @@ def equal_allocation(fba_load):
Function only works if all mapped sectors are the same length
:param fba_load: df, FBA with activity columns mapped to sectors
+ :param sector_level: string ('NAICS_X') used when assigning columns of
+ sector levels if there are ambiguous sectors (e.g., household and
+ government sectors)
:return: df, with FlowAmount equally allocated to all mapped sectors
"""
+ from flowsa.flowbyfunctions import assign_columns_of_sector_levels
+
+ # first check that all sector lengths are the same
+ dfc = assign_columns_of_sector_levels(fba_load)
+ # if duplicated rows, keep assignment to most specific sectors because
+ # data should already be at final assignment lengths if equally
+ # allocating and because not manipulating the loaded dataset, but rather
+ # just checking that all sector lengths match for an activity
+ duplicate_cols = [e for e in dfc.columns if e not in [
+ 'SectorProducedByLength', 'SectorConsumedByLength']]
+ duplicate_df = dfc[dfc.duplicated(duplicate_cols)]
+ if len(duplicate_df) > 0:
+ log.info('Dropping rows duplicated due to assigning sector lengths '
+ 'for ambiguous sectors. Keeping sector length assignments '
+ 'to most specific sectors.')
+ dfc = dfc[dfc.duplicated(duplicate_cols, keep='first')]
+
+ # Before equally allocating, check that each activity is being allocated
+ # to sectors of the same length
+ dfsub = dfc[['ActivityProducedBy', 'ActivityConsumedBy',
+ 'SectorProducedByLength',
+ 'SectorConsumedByLength']].drop_duplicates()
+ df_dup = dfsub[dfsub.duplicated(['ActivityProducedBy', 'ActivityConsumedBy'])]
+ if len(df_dup) > 1:
+ log.error('Cannot equally allocate because sector lengths vary. All '
+ 'sectors must be the same sector level.')
+
# create groupby cols by which to determine allocation
fba_cols = fba_load.select_dtypes([object]).columns.to_list()
groupcols = [e for e in fba_cols if e not in
['SectorProducedBy', 'SectorConsumedBy', 'Description']]
# create counts of rows
df_count = fba_load.groupby(
- groupcols, as_index=False, dropna=False).size().astype(str)
- df_count = replace_strings_with_NoneType(df_count)
+ groupcols, as_index=False, dropna=False).size()
+ df_count = replace_NoneType_with_empty_cells(df_count)
- # merge dfs
- dfm = fba_load.merge(df_count, how='left')
+ # merge dfs, replace cells with empty strings to ensure merge occurs
+ # correctly
+ fba = replace_NoneType_with_empty_cells(fba_load)
+ dfm = fba.merge(df_count, how='outer', on=groupcols)
# calc new flowamounts
- dfm['FlowAmount'] = dfm['FlowAmount'] / dfm['size'].astype(int)
+ dfm['FlowAmount'] = dfm['FlowAmount'] / dfm['size']
dfm = dfm.drop(columns='size')
+ dfm = replace_strings_with_NoneType(dfm)
return dfm
diff --git a/flowsa/bibliography.py b/flowsa/bibliography.py
index 358362aa6..21687e8fd 100644
--- a/flowsa/bibliography.py
+++ b/flowsa/bibliography.py
@@ -148,9 +148,9 @@ def generate_fbs_bibliography(methodname):
f"{str(source[1])}",
'author': config['author'],
'year': str(source[1]),
- 'url': config['source_url'],
+ 'url': config['tool_meta']['source_url'],
'urldate': bib_date,
- 'ID': config['bib_id'] + '_' + str(source[1]),
+ 'ID': config['tool_meta']['bib_id'] + '_' + str(source[1]),
'ENTRYTYPE': 'misc'
}]
# append each entry to a list of BibDatabase entries
diff --git a/flowsa/common.py b/flowsa/common.py
index 73ceca7d6..3e41d849e 100644
--- a/flowsa/common.py
+++ b/flowsa/common.py
@@ -12,6 +12,7 @@
from dotenv import load_dotenv
from esupy.processed_data_mgmt import create_paths_if_missing
import flowsa.flowsa_yaml as flowsa_yaml
+import flowsa.exceptions
from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \
flow_by_sector_collapsed_fields, flow_by_activity_mapped_fields, \
flow_by_activity_wsec_fields, flow_by_activity_mapped_wsec_fields, \
@@ -51,8 +52,7 @@ def load_api_key(api_source):
load_dotenv(f'{MODULEPATH}API_Keys.env', verbose=True)
key = os.getenv(api_source)
if key is None:
- log.error(f"Key file {api_source} not found. See github wiki for help "
- "https://github.com/USEPA/flowsa/wiki/Using-FLOWSA#api-keys")
+ raise flowsa.exceptions.APIError(api_source=api_source)
return key
@@ -131,9 +131,9 @@ def load_yaml_dict(filename, flowbytype=None, filepath=None):
try:
with open(yaml_path, 'r', encoding='utf-8') as f:
config = flowsa_yaml.load(f, filepath)
- except IOError:
- log.error(f'{flowbytype} method file not found')
- raise
+ except FileNotFoundError:
+ raise flowsa.exceptions.FlowsaMethodNotFoundError(
+ method_type=flowbytype, method=filename)
return config
@@ -324,20 +324,32 @@ def return_true_source_catalog_name(sourcename):
return sourcename
-def check_activities_sector_like(sourcename_load):
+def check_activities_sector_like(df_load, sourcename=None):
"""
Check if the activities in a df are sector-like,
if cannot find the sourcename in the source catalog, drop extensions on the
source name
+ :param df_load: df, df to determine if activities are sector-like
+ :param source: str, optionial, can identify sourcename to use
"""
- sourcename = return_true_source_catalog_name(sourcename_load)
+ # identify sourcename
+ if sourcename is not None:
+ s = sourcename
+ else:
+ if 'SourceName' in df_load.columns:
+ s = pd.unique(df_load['SourceName'])[0]
+ elif 'MetaSources' in df_load.columns:
+ s = pd.unique(df_load['MetaSources'])[0]
+
+ sourcename = return_true_source_catalog_name(s)
try:
sectorLike = load_yaml_dict('source_catalog')[sourcename][
'sector-like_activities']
except KeyError:
- log.error(f'%s or %s not found in {datapath}source_catalog.yaml',
- sourcename_load, sourcename)
+ log.info(f'%s not found in {datapath}source_catalog.yaml, assuming '
+ f'activities are not sector-like', sourcename)
+ sectorLike = False
return sectorLike
diff --git a/flowsa/data/README.md b/flowsa/data/README.md
index 5f7c1981b..debacda02 100644
--- a/flowsa/data/README.md
+++ b/flowsa/data/README.md
@@ -4,16 +4,20 @@ Flow-By-Activity.
## Term descriptions
- _class_: list, classes such as "Water" found in the Flow-By-Activity
-- _sector-like_activities_: 'True' or 'False', “sector-like activities” are True when the Flow-By-Activity
- “ActivityProducedBy” and “ActivityConsumedBy” columns are already NAICS based. For example, all BLS QCEW
- data for employment and establishments are published by NAICS codes. We deem these “sector-like” because we
- then implement checks to determine if the activities are published in the identified NAICS year in the
- Flow-By-Sector and if not, we have a crosswalk to map the sectors/NAICS to NAICS year.
-- _activity_schema_: 'None' if 'sector-like_activities' is False, otherwise the year of the sector data
+- _sector-like_activities_: 'True' or 'False', “sector-like activities” are
+ True when the Flow-By-Activity “ActivityProducedBy” and
+ “ActivityConsumedBy” columns are already NAICS based. For example, all
+ BLS QCEW data for employment and establishments are published by NAICS
+ codes. We deem these “sector-like” because we then implement checks to
+ determine if the activities are published in the identified NAICS year in the
+ Flow-By-Sector and if not, we have a crosswalk to map the sectors/NAICS
+ to NAICS year.
+- _activity_schema_: 'None' if 'sector-like_activities' is False, otherwise
+ the year of the sector data
(ex. NAICS_2012_Code)
-- _sector_aggregation_level_: 'aggregated' or 'disaggregated'. Some
- dataset crosswalks contain every level of relevant sectors (ex. NAICS
- for 2-6 digits), that is they are fully disaggregated. Other datasets only
- contain information for the highest relevant sector level, in which case,
- the dataset is marked as showing aggregated sectors only
- (ex. USGS_WU_Coef crosswalk)
\ No newline at end of file
+- _sector_aggregation_level_: 'aggregated' or 'disaggregated'. Some dataset
+ crosswalks contain every level of relevant sectors (ex. NAICS for 2-6
+ digits), that is they are fully disaggregated. Other datasets only
+ contain information for the highest relevant sector level, in which case,
+ the dataset is marked as showing aggregated sectors only (e.g.,
+ USGS_WU_Coef crosswalk).
\ No newline at end of file
diff --git a/flowsa/data/activitytosectormapping/NAICS_Crosswalk_BEA_2012_Summary.csv b/flowsa/data/activitytosectormapping/NAICS_Crosswalk_BEA_2012_Summary.csv
new file mode 100644
index 000000000..9179537c3
--- /dev/null
+++ b/flowsa/data/activitytosectormapping/NAICS_Crosswalk_BEA_2012_Summary.csv
@@ -0,0 +1,1117 @@
+ActivitySourceName,Activity,SectorSourceName,Sector,SectorType
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111110,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111120,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111130,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111140,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111150,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111160,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111191,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111199,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111211,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111219,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111310,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111320,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111331,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111332,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111333,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111334,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111335,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111336,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111339,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111411,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111419,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111421,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111422,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111910,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111920,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111930,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111940,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111991,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111992,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111998,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112111,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112112,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112120,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112130,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112210,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112310,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112320,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112330,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112340,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112390,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112410,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112420,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112511,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112512,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112519,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112910,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112920,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112930,
+BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112990,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,113110,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,113210,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,113310,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114111,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114112,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114119,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114210,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115111,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115112,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115113,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115114,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115115,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115116,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115210,
+BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115310,
+BEA_2012_Summary_Code,211,NAICS_2012_Code,211111,
+BEA_2012_Summary_Code,211,NAICS_2012_Code,211112,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212111,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212112,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212113,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212210,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212221,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212222,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212231,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212234,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212291,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212299,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212311,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212312,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212313,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212319,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212321,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212322,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212324,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212325,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212391,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212392,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212393,
+BEA_2012_Summary_Code,212,NAICS_2012_Code,212399,
+BEA_2012_Summary_Code,213,NAICS_2012_Code,213111,
+BEA_2012_Summary_Code,213,NAICS_2012_Code,213112,
+BEA_2012_Summary_Code,213,NAICS_2012_Code,213113,
+BEA_2012_Summary_Code,213,NAICS_2012_Code,213114,
+BEA_2012_Summary_Code,213,NAICS_2012_Code,213115,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221111,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221112,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221113,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221114,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221115,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221116,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221117,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221118,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221121,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221122,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221210,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221310,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221320,
+BEA_2012_Summary_Code,22,NAICS_2012_Code,221330,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,236115,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,236116,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,236117,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,236118,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,236210,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,236220,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,237110,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,237120,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,237130,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,237210,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,237310,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,237990,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238110,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238120,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238130,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238140,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238150,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238160,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238170,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238190,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238210,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238220,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238290,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238310,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238320,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238330,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238340,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238350,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238390,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238910,
+BEA_2012_Summary_Code,23,NAICS_2012_Code,238990,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311111,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311119,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311211,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311212,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311213,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311221,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311224,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311225,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311230,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311313,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311314,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311340,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311351,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311352,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311411,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311412,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311421,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311422,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311423,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311511,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311512,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311513,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311514,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311520,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311611,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311612,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311613,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311615,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311710,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311811,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311812,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311813,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311821,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311824,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311830,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311911,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311919,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311920,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311930,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311941,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311942,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311991,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311999,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312111,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312112,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312113,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312120,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312130,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312140,
+BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312230,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313110,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313210,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313220,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313230,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313240,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313310,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313320,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314110,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314120,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314910,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314994,
+BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314999,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315110,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315190,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315210,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315220,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315240,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315280,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315990,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316110,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316210,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316992,
+BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316998,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321113,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321114,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321211,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321212,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321213,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321214,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321219,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321911,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321912,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321918,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321920,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321991,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321992,
+BEA_2012_Summary_Code,321,NAICS_2012_Code,321999,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322110,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322121,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322122,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322130,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322211,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322212,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322219,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322220,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322230,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322291,
+BEA_2012_Summary_Code,322,NAICS_2012_Code,322299,
+BEA_2012_Summary_Code,323,NAICS_2012_Code,323111,
+BEA_2012_Summary_Code,323,NAICS_2012_Code,323113,
+BEA_2012_Summary_Code,323,NAICS_2012_Code,323117,
+BEA_2012_Summary_Code,323,NAICS_2012_Code,323120,
+BEA_2012_Summary_Code,324,NAICS_2012_Code,324110,
+BEA_2012_Summary_Code,324,NAICS_2012_Code,324121,
+BEA_2012_Summary_Code,324,NAICS_2012_Code,324122,
+BEA_2012_Summary_Code,324,NAICS_2012_Code,324191,
+BEA_2012_Summary_Code,324,NAICS_2012_Code,324199,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325110,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325120,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325130,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325180,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325193,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325194,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325199,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325211,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325212,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325220,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325311,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325312,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325314,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325320,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325411,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325412,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325413,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325414,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325510,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325520,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325611,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325612,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325613,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325620,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325910,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325920,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325991,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325992,
+BEA_2012_Summary_Code,325,NAICS_2012_Code,325998,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326111,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326112,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326113,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326121,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326122,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326130,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326140,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326150,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326160,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326191,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326199,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326211,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326212,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326220,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326291,
+BEA_2012_Summary_Code,326,NAICS_2012_Code,326299,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327110,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327120,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327211,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327212,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327213,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327215,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327310,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327320,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327331,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327332,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327390,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327410,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327420,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327910,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327991,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327992,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327993,
+BEA_2012_Summary_Code,327,NAICS_2012_Code,327999,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331110,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331210,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331221,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331222,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331313,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331314,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331315,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331318,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331410,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331420,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331491,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331492,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331511,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331512,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331513,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331523,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331524,
+BEA_2012_Summary_Code,331,NAICS_2012_Code,331529,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332111,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332112,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332114,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332117,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332119,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332215,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332216,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332311,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332312,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332313,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332321,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332322,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332323,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332410,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332420,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332431,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332439,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332510,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332613,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332618,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332710,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332721,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332722,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332811,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332812,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332813,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332911,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332912,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332913,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332919,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332991,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332992,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332993,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332994,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332996,
+BEA_2012_Summary_Code,332,NAICS_2012_Code,332999,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333111,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333112,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333120,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333131,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333132,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333241,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333242,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333243,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333244,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333249,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333314,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333316,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333318,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333413,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333414,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333415,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333511,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333514,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333515,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333517,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333519,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333611,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333612,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333613,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333618,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333911,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333912,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333913,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333921,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333922,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333923,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333924,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333991,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333992,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333993,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333994,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333995,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333996,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333997,
+BEA_2012_Summary_Code,333,NAICS_2012_Code,333999,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334111,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334112,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334118,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334210,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334220,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334290,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334310,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334412,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334413,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334416,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334417,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334418,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334419,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334510,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334511,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334512,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334513,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334514,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334515,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334516,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334517,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334519,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334613,
+BEA_2012_Summary_Code,334,NAICS_2012_Code,334614,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335110,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335121,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335122,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335129,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335210,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335221,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335222,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335224,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335228,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335311,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335312,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335313,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335314,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335911,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335912,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335921,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335929,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335931,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335932,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335991,
+BEA_2012_Summary_Code,335,NAICS_2012_Code,335999,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336111,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336112,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336120,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336211,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336212,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336213,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336214,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336310,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336320,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336330,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336340,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336350,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336360,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336370,
+BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336390,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336411,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336412,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336413,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336414,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336415,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336419,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336510,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336611,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336612,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336991,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336992,
+BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336999,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337110,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337121,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337122,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337124,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337125,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337127,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337211,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337212,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337214,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337215,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337910,
+BEA_2012_Summary_Code,337,NAICS_2012_Code,337920,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339112,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339113,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339114,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339115,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339116,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339910,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339920,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339930,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339940,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339950,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339991,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339992,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339993,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339994,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339995,
+BEA_2012_Summary_Code,339,NAICS_2012_Code,339999,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423110,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423120,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423130,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423140,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423210,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423220,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423310,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423320,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423330,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423390,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423410,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423420,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423430,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423440,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423450,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423460,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423490,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423510,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423520,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423610,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423620,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423690,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423710,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423720,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423730,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423740,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423810,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423820,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423830,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423840,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423850,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423860,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423910,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423920,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423930,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423940,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,423990,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424110,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424120,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424130,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424210,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424310,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424320,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424330,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424340,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424410,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424420,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424430,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424440,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424450,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424460,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424470,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424480,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424490,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424510,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424520,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424590,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424610,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424690,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424710,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424720,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424810,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424820,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424910,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424920,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424930,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424940,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424950,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,424990,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,425110,
+BEA_2012_Summary_Code,42,NAICS_2012_Code,425120,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441110,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441120,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441210,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441222,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441228,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441310,
+BEA_2012_Summary_Code,441,NAICS_2012_Code,441320,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442210,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442291,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442299,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,443141,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,443142,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444120,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444130,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444190,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444210,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444220,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445110,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445120,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445210,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445220,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445230,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445291,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445292,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445299,
+BEA_2012_Summary_Code,445,NAICS_2012_Code,445310,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446120,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446130,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446191,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446199,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,447110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,447190,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448120,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448130,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448140,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448150,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448190,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448210,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448310,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448320,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451120,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451130,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451140,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451211,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451212,
+BEA_2012_Summary_Code,452,NAICS_2012_Code,452111,
+BEA_2012_Summary_Code,452,NAICS_2012_Code,452112,
+BEA_2012_Summary_Code,452,NAICS_2012_Code,452910,
+BEA_2012_Summary_Code,452,NAICS_2012_Code,452990,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453110,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453210,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453220,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453310,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453910,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453920,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453930,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453991,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453998,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454111,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454112,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454113,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454210,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454310,
+BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454390,
+BEA_2012_Summary_Code,481,NAICS_2012_Code,481111,
+BEA_2012_Summary_Code,481,NAICS_2012_Code,481112,
+BEA_2012_Summary_Code,481,NAICS_2012_Code,481211,
+BEA_2012_Summary_Code,481,NAICS_2012_Code,481212,
+BEA_2012_Summary_Code,481,NAICS_2012_Code,481219,
+BEA_2012_Summary_Code,482,NAICS_2012_Code,482111,
+BEA_2012_Summary_Code,482,NAICS_2012_Code,482112,
+BEA_2012_Summary_Code,483,NAICS_2012_Code,483111,
+BEA_2012_Summary_Code,483,NAICS_2012_Code,483112,
+BEA_2012_Summary_Code,483,NAICS_2012_Code,483113,
+BEA_2012_Summary_Code,483,NAICS_2012_Code,483114,
+BEA_2012_Summary_Code,483,NAICS_2012_Code,483211,
+BEA_2012_Summary_Code,483,NAICS_2012_Code,483212,
+BEA_2012_Summary_Code,484,NAICS_2012_Code,484110,
+BEA_2012_Summary_Code,484,NAICS_2012_Code,484121,
+BEA_2012_Summary_Code,484,NAICS_2012_Code,484122,
+BEA_2012_Summary_Code,484,NAICS_2012_Code,484210,
+BEA_2012_Summary_Code,484,NAICS_2012_Code,484220,
+BEA_2012_Summary_Code,484,NAICS_2012_Code,484230,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485111,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485112,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485113,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485119,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485210,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485310,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485320,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485410,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485510,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485991,
+BEA_2012_Summary_Code,485,NAICS_2012_Code,485999,
+BEA_2012_Summary_Code,486,NAICS_2012_Code,486110,
+BEA_2012_Summary_Code,486,NAICS_2012_Code,486210,
+BEA_2012_Summary_Code,486,NAICS_2012_Code,486910,
+BEA_2012_Summary_Code,486,NAICS_2012_Code,486990,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,487110,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,487210,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,487990,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488111,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488119,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488190,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488210,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488310,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488320,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488330,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488390,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488410,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488490,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488510,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488991,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488999,
+BEA_2012_Summary_Code,GFE,NAICS_2012_Code,491110,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,492110,
+BEA_2012_Summary_Code,487OS,NAICS_2012_Code,492210,
+BEA_2012_Summary_Code,493,NAICS_2012_Code,493110,
+BEA_2012_Summary_Code,493,NAICS_2012_Code,493120,
+BEA_2012_Summary_Code,493,NAICS_2012_Code,493130,
+BEA_2012_Summary_Code,493,NAICS_2012_Code,493190,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511110,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511120,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511130,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511140,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511191,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511199,
+BEA_2012_Summary_Code,511,NAICS_2012_Code,511210,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512110,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512120,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512131,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512132,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512191,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512199,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512210,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512220,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512230,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512240,
+BEA_2012_Summary_Code,512,NAICS_2012_Code,512290,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,515111,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,515112,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,515120,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,515210,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,517110,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,517210,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,517410,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,517911,
+BEA_2012_Summary_Code,513,NAICS_2012_Code,517919,
+BEA_2012_Summary_Code,514,NAICS_2012_Code,518210,
+BEA_2012_Summary_Code,514,NAICS_2012_Code,519110,
+BEA_2012_Summary_Code,514,NAICS_2012_Code,519120,
+BEA_2012_Summary_Code,514,NAICS_2012_Code,519130,
+BEA_2012_Summary_Code,514,NAICS_2012_Code,519190,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,521110,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522110,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522120,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522130,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522190,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522210,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522220,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522291,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522292,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522293,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522294,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522298,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522310,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522320,
+BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522390,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523110,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523120,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523130,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523140,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523210,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523910,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523920,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523930,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523991,
+BEA_2012_Summary_Code,523,NAICS_2012_Code,523999,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524113,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524114,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524126,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524127,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524128,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524130,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524210,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524291,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524292,
+BEA_2012_Summary_Code,524,NAICS_2012_Code,524298,
+BEA_2012_Summary_Code,525,NAICS_2012_Code,525110,
+BEA_2012_Summary_Code,525,NAICS_2012_Code,525120,
+BEA_2012_Summary_Code,525,NAICS_2012_Code,525190,
+BEA_2012_Summary_Code,525,NAICS_2012_Code,525910,
+BEA_2012_Summary_Code,525,NAICS_2012_Code,525920,
+BEA_2012_Summary_Code,525,NAICS_2012_Code,525990,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531110,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531110,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531120,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531120,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531130,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531130,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531190,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531190,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531210,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531210,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531311,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531311,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531312,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531312,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531320,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531320,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,531390,
+BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531390,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532111,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532111,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532112,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532112,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532120,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532120,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532210,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532210,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532220,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532220,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532230,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532230,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532291,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532291,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532292,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532292,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532299,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532299,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532310,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532310,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532411,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532411,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532412,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532412,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532420,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532420,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,532490,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532490,
+BEA_2012_Summary_Code,532RL,NAICS_2012_Code,533110,
+BEA_2012_Summary_Code,HS,NAICS_2012_Code,533110,
+BEA_2012_Summary_Code,5411,NAICS_2012_Code,541110,
+BEA_2012_Summary_Code,5411,NAICS_2012_Code,541120,
+BEA_2012_Summary_Code,5411,NAICS_2012_Code,541191,
+BEA_2012_Summary_Code,5411,NAICS_2012_Code,541199,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541211,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541213,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541214,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541219,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541310,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541320,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541330,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541340,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541350,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541360,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541370,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541380,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541410,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541420,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541430,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541490,
+BEA_2012_Summary_Code,5415,NAICS_2012_Code,541511,
+BEA_2012_Summary_Code,5415,NAICS_2012_Code,541512,
+BEA_2012_Summary_Code,5415,NAICS_2012_Code,541513,
+BEA_2012_Summary_Code,5415,NAICS_2012_Code,541519,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541611,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541612,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541613,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541614,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541618,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541620,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541690,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541711,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541712,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541720,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541810,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541820,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541830,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541840,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541850,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541860,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541870,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541890,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541910,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541921,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541922,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541930,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541940,
+BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541990,
+BEA_2012_Summary_Code,55,NAICS_2012_Code,551111,
+BEA_2012_Summary_Code,55,NAICS_2012_Code,551112,
+BEA_2012_Summary_Code,55,NAICS_2012_Code,551114,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561110,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561210,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561311,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561312,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561320,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561330,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561410,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561421,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561422,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561431,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561439,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561440,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561450,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561491,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561492,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561499,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561510,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561520,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561591,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561599,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561611,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561612,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561613,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561621,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561622,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561710,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561720,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561730,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561740,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561790,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561910,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561920,
+BEA_2012_Summary_Code,561,NAICS_2012_Code,561990,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562111,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562112,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562119,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562211,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562212,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562213,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562219,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562910,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562920,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562991,
+BEA_2012_Summary_Code,562,NAICS_2012_Code,562998,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611110,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611210,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611310,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611410,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611420,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611430,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611511,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611512,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611513,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611519,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611610,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611620,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611630,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611691,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611692,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611699,
+BEA_2012_Summary_Code,61,NAICS_2012_Code,611710,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621111,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621112,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621210,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621310,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621320,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621330,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621340,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621391,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621399,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621410,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621420,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621491,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621492,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621493,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621498,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621511,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621512,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621610,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621910,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621991,
+BEA_2012_Summary_Code,621,NAICS_2012_Code,621999,
+BEA_2012_Summary_Code,622,NAICS_2012_Code,622110,
+BEA_2012_Summary_Code,622,NAICS_2012_Code,622210,
+BEA_2012_Summary_Code,622,NAICS_2012_Code,622310,
+BEA_2012_Summary_Code,623,NAICS_2012_Code,623110,
+BEA_2012_Summary_Code,623,NAICS_2012_Code,623210,
+BEA_2012_Summary_Code,623,NAICS_2012_Code,623220,
+BEA_2012_Summary_Code,623,NAICS_2012_Code,623311,
+BEA_2012_Summary_Code,623,NAICS_2012_Code,623312,
+BEA_2012_Summary_Code,623,NAICS_2012_Code,623990,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624110,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624120,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624190,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624210,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624221,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624229,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624230,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624310,
+BEA_2012_Summary_Code,624,NAICS_2012_Code,624410,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711110,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711120,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711130,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711190,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711211,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711212,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711219,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711310,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711320,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711410,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711510,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712110,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712120,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712130,
+BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712190,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713110,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713120,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713210,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713290,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713910,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713920,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713930,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713940,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713950,
+BEA_2012_Summary_Code,713,NAICS_2012_Code,713990,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721110,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721120,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721191,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721199,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721211,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721214,
+BEA_2012_Summary_Code,721,NAICS_2012_Code,721310,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722310,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722320,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722330,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722410,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722511,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722513,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722514,
+BEA_2012_Summary_Code,722,NAICS_2012_Code,722515,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811111,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811112,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811113,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811118,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811121,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811122,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811191,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811192,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811198,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811211,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811212,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811213,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811219,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811310,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811411,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811412,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811420,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811430,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,811490,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812111,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812112,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812113,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812191,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812199,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812210,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812220,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812310,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812320,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812331,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812332,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812910,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812921,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812922,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812930,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,812990,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813110,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813211,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813212,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813219,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813311,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813312,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813319,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813410,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813910,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813920,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813930,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813940,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,813990,
+BEA_2012_Summary_Code,81,NAICS_2012_Code,814110,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921110,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921120,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921120,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921130,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921130,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921140,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921140,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921150,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921190,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921190,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922110,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922120,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922120,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922130,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922130,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922140,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922140,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922150,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922150,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922160,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922160,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922190,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922190,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,923110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923110,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,923120,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923120,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,923130,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923130,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923140,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,924110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,924110,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,924120,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,924120,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,925110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,925110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,925120,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,925120,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926110,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926120,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926130,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926130,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926140,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926140,
+BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926150,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926150,
+BEA_2012_Summary_Code,GFE,NAICS_2012_Code,927110,
+BEA_2012_Summary_Code,GFGD,NAICS_2012_Code,928110,
+BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,928120,
+BEA_2012_Summary_Code,F010,NAICS_2012_Code,F01000,
+BEA_2012_Summary_Code,GFE,NAICS_2012_Code,S00101,
+BEA_2012_Summary_Code,GSLE,NAICS_2012_Code,S00201,
+BEA_2012_Summary_Code,GSLE,NAICS_2012_Code,S00202,
diff --git a/flowsa/data/activitytosectormapping/README.md b/flowsa/data/activitytosectormapping/README.md
index 70b678694..5d22b18dc 100644
--- a/flowsa/data/activitytosectormapping/README.md
+++ b/flowsa/data/activitytosectormapping/README.md
@@ -4,17 +4,18 @@ sectors. These files are not required for datasets where activities are already
NAICS-like.
Each csv contains columns for:
-1. _ActivitySourceName_: Activity Source Name must match the file name, although the name
- can be missing extensions. For example, the Activity Source Name can be
- "EPA_GHGI" rather than "EPA_GHGI_T_2_4", as the function that looks for the
- file names will strip "_XXX" from the file name until the file is found
+1. _ActivitySourceName_: Activity Source Name must match the file name,
+ although the name can be missing extensions. For example, the Activity
+ Source Name can be "EPA_GHGI" rather than "EPA_GHGI_T_2_4", as the
+ function that looks for the file names will strip "_XXX" from the file
+ name until the file is found
2. _Activity_: Any activities that should be mapped to a sector
-3. _SectorSourceName_ Specify the sector year being mapped to (e.g. NAICS_2012_Code)
-4. _Sector_: The 2- to 6-digit NAICS code that the activity relates to. Can map to
- multiple NAICS codes of varying lengths. Optional: If necessary a user can
- map to their own non-official NAICS codes. If mapped to non-official NAICS,
- the NAICS crosswalk must be recreated in the
+3. _SectorSourceName_ Specify the sector year being mapped to (e.g.
+ NAICS_2012_Code)
+4. _Sector_: The 2- to 7-digit NAICS code that the activity relates to. Can
+ map to multiple NAICS codes of varying lengths. Optional: If necessary a
+ user can map to their own non-official NAICS codes. If mapped to
+ non-official NAICS, the NAICS crosswalk must be recreated in the
[scripts directory](https://github.com/USEPA/flowsa/blob/master/scripts/update_NAICS_crosswalk.py)
5. _SectorType_: "I" for industry, "C" for commodity
6. _Notes_: (optional) Any additional relevant information
-7.
\ No newline at end of file
diff --git a/flowsa/data/process_adjustments/README.md b/flowsa/data/process_adjustments/README.md
index 31de68107..102a80495 100644
--- a/flowsa/data/process_adjustments/README.md
+++ b/flowsa/data/process_adjustments/README.md
@@ -1,8 +1,11 @@
# Process Adjustments
-Process adjustments allow for adjustments to the `SectorProducedBy` field for data obtained from stewicombo.
-Records that are from the `source_naics` AND the `source_process` are reassigned to the `target_naics` indicated in the process adjustment file.
+Process adjustments allow for adjustments to the `SectorProducedBy` field
+for data obtained from stewicombo. Records that are from the `source_naics`
+AND the `source_process` are reassigned to the `target_naics` indicated in
+the process adjustment file.
-Adjustments are indicated by identifying one or more named files in the `reassign_process_to_sectors` FBS parameter.
+Adjustments are indicated by identifying one or more named files in the
+`reassign_process_to_sectors` FBS parameter.
## Available Adjustments
diff --git a/flowsa/data/source_catalog.yaml b/flowsa/data/source_catalog.yaml
index 40696897f..1fde6f036 100644
--- a/flowsa/data/source_catalog.yaml
+++ b/flowsa/data/source_catalog.yaml
@@ -131,6 +131,18 @@ EPA_GHGI:
sector-like_activities: False
activity_schema:
sector_aggregation_level: "aggregated"
+EPA_StateGHGI:
+ class:
+ - Chemicals
+ sector-like_activities: False
+ activity_schema:
+ sector_aggregation_level: "aggregated"
+EPA_SIT:
+ class:
+ - Chemicals
+ sector-like_activities: False
+ activity_schema:
+ sector_aggregation_level: "aggregated"
EPA_NEI_Nonpoint:
data_format: FBA
class:
@@ -182,6 +194,12 @@ StatCan_LFS:
sector-like_activities: False
activity_schema:
sector_aggregation_level: "aggregated"
+stateio:
+ class:
+ - Money
+ sector-like_activities: False #update to true once alternate activity_schema in place
+ # activity_schema: BEA_2012_Summary_Code
+ sector_aggregation_level: "disaggregated"
USDA_CoA_Cropland:
data_format: FBA
class:
diff --git a/flowsa/data_source_scripts/BEA.py b/flowsa/data_source_scripts/BEA.py
index f19757870..7add8178b 100644
--- a/flowsa/data_source_scripts/BEA.py
+++ b/flowsa/data_source_scripts/BEA.py
@@ -62,28 +62,8 @@ def bea_use_detail_br_parse(*, year, **_):
f'_Detail_Use_PRO_BeforeRedef.csv'
df_raw = pd.read_csv(csv_load)
- # first column is the commodity being consumed
- df = df_raw.rename(columns={'Unnamed: 0': 'ActivityProducedBy'})
-
- # use "melt" fxn to convert colummns into rows
- df = df.melt(id_vars=["ActivityProducedBy"],
- var_name="ActivityConsumedBy",
- value_name="FlowAmount")
-
- df['Year'] = str(year)
- # hardcode data
- df['FlowName'] = "USD" + str(year)
- df["Class"] = "Money"
- df["FlowType"] = "TECHNOSPHERE_FLOW"
- df['Description'] = 'BEA_2012_Detail_Code'
+ df = bea_detail_parse(df_raw, year)
df["SourceName"] = "BEA_Use_Detail_PRO_BeforeRedef"
- df["Location"] = US_FIPS
- df['LocationSystem'] = "FIPS_2015"
- # original unit in million USD
- df['FlowAmount'] = df['FlowAmount'] * 1000000
- df["Unit"] = "USD"
- df['DataReliability'] = 5 # tmp
- df['DataCollection'] = 5 # tmp
return df
@@ -96,10 +76,17 @@ def bea_make_detail_br_parse(*, year, **_):
flowbyactivity specifications
"""
# Read directly into a pandas df
- df_raw = pd.read_csv(externaldatapath + "BEA_" + str(year) +
- "_Detail_Make_BeforeRedef.csv")
+ csv_load = f'{externaldatapath}BEA_{str(year)}' \
+ f'_Detail_Make_BeforeRedef.csv'
+ df_raw = pd.read_csv(csv_load)
+
+ df = bea_detail_parse(df_raw, year)
+ df["SourceName"] = "BEA_Make_Detail_BeforeRedef"
- # first column is the industry
+ return df
+
+
+def bea_detail_parse(df_raw, year):
df = df_raw.rename(columns={'Unnamed: 0': 'ActivityProducedBy'})
# use "melt" fxn to convert colummns into rows
@@ -109,11 +96,10 @@ def bea_make_detail_br_parse(*, year, **_):
df['Year'] = str(year)
# hardcode data
- df['FlowName'] = "USD" + str(year)
+ df['FlowName'] = f"USD{str(year)}"
df["Class"] = "Money"
df["FlowType"] = "TECHNOSPHERE_FLOW"
df['Description'] = 'BEA_2012_Detail_Code'
- df["SourceName"] = "BEA_Make_Detail_BeforeRedef"
df["Location"] = US_FIPS
df['LocationSystem'] = "FIPS_2015"
# original unit in million USD
@@ -121,7 +107,6 @@ def bea_make_detail_br_parse(*, year, **_):
df["Unit"] = "USD"
df['DataReliability'] = 5 # tmp
df['DataCollection'] = 5 # tmp
-
return df
@@ -187,18 +172,27 @@ def subset_and_allocate_BEA_table(df, attr, **_):
"""
Temporary function to mimic use of 2nd helper allocation dataset
"""
+
df = subset_BEA_table(df, attr)
v = {'geoscale_to_use': 'national'}
method2 = {'target_sector_source': 'NAICS_2012_Code'}
+
+ import importlib
+ fxn = getattr(importlib.import_module(
+ 'flowsa.data_source_scripts.BLS_QCEW'),
+ "bls_clean_allocation_fba_w_sec")
+
attr2 = {"helper_source": "BLS_QCEW",
"helper_method": "proportional",
"helper_source_class": "Employment",
"helper_source_year": 2012,
- "helper_flow": ["Number of employees"],
+ "helper_flow": ["Number of employees, Federal Government",
+ "Number of employees, State Government",
+ "Number of employees, Local Government",
+ "Number of employees, Private"],
"helper_from_scale": "national",
"allocation_from_scale": "national",
- "clean_helper_fba": "clean_bls_qcew_fba",
- "clean_helper_fba_wsec": "bls_clean_allocation_fba_w_sec"}
+ "clean_helper_fba_wsec": fxn}
df2 = allocation_helper(df, attr2, method2, v, False)
# Drop remaining rows with no sectors e.g. T001 and other final demands
df2 = df2.dropna(subset=['SectorConsumedBy']).reset_index(drop=True)
diff --git a/flowsa/data_source_scripts/BLS_QCEW.py b/flowsa/data_source_scripts/BLS_QCEW.py
index 7a964ce75..aa7aa7b3f 100644
--- a/flowsa/data_source_scripts/BLS_QCEW.py
+++ b/flowsa/data_source_scripts/BLS_QCEW.py
@@ -17,13 +17,8 @@
import pandas as pd
import numpy as np
from flowsa.location import US_FIPS
-from flowsa.common import fba_default_grouping_fields
-from flowsa.schema import flow_by_activity_wsec_fields, \
- flow_by_activity_mapped_wsec_fields
from flowsa.flowbyfunctions import assign_fips_location_system, \
- aggregator
-from flowsa.dataclean import add_missing_flow_by_fields, \
- replace_strings_with_NoneType
+ aggregator, equally_allocate_suppressed_parent_to_child_naics
def BLS_QCEW_URL_helper(*, build_url, year, **_):
@@ -92,16 +87,19 @@ def bls_qcew_parse(*, df_list, year, **_):
df.loc[df['area_fips'] == 'US000', 'area_fips'] = US_FIPS
# set datatypes
float_cols = [col for col in df.columns if col not in
- ['area_fips', 'industry_code', 'year']]
+ ['area_fips', 'own_code', 'industry_code', 'year']]
for col in float_cols:
df[col] = df[col].astype('float')
# Keep owner_code = 1, 2, 3, 5
- df = df[df.own_code.isin([1, 2, 3, 5])]
- # Aggregate annual_avg_estabs and annual_avg_emplvl by area_fips,
- # industry_code, year, flag
- df = df.groupby(['area_fips', 'industry_code', 'year'])[[
- 'annual_avg_estabs', 'annual_avg_emplvl',
- 'total_annual_wages']].sum().reset_index()
+ df = df[df.own_code.isin(['1', '2', '3', '5'])]
+ # replace ownership code with text defined by bls
+ # https://www.bls.gov/cew/classifications/ownerships/ownership-titles.htm
+ replace_dict = {'1': 'Federal Government',
+ '2': 'State Government',
+ '3': 'Local Government',
+ '5': 'Private'}
+ for key in replace_dict.keys():
+ df['own_code'] = df['own_code'].replace(key, replace_dict[key])
# Rename fields
df = df.rename(columns={'area_fips': 'Location',
'industry_code': 'ActivityProducedBy',
@@ -112,140 +110,47 @@ def bls_qcew_parse(*, df_list, year, **_):
# Reformat FIPs to 5-digit
df['Location'] = df['Location'].apply('{:0>5}'.format)
# use "melt" fxn to convert colummns into rows
- df = df.melt(id_vars=["Location", "ActivityProducedBy", "Year"],
- var_name="FlowName",
- value_name="FlowAmount")
+ df2 = df.melt(id_vars=["Location", "ActivityProducedBy", "Year",
+ 'own_code'],
+ var_name="FlowName",
+ value_name="FlowAmount")
# specify unit based on flowname
- df['Unit'] = np.where(df["FlowName"] == 'Annual payroll', "USD", "p")
+ df2['Unit'] = np.where(df2["FlowName"] == 'Annual payroll', "USD", "p")
# specify class
- df.loc[df['FlowName'] == 'Number of employees', 'Class'] = 'Employment'
- df.loc[df['FlowName'] == 'Number of establishments', 'Class'] = 'Other'
- df.loc[df['FlowName'] == 'Annual payroll', 'Class'] = 'Money'
+ df2.loc[df2['FlowName'] == 'Number of employees', 'Class'] = 'Employment'
+ df2.loc[df2['FlowName'] == 'Number of establishments', 'Class'] = 'Other'
+ df2.loc[df2['FlowName'] == 'Annual payroll', 'Class'] = 'Money'
+ # update flow name
+ df2['FlowName'] = df2['FlowName'] + ', ' + df2['own_code']
+ df2 = df2.drop(columns='own_code')
# add location system based on year of data
- df = assign_fips_location_system(df, year)
+ df2 = assign_fips_location_system(df2, year)
# add hard code data
- df['SourceName'] = 'BLS_QCEW'
+ df2['SourceName'] = 'BLS_QCEW'
# Add tmp DQ scores
- df['DataReliability'] = 5
- df['DataCollection'] = 5
- df['Compartment'] = None
- df['FlowType'] = "ELEMENTARY_FLOW"
+ df2['DataReliability'] = 5
+ df2['DataCollection'] = 5
+ df2['Compartment'] = None
+ df2['FlowType'] = "ELEMENTARY_FLOW"
- return df
+ return df2
-def clean_bls_qcew_fba_for_employment_sat_table(fba_df, **kwargs):
+def clean_bls_qcew_fba_for_employment_sat_table(fba, **_):
"""
When creating the employment satellite table for use in useeior,
- modify the flow name to match prior methodology for mapping/impact factors
+ modify the flow name to match prior methodology for mapping/impact factors.
+ clean_fba_df_fxn
- :param fba_df: df, flowbyactivity
- :param kwargs: dictionary, can include attr, a dictionary of parameters
- in the FBA method yaml
+ :param fba: df, flowbyactivity
:return: df, flowbyactivity, with modified flow names
"""
- fba_df = clean_bls_qcew_fba(fba_df, **kwargs)
-
# rename flowname value
for c in ['FlowName', 'Flowable']:
- fba_df[c] = fba_df[c].replace({'Number of employees': 'Jobs'})
-
- return fba_df
-
-
-def clean_bls_qcew_fba(fba_df, **kwargs):
- """
- Function to clean BLS QCEW data when FBA is not used for employment
- satellite table
- :param fba_df: df, FBA format
- :param kwargs: dictionary, can include attr, a dictionary of parameters
- in the FBA method yaml
- :return: df, modified BLS QCEW data
- """
-
- fba_df = fba_df.reset_index(drop=True)
- # aggregate data to NAICS 2 digits, if 2 digit value is missing
- fba_df = replace_missing_2_digit_sector_values(fba_df)
- # drop rows of data where sectors are provided in ranges
- fba_df = remove_2_digit_sector_ranges(fba_df)
-
- return fba_df
-
-
-def replace_missing_2_digit_sector_values(df):
- """
- In the 2015 (and possibly other dfs, there are instances of values
- at the 3 digit NAICS level, while the 2 digit NAICS is reported as 0.
- The 0 values are replaced with summed 3 digit NAICS
- :param df: df, BLS QCEW data in FBA format
- :return: df, BLS QCEW data with 2-digit NAICS sector FlowAmounts
- """
-
- # check for 2 digit 0 values
- df_missing = df[(df['ActivityProducedBy'].apply(
- lambda x: len(x) == 2)) & (df['FlowAmount'] == 0)]
- # create list of location/activityproduced by combos
- missing_sectors = df_missing[[
- 'Location', 'ActivityProducedBy']].drop_duplicates().values.tolist()
-
- # subset the df to 3 naics where flow amount is not 0 and
- # that would sum to the missing 2 digit naics
- df_subset = df[df['ActivityProducedBy'].apply(
- lambda x: len(x) == 3) & (df['FlowAmount'] != 0)]
- new_sectors_list = []
- for q, r in missing_sectors:
- c1 = df_subset['Location'] == q
- c2 = df_subset['ActivityProducedBy'].apply(lambda x: x[0:2] == r)
- # subset data
- new_sectors_list.append(df_subset[c1 & c2])
- if len(new_sectors_list) != 0:
- new_sectors = pd.concat(
- new_sectors_list, sort=False, ignore_index=True)
-
- # drop last digit of naics and aggregate
- new_sectors.loc[:, 'ActivityProducedBy'] = \
- new_sectors['ActivityProducedBy'].apply(lambda x: x[0:2])
- new_sectors = aggregator(new_sectors, fba_default_grouping_fields)
-
- # drop the old location/activity columns in the bls df and
- # add new sector values
- new_sectors_list = \
- new_sectors[['Location', 'ActivityProducedBy'
- ]].drop_duplicates().values.tolist()
-
- # rows to drop
- rows_list = []
- for q, r in new_sectors_list:
- c1 = df['Location'] == q
- c2 = df['ActivityProducedBy'].apply(lambda x: x == r)
- # subset data
- rows_list.append(df[(c1 & c2)])
- rows_to_drop = pd.concat(rows_list, ignore_index=True)
- # drop rows from df
- modified_df = pd.merge(df, rows_to_drop, indicator=True,
- how='outer').query('_merge=="left_only"'
- ).drop('_merge', axis=1)
- # add new rows
- modified_df = modified_df.append(new_sectors, sort=False)
- return modified_df
- else:
- return df
-
-
-def remove_2_digit_sector_ranges(fba_df):
- """
- BLS publishes activity ranges of '31-33', 44-45', '48-49...
- drop these ranges.
- The individual 2 digit naics are summed later.
- :param fba_df: df, BLS QCEW in FBA format
- :return: df, no sector ranges
- """
+ fba[c] = fba[c].str.replace('Number of employees', 'Jobs')
- df = fba_df[
- ~fba_df['ActivityProducedBy'].str.contains('-')].reset_index(drop=True)
-
- return df
+ return fba
def bls_clean_allocation_fba_w_sec(df_w_sec, **kwargs):
@@ -256,26 +161,18 @@ def bls_clean_allocation_fba_w_sec(df_w_sec, **kwargs):
dictionary of FBA method yaml parameters
:return: df, BLS QCEW FBA with estimated suppressed data
"""
- df_w_sec = df_w_sec.reset_index(drop=True)
- df2 = add_missing_flow_by_fields(
- df_w_sec, flow_by_activity_wsec_fields).reset_index(drop=True)
- df3 = replace_strings_with_NoneType(df2)
-
- return df3
-
-
-def bls_clean_allocation_fba_w_sec_sat_table(df_w_sec, **kwargs):
- """
- clean up bls df with sectors by estimating suppresed data
- :param df_w_sec: df, FBA format BLS QCEW data
- :param kwargs: additional arguments can include 'attr', a
- dictionary of FBA method yaml parameters
- :return: df, BLS QCEW FBA with estimated suppressed data
- """
- df_w_sec = df_w_sec.reset_index(drop=True)
- df2 = add_missing_flow_by_fields(df_w_sec,
- flow_by_activity_mapped_wsec_fields
- ).reset_index(drop=True)
- df3 = replace_strings_with_NoneType(df2)
-
- return df3
+ groupcols = list(df_w_sec.select_dtypes(include=['object', 'int']).columns)
+ # estimate supressed data
+ df = equally_allocate_suppressed_parent_to_child_naics(
+ df_w_sec, kwargs['method'], 'SectorProducedBy', groupcols)
+
+ # for purposes of allocation, we do not need to differentiate between
+ # federal government, state government, local government, or private
+ # sectors. So after estimating the suppressed data (above), modify the
+ # flow names and aggregate data
+ col_list = [e for e in df_w_sec.columns if e in ['FlowName', 'Flowable']]
+ for c in col_list:
+ df[c] = df[c].str.split(',').str[0]
+ df2 = aggregator(df, groupcols)
+
+ return df2
diff --git a/flowsa/data_source_scripts/Blackhurst_IO.py b/flowsa/data_source_scripts/Blackhurst_IO.py
index b67c0d6cb..84a1e1ff0 100644
--- a/flowsa/data_source_scripts/Blackhurst_IO.py
+++ b/flowsa/data_source_scripts/Blackhurst_IO.py
@@ -17,7 +17,6 @@
from flowsa.allocation import \
proportional_allocation_by_location_and_activity
from flowsa.sectormapping import add_sectors_to_flowbyactivity
-from flowsa.data_source_scripts.BLS_QCEW import clean_bls_qcew_fba
from flowsa.validation import compare_df_units
@@ -127,9 +126,6 @@ def convert_blackhurst_data_to_kg_per_employee(
flowclass='Employment', geographic_level='national',
download_FBA_if_missing=kwargs['download_FBA_if_missing'])
- # clean df
- bls = clean_bls_qcew_fba(bls, attr=attr)
-
# assign naics to allocation dataset
bls_wsec = add_sectors_to_flowbyactivity(
bls, sectorsourcename=method['target_sector_source'])
diff --git a/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py b/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py
index 54b4c56b8..fa4f73f84 100644
--- a/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py
+++ b/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py
@@ -17,10 +17,9 @@
load_fba_w_standardized_units, \
aggregate_and_subset_for_target_sectors
from flowsa.settings import externaldatapath
-from flowsa.data_source_scripts.BLS_QCEW import clean_bls_qcew_fba
from flowsa.sectormapping import get_fba_allocation_subset, \
add_sectors_to_flowbyactivity
-from flowsa.dataclean import replace_strings_with_NoneType
+from flowsa.dataclean import replace_strings_with_NoneType, standardize_units
def produced_by(entry):
@@ -110,16 +109,17 @@ def calR_parse(*, year, **_):
return output
-def keep_generated_quantity(fba, **kwargs):
+def keep_generated_quantity(fba, **_):
"""
Function to clean CalRecycles FBA to remove quantities not
assigned as Generated
:param fba: df, FBA format
- :param kwargs: dictionary, can include attr, a dictionary of parameters in
- the FBA method yaml
:return: df, modified CalRecycles FBA
"""
- fba = fba[fba['Description'] == 'Generated']
+ fba = fba[fba['Description'] == 'Generated'].reset_index(drop=True)
+ # if no mapping performed, still update units
+ if 'tons' in fba['Unit'].values:
+ fba = standardize_units(fba)
return fba
@@ -133,9 +133,10 @@ def apply_tons_per_employee_per_year_to_states(fbs, method, **_):
year=fbs['Year'].unique()[0],
flowclass='Employment',
geographic_level='state')
- bls = bls[bls['FlowName'] == 'Number of employees']
- # clean df
- bls = clean_bls_qcew_fba(bls)
+ bls = bls[bls['FlowName'].isin(["Number of employees, Federal Government",
+ "Number of employees, State Government",
+ "Number of employees, Local Government",
+ "Number of employees, Private"])]
bls = add_sectors_to_flowbyactivity(bls)
# Subset BLS dataset
@@ -146,6 +147,10 @@ def apply_tons_per_employee_per_year_to_states(fbs, method, **_):
# Calculate tons per employee per year per material and sector in CA
bls_CA = bls[bls['Location'] == '06000'] # California
+ # aggregate all employment prior to generating tpepy
+ bls_CA = (bls_CA.groupby(['Location','Year','SectorProducedBy'])
+ .agg({'Employees':'sum'})
+ .reset_index())
tpepy = fbs.merge(bls_CA, how='inner')
tpepy['TPEPY'] = np.divide(tpepy['FlowAmount'], tpepy['Employees'],
out=np.zeros_like(tpepy['Employees']),
diff --git a/flowsa/data_source_scripts/Census_CBP.py b/flowsa/data_source_scripts/Census_CBP.py
index c805a76ab..16f06f052 100644
--- a/flowsa/data_source_scripts/Census_CBP.py
+++ b/flowsa/data_source_scripts/Census_CBP.py
@@ -34,11 +34,7 @@ def Census_CBP_URL_helper(*, build_url, year, **_):
# This is only for years 2010 and 2011. This is done because the State
# query that gets all counties returns too many results and errors out.
if year in ['2010', '2011']:
- if year == '2011':
- fips_year = '2010'
- else:
- fips_year = '2010'
- county_fips_df = get_county_FIPS(fips_year)
+ county_fips_df = get_county_FIPS('2010')
county_fips = county_fips_df.FIPS
for d in county_fips:
url = build_url
@@ -82,16 +78,15 @@ def Census_CBP_URL_helper(*, build_url, year, **_):
urls_census.append(url)
else:
FIPS_2 = get_all_state_FIPS_2()['FIPS_2']
- for c in FIPS_2:
+ for state in FIPS_2:
url = build_url
- url = url.replace("__stateFIPS__", c)
+ url = url.replace("__stateFIPS__", state)
# specified NAICS code year depends on year of data
- if year in ['2017']:
+ if year in ['2017', '2018', '2019', '2020']:
url = url.replace("__NAICS__", "NAICS2017")
- url = url.replace("__countyFIPS__", "*")
- if year in ['2012', '2013', '2014', '2015', '2016']:
+ elif year in ['2012', '2013', '2014', '2015', '2016']:
url = url.replace("__NAICS__", "NAICS2012")
- url = url.replace("__countyFIPS__", "*")
+ url = url.replace("__countyFIPS__", "*")
urls_census.append(url)
return urls_census
@@ -152,6 +147,10 @@ def census_cbp_parse(*, df_list, year, **_):
value_name="FlowAmount")
# specify unit based on flowname
df['Unit'] = np.where(df["FlowName"] == 'Annual payroll', "USD", "p")
+ # Payroll in units of thousand USD
+ df['FlowAmount'] = np.where(df["FlowName"] == 'Annual payroll',
+ df['FlowAmount'] * 1000,
+ df['FlowAmount'])
# specify class
df.loc[df['FlowName'] == 'Number of employees', 'Class'] = 'Employment'
df.loc[df['FlowName'] == 'Number of establishments', 'Class'] = 'Other'
diff --git a/flowsa/data_source_scripts/EIA_CBECS_Land.py b/flowsa/data_source_scripts/EIA_CBECS_Land.py
index 610d28389..dba2a5536 100644
--- a/flowsa/data_source_scripts/EIA_CBECS_Land.py
+++ b/flowsa/data_source_scripts/EIA_CBECS_Land.py
@@ -233,15 +233,15 @@ def standardize_eia_cbecs_land_activity_names(df, column_to_standardize):
return df
-def cbecs_land_fba_cleanup(fba_load):
+def cbecs_land_fba_cleanup(fba, **_):
"""
Clean up the land fba for use in allocation
- :param fba_load: df, eia cbecs land flowbyactivity format
+ :param fba: df, eia cbecs land flowbyactivity format
:return: df, flowbyactivity with modified values
"""
# estimate floor space using number of floors
- fba = calculate_floorspace_based_on_number_of_floors(fba_load)
+ fba = calculate_floorspace_based_on_number_of_floors(fba)
# calculate the land area in addition to building footprint
fba1 = calculate_total_facility_land_area(fba)
diff --git a/flowsa/data_source_scripts/EIA_MECS.py b/flowsa/data_source_scripts/EIA_MECS.py
index 7e9f6f790..40086e9d2 100644
--- a/flowsa/data_source_scripts/EIA_MECS.py
+++ b/flowsa/data_source_scripts/EIA_MECS.py
@@ -436,7 +436,7 @@ def eia_mecs_energy_clean_allocation_fba_w_sec(
return df2
-def mecs_land_fba_cleanup(fba):
+def mecs_land_fba_cleanup(fba, **_):
"""
Modify the EIA MECS Land FBA
:param fba: df, EIA MECS Land FBA format
@@ -452,7 +452,7 @@ def mecs_land_fba_cleanup(fba):
return fba
-def mecs_land_fba_cleanup_for_land_2012_fbs(fba):
+def mecs_land_fba_cleanup_for_land_2012_fbs(fba, **_):
"""
The 'land_national_2012' FlowBySector uses MECS 2014 data, set
MECS year to 2012
@@ -460,7 +460,7 @@ def mecs_land_fba_cleanup_for_land_2012_fbs(fba):
:return: df, EIA MECS Land FBA modified
"""
- fba = mecs_land_fba_cleanup(fba)
+ fba = mecs_land_fba_cleanup(fba=fba)
# reset the EIA MECS Land year from 2014 to 2012 to match
# the USDA ERS MLU year
diff --git a/flowsa/data_source_scripts/EPA_CDDPath.py b/flowsa/data_source_scripts/EPA_CDDPath.py
index 33d2339bc..8ef581457 100644
--- a/flowsa/data_source_scripts/EPA_CDDPath.py
+++ b/flowsa/data_source_scripts/EPA_CDDPath.py
@@ -14,6 +14,7 @@
from flowsa.location import US_FIPS
from flowsa.settings import externaldatapath
from flowsa.flowbyfunctions import assign_fips_location_system
+from flowsa.dataclean import standardize_units
# Read pdf into list of DataFrame
@@ -95,17 +96,21 @@ def combine_cdd_path(*, resp, **_):
return df
-def assign_wood_to_engineering(df):
+def assign_wood_to_engineering(fba, **_):
"""clean_fba_df_fxn that reclassifies Wood from 'Other' to
'Other - Wood' so that its mapping can be adjusted to only use
237990/Heavy engineering NAICS according to method in Meyer et al. 2020
- :param df: df, FBA of CDDPath
+ :param fba: df, FBA of CDDPath
:return: df, CDDPath FBA with wood reassigned
"""
# Update wood to a new activity for improved mapping
- df.loc[((df.FlowName == 'Wood') &
- (df.ActivityProducedBy == 'Other')),
+ fba.loc[((fba.FlowName == 'Wood') &
+ (fba.ActivityProducedBy == 'Other')),
'ActivityProducedBy'] = 'Other - Wood'
- return df
+ # if no mapping performed, still update units
+ if 'short tons' in fba['Unit'].values:
+ fba = standardize_units(fba)
+
+ return fba
diff --git a/flowsa/data_source_scripts/EPA_GHGI.py b/flowsa/data_source_scripts/EPA_GHGI.py
index 18d4d0966..ab02079f4 100644
--- a/flowsa/data_source_scripts/EPA_GHGI.py
+++ b/flowsa/data_source_scripts/EPA_GHGI.py
@@ -15,8 +15,10 @@
from flowsa.dataclean import replace_NoneType_with_empty_cells
from flowsa.settings import log, externaldatapath
from flowsa.schema import flow_by_activity_fields
+from flowsa.common import load_yaml_dict
from flowsa.data_source_scripts import EIA_MECS
+
SECTOR_DICT = {'Res.': 'Residential',
'Comm.': 'Commercial',
'Ind.': 'Industrial',
@@ -421,9 +423,9 @@ def ghg_parse(*, df_list, year, config, **_):
source_No_activity = ["3-22", "3-22b"]
# Handle tables with 1 parent level category
source_activity_1 = ["3-7", "3-8", "3-9", "3-10", "3-14", "3-15",
- "5-18", "5-19", "A-76", "A-77", "A-103"]
+ "5-18", "5-19", "A-76", "A-77"]
# Tables with sub categories
- source_activity_2 = ["3-38", "3-63"]
+ source_activity_2 = ["3-38", "3-63", "A-103"]
if table_name in multi_chem_names:
bool_apb = False
@@ -520,7 +522,9 @@ def ghg_parse(*, df_list, year, config, **_):
flow_name_list = ["Explorationb", "Production", "Processing",
"Transmission and Storage", "Distribution",
"Crude Oil Transportation", "Refining",
- "Exploration"]
+ "Exploration", "Mobile AC",
+ "Refrigerated Transport",
+ "Comfort Cooling for Trains and Buses"]
for index, row in df.iterrows():
apb_value = row["ActivityProducedBy"]
start_activity = row["FlowName"]
@@ -546,7 +550,7 @@ def ghg_parse(*, df_list, year, config, **_):
df.loc[index, 'ActivityProducedBy'
] = f"{apb_txt} {apbe_value}"
if "Total" == apb_value or "Total " == apb_value:
- df = df.drop(index)
+ df = df.drop(index)
elif table_name == "A-79":
fuel_name = ""
@@ -593,7 +597,7 @@ def ghg_parse(*, df_list, year, config, **_):
text_split = apb_value.split("(")
df.loc[index, 'ActivityProducedBy'] = text_split[0]
- elif table_name in ["A-101", "A-103"]:
+ elif table_name in ["A-101"]:
for index, row in df.iterrows():
apb_value = strip_char(row["ActivityProducedBy"])
df.loc[index, 'ActivityProducedBy'] = apb_value
@@ -633,18 +637,35 @@ def get_manufacturing_energy_ratios(year):
'Natural Gas': 'Natural Gas',
}
- # TODO make this year dynamic
+ def closest_value(input_list, input_value):
+ difference = lambda input_list : abs(input_list - input_value)
+ return min(input_list, key=difference)
+
+ mecs_year = closest_value(load_yaml_dict('EIA_MECS_Energy',
+ flowbytype='FBA').get('years'),
+ year)
+
# Filter MECS for total national energy consumption for manufacturing sectors
mecs = load_fba_w_standardized_units(datasource='EIA_MECS_Energy',
- year=year,
+ year=mecs_year,
flowclass='Energy')
mecs = mecs.loc[(mecs['ActivityConsumedBy'] == '31-33') &
(mecs['Location'] == '00000')].reset_index(drop=True)
mecs = EIA_MECS.mecs_energy_fba_cleanup(mecs, None)
- # TODO dynamically change the table imported here based on year
- ghgi = load_fba_w_standardized_units(datasource='EPA_GHGI_T_A_14',
- year=2016,
+ # Identify the GHGI table that matches EIA_MECS
+ for t, v in (load_yaml_dict('EPA_GHGI', 'FBA')
+ .get('Annex').get('Annex').items()):
+ if ((v.get('class') == 'Energy')
+ & ('Energy Consumption Data' in v.get('desc'))
+ & (v.get('year') == str(mecs_year))):
+ table = f"EPA_GHGI_T_{t.replace('-', '_')}"
+ break
+ else:
+ log.error('unable to identify corresponding GHGI table')
+
+ ghgi = load_fba_w_standardized_units(datasource=table,
+ year=mecs_year,
flowclass='Energy')
ghgi = ghgi[ghgi['ActivityConsumedBy']=='Industrial'].reset_index(drop=True)
@@ -659,7 +680,7 @@ def get_manufacturing_energy_ratios(year):
return pct_dict
-def allocate_industrial_combustion(df):
+def allocate_industrial_combustion(fba, source_dict, **_):
"""
Split industrial combustion emissions into two buckets to be further allocated.
@@ -667,9 +688,7 @@ def allocate_industrial_combustion(df):
EIA MECS relative to EPA GHGI. Create new activities to distinguish those
which use EIA MECS as allocation source and those that use alternate source.
"""
- # TODO make this year dynamic
- year = 2014
- pct_dict = get_manufacturing_energy_ratios(year)
+ pct_dict = get_manufacturing_energy_ratios(source_dict.get('year'))
# activities reflect flows in A_14 and 3_8 and 3_9
activities_to_split = {'Industrial Other Coal Industrial': 'Coal',
@@ -678,29 +697,30 @@ def allocate_industrial_combustion(df):
'Natural gas industrial': 'Natural Gas'}
for activity, fuel in activities_to_split.items():
- df_subset = df.loc[df['ActivityProducedBy'] == activity].reset_index(drop=True)
+ df_subset = fba.loc[fba['ActivityProducedBy'] == activity].reset_index(drop=True)
if len(df_subset) == 0:
continue
df_subset['FlowAmount'] = df_subset['FlowAmount'] * pct_dict[fuel]
df_subset['ActivityProducedBy'] = f"{activity} - Manufacturing"
- df.loc[df['ActivityProducedBy'] == activity,
- 'FlowAmount'] = df['FlowAmount'] * (1-pct_dict[fuel])
- df = pd.concat([df, df_subset], ignore_index=True)
+ fba.loc[fba['ActivityProducedBy'] == activity,
+ 'FlowAmount'] = fba['FlowAmount'] * (1-pct_dict[fuel])
+ fba = pd.concat([fba, df_subset], ignore_index=True)
- return df
+ return fba
-def split_HFCs_by_type(df):
- """Speciates HFCs and PFCs for all activities based on T_4_99."""
+def split_HFCs_by_type(fba, **_):
+ """Speciates HFCs and PFCs for all activities based on T_4_99.
+ clean_fba_before_mapping_df_fxn"""
splits = load_fba_w_standardized_units(datasource='EPA_GHGI_T_4_99',
- year=df['Year'][0])
+ year=fba['Year'][0])
splits['pct'] = splits['FlowAmount'] / splits['FlowAmount'].sum()
splits = splits[['FlowName', 'pct']]
- speciated_df = df.apply(lambda x: [p * x['FlowAmount'] for p in splits['pct']],
+ speciated_df = fba.apply(lambda x: [p * x['FlowAmount'] for p in splits['pct']],
axis=1, result_type='expand')
speciated_df.columns = splits['FlowName']
- speciated_df = pd.concat([df, speciated_df], axis=1)
+ speciated_df = pd.concat([fba, speciated_df], axis=1)
speciated_df = speciated_df.melt(id_vars=flow_by_activity_fields.keys(),
var_name='Flow')
speciated_df['FlowName'] = speciated_df['Flow']
@@ -780,20 +800,20 @@ def split_HFC_foams(df):
return df
-def clean_HFC_fba(df):
+def clean_HFC_fba(fba, **_):
"""Adjust HFC emissions for improved parsing.
clean_fba_before_mapping_df_fxn used in EPA_GHGI_T_4_101."""
- df = subtract_HFC_transport_emissions(df)
+ df = subtract_HFC_transport_emissions(fba)
df = allocate_HFC_to_residential(df)
df = split_HFC_foams(df)
df = split_HFCs_by_type(df)
return df
-def remove_HFC_kt(df):
+def remove_HFC_kt(fba, **_):
"""Remove records of emissions in kt, data are also provided in MMT CO2e.
clean_fba_before_mapping_df_fxn used in EPA_GHGI_T_4_50."""
- return df.loc[df['Unit'] != 'kt']
+ return fba.loc[fba['Unit'] != 'kt']
def adjust_transport_activities(df, **_):
diff --git a/flowsa/data_source_scripts/EPA_NEI.py b/flowsa/data_source_scripts/EPA_NEI.py
index dbddf0f23..3fe4e99d4 100644
--- a/flowsa/data_source_scripts/EPA_NEI.py
+++ b/flowsa/data_source_scripts/EPA_NEI.py
@@ -155,7 +155,7 @@ def epa_nei_nonpoint_parse(*, df_list, source, year, config, **_):
return df
-def clean_NEI_fba(fba):
+def clean_NEI_fba(fba, **_):
"""
Clean up the NEI FBA for use in FBS creation
:param fba: df, FBA format
@@ -181,14 +181,14 @@ def clean_NEI_fba(fba):
return fba
-def clean_NEI_fba_no_pesticides(fba):
+def clean_NEI_fba_no_pesticides(fba, **_):
"""
Clean up the NEI FBA with no pesicides for use in FBS creation
:param fba: df, FBA format
:return: df, modified FBA
"""
fba = drop_pesticides(fba)
- fba = clean_NEI_fba(fba)
+ fba = clean_NEI_fba(fba=fba)
return fba
@@ -217,10 +217,10 @@ def drop_GHGs(df, *_):
:return: df
"""""
flowlist = [
- 'Carbon Dioxide',
+ 'Carbon Dioxide', 'Carbon dioxide',
'Methane',
- 'Nitrous Oxide',
- 'Sulfur Hexafluoride',
+ 'Nitrous Oxide', 'Nitrous oxide',
+ 'Sulfur Hexafluoride', 'Sulfur hexafluoride',
]
flow_var = 'Flowable' if 'Flowable' in df.columns else 'FlowName'
return df.query(f'{flow_var} not in @flowlist')
diff --git a/flowsa/data_source_scripts/EPA_SIT.py b/flowsa/data_source_scripts/EPA_SIT.py
new file mode 100644
index 000000000..969b60bbd
--- /dev/null
+++ b/flowsa/data_source_scripts/EPA_SIT.py
@@ -0,0 +1,121 @@
+# EPA_SIT.py (flowsa)
+# !/usr/bin/env python3
+# coding=utf-8
+"""
+Loads EPA State Inventory Tool (SIT) data for state specified from external
+data directory. Parses EPA SIT data to flowbyactivity format.
+"""
+
+import pandas as pd
+import os
+from flowsa.settings import externaldatapath, log
+from flowsa.flowbyfunctions import assign_fips_location_system
+from flowsa.location import apply_county_FIPS
+
+def epa_sit_parse(*, source, year, config, **_):
+
+ state = config['state']
+ filepath = f"{externaldatapath}/SIT_data/{state}/{config['file']}"
+ # dictionary containing Excel sheet-specific information
+ sheet_dict = config['sheet_dict']
+ # initialize the dataframe
+ df0 = pd.DataFrame()
+
+ if not os.path.exists(filepath):
+ raise FileNotFoundError(f'SIT file not found in {filepath}')
+
+ # for each sheet in the Excel file containing data...
+ for sheet, sheet_dict in config.get('sheet_dict').items():
+ sheetname = sheet_dict.get('sheetname', sheet)
+ tablename = sheet_dict.get('tablename')
+ if tablename:
+ sheetandtable = f'{sheetname}, {tablename}'
+ else:
+ sheetandtable = sheetname
+ tablename = sheet_dict.get('tablename', sheetname)
+ log.debug(f'Loading data from: {sheetname}...')
+ # read in data from Excel sheet
+ df = pd.read_excel(filepath,
+ sheet_name = sheetname,
+ header=sheet_dict.get('header', 2),
+ skiprows=range(sheet_dict.get('skiprowstart', 0),
+ sheet_dict.get('skiprowend', 0)),
+ usecols="B:AG",
+ nrows=sheet_dict.get('nrows'))
+ df.columns = df.columns.map(str)
+ df['ActivityProducedBy'] = df.iloc[:,0]
+
+ # for each row in the data table...
+ # ...emissions categories will be renamed with the format
+ # 'sheet name, emissions category'
+ # ...emissions subcategories will be renamed with the format
+ # 'sheet name, emissions category, emissions subcategory'
+ for ind in df.index:
+ current_header = df['ActivityProducedBy'][ind].strip()
+ # for level 1 headers...
+ if current_header in sheet_dict.get('headers'):
+ active_header = current_header
+ if sheet_dict.get('subgroup') == 'activitybyflow':
+ df.loc[ind, 'FlowName'] = active_header
+ elif sheet_dict.get('subgroup') == 'flow':
+ df.loc[ind, 'FlowName'] = 'Total N2O and CH4 Emissions'
+ df.loc[ind,'ActivityProducedBy'] = (
+ f'{sheetandtable}, {active_header}')
+ # for level 2 headers...
+ elif current_header not in sheet_dict.get('subsubheaders',''):
+ active_subheader = df['ActivityProducedBy'][ind].strip()
+ if sheet_dict.get('subgroup') == 'flow':
+ df.loc[ind, 'FlowName'] = active_subheader
+ df.loc[ind,'ActivityProducedBy'] = (
+ f'{sheetandtable}, {active_header}')
+ elif sheet_dict.get('subgroup') == 'activitybyflow':
+ df.loc[ind, 'FlowName'] = active_header
+ df.loc[ind,'ActivityProducedBy'] = (
+ f'{sheetandtable}, {active_subheader}')
+ else:
+ df.loc[ind,'ActivityProducedBy'] = (
+ f'{sheetandtable}, {active_header}, '
+ f'{active_subheader}')
+ # for level 3 headers (only occur in IndirectCO2 and Agriculture tabs)...
+ else:
+ subsubheader = df['ActivityProducedBy'][ind].strip()
+ df.loc[ind,'ActivityProducedBy'] = (
+ f'{sheetandtable}, {active_header}, '
+ f'{active_subheader}, {subsubheader}')
+
+ # drop all columns except the desired emissions year and the
+ # emissions activity source
+ df = df.filter([year, 'ActivityProducedBy', 'FlowName'])
+ # rename columns
+ df = df.rename(columns={year: 'FlowAmount'})
+ # add sheet-specific hardcoded data
+ if 'subgroup' not in sheet_dict:
+ df['FlowName'] = sheet_dict.get('flow')
+ df['Unit'] = sheet_dict.get('unit')
+ df['Description'] = sheetname
+
+ # concatenate dataframe from each sheet with existing master dataframe
+ df0 = pd.concat([df0, df])
+
+ # add general hardcoded data
+ df0['Class'] = 'Chemicals'
+ df0['SourceName'] = source
+ df0['FlowType'] = "ELEMENTARY_FLOW"
+ df0['Compartment'] = 'air'
+ df0['Year'] = year
+ df0['DataReliability'] = 5
+ df0['DataCollection'] = 5
+
+ # add state FIPS code
+ df0['State'] = state
+ df0['County'] = ''
+ df0 = apply_county_FIPS(df0, year='2015', source_state_abbrev=True)
+ # add FIPS location system
+ df0 = assign_fips_location_system(df0, '2015')
+
+ return df0
+
+if __name__ == '__main__':
+ import flowsa
+ flowsa.flowbyactivity.main(source='EPA_SIT', year='2017')
+ fba = flowsa.getFlowByActivity('EPA_SIT', '2017')
diff --git a/flowsa/data_source_scripts/EPA_StateGHGI.py b/flowsa/data_source_scripts/EPA_StateGHGI.py
new file mode 100644
index 000000000..8f59134f8
--- /dev/null
+++ b/flowsa/data_source_scripts/EPA_StateGHGI.py
@@ -0,0 +1,99 @@
+# EPA_StateGHGI.py (flowsa)
+# !/usr/bin/env python3
+# coding=utf-8
+"""
+Inventory of US GHGs from EPA disaggregated to States
+"""
+import json
+import pandas as pd
+from flowsa.settings import externaldatapath
+from flowsa.location import apply_county_FIPS
+from flowsa.flowbyfunctions import assign_fips_location_system
+import flowsa.exceptions
+
+
+def epa_state_ghgi_parse(*, source, year, config, **_):
+
+ try:
+ with open(externaldatapath + config.get('file')) as f:
+ data = json.load(f)
+ except FileNotFoundError:
+ raise FileNotFoundError('State GHGI data not yet available for '
+ 'external users')
+
+ data_df = pd.DataFrame(data)
+ activity_cols = ['SECTOR', 'SOURCE', 'SUBSOURCE', 'FUEL_TYPE',
+ 'SUB_REFERENCE', 'SECSUB_REFERENCE']
+
+ states = data_df[['STATE']].drop_duplicates()
+ flows = data_df[['GHG_NAME']].drop_duplicates()
+
+ df = data_df.melt(id_vars = activity_cols + ['STATE'] + ['GHG_NAME'],
+ value_vars=f'EMISSION_{year}',
+ var_name = 'Year',
+ value_name = 'FlowAmount')
+ df['Year'] = year
+ df['Unit'] = 'MMT CO2e' # TODO confirm units
+ df['FlowType'] = 'ELEMENTARY_FLOW'
+ df['SourceName'] = source
+ df['Class'] = 'Chemicals'
+ df['Compartment'] = 'air'
+
+ df.rename(columns={'STATE': 'State',
+ 'GHG_NAME': 'FlowName'},
+ inplace=True)
+
+ df['ActivityProducedBy'] = (df[activity_cols]
+ .apply(lambda row: ' - '.join(
+ row.values.astype(str)), axis=1))
+ df['ActivityProducedBy'] = (df['ActivityProducedBy']
+ .str.replace(' - None', ''))
+ df.drop(columns=activity_cols, inplace=True)
+ activities = df[['ActivityProducedBy']].drop_duplicates()
+
+ df['County'] = ''
+ df = apply_county_FIPS(df)
+ df = assign_fips_location_system(df, '2015')
+ df.drop(columns=['County'], inplace=True)
+
+ return df
+
+
+def remove_select_states(fba, source_dict, **_):
+ """
+ clean_fba_df_fxn to remove selected states so they can be added
+ from alternate sources. State abbreviations must be passed as list
+ in method parameter 'state_list'
+
+ :param fba: df
+ :param source_dict: dictionary of source methods includes 'state_list'
+ key of states to remove
+ """
+ state_list = source_dict.get('state_list')
+ state_df = pd.DataFrame(state_list, columns=['State'])
+ state_df['County'] =''
+ state_df = apply_county_FIPS(state_df)
+ df_subset = fba[~fba['Location'].isin(state_df['Location'])]
+ return df_subset
+
+
+def tag_biogenic_activities(fba, source_dict, **_):
+ """
+ clean_fba_before_mapping_df_fxn to tag emissions from passed activities
+ as biogenic. Activities passed as list in paramter 'activity_list'.
+ """
+ a_list = source_dict.get('activity_list')
+ if a_list is None:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ message="Activities to tag must be passed in FBS parameter "
+ "'activity_list'")
+ fba.loc[fba['ActivityProducedBy'].isin(a_list),
+ 'FlowName'] = fba['FlowName'] + ' - biogenic'
+
+ return fba
+
+
+if __name__ == '__main__':
+ import flowsa
+ flowsa.flowbyactivity.main(source='EPA_StateGHGI', year='2017')
+ fba = flowsa.getFlowByActivity('EPA_StateGHGI', '2017')
diff --git a/flowsa/data_source_scripts/README.md b/flowsa/data_source_scripts/README.md
index 64e6bc452..4837492e2 100644
--- a/flowsa/data_source_scripts/README.md
+++ b/flowsa/data_source_scripts/README.md
@@ -1,85 +1,27 @@
-# Datapull Descriptions
-Descriptions of the type of data pulled from each data source and the information in the
-FlowByActivity parquet files.
-
-## BLS_QCEW
-US Bureau of Labor Statistics, Quarterly Census of Employment and Wages
-
-## Census_CBP
-US Census Bureau, County Business Patterns
-
-## Census_PEP_Population
-US Census Bureau, Population Estimates Program, Population
-
-## EIA_CBECS_Land
-US Energy Information Administration, Commercial Buildings Energy Consumption Survey, Land
-
-## EIA_CBECS_Water
-US Energy Information Administration, Commercial Buildings Energy Consumption Survey, Water
-
-## [EIA_MECS_Energy](https://www.eia.gov/consumption/manufacturing/)
-US Energy Information Administration, Manufacturing Energy Consumption Survey
-- Energy (Tables 2.1, 2.2, 3.1, 3.2)
-- National and regional (4 Census regions)
-- Flows in energy units (MJ) and physical units (varies), represents duplicate data
-- Fuel consumption Class: Energy
-- Nonfuel consumption (feedstock) Class: Other
-
-## [EPA_NEI](https://www.epa.gov/air-emissions-inventories/national-emissions-inventory-nei)
-Environmental Protection Agency National Emissions Inventory
-- Nonpoint, Nonroad, Onroad emissions
-- County level
-
-## NOAA_FisheryLandings
-National Oceanic and Atmospheric Administration, Fishery Landings
-
-## [StatCan_IWS_MI]('https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=3810003701')
-Statistics Canada, Industrial Water Survey, Manufacturing Industries
-- Table: 38-10-0037-01 (formerly CANSIM 153-0047)
-
-## [USDA_CoA_Cropland]('https://www.nass.usda.gov/AgCensus/')
-US Department of Agriculture, Census of Agriculture, Cropland
-- National, state, county levels
-- Total cropland and pastureland
-- Harvested cropland and pastureland
-- Harvested, irrigated cropland and pastureland
-
-## [USDA_CoA_Livestock]('https://www.nass.usda.gov/AgCensus/')
-US Department of Agriculture, Census of Agriculture, Livestock
-- Tables 12, 15-17, 19, 27, 27-30, 32 (2017 report @ national level)
-- National, state, county levels
-- Livestock inventory for animal types
-
-## [USDA_ERS_FIWS]('https://www.ers.usda.gov/data-products/farm-income-and-wealth-statistics/data-files-us-and-state-level-farm-income-and-wealth-statistics/')
-US Department of Agriculture, Economic Research Service, Farm Income and Wealth Statistics
-- National, state level
-- Cash Receipts by commodity for US crops and animals
-
-## [USDA_ERS_MLU]('https://www.ers.usda.gov/data-products/major-land-uses/')
-US Department of Agriculture, Economic Research Service, Major Land Use
-- National level
-- Major uses of public/private land for 15 land use categories in Thousand Acres
-
-## [USDA_IWMS]('https://www.nass.usda.gov/Publications/AgCensus/2017/Online_Resources/Farm_and_Ranch_Irrigation_Survey/index.php')
-US Department of Agriculture, Irrigation and Water Management Survey
-- Table 36: Field Water Distribution for Selected Crops Harvested in the Open and Irrigated
- Pastureland: 2018 and 2013 (2018 report)
-- National, State level
-- Water Application rates in Average acre-feet applied per acre by crop type
-
-## [USGS_NWIS_WU]('https://waterdata.usgs.gov/nwis')
-US Geological Survey, National Water Information System, Water Use
-- National, State, County level water withdrawals for the US, originally in million gallons per day
-- Water withdrawals for ground/surface/total water, fresh/saline/total water
-- Withdrawals for Aquaculture, Public Supply, Domestic Deliveries, Livestock, Irrigation (Crop and Golf),
- Thermoelectric Power, Industrial, Mining
-
-## [USGS_WU_Coef]('https://pubs.er.usgs.gov/publication/sir20095041')
-US Geological Survey, Water Use Coefficients
-- Source: Lovelace, John K., 2009, Method for estimating water withdrawals for livestock in the
- United States, 2005: U.S. Geological Survey Scientific Investigations Report 2009–5041, 7 p.
-- Table 1
-- Livestock water use originally provided in gallons/animal/day for 9 animal types based on 2005 USGS NWIS WU
-- Ground and surface water associated with livestock watering, feedlots, dairy operations, and other on-farm needs.
- Water for drinking, cooling, sanitation, waste disposal, and other needs.
-
+# Data Source Scripts
+The Python files in the `data_source_scripts` folder include functions
+specific to each Flow-By-Activity (FBA) dataset. These functions are used to
+help load, call, and parse the FBAs. These files can also contain functions
+used in Flow-By-Sector generation.
+
+The functions in these files are called on in FBA and FBS method yamls
+using the tag `!script_function:PythonFileName FunctionName`
+where _PythonFileName_ is the name of the Python file (e.g.,
+"BLS_QCEW.py") and _FunctionName_ is the name of the function
+(e.g., "bls_clean_allocation_fba_w_sec").
+
+```
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: national
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: national
+ source_fba_load_scale: national
+ year: 2017
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_sets:
+```
diff --git a/flowsa/data_source_scripts/USDA_ACUP.py b/flowsa/data_source_scripts/USDA_ACUP.py
index 24b591a55..3f0bf9629 100644
--- a/flowsa/data_source_scripts/USDA_ACUP.py
+++ b/flowsa/data_source_scripts/USDA_ACUP.py
@@ -33,13 +33,9 @@ def acup_url_helper(*, build_url, config, **_):
state_abbrevs = abbrev_us_state
state_abbrevs = {k: v for (k, v) in state_abbrevs.items() if k != "DC"}
- for x in config['domain_levels']:
- for y in state_abbrevs:
- url = build_url
- url = url.replace("__domainLevel__", x)
- url = url.replace("__stateAlpha__", y)
- url = url.replace(" ", "%20")
- urls.append(url)
+ url = build_url
+ url = url.replace(" ", "%20")
+ urls.append(url)
return urls
@@ -52,12 +48,7 @@ def acup_call(*, resp, **_):
:return: pandas dataframe of original source data
"""
response_json = json.loads(resp.text)
- # not all states have data, so return empty df if does not exist
- try:
- df = pd.DataFrame(data=response_json["data"])
- except KeyError:
- log.info('No data exists for state')
- df = []
+ df = pd.DataFrame(data=response_json["data"])
return df
diff --git a/flowsa/data_source_scripts/USDA_CoA_Cropland.py b/flowsa/data_source_scripts/USDA_CoA_Cropland.py
index 45f41e18a..535320025 100644
--- a/flowsa/data_source_scripts/USDA_CoA_Cropland.py
+++ b/flowsa/data_source_scripts/USDA_CoA_Cropland.py
@@ -9,17 +9,16 @@
import json
import numpy as np
import pandas as pd
-from flowsa.location import US_FIPS, abbrev_us_state
-from flowsa.common import WITHDRAWN_KEYWORD, \
- fbs_default_grouping_fields, fbs_fill_na_dict, \
- fba_wsec_default_grouping_fields
-from flowsa.schema import flow_by_sector_fields
-from flowsa.flowbyfunctions import assign_fips_location_system, sector_aggregation, \
- sector_disaggregation, sector_ratios, \
- load_fba_w_standardized_units, equally_allocate_suppressed_parent_to_child_naics
-from flowsa.allocation import allocate_by_sector, equally_allocate_parent_to_child_naics
+from flowsa.allocation import allocate_by_sector, \
+ equally_allocate_parent_to_child_naics, equal_allocation
+from flowsa.common import WITHDRAWN_KEYWORD, fba_wsec_default_grouping_fields
from flowsa.dataclean import replace_NoneType_with_empty_cells, \
- replace_strings_with_NoneType, clean_df
+ replace_strings_with_NoneType
+from flowsa.flowbyfunctions import assign_fips_location_system, \
+ sector_aggregation, sector_disaggregation, sector_ratios, \
+ load_fba_w_standardized_units, \
+ equally_allocate_suppressed_parent_to_child_naics
+from flowsa.location import US_FIPS, abbrev_us_state
from flowsa.sectormapping import add_sectors_to_flowbyactivity
from flowsa.validation import compare_df_units
@@ -188,6 +187,13 @@ def coa_cropland_parse(*, df_list, year, **_):
", ALL PRODUCTION PRACTICES", "", regex=True)
df.loc[:, 'FlowName'] = df['FlowName'].str.replace(
", IN THE OPEN", "", regex=True)
+ # want to included "harvested" in the flowname when "harvested" is
+ # included in the class_desc
+ df['FlowName'] = np.where(df['class_desc'].str.contains(', HARVESTED'),
+ df['FlowName'] + " HARVESTED", df['FlowName'])
+ # reorder
+ df['FlowName'] = np.where(df['FlowName'] == 'AREA, IRRIGATED HARVESTED',
+ 'AREA HARVESTED, IRRIGATED', df['FlowName'])
# combine column information to create activity
# information, and create two new columns for activities
df['Activity'] = df['commodity_desc'] + ', ' + df['class_desc'] + ', ' + \
@@ -241,42 +247,27 @@ def coa_cropland_parse(*, df_list, year, **_):
return df
-def coa_irrigated_cropland_fba_cleanup(fba, **kwargs):
- """
- When using irrigated cropland, aggregate sectors to cropland and total
- ag land. Doing this because published values for irrigated harvested
- cropland do not include the water use for vegetables, woody crops, berries.
- :param fba: df, COA FBA format
- :return: df, COA with dropped rows based on ActivityConsumedBy column
- """
-
- fba =\
- fba[~fba['ActivityConsumedBy'].isin(['AG LAND',
- 'AG LAND, CROPLAND, HARVESTED']
- )].reset_index(drop=True)
-
- return fba
-
-
def coa_nonirrigated_cropland_fba_cleanup(fba, **kwargs):
"""
- When using irrigated cropland, aggregate sectors to cropland and total
- ag land. Doing this because published values for irrigated harvested
- cropland do not include the water use for vegetables, woody crops, berries.
+ Cleanup coa cropland data for nonirrigated crops
:param fba: df, COA when using non-irrigated data
:return: df, COA nonirrigated data, modified
"""
- # drop rows of data that contain certain strings
- fba = fba[~fba['ActivityConsumedBy'].isin(
- ['AG LAND', 'AG LAND, CROPLAND, HARVESTED'])]
-
# when include 'area harvested' and 'area in production' in
# single dataframe, which is necessary to include woody crops,
# 'vegetable totals' are double counted
fba = fba[~((fba['FlowName'] == 'AREA IN PRODUCTION') &
(fba['ActivityConsumedBy'] == 'VEGETABLE TOTALS'))]
+ # When using a mix of flow names, drop activities for ag land (naics 11)
+ # and ag land, cropland, harvested (naics 111),because published values
+ # for harvested cropland do not include data for vegetables, woody crops,
+ # berries. Values for sectors 11 and 111 will be aggregated from the
+ # dataframe later
+ fba = fba[~fba['ActivityConsumedBy'].isin(
+ ['AG LAND', 'AG LAND, CROPLAND, HARVESTED'])].reset_index(drop=True)
+
return fba
@@ -294,7 +285,6 @@ def disaggregate_coa_cropland_to_6_digit_naics(
# define the activity and sector columns to base modifications on
# these definitions will vary dependent on class type
- activity_col = 'ActivityConsumedBy'
sector_col = 'SectorConsumedBy'
# drop rows without assigned sectors
@@ -302,8 +292,7 @@ def disaggregate_coa_cropland_to_6_digit_naics(
~fba_w_sector[sector_col].isna()].reset_index(drop=True)
# modify the flowamounts related to the 6 naics 'orchards' are mapped to
- fba_w_sector = modify_orchard_flowamounts(
- fba_w_sector, activity_column=activity_col)
+ fba_w_sector = equal_allocation(fba_w_sector)
# use ratios of usda 'land in farms' to determine animal use of
# pasturelands at 6 digit naics
@@ -312,38 +301,55 @@ def disaggregate_coa_cropland_to_6_digit_naics(
sector_column=sector_col,
download_FBA_if_missing=kwargs['download_FBA_if_missing'])
- # use ratios of usda 'harvested cropland' to determine missing 6 digit naics
- fba_w_sector = disaggregate_cropland(fba_w_sector, attr,
- method, year=attr['allocation_source_year'],
- sector_column=sector_col,
- download_FBA_if_missing=kwargs['download_FBA_if_missing'])
+ # use ratios of usda 'harvested cropland' to determine missing 6 digit
+ # naics
+ fba_w_sector = disaggregate_cropland(
+ fba_w_sector, attr, method, year=attr['allocation_source_year'],
+ sector_column=sector_col, download_FBA_if_missing=kwargs[
+ 'download_FBA_if_missing'])
return fba_w_sector
def disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal(
- fba_w_sector, attr, method, **kwargs):
+ fba_w_sector_load, attr, method, **kwargs):
"""
Disaggregate usda coa cropland to naics 6
- :param fba_w_sector: df, CoA cropland data, FBA format with sector columns
+ :param fba_w_sector_load: df, CoA cropland data, FBA format with sector
+ columns
:param attr: dictionary, attribute data from method yaml for activity set
:param method: dictionary, FBS method yaml
- :param kwargs: dictionary, arguments that might be required for other functions.
- Currently includes data source name.
+ :param kwargs: dictionary, arguments that might be required for other
+ functions. Currently includes data source name.
:return: df, CoA cropland with disaggregated NAICS sectors
"""
# define the activity and sector columns to base modifications on
# these definitions will vary dependent on class type
- activity_col = 'ActivityConsumedBy'
sector_col = 'SectorConsumedBy'
# drop rows without assigned sectors
- fba_w_sector = fba_w_sector[~fba_w_sector[sector_col].isna()].reset_index(drop=True)
+ fba_w_sector = fba_w_sector_load[~fba_w_sector_load[sector_col].isna()]\
+ .reset_index(drop=True)
# modify the flowamounts related to the 6 naics 'orchards' are mapped to
- fba_w_sector = modify_orchard_flowamounts(
- fba_w_sector, activity_column=activity_col)
+ fba_w_sector = equal_allocation(fba_w_sector)
+
+ # todo: add back in once suppression fxn modified to accept non-naics
+ # like activities and mixed level final naics (naics6 and naics7)
+ # then estimate any suppressed data by equally allocating parent to
+ # child naics
+ # groupcols = list(fba_w_sector3.select_dtypes(
+ # include=['object', 'int']).columns)
+ # fba_w_sector = equally_allocate_suppressed_parent_to_child_naics(
+ # fba_w_sector, method, 'SectorConsumedBy', groupcols)
+
+ # When using irrigated cropland, aggregate sectors to cropland and total
+ # ag land. Doing this because published values for irrigated harvested
+ # cropland do not include the water use for vegetables, woody crops,
+ # berries.
+ fba_w_sector = fba_w_sector[~fba_w_sector['ActivityConsumedBy'].isin(
+ ['AG LAND', 'AG LAND, CROPLAND, HARVESTED'])].reset_index(drop=True)
# use ratios of usda 'land in farms' to determine animal use of
# pasturelands at 6 digit naics
@@ -362,24 +368,6 @@ def disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal(
return fba_w_sector
-def modify_orchard_flowamounts(fba, activity_column):
- """
- In the CoA cropland crosswalk, the activity 'orchards' is mapped
- to eight 6-digit naics. Therefore, after mapping,
- divide the orchard flow amount by 8.
- :param fba: A FlowByActiivty df mapped to sectors
- :param activity_column: The activity column to base FlowAmount
- modifications on (ActivityProducedBy or ActivityConsumedBy)
- :return: df, CoA cropland data with modified FlowAmounts
- """
-
- # divide the Orchards data allocated to NAICS by 6 to avoid double counting
- fba.loc[fba[activity_column] == 'ORCHARDS',
- 'FlowAmount'] = fba['FlowAmount'] / 8
-
- return fba
-
-
def disaggregate_pastureland(fba_w_sector, attr, method, year,
sector_column, download_FBA_if_missing, **kwargs):
"""
@@ -422,14 +410,18 @@ def disaggregate_pastureland(fba_w_sector, attr, method, year,
df_f = df_f[~df_f['ActivityConsumedBy'].str.contains('&')]
if 'parameter_drop' in kwargs:
# drop aquaculture because pastureland not used for aquaculture
- df_f = df_f[~df_f['ActivityConsumedBy'].isin(
- kwargs['parameter_drop'])]
+ # drop any activities at a more aggregated sector level because
+ # will need to be reaggregated after dropping a parameter to
+ # accurately calculate the allocation ratios
+ drop_list = [sub[ : -1] for sub in kwargs['parameter_drop']]
+ drop_list = drop_list + kwargs['parameter_drop']
+ df_f = df_f[~df_f['ActivityConsumedBy'].isin(drop_list)]
# create sector columns
df_f = add_sectors_to_flowbyactivity(
df_f, sectorsourcename=method['target_sector_source'])
# estimate suppressed data by equal allocation
df_f = equally_allocate_suppressed_parent_to_child_naics(
- df_f, 'SectorConsumedBy', fba_wsec_default_grouping_fields)
+ df_f, method, 'SectorConsumedBy', fba_wsec_default_grouping_fields)
# create proportional ratios
group_cols = [e for e in fba_wsec_default_grouping_fields if
e not in ('ActivityProducedBy', 'ActivityConsumedBy')]
@@ -458,8 +450,7 @@ def disaggregate_pastureland(fba_w_sector, attr, method, year,
# original fba_w_sector
fba_w_sector = fba_w_sector[fba_w_sector[sector_column].apply(
lambda x: x[0:3]) != '112'].reset_index(drop=True)
- fba_w_sector = pd.concat([fba_w_sector, df],
- sort=True).reset_index(drop=True)
+ fba_w_sector = pd.concat([fba_w_sector, df]).reset_index(drop=True)
# fill empty cells with NoneType
fba_w_sector = replace_strings_with_NoneType(fba_w_sector)
@@ -512,17 +503,12 @@ def disaggregate_cropland(fba_w_sector, attr, method, year,
naics, sectorsourcename=method['target_sector_source'])
# estimate suppressed data by equally allocating parent to child naics
naics = equally_allocate_suppressed_parent_to_child_naics(
- naics, 'SectorConsumedBy', fba_wsec_default_grouping_fields)
- # add missing fbs fields
- naics = clean_df(naics, flow_by_sector_fields, fbs_fill_na_dict)
+ naics, method, 'SectorConsumedBy', fba_wsec_default_grouping_fields)
# aggregate sectors to create any missing naics levels
- group_cols = fbs_default_grouping_fields
naics2 = sector_aggregation(naics)
# add missing naics5/6 when only one naics5/6 associated with a naics4
naics3 = sector_disaggregation(naics2)
- # drop rows where FlowAmount 0
- naics3 = naics3.loc[naics3['FlowAmount'] != 0]
# create ratios
naics4 = sector_ratios(naics3, sector_column)
# create temporary sector column to match the two dfs on
@@ -577,7 +563,7 @@ def disaggregate_cropland(fba_w_sector, attr, method, year,
# tmp drop Nonetypes
df_subset = replace_NoneType_with_empty_cells(df_subset)
# add new rows of data to crop df
- crop = pd.concat([crop, df_subset], sort=True).reset_index(drop=True)
+ crop = pd.concat([crop, df_subset]).reset_index(drop=True)
# clean up df
crop = crop.drop(columns=['Location_tmp'])
@@ -590,7 +576,7 @@ def disaggregate_cropland(fba_w_sector, attr, method, year,
fba_w_sector.loc[fba_w_sector[sector_column].apply(
lambda x: x[0:3]) == '112'].reset_index(drop=True)
# concat crop and pasture
- fba_w_sector = pd.concat([pasture, crop], sort=True).reset_index(drop=True)
+ fba_w_sector = pd.concat([pasture, crop]).reset_index(drop=True)
# fill empty cells with NoneType
fba_w_sector = replace_strings_with_NoneType(fba_w_sector)
diff --git a/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py b/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py
index 01a55c693..004147140 100644
--- a/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py
+++ b/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py
@@ -164,6 +164,8 @@ def coa_cropland_naics_fba_wsec_cleanup(fba_w_sector, **kwargs):
:return: df, flowbyactivity with modified values
"""
+ method = kwargs.get('method')
df = equally_allocate_suppressed_parent_to_child_naics(
- fba_w_sector, 'SectorConsumedBy', fba_wsec_default_grouping_fields)
+ fba_w_sector, method, 'SectorConsumedBy',
+ fba_wsec_default_grouping_fields)
return df
diff --git a/flowsa/data_source_scripts/USDA_ERS_MLU.py b/flowsa/data_source_scripts/USDA_ERS_MLU.py
index aed702aac..9f5b1a868 100644
--- a/flowsa/data_source_scripts/USDA_ERS_MLU.py
+++ b/flowsa/data_source_scripts/USDA_ERS_MLU.py
@@ -13,7 +13,7 @@
import numpy as np
from flowsa.location import get_all_state_FIPS_2, US_FIPS
from flowsa.settings import vLogDetailed
-from flowsa.flowbyfunctions import assign_fips_location_system
+from flowsa.flowbyfunctions import assign_fips_location_system, aggregator
from flowsa.common import load_crosswalk
from flowsa.literature_values import \
get_area_of_rural_land_occupied_by_houses_2013, \
@@ -195,7 +195,13 @@ def allocate_usda_ers_mlu_land_in_urban_areas(df, attr, fbs_list):
[df_residential, df_openspace, df_airport, df_railroad, df_highway2],
ignore_index=True, sort=False).reset_index(drop=True)
- return allocated_urban_areas_df
+ # aggregate because multiple rows to household data due to residential
+ # land area and highway fee shares
+ groupcols = list(df.select_dtypes(include=['object', 'int']).columns)
+ allocated_urban_areas_df_2 = aggregator(allocated_urban_areas_df,
+ groupcols)
+
+ return allocated_urban_areas_df_2
def allocate_usda_ers_mlu_land_in_rural_transportation_areas(
diff --git a/flowsa/data_source_scripts/USGS_MYB.py b/flowsa/data_source_scripts/USGS_MYB.py
index 05a063814..66fa6880b 100644
--- a/flowsa/data_source_scripts/USGS_MYB.py
+++ b/flowsa/data_source_scripts/USGS_MYB.py
@@ -1754,6 +1754,11 @@ def usgs_iodine_call(*, resp, year, **_):
df_data.columns = ["Production", "space_1", "year_1", "space_2",
"year_2", "space_3", "year_3", "space_4",
"year_4", "space_5", "year_5"]
+ elif len(df_data. columns) == 13:
+ df_data.columns = ["Production", "unit", "space_1", "year_1", "space_2",
+ "year_2", "space_3", "year_3", "space_4",
+ "year_4", "space_5", "year_5", "space_6"]
+
col_to_use = ["Production"]
col_to_use.append(usgs_myb_year(YEARS_COVERED['iodine'], year))
@@ -1819,7 +1824,7 @@ def usgs_iron_ore_call(*, resp, year, **_):
:return: pandas dataframe of original source data
"""
df_raw_data = pd.io.excel.read_excel(io.BytesIO(resp.content),
- sheet_name='T1 ')
+ sheet_name='T1')
df_data = pd.DataFrame(df_raw_data.loc[7:25]).reindex()
df_data = df_data.reset_index()
del df_data["index"]
@@ -1972,15 +1977,14 @@ def usgs_lead_url_helper(*, year, **_):
format
"""
if int(year) < 2013:
- build_url = ('https://prd-wret.s3.us-west-2.amazonaws.com/assets/'
+ build_url = ('https://d9-wret.s3.us-west-2.amazonaws.com/assets/'
'palladium/production/atoms/files/myb1-2016-lead.xls')
elif int(year) < 2014:
- build_url = ('https://prd-wret.s3.us-west-2.amazonaws.com/assets/'
+ build_url = ('https://d9-wret.s3.us-west-2.amazonaws.com/assets/'
'palladium/production/atoms/files/myb1-2017-lead.xls')
else:
- build_url = ('https://s3-us-west-2.amazonaws.com/prd-wret/assets/'
- 'palladium/production/atoms/files/myb1-2018-lead-adv'
- '.xlsx')
+ build_url = ('https://d9-wret.s3.us-west-2.amazonaws.com/assets/'
+ 'palladium/production/s3fs-public/media/files/myb1-2018-lead-advrel.xlsx')
url = build_url
return [url]
@@ -3109,7 +3113,15 @@ def usgs_platinum_call(*, resp, year, **_):
"year_1", "space_2", "year_2", "space_3",
"year_3", "space_4", "year_4", "space_5",
"year_5"]
-
+ elif len(df_data_1. columns) == 12:
+ df_data_1.columns = ["Production", "Units", "space_1",
+ "year_1", "space_2", "year_2", "space_3",
+ "year_3", "space_4", "year_4", "space_5",
+ "year_5"]
+ df_data_2.columns = ["Production", "Units", "space_1",
+ "year_1", "space_2", "year_2", "space_3",
+ "year_3", "space_4", "year_4", "space_5",
+ "year_5"]
col_to_use = ["Production"]
col_to_use.append(usgs_myb_year(YEARS_COVERED['platinum'], year))
for col in df_data_1.columns:
@@ -3823,8 +3835,8 @@ def soda_call(*, resp, year, **_):
"""
col_to_use = ["Production", "NAICS code", "End use", "year_5", "total"]
-
- if str(year) in YEARS_COVERED['sodaash_T4']:
+ years_covered = YEARS_COVERED['sodaash_t4']
+ if str(year) in years_covered:
df_raw_data = pd.io.excel.read_excel(io.BytesIO(resp.content),
sheet_name='T4')
df_data_one = pd.DataFrame(df_raw_data.loc[7:25]).reindex()
@@ -3855,7 +3867,7 @@ def soda_call(*, resp, year, **_):
"year_2", "space_3", "year_3", "space_4",
"year_4", "space_5", "year_5"]
- if str(year) in YEARS_COVERED['sodaash_T4']:
+ if str(year) in years_covered:
for col in df_data_one.columns:
if col not in col_to_use:
del df_data_one[col]
@@ -3864,7 +3876,7 @@ def soda_call(*, resp, year, **_):
if col not in col_to_use:
del df_data_two[col]
- if str(year) in YEARS_COVERED['sodaash_T4']:
+ if str(year) in years_covered:
frames = [df_data_one, df_data_two]
else:
frames = [df_data_two]
@@ -4467,6 +4479,10 @@ def usgs_vermiculite_call(*, resp, year, **_):
df_data_one.columns = ["Production", "Unit", "space_2", "year_1",
"space_3", "year_2", "space_4", "year_3",
"space_5", "year_4", "space_6", "year_5"]
+ elif len(df_data_one. columns) == 13:
+ df_data_one.columns = ["Production", "Unit", "space_2", "year_1",
+ "space_3", "year_2", "space_4", "year_3",
+ "space_5", "year_4", "space_6", "year_5", "space_7"]
col_to_use = ["Production"]
col_to_use.append(usgs_myb_year(YEARS_COVERED['vermiculite'], year))
diff --git a/flowsa/data_source_scripts/USGS_NWIS_WU.py b/flowsa/data_source_scripts/USGS_NWIS_WU.py
index 1bb1ee170..5fcf66f36 100644
--- a/flowsa/data_source_scripts/USGS_NWIS_WU.py
+++ b/flowsa/data_source_scripts/USGS_NWIS_WU.py
@@ -12,7 +12,7 @@
from flowsa.location import abbrev_us_state, US_FIPS
from flowsa.common import fba_activity_fields, capitalize_first_letter
from flowsa.settings import vLogDetailed
-from flowsa.flowbyfunctions import assign_fips_location_system
+from flowsa.flowbyfunctions import assign_fips_location_system, aggregator
from flowsa.validation import compare_df_units, \
calculate_flowamount_diff_between_dfs
@@ -354,10 +354,10 @@ def standardize_usgs_nwis_names(flowbyactivity_df):
return flowbyactivity_df
-def usgs_fba_data_cleanup(df):
+def usgs_fba_data_cleanup(fba, **_):
"""
Clean up the dataframe to prepare for flowbysector. Used in flowbysector.py
- :param df: df, FBA format
+ :param fba: df, FBA format
:return: df, modified FBA
"""
@@ -368,9 +368,9 @@ def usgs_fba_data_cleanup(df):
vLogDetailed.info('Removing all rows for Commercial Data because does not '
'exist for all states and causes issues as information '
'on Public Supply deliveries.')
- dfa = df[~df['Description'].str.lower().str.contains(
+ dfa = fba[~fba['Description'].str.lower().str.contains(
'commercial|closed-loop cooling|once-through')]
- calculate_flowamount_diff_between_dfs(df, dfa)
+ calculate_flowamount_diff_between_dfs(fba, dfa)
# calculated NET PUBLIC SUPPLY by subtracting out deliveries to domestic
vLogDetailed.info('Modify the public supply values to generate '
'NET public supply by subtracting out deliveries '
@@ -542,6 +542,50 @@ def check_golf_and_crop_irrigation_totals(df_load):
# drop national data
df = df_load[df_load['Location'] != '00000']
+ df_m2 = subset_and_merge_irrigation_types(df)
+
+ df_m3 = df_m2[df_m2['Diff'] > 0].reset_index(drop=True)
+
+ # rename irrigation to irrigation crop and append rows to df
+ df_m3.loc[df_m3['ActivityProducedBy'] ==
+ 'Irrigation', 'ActivityProducedBy'] = 'Irrigation Crop'
+ df_m3.loc[df_m3['ActivityConsumedBy'] ==
+ 'Irrigation', 'ActivityConsumedBy'] = 'Irrigation Crop'
+ df_m3['Description'] = df_m3['Description'].str.replace(
+ 'Irrigation, Total', 'Irrigation, Crop').str.replace(
+ 'withdrawals', 'withdrawals for crops').str.replace(
+ 'use', 'use for crops')
+ df_m3 = df_m3.drop(columns=['Golf_Amount', 'Golf_APB', 'Golf_ACB',
+ 'Crop_Amount', 'Crop_APB',
+ 'Crop_ACB', 'subset_sum', 'FlowAmount',
+ 'Crop_Description'])
+ df_m3 = df_m3.rename(columns={'Diff': 'FlowAmount'})
+
+ if len(df_m3) != 0:
+ df_w_missing_crop = pd.concat([df_load, df_m3], ignore_index=True)
+
+ group_cols = list(df.select_dtypes(include=['object', 'int']).columns)
+ df_w_missing_crop = aggregator(df_w_missing_crop, group_cols,
+ retain_zeros=True)
+
+ # validate results - the differences should all be 0
+ df_check = subset_and_merge_irrigation_types(df_w_missing_crop)
+ df_check = df_check[df_check['Location'] != US_FIPS].reset_index(
+ drop=True)
+ df_check['Diff'] = df_check['Diff'].apply(lambda x: round(x, 2))
+ df_check2 = df_check[df_check['Diff'] != 0]
+ if len(df_check2) > 0:
+ vLogDetailed.info('The golf and crop irrigation do not add up to '
+ 'total irrigation.')
+ else:
+ vLogDetailed.info('The golf and crop irrigation add up to total '
+ 'irrigation.')
+ return df_w_missing_crop
+ else:
+ return df_load
+
+
+def subset_and_merge_irrigation_types(df):
# subset into golf, crop, and total irrigation (and non irrigation)
df_i = df[(df[fba_activity_fields[0]] == 'Irrigation') |
(df[fba_activity_fields[1]] == 'Irrigation')]
@@ -572,7 +616,7 @@ def check_golf_and_crop_irrigation_totals(df_load):
df_m2 = pd.merge(df_m,
df_c[['FlowName', 'FlowAmount', 'ActivityProducedBy',
'ActivityConsumedBy', 'Compartment',
- 'Location', 'Year']],
+ 'Location', 'Year', 'Description']],
how='outer',
right_on=['FlowName', 'Compartment', 'Location', 'Year'],
left_on=['FlowName', 'Compartment', 'Location', 'Year'])
@@ -581,29 +625,17 @@ def check_golf_and_crop_irrigation_totals(df_load):
"ActivityConsumedBy_x": "ActivityConsumedBy",
"FlowAmount_y": "Crop_Amount",
"ActivityProducedBy_y": "Crop_APB",
- "ActivityConsumedBy_y": "Crop_ACB"})
+ "ActivityConsumedBy_y": "Crop_ACB",
+ "Description_x": 'Description',
+ "Description_y": "Crop_Description"})
# fill na and sum crop and golf
- # df_m2 = df_m2.fillna(0)
+ for col in df_m2:
+ if df_m2[col].dtype in ("int", "float"):
+ df_m2[col] = df_m2[col].fillna(0)
df_m2['subset_sum'] = df_m2['Crop_Amount'] + df_m2['Golf_Amount']
df_m2['Diff'] = df_m2['FlowAmount'] - df_m2['subset_sum']
- df_m3 = df_m2[df_m2['Diff'] >= 0.000001].reset_index(drop=True)
-
- # rename irrigation to irrgation crop and append rows to df
- df_m3.loc[df_m3['ActivityProducedBy'] ==
- 'Irrigation', 'ActivityProducedBy'] = 'Irrigation Crop'
- df_m3.loc[df_m3['ActivityConsumedBy'] ==
- 'Irrigation', 'ActivityConsumedBy'] = 'Irrigation Crop'
- df_m3 = df_m3.drop(columns=['Golf_Amount', 'Golf_APB', 'Golf_ACB',
- 'Crop_Amount', 'Crop_APB',
- 'Crop_ACB', 'subset_sum', 'Diff'])
-
- if len(df_m3) != 0:
- df_w_missing_crop = pd.concat([df_load, df_m3], sort=True,
- ignore_index=True)
- return df_w_missing_crop
- else:
- return df_load
+ return df_m2
def usgs_fba_w_sectors_data_cleanup(df_wsec, attr, **kwargs):
@@ -680,7 +712,7 @@ def modify_sector_length(df_wsec):
df2 = df2.drop(columns=["LengthToModify", 'TargetLength'])
- df = pd.concat([df1, df2], sort=True)
+ df = pd.concat([df1, df2])
return df
else:
return df1
diff --git a/flowsa/data_source_scripts/stateio.py b/flowsa/data_source_scripts/stateio.py
new file mode 100644
index 000000000..d3aa77771
--- /dev/null
+++ b/flowsa/data_source_scripts/stateio.py
@@ -0,0 +1,80 @@
+# stateio.py (flowsa)
+# !/usr/bin/env python3
+# coding=utf-8
+"""
+Supporting functions for accessing files from stateior via data commons.
+https://github.com/USEPA/stateior
+"""
+
+import os
+import pandas as pd
+
+from esupy.processed_data_mgmt import download_from_remote, Paths,\
+ load_preprocessed_output
+from flowsa.metadata import set_fb_meta
+from flowsa.location import us_state_abbrev, apply_county_FIPS
+from flowsa.flowbyfunctions import assign_fips_location_system
+
+
+def parse_statior(*, source, year, config, **_):
+ """parse_response_fxn for stateio make and use tables"""
+ # Prepare meta for downloading stateior datasets
+ name = config.get('datatype')
+ fname = f"{name}_{year}"
+ meta = set_fb_meta(fname, "")
+ meta.tool = 'stateio'
+ meta.ext = 'rds'
+ stateio_paths = Paths()
+ stateio_paths.local_path = os.path.realpath(stateio_paths.local_path +
+ "/stateio")
+ # Download and load the latest version from remote
+ download_from_remote(meta, stateio_paths)
+ states = load_preprocessed_output(meta, stateio_paths)
+
+ data_dict = {}
+
+ # uses rpy2
+ # this .rds is stored as a list of named dataframes by state
+ for state in us_state_abbrev.keys():
+ df = states.rx2(state)
+ df2 = df.melt(ignore_index=False, value_name = 'FlowAmount',
+ var_name = 'ActivityConsumedBy')
+ df2['ActivityProducedBy'] = df2.index
+ if source == 'stateio_Make_Summary':
+ # Adjust the index by removing the state: STATE.SECTOR
+ df2['ActivityProducedBy'] = df2[
+ 'ActivityProducedBy'].str.split(".", expand=True)[1]
+ df2.reset_index(drop=True, inplace=True)
+ df2['State'] = state
+ data_dict[state] = df2
+
+ fba = pd.concat(data_dict, ignore_index=True)
+ fba.dropna(subset=['FlowAmount'], inplace=True)
+
+ # Gross Output
+ if 'GO' in source and 'ActivityConsumedBy' in fba.columns:
+ fba = fba.drop(columns=['ActivityConsumedBy'])
+
+ # Assign location
+ fba['County'] = ''
+ fba = apply_county_FIPS(fba)
+ fba = assign_fips_location_system(fba, '2015')
+ fba.drop(columns=['County'], inplace=True)
+
+ # Hardcoded data
+ fba['Year'] = year
+ fba['SourceName'] = source
+ fba['Class'] = 'Money'
+ fba['Unit'] = "USD"
+ fba['FlowName'] = f"USD{year}"
+ fba["FlowType"] = "TECHNOSPHERE_FLOW"
+ fba['DataReliability'] = 5 # tmp
+ fba['DataCollection'] = 5 # tmp
+ return fba
+
+
+if __name__ == "__main__":
+ import flowsa
+ source = 'stateio_Industry_GO'
+ flowsa.flowbyactivity.main(year=2017, source=source)
+ fba = flowsa.getFlowByActivity(source, 2017)
diff --git a/flowsa/data_source_scripts/stewiFBS.py b/flowsa/data_source_scripts/stewiFBS.py
index 06b23ee69..77f0053c3 100644
--- a/flowsa/data_source_scripts/stewiFBS.py
+++ b/flowsa/data_source_scripts/stewiFBS.py
@@ -15,6 +15,7 @@
import os
import pandas as pd
from esupy.dqi import get_weighted_average
+from esupy.processed_data_mgmt import read_source_metadata
from flowsa.allocation import equally_allocate_parent_to_child_naics
from flowsa.flowbyfunctions import assign_fips_location_system,\
aggregate_and_subset_for_target_sectors
@@ -24,6 +25,12 @@
from flowsa.schema import flow_by_sector_fields
from flowsa.settings import log, process_adjustmentpath
from flowsa.validation import replace_naics_w_naics_from_another_year
+import stewicombo
+import stewi
+from stewicombo.overlaphandler import remove_default_flow_overlaps
+from stewicombo.globals import addChemicalMatches, compile_metadata,\
+ set_stewicombo_meta
+import facilitymatcher
def stewicombo_to_sector(yaml_load, method, fbsconfigpath=None):
@@ -43,8 +50,6 @@ def stewicombo_to_sector(yaml_load, method, fbsconfigpath=None):
:param fbsconfigpath, str, optional path to an FBS method outside flowsa repo
:return: df, FBS format
"""
- import stewicombo
-
inventory_name = yaml_load.get('local_inventory_name')
df = None
@@ -102,8 +107,6 @@ def stewi_to_sector(yaml_load, method, *_):
:param method: dictionary, FBS method
:return: df, FBS format
"""
- import stewi
-
# determine if fxns specified in FBS method yaml
functions = yaml_load.get('functions', [])
@@ -147,10 +150,6 @@ def reassign_process_to_sectors(df, year, file_list, fbsconfigpath):
:param fbsconfigpath, str, optional path to an FBS method outside flowsa repo
:return: df
"""
- import stewi
- from stewicombo.overlaphandler import remove_default_flow_overlaps
- from stewicombo.globals import addChemicalMatches
-
df_adj = pd.DataFrame()
for file in file_list:
fpath = f"{process_adjustmentpath}{file}.csv"
@@ -218,7 +217,6 @@ def extract_facility_data(inventory_dict):
{'NEI':'2017', 'TRI':'2017'})
:return: df
"""
- import stewi
facilities_list = []
# load facility data from stewi output directory, keeping only the
# facility IDs, and geographic information
@@ -248,7 +246,6 @@ def obtain_NAICS_from_facility_matcher(inventory_list):
:param inventory_list: a list of inventories (e.g., ['NEI', 'TRI'])
:return: df
"""
- import facilitymatcher
# Access NAICS From facility matcher and assign based on FRS_ID
all_NAICS = \
facilitymatcher.get_FRS_NAICSInfo_for_facility_list(
@@ -372,10 +369,15 @@ def add_stewi_metadata(inventory_dict):
{'NEI':'2017', 'TRI':'2017'})
:return: combined dictionary of metadata from each inventory
"""
- from stewicombo.globals import compile_metadata
return compile_metadata(inventory_dict)
+def add_stewicombo_metadata(inventory_name):
+ """Access locally stored stewicombo metadata by filename"""
+ return read_source_metadata(stewicombo.globals.paths,
+ set_stewicombo_meta(inventory_name))
+
+
if __name__ == "__main__":
import flowsa
flowsa.flowbysector.main(method='CRHW_state_2017')
diff --git a/flowsa/dataclean.py b/flowsa/dataclean.py
index 477e55c6d..8a07161b6 100644
--- a/flowsa/dataclean.py
+++ b/flowsa/dataclean.py
@@ -210,13 +210,15 @@ def standardize_units(df):
np.where(df['Unit'] == 'million Cubic metres/year', 'kg', df['Unit'])
# Convert mass units (LB or TON) to kg
- df.loc[:, 'FlowAmount'] = np.where(df['Unit'] == 'TON',
+ df.loc[:, 'FlowAmount'] = np.where(df['Unit'].isin(['TON', 'tons',
+ 'short tons']),
df['FlowAmount'] * ton_to_kg,
df['FlowAmount'])
df.loc[:, 'FlowAmount'] = np.where(df['Unit'] == 'LB',
df['FlowAmount'] * lb_to_kg,
df['FlowAmount'])
- df.loc[:, 'Unit'] = np.where(df['Unit'].isin(['TON', 'LB']),
+ df.loc[:, 'Unit'] = np.where(df['Unit'].isin(['TON', 'tons',
+ 'short tons', 'LB']),
'kg', df['Unit'])
return df
diff --git a/flowsa/exceptions.py b/flowsa/exceptions.py
new file mode 100644
index 000000000..5d922fe92
--- /dev/null
+++ b/flowsa/exceptions.py
@@ -0,0 +1,46 @@
+# exceptions.py (flowsa)
+# !/usr/bin/env python3
+# coding=utf-8
+
+"""Defines custom exceptions for flowsa"""
+
+
+class FBANotAvailableError(Exception):
+ def __init__(self, method=None, year=None, message=None):
+ if message is None:
+ message = ("FBA not available for requested year")
+ if method:
+ message = message.replace("FBA", method)
+ if year:
+ message = message.replace("requested year", str(year))
+ self.message = message
+ super().__init__(self.message)
+
+
+class FlowsaMethodNotFoundError(FileNotFoundError):
+ def __init__(self, method_type=None, method=None):
+ message = (f"{method_type} method file not found")
+ if method:
+ message = " ".join((message, f"for {method}"))
+ self.message = message
+ super().__init__(self.message)
+
+
+class APIError(Exception):
+ def __init__(self, api_source):
+ message = (f"Key file {api_source} not found. See github wiki for help "
+ "https://github.com/USEPA/flowsa/wiki/Using-FLOWSA#api-keys")
+ self.message = message
+ super().__init__(self.message)
+
+
+class FBSMethodConstructionError(Exception):
+ """Errors in FBS methods which result in incompatible models"""
+ def __init__(self, message=None, error_type=None):
+ if message is None:
+ message = ("Error in method construction.")
+ if error_type == 'fxn_call':
+ message = ("Calling functions in method files must be preceded "
+ "by '!script_function:'")
+ self.message = message
+ super().__init__(self.message)
diff --git a/flowsa/fbs_allocation.py b/flowsa/fbs_allocation.py
index 4b6a2946d..ae402e879 100644
--- a/flowsa/fbs_allocation.py
+++ b/flowsa/fbs_allocation.py
@@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
+import flowsa
from flowsa.common import fba_activity_fields, fbs_activity_fields, \
fba_mapped_wsec_default_grouping_fields, fba_wsec_default_grouping_fields, \
check_activities_sector_like, return_bea_codes_used_as_naics
@@ -17,7 +18,7 @@
check_if_location_systems_match
from flowsa.flowbyfunctions import collapse_activity_fields, \
sector_aggregation, sector_disaggregation, subset_df_by_geoscale, \
- load_fba_w_standardized_units
+ load_fba_w_standardized_units, aggregator
from flowsa.allocation import allocate_by_sector, proportional_allocation_by_location_and_activity, \
equally_allocate_parent_to_child_naics, equal_allocation
from flowsa.sectormapping import get_fba_allocation_subset, add_sectors_to_flowbyactivity
@@ -37,7 +38,7 @@ def direct_allocation_method(fbs, k, names, method):
log.info('Directly assigning activities to sectors')
# for each activity, if activities are not sector like,
# check that there is no data loss
- if check_activities_sector_like(k) is False:
+ if check_activities_sector_like(fbs) is False:
activity_list = []
n_allocated = []
for n in names:
@@ -50,14 +51,12 @@ def direct_allocation_method(fbs, k, names, method):
fbs_subset = \
fbs[(fbs[fba_activity_fields[0]] == n) |
(fbs[fba_activity_fields[1]] == n)].reset_index(drop=True)
- # check if an Activity maps to more than one sector,
- # if so, equally allocate
- fbs_subset = equal_allocation(fbs_subset)
- fbs_subset = equally_allocate_parent_to_child_naics(
- fbs_subset, method)
activity_list.append(fbs_subset)
n_allocated.append(n)
fbs = pd.concat(activity_list, ignore_index=True)
+ # check if an Activity maps to more than one sector,
+ # if so, equally allocate
+ fbs = equal_allocation(fbs)
return fbs
@@ -191,20 +190,20 @@ def dataset_allocation_method(flow_subset_mapped, attr, names, method,
# subset fba allocation table to the values in the activity
# list, based on overlapping sectors
- flow_subset_mapped = flow_subset_mapped.loc[
+ flow_subset_mapped2 = flow_subset_mapped.loc[
(flow_subset_mapped[fbs_activity_fields[0]].isin(sector_list)) |
(flow_subset_mapped[fbs_activity_fields[1]].isin(sector_list))]
# check if fba and allocation dfs have the same LocationSystem
log.info("Checking if flowbyactivity and allocation "
"dataframes use the same location systems")
- check_if_location_systems_match(flow_subset_mapped, flow_allocation)
+ check_if_location_systems_match(flow_subset_mapped2, flow_allocation)
# merge fba df w/flow allocation dataset
log.info("Merge %s and subset of %s", k, attr['allocation_source'])
for i, j in activity_fields.items():
# check units
- compare_df_units(flow_subset_mapped, flow_allocation)
+ compare_df_units(flow_subset_mapped2, flow_allocation)
# create list of columns to merge on
if 'allocation_merge_columns' in attr:
fa_cols = \
@@ -219,21 +218,21 @@ def dataset_allocation_method(flow_subset_mapped, attr, names, method,
fa_cols = ['Location', 'Sector', 'FlowAmountRatio', 'FBA_Activity']
l_cols = ['Location', j[1]["flowbysector"], j[0]["flowbyactivity"]]
r_cols = ['Location', 'Sector', 'FBA_Activity']
- flow_subset_mapped = \
- flow_subset_mapped.merge(flow_allocation[fa_cols], left_on=l_cols,
+ flow_subset_mapped2 = \
+ flow_subset_mapped2.merge(flow_allocation[fa_cols], left_on=l_cols,
right_on=r_cols, how='left')
# merge the flowamount columns
- flow_subset_mapped.loc[:, 'FlowAmountRatio'] =\
- flow_subset_mapped['FlowAmountRatio_x'].fillna(
- flow_subset_mapped['FlowAmountRatio_y'])
+ flow_subset_mapped2.loc[:, 'FlowAmountRatio'] =\
+ flow_subset_mapped2['FlowAmountRatio_x'].fillna(
+ flow_subset_mapped2['FlowAmountRatio_y'])
# fill null rows with 0 because no allocation info
- flow_subset_mapped['FlowAmountRatio'] = \
- flow_subset_mapped['FlowAmountRatio'].fillna(0)
+ flow_subset_mapped2['FlowAmountRatio'] = \
+ flow_subset_mapped2['FlowAmountRatio'].fillna(0)
# drop rows where there is no allocation data
- fbs = flow_subset_mapped.dropna(
- subset=['Sector_x', 'Sector_y'], how='all').reset_index()
+ fbs = flow_subset_mapped2.dropna(
+ subset=['Sector_x', 'Sector_y'], how='all').reset_index(drop=True)
# calculate flow amounts for each sector
log.info("Calculating new flow amounts using flow ratios")
@@ -243,8 +242,22 @@ def dataset_allocation_method(flow_subset_mapped, attr, names, method,
log.info("Cleaning up new flow by sector")
fbs = fbs.drop(columns=['Sector_x', 'FlowAmountRatio_x', 'Sector_y',
'FlowAmountRatio_y', 'FlowAmountRatio',
- 'FBA_Activity_x', 'FBA_Activity_y'])
- return fbs
+ 'FBA_Activity_x', 'FBA_Activity_y',
+ 'disaggregate_flag', 'Description'],
+ errors='ignore')
+
+ # if activities are source like, reset activity columns
+ sector_like_activities = check_activities_sector_like(flow_subset_mapped)
+ if sector_like_activities:
+ fbs = fbs.assign(ActivityProducedBy = fbs['SectorProducedBy'],
+ ActivityConsumedBy = fbs['SectorConsumedBy'])
+
+ group_cols = list(fbs.select_dtypes(include=['object', 'int']).columns)
+ fbs2 = aggregator(fbs, group_cols)
+
+ fbs3 = sector_aggregation(fbs2)
+
+ return fbs3
def allocation_helper(df_w_sector, attr, method, v, download_FBA_if_missing):
@@ -465,6 +478,7 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass,
'clean_allocation_fba_w_sec'
:return: df, fba format
"""
+ from flowsa.sectormapping import get_activitytosector_mapping
# dictionary to load/standardize fba
kwargs_dict = {}
if 'download_FBA_if_missing' in kwargs:
@@ -482,15 +496,6 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass,
**kwargs_dict
)
- # check if allocation data exists at specified geoscale to use
- log.info("Checking if allocation data exists at the %s level",
- geoscale_from)
- check_if_data_exists_at_geoscale(fba, geoscale_from)
-
- # aggregate geographically to the scale of the flowbyactivty source,
- # if necessary
- fba = subset_df_by_geoscale(fba, geoscale_from, geoscale_to)
-
# subset based on yaml settings
if 'flowname_subset' in kwargs:
if kwargs['flowname_subset'] != 'None':
@@ -499,17 +504,44 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass,
if kwargs['compartment_subset'] != 'None':
fba = \
fba.loc[fba['Compartment'].isin(kwargs['compartment_subset'])]
+ fba = fba.reset_index(drop=True)
+
+ if len(fba) == 0:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ message='Allocation dataset is length 0; check flow or '
+ 'compartment subset for errors')
+
+ # load relevant activities if activities are not naics-like
+ try:
+ sm = get_activitytosector_mapping(
+ fba_sourcename, fbsconfigpath=fbsconfigpath)
+ sm_list = sm['Activity'].drop_duplicates().values.tolist()
+ # subset fba data by activities listed in the sector crosswalk
+ fba = fba[(fba[fba_activity_fields[0]].isin(sm_list)) |
+ (fba[fba_activity_fields[1]].isin(sm_list)
+ )].reset_index(drop=True)
+ except FileNotFoundError:
+ pass
+
+ # check if allocation data exists at specified geoscale to use
+ log.info("Checking if allocation data exists at the %s level",
+ geoscale_from)
+ check_if_data_exists_at_geoscale(fba, geoscale_from)
+
+ # aggregate geographically to the scale of the flowbyactivity source,
+ # if necessary
+ fba2 = subset_df_by_geoscale(fba, geoscale_from, geoscale_to)
# cleanup the fba allocation df, if necessary
if 'clean_fba' in kwargs:
log.info("Cleaning %s", fba_sourcename)
- fba = kwargs["clean_fba"](
- fba,
+ fba2 = kwargs["clean_fba"](
+ fba2,
attr=attr,
download_FBA_if_missing=kwargs['download_FBA_if_missing']
)
# reset index
- fba = fba.reset_index(drop=True)
+ fba2 = fba2.reset_index(drop=True)
# assign sector to allocation dataset
activity_to_sector_mapping = attr.get('activity_to_sector_mapping')
@@ -521,7 +553,7 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass,
overwrite_sectorlevel = 'aggregated'
else:
overwrite_sectorlevel = None
- fba_wsec = add_sectors_to_flowbyactivity(fba, sectorsourcename=method[
+ fba_wsec = add_sectors_to_flowbyactivity(fba2, sectorsourcename=method[
'target_sector_source'],
activity_to_sector_mapping=activity_to_sector_mapping,
fbsconfigpath=fbsconfigpath, overwrite_sectorlevel=overwrite_sectorlevel)
diff --git a/flowsa/flowbyactivity.py b/flowsa/flowbyactivity.py
index 4ca83760d..adce16020 100644
--- a/flowsa/flowbyactivity.py
+++ b/flowsa/flowbyactivity.py
@@ -11,11 +11,12 @@
import argparse
import pandas as pd
from urllib import parse
+import flowsa
from esupy.processed_data_mgmt import write_df_to_file
from esupy.remote import make_url_request
-from flowsa.common import log, load_api_key, sourceconfigpath, \
+from flowsa.common import load_api_key, sourceconfigpath, \
load_yaml_dict, rename_log_file, get_flowsa_base_name
-from flowsa.settings import paths
+from flowsa.settings import paths, log
from flowsa.metadata import set_fb_meta, write_metadata
from flowsa.flowbyfunctions import fba_fill_na_dict
from flowsa.schema import flow_by_activity_fields
@@ -74,11 +75,14 @@ def assemble_urls_for_query(*, source, year, config):
userAPIKey = load_api_key(config['api_name']) # (common.py fxn)
build_url = build_url.replace("__apiKey__", userAPIKey)
- if "url_replace_fxn" in config:
- # dynamically import and call on function
- urls = config["url_replace_fxn"](build_url=build_url, source=source,
- year=year, config=config)
+ fxn = config.get("url_replace_fxn")
+ if callable(fxn):
+ urls = fxn(build_url=build_url, source=source,
+ year=year, config=config)
return urls
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
else:
return [build_url]
@@ -108,11 +112,13 @@ def call_urls(*, url_list, source, year, config):
resp = make_url_request(url,
set_cookies=set_cookies,
confirm_gdrive=confirm_gdrive)
- if "call_response_fxn" in config:
- # dynamically import and call on function
- df = config["call_response_fxn"](resp=resp, source=source,
- year=year, config=config,
- url=url)
+ fxn = config.get("call_response_fxn")
+ if callable(fxn):
+ df = fxn(resp=resp, source=source, year=year,
+ config=config, url=url)
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
if isinstance(df, pd.DataFrame):
data_frames_list.append(df)
elif isinstance(df, list):
@@ -131,10 +137,15 @@ def parse_data(*, df_list, source, year, config):
:param config: dictionary, FBA yaml
:return: df, single df formatted to FBA
"""
- if "parse_response_fxn" in config:
- # dynamically import and call on function
- df = config["parse_response_fxn"](df_list=df_list, source=source,
- year=year, config=config)
+
+ fxn = config.get("parse_response_fxn")
+ if callable(fxn):
+ df = fxn(df_list=df_list, source=source, year=year, config=config)
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
+ # else:
+ # Handle parse_response_fxn = None
return df
@@ -187,7 +198,7 @@ def main(**kwargs):
# filename if run into error
try:
config = load_yaml_dict(source, flowbytype='FBA')
- except (UnboundLocalError, FileNotFoundError):
+ except FileNotFoundError:
log.info(f'Could not find Flow-By-Activity config file for {source}')
source = get_flowsa_base_name(sourceconfigpath, source, "yaml")
log.info(f'Generating FBA for {source}')
diff --git a/flowsa/flowbyfunctions.py b/flowsa/flowbyfunctions.py
index bd23f6ea0..d0ecd2ec4 100644
--- a/flowsa/flowbyfunctions.py
+++ b/flowsa/flowbyfunctions.py
@@ -9,20 +9,19 @@
import numpy as np
from esupy.dqi import get_weighted_average
import flowsa
-from flowsa.common import fbs_activity_fields, \
- load_crosswalk, fbs_fill_na_dict, \
+from flowsa.common import fbs_activity_fields, sector_level_key, \
+ load_crosswalk, fbs_fill_na_dict, check_activities_sector_like, \
fbs_collapsed_default_grouping_fields, fbs_collapsed_fill_na_dict, \
fba_activity_fields, fba_default_grouping_fields, \
load_sector_length_cw_melt, fba_fill_na_dict, \
- get_flowsa_base_name, fba_mapped_default_grouping_fields, \
- check_activities_sector_like
+ fba_mapped_default_grouping_fields
+from flowsa.dataclean import clean_df, replace_strings_with_NoneType, \
+ replace_NoneType_with_empty_cells, standardize_units
from flowsa.location import US_FIPS, get_state_FIPS, \
get_county_FIPS, update_geoscale, fips_number_key
from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \
flow_by_sector_collapsed_fields, flow_by_activity_mapped_fields
-from flowsa.settings import datasourcescriptspath, log
-from flowsa.dataclean import clean_df, replace_strings_with_NoneType, \
- replace_NoneType_with_empty_cells, standardize_units
+from flowsa.settings import log, vLogDetailed, vLog
def create_geoscale_list(df, geoscale, year='2015'):
@@ -40,9 +39,9 @@ def create_geoscale_list(df, geoscale, year='2015'):
if geoscale == "national":
fips.append(US_FIPS)
elif df['LocationSystem'].str.contains('FIPS').any():
- # all_FIPS = read_stored_FIPS()
if geoscale == "state":
state_FIPS = get_state_FIPS(year)
+ state_FIPS = state_FIPS[state_FIPS['FIPS'] != '72000']
fips = list(state_FIPS['FIPS'])
elif geoscale == "county":
county_FIPS = get_county_FIPS(year)
@@ -64,8 +63,9 @@ def filter_by_geoscale(df, geoscale):
df = df[df['Location'].isin(fips)].reset_index(drop=True)
if len(df) == 0:
- log.error("No flows found in the flow dataset at the %s scale",
- geoscale)
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ message="No flows found in the flow dataset at "
+ f"the {geoscale} scale")
else:
return df
@@ -92,13 +92,16 @@ def agg_by_geoscale(df, from_scale, to_scale, groupbycols):
return fba_agg
-def aggregator(df, groupbycols):
+def aggregator(df, groupbycols, retain_zeros=True):
"""
Aggregates flowbyactivity or flowbysector 'FlowAmount' column in df and
generate weighted average values based on FlowAmount values for numeric
columns
:param df: df, Either flowbyactivity or flowbysector
:param groupbycols: list, Either flowbyactivity or flowbysector columns
+ :param retain_zeros, bool, default True, if set to True, all rows that
+ have a FlowAmount = 0 will be returned in df. If False, those rows will
+ be dropped
:return: df, with aggregated columns
"""
@@ -108,7 +111,8 @@ def aggregator(df, groupbycols):
df = replace_NoneType_with_empty_cells(df)
# drop columns with flowamount = 0
- df = df[df['FlowAmount'] != 0]
+ if retain_zeros is False:
+ df = df[df['FlowAmount'] != 0]
# list of column headers, that if exist in df, should be
# aggregated using the weighted avg fxn
@@ -163,14 +167,13 @@ def sector_ratios(df, sectorcolumn):
# appends missing naics levels to df
sec_ratios = []
for i in range(length, 3, -1):
- # subset df to sectors with length = i and length = i + 1
- df_subset = df.loc[df[sectorcolumn].apply(lambda x: len(x) == i)]
+ # subset df to sectors with length = i
+ df_subset = subset_df_by_sector_lengths(df, [i])
# create column for sector grouping
- df_subset = df_subset.assign(
- Sector_group=df_subset[sectorcolumn].apply(lambda x: x[0:i-1]))
+ df_subset = assign_sector_match_column(df_subset, sectorcolumn, i, i-1)
# subset df to create denominator
- df_denom = df_subset[['FlowAmount', 'Location', 'Sector_group']]
- df_denom = df_denom.groupby(['Location', 'Sector_group'],
+ df_denom = df_subset[['FlowAmount', 'Location', 'sector_group']]
+ df_denom = df_denom.groupby(['Location', 'sector_group'],
as_index=False).agg({"FlowAmount": sum})
df_denom = df_denom.rename(columns={"FlowAmount": "Denominator"})
# merge the denominator column with fba_w_sector df
@@ -179,10 +182,10 @@ def sector_ratios(df, sectorcolumn):
ratio_df.loc[:, 'FlowAmountRatio'] = \
ratio_df['FlowAmount'] / ratio_df['Denominator']
ratio_df = ratio_df.drop(
- columns=['Denominator', 'Sector_group']).reset_index()
+ columns=['Denominator', 'sector_group'])
sec_ratios.append(ratio_df)
# concat list of dataframes (info on each page)
- df_w_ratios = pd.concat(sec_ratios, sort=True).reset_index(drop=True)
+ df_w_ratios = pd.concat(sec_ratios, ignore_index=True)
return df_w_ratios
@@ -200,12 +203,9 @@ def sector_aggregation(df_load):
# determine grouping columns - based on datatype
group_cols = list(df.select_dtypes(include=['object', 'int']).columns)
- # determine if activities are sector-like,
- # if aggregating a df with a 'SourceName'
- sector_like_activities = False
- if 'SourceName' in df_load.columns:
- s = pd.unique(df_load['SourceName'])[0]
- sector_like_activities = check_activities_sector_like(s)
+ # determine if activities are sector-like, if aggregating a df with a
+ # 'SourceName'
+ sector_like_activities = check_activities_sector_like(df_load)
# if activities are sector like, drop columns while running ag then
# add back in
@@ -263,7 +263,8 @@ def sector_aggregation(df_load):
drop=True)
# if activities are source-like, set col values as
# copies of the sector columns
- if sector_like_activities & ('FlowAmount' in df.columns):
+ if sector_like_activities & ('FlowAmount' in df.columns) & \
+ ('ActivityProducedBy' in df_load.columns):
df = df.assign(ActivityProducedBy=df['SectorProducedBy'])
df = df.assign(ActivityConsumedBy=df['SectorConsumedBy'])
@@ -286,15 +287,13 @@ def sector_disaggregation(df_load):
# determine if activities are sector-like, if aggregating
# a df with a 'SourceName'
- sector_like_activities = False
- if 'SourceName' in df_load.columns:
- s = pd.unique(df_load['SourceName'])[0]
- sector_like_activities = check_activities_sector_like(s)
+ sector_like_activities = check_activities_sector_like(df_load)
# if activities are sector like, drop columns while running disag then
# add back in
if sector_like_activities:
- df = df.drop(columns=['ActivityProducedBy', 'ActivityConsumedBy'])
+ df = df.drop(columns=['ActivityProducedBy', 'ActivityConsumedBy'],
+ errors='ignore')
df = df.reset_index(drop=True)
# load naics 2 to naics 6 crosswalk
@@ -313,9 +312,7 @@ def sector_disaggregation(df_load):
sector_add = 'NAICS_' + str(i + 1)
# subset the df by naics length
- cw = cw_load[[sector_merge, sector_add]]
- # first drop all duplicates
- cw = cw.drop_duplicates()
+ cw = cw_load[[sector_merge, sector_add]].drop_duplicates()
# only keep the rows where there is only one value in sector_add for
# a value in sector_merge
cw = cw.drop_duplicates(subset=[sector_merge], keep=False).reset_index(
@@ -369,6 +366,33 @@ def assign_fips_location_system(df, year_of_data):
return df
+def return_primary_sector_column(df_load):
+ """
+ Determine sector column with values
+ :param fbs: fbs df with two sector columns
+ :return: string, primary sector column
+ """
+ # determine the df_w_sector column to merge on
+ if 'Sector' in df_load.columns:
+ primary_sec_column = 'Sector'
+ else:
+ df = replace_strings_with_NoneType(df_load)
+ sec_consumed_list = \
+ df['SectorConsumedBy'].drop_duplicates().values.tolist()
+ sec_produced_list = \
+ df['SectorProducedBy'].drop_duplicates().values.tolist()
+ # if a sector field column is not all 'none', that is the column to
+ # merge
+ if all(v is None for v in sec_consumed_list):
+ primary_sec_column = 'SectorProducedBy'
+ elif all(v is None for v in sec_produced_list):
+ primary_sec_column = 'SectorConsumedBy'
+ else:
+ log.error('There are values in both SectorProducedBy and '
+ 'SectorConsumedBy, cannot isolate Sector column')
+ return primary_sec_column
+
+
def collapse_fbs_sectors(fbs):
"""
Collapses the Sector Produced/Consumed into a single column named "Sector"
@@ -589,25 +613,48 @@ def dataframe_difference(df1, df2, which=None):
def equally_allocate_suppressed_parent_to_child_naics(
- df_load, sector_column, groupcols):
+ df_load, method, sector_column, groupcols,
+ equally_allocate_parent_to_child=True):
"""
Estimate data suppression, by equally allocating parent NAICS
values to child NAICS
:param df_load: df with sector columns
+ :param method: dictionary, FBS method yaml
:param sector_column: str, column to estimate suppressed data for
:param groupcols: list, columns to group df by
+ :param equally_allocate_parent_to_child: default True, if True will
+ first equally allocate parent to child sectors if the child sector is
+ missing
:return: df, with estimated suppressed data
"""
+ from flowsa.allocation import equally_allocate_parent_to_child_naics
+ from flowsa.validation import \
+ compare_child_to_parent_sectors_flowamounts, \
+ compare_summation_at_sector_lengths_between_two_dfs
+
+ vLogDetailed.info('Estimating suppressed data by equally allocating '
+ 'parent to child sectors.')
df = sector_disaggregation(df_load)
+
+ # equally allocate parent to child naics where child naics are not
+ # included in the dataset. This step is necessary to accurately
+ # calculate the flow that has already been allocated. Must allocate to
+ # NAICS_6 for suppressed data function to work correctly.
+ if equally_allocate_parent_to_child:
+ vLogDetailed.info('Before estimating suppressed data, equally '
+ 'allocate parent sectors to child sectors.')
+ df = equally_allocate_parent_to_child_naics(
+ df, method, overwritetargetsectorlevel='NAICS_6')
+
df = replace_NoneType_with_empty_cells(df)
df = df[df[sector_column] != '']
# determine if activities are sector-like,
# if aggregating a df with a 'SourceName'
- sector_like_activities = False
- if 'SourceName' in df_load.columns:
- s = pd.unique(df_load['SourceName'])[0]
- sector_like_activities = check_activities_sector_like(s)
+ sector_like_activities = check_activities_sector_like(df_load)
+ if sector_like_activities is False:
+ log.error('Function is not written to estimate suppressed data when '
+ 'activities are not NAICS-like.')
# if activities are source like, drop from df,
# add back in as copies of sector columns columns to keep
@@ -623,7 +670,9 @@ def equally_allocate_suppressed_parent_to_child_naics(
# load naics 2 to naics 6 crosswalk
cw_load = load_crosswalk('sector_length')
- cw_melt = cw_load.melt(
+ # only keep official naics
+ cw = cw_load.drop(columns=['NAICS_7']).drop_duplicates()
+ cw_melt = pd.melt(cw,
id_vars=["NAICS_6"], var_name="NAICS_Length",
value_name="NAICS_Match").drop(
columns=['NAICS_Length']).drop_duplicates()
@@ -637,10 +686,15 @@ def equally_allocate_suppressed_parent_to_child_naics(
new_naics[sector_column] = new_naics['NAICS_6'].copy()
new_naics = new_naics.drop(columns=['NAICS_6', 'NAICS_Match'])
+ # if a parent and child naics are both suppressed, can get situations
+ # where a naics6 code is duplicated because both the parent and child
+ # will match with the naics6. Therefore, drop duplicates
+ new_naics2 = new_naics.drop_duplicates()
+
# merge the new naics with the existing df, if data already
# existed for a NAICS6, keep the original
dfm = pd.merge(
- new_naics[groupcols], df, how='left', on=groupcols,
+ new_naics2[groupcols], df, how='left', on=groupcols,
indicator=True).query('_merge=="left_only"').drop('_merge', axis=1)
dfm = replace_NoneType_with_empty_cells(dfm)
dfm = dfm.fillna(0)
@@ -648,54 +702,148 @@ def equally_allocate_suppressed_parent_to_child_naics(
# add length column and subset the data
# subtract out existing data at NAICS6 from total data
# at a length where no suppressed data
- df = df.assign(secLength=df[sector_column].apply(lambda x: len(x)))
-
- # add column for each state of sector length where
- # there are no missing values
- df_sup = df_sup.assign(
- secLength=df_sup[sector_column].apply(lambda x: len(x)))
- df_sup2 = (df_sup.groupby(
- ['FlowName', 'Compartment', 'Location'])['secLength'].agg(
- lambda x: x.min()-1).reset_index(name='secLengthsup'))
-
- # merge the dfs and sub out the last sector lengths with
- # all data for each state drop states that don't have suppressed dat
- df1 = df.merge(df_sup2)
-
- df2 = df1[df1['secLength'] == 6].reset_index(drop=True)
- # determine sector to merge on
- df2.loc[:, 'mergeSec'] = df2.apply(
- lambda x: x[sector_column][:x['secLengthsup']], axis=1)
-
- sum_cols = [e for e in fba_default_grouping_fields if e not in
- ['ActivityConsumedBy', 'ActivityProducedBy']]
- sum_cols.append('mergeSec')
- df2 = df2.assign(
- FlowAlloc=df2.groupby(sum_cols)['FlowAmount'].transform('sum'))
- # rename columns for the merge and define merge cols
- df2 = df2.rename(columns={sector_column: 'NewNAICS',
- 'mergeSec': sector_column})
- # keep flows with 0 flow
- df3 = df2[df2['FlowAmount'] == 0].reset_index(drop=True)
- m_cols = groupcols + ['NewNAICS', 'FlowAlloc']
- # merge the two dfs
- dfe = df1.merge(df3[m_cols])
- # add count column used to divide the unallocated flows
- dfe = dfe.assign(
- secCount=dfe.groupby(groupcols)['NewNAICS'].transform('count'))
- dfe = dfe.assign(
- newFlow=(dfe['FlowAmount'] - dfe['FlowAlloc']) / dfe['secCount'])
- # reassign values and drop columns
- dfe = dfe.assign(FlowAmount=dfe['newFlow'])
- dfe[sector_column] = dfe['NewNAICS'].copy()
- dfe = dfe.drop(columns=['NewNAICS', 'FlowAlloc', 'secCount', 'newFlow'])
-
- # new df with estimated naics6
- dfn = pd.concat([df, dfe], ignore_index=True)
- dfn2 = dfn[dfn['FlowAmount'] != 0].reset_index(drop=True)
- dfn2 = dfn2.drop(columns=['secLength'])
-
- dff = sector_aggregation(dfn2)
+ drop_col = 'SectorConsumedByLength'
+ if sector_column == 'SectorConsumedBy':
+ drop_col = 'SectorProducedByLength'
+ df = assign_columns_of_sector_levels(df).rename(
+ columns={f'{sector_column}Length': 'SectorLength'}).drop(columns=[
+ drop_col])
+ # df with non-suppressed data only
+ dfns = df[df['FlowAmount'] != 0].reset_index(drop=True)
+
+ df_sup2 = pd.DataFrame()
+ cw_load = load_crosswalk('sector_length')
+ df_sup = df_sup.assign(SectorMatchFlow=np.nan)
+ merge_cols = list(df_sup.select_dtypes(
+ include=['object', 'int']).columns)
+ # also drop sector and description cols
+ merge_cols = [c for c in merge_cols
+ if c not in ['SectorConsumedBy', 'SectorProducedBy',
+ 'Description']]
+ # subset the df by length i
+ dfs = subset_df_by_sector_lengths(df_sup, [6])
+
+ counter = 1
+ while dfs.isnull().values.any() and 6-counter > 1:
+ # subset the crosswalk by i and i-1
+ cw = cw_load[[f'NAICS_6',
+ f'NAICS_{6-counter}']].drop_duplicates()
+ # merge df with the cw to determine which sector to look for in
+ # non-suppressed data
+ for s in ['Produced', 'Consumed']:
+ dfs = dfs.merge(cw, how='left', left_on=f'Sector{s}By',
+ right_on=f'NAICS_6').drop(
+ columns=f'NAICS_6').rename(
+ columns={f'NAICS_{6-counter}': f'Sector{s}Match'})
+ dfs[f'Sector{s}Match'] = dfs[f'Sector{s}Match'].fillna('')
+ # merge with non suppressed data
+ dfs = dfs.merge(dfns, how='left',
+ left_on=merge_cols + ['SectorProducedMatch',
+ 'SectorConsumedMatch'],
+ right_on=merge_cols + ['SectorProducedBy',
+ 'SectorConsumedBy'])
+ dfs['SectorMatchFlow'].fillna(dfs['FlowAmount_y'], inplace=True)
+ # drop all columns from the non suppressed data
+ dfs = dfs[dfs.columns[~dfs.columns.str.endswith('_y')]]
+ dfs.columns = dfs.columns.str.replace('_x', '')
+ # subset the df into rows assigned a new value and those not
+ dfs_assigned = dfs[~dfs['SectorMatchFlow'].isnull()]
+ dfs = dfs[dfs['SectorMatchFlow'].isnull()].drop(
+ columns=['SectorProducedMatch', 'SectorConsumedMatch',
+ 'SectorLength']).reset_index(drop=True)
+ df_sup2 = pd.concat([df_sup2, dfs_assigned], ignore_index=True)
+ counter = counter + 1
+
+ # merge in the df where calculated how much flow has already been
+ # allocated to NAICS6
+ mergecols = [e for e in groupcols if e not in
+ ['SectorProducedBy', 'SectorConsumedBy']]
+ mergecols = mergecols + ['SectorProducedMatch', 'SectorConsumedMatch']
+ meltcols = mergecols + ['sector_allocated']
+
+ if len(df_sup2) > 0:
+ for ii in range(5, 1, -1):
+ # subset the df by length i
+ dfs = df_sup2[df_sup2['SectorLength'] == ii]
+
+ dfns_sub = dfns[dfns['SectorLength'] == 6].reset_index(drop=True)
+ for s in ['Produced', 'Consumed']:
+ dfns_sub = assign_sector_match_column(
+ dfns_sub, f'Sector{s}By', 6, ii).rename(
+ columns={'sector_group': f'Sector{s}Match'})
+ dfns_sub = dfns_sub.fillna('')
+ dfsum = dfns_sub.groupby(mergecols, as_index=False).agg(
+ {"FlowAmount": sum}).rename(columns={
+ "FlowAmount": 'sector_allocated'})
+
+ df_sup3 = dfs.merge(dfsum[meltcols], on=mergecols, how='left')
+ df_sup3['sector_allocated'] = df_sup3['sector_allocated'].fillna(0)
+ # calc the remaining flow that can be allocated
+ df_sup3['FlowRemainder'] = df_sup3['SectorMatchFlow'] - \
+ df_sup3['sector_allocated']
+ # Due to rounding, there can be slight differences in data at
+ # sector levels, which can result in some minor negative values.
+ # If the percent of FlowRemainder is less than the assigned
+ # tolerance for negative numbers, or if the flowremainder is
+ # -1, reset the number to 0. If it is greater, issue a warning.
+ percenttolerance = 1
+ flowtolerance = -1
+ df_sup3 = df_sup3.assign(PercentOfAllocated=
+ (abs(df_sup3['FlowRemainder']) / df_sup3[
+ 'SectorMatchFlow']) * 100)
+ df_sup3['FlowRemainder'] = np.where(
+ (df_sup3["FlowRemainder"] < 0) &
+ (df_sup3['PercentOfAllocated'] < percenttolerance), 0,
+ df_sup3['FlowRemainder'])
+ df_sup3['FlowRemainder'] = np.where(
+ df_sup3["FlowRemainder"].between(flowtolerance, 0), 0,
+ df_sup3['FlowRemainder'])
+
+ # check for negative values
+ negv = df_sup3[df_sup3['FlowRemainder'] < 0]
+ if len(negv) > 0:
+ col_subset = [e for e in negv.columns if e in
+ ['Class', 'SourceName', 'FlowName',
+ 'Flowable', 'FlowAmount', 'Unit',
+ 'Compartment', 'Context', 'Location', 'Year',
+ 'SectorProducedBy', 'SectorConsumedBy',
+ 'SectorMatchFlow', 'SectorProducedMatch',
+ 'SectorConsumedMatch', 'sector_allocated',
+ 'FlowRemainder']]
+ negv = negv[col_subset].reset_index(drop=True)
+ vLog.info(
+ 'There are negative values when allocating suppressed '
+ 'parent data to child sector. The values are more than '
+ '%s%% of the total parent sector with a negative flow '
+ 'amount being allocated more than %s. Resetting flow '
+ 'values to be allocated to 0. See validation log for '
+ 'details.', str(percenttolerance), str(flowtolerance))
+ vLogDetailed.info('Values where flow remainders are '
+ 'negative, resetting to 0: '
+ '\n {}'.format(negv.to_string()))
+ df_sup3['FlowRemainder'] = np.where(df_sup3["FlowRemainder"] < 0,
+ 0, df_sup3['FlowRemainder'])
+ df_sup3 = df_sup3.drop(columns=[
+ 'SectorMatchFlow', 'sector_allocated', 'PercentOfAllocated'])
+ # add count column used to divide the unallocated flows
+ sector_column_match = sector_column.replace('By', 'Match')
+ df_sup3 = df_sup3.assign(secCount=df_sup3.groupby(mergecols)[
+ sector_column_match].transform('count'))
+ df_sup3 = df_sup3.assign(newFlow=df_sup3['FlowRemainder'] /
+ df_sup3['secCount'])
+ # reassign values and drop columns
+ df_sup3 = df_sup3.assign(FlowAmount=df_sup3['newFlow'])
+ df_sup3 = df_sup3.drop(columns=['SectorProducedMatch',
+ 'SectorConsumedMatch',
+ 'FlowRemainder', 'secCount',
+ 'newFlow'])
+ # reset SectorLength
+ df_sup3['SectorLength'] = 6
+ # add to the df with no suppressed data
+ dfns = pd.concat([dfns, df_sup3], ignore_index=True)
+
+ dfns = dfns.drop(columns=['SectorLength'])
+ dff = sector_aggregation(dfns)
# if activities are source-like, set col values as copies
# of the sector columns
@@ -705,8 +853,15 @@ def equally_allocate_suppressed_parent_to_child_naics(
# reindex columns
dff = dff.reindex(df_load.columns, axis=1)
+ vLogDetailed.info('Checking results of allocating suppressed parent to '
+ 'child sectors. ')
+ compare_summation_at_sector_lengths_between_two_dfs(df_load, dff)
+ compare_child_to_parent_sectors_flowamounts(dff)
+ # todo: add third check comparing smallest child naics (6) to largest (2)
+
# replace null values
dff = replace_strings_with_NoneType(dff).reset_index(drop=True)
+
return dff
@@ -825,7 +980,7 @@ def subset_df_by_sector_list(df_load, sector_list):
df['SectorConsumedBy'].isin(sector_list)
) | (
df['SectorProducedBy'].isin(sector_list) &
- df['SectorConsumedBy'].isin(sector_list))]
+ df['SectorConsumedBy'].isin(sector_list))].reset_index(drop=True)
return df
@@ -840,6 +995,7 @@ def subset_and_merge_df_by_sector_lengths(df, length1, length2):
drop=True)
# df where either sector column is length or both columns are
+ df = df.reset_index(drop=True)
df1 = subset_df_by_sector_lengths(df, [length1])
# second dataframe where length is length2
df2 = subset_df_by_sector_lengths(df, [length2])
@@ -873,6 +1029,139 @@ def subset_and_merge_df_by_sector_lengths(df, length1, length2):
return dfm
+def assign_columns_of_sector_levels(df_load):
+ """
+ Add additional column capturing the sector level in the two columns
+ :param df_load: df with at least on sector column
+ :param ambiguous_sector_assignment: if there are sectors that can be
+ assigned to multiple sector lengths (e.g., for government or household
+ sectors), option to specify which sector assignment to keep.
+ :return: df with new column for sector length
+ """
+ df = replace_NoneType_with_empty_cells(df_load)
+ # load cw with column of sector levels
+ cw = load_sector_length_cw_melt()
+ # merge df assigning sector lengths
+ for s in ['Produced', 'Consumed']:
+ df = df.merge(cw, how='left', left_on=f'Sector{s}By',
+ right_on='Sector').drop(columns=['Sector']).rename(
+ columns={'SectorLength': f'Sector{s}ByLength'})
+ df[f'Sector{s}ByLength'] = df[f'Sector{s}ByLength'].fillna(0)
+
+ # There are cases where non-traditional sectors (non naics) have
+ # multiple naics assignments. If there is a non-zero value in the other
+ # sector length column, keep that row because sector lengths must always
+ # match.
+ # subset df into two dfs, one where one sector column length has a zero
+ # value and the second where both sector length columns have non-zero
+ # values
+ df1 = df[(df['SectorProducedByLength'] == 0) |
+ (df['SectorConsumedByLength'] == 0)]
+
+ df2 = df[(df['SectorProducedByLength'] != 0) &
+ (df['SectorConsumedByLength'] != 0)]
+ # only keep rows where the values are equal
+ df2e = df2[df2['SectorProducedByLength'] == df2['SectorConsumedByLength']]
+
+ # concat dfs
+ dfc = pd.concat([df1, df2e], ignore_index=True)
+
+ # check for duplicates. Rows might be duplicated if a sector is the same
+ # for multiple sector lengths
+ duplicate_cols = [e for e in dfc.columns if e not in [
+ 'SectorProducedByLength', 'SectorConsumedByLength']]
+ duplicate_df = dfc[dfc.duplicated(subset=duplicate_cols,
+ keep=False)].reset_index(drop=True)
+
+ if len(duplicate_df) > 0:
+ log.warning('There are duplicate rows caused by ambiguous sectors.')
+
+ dfc = dfc.sort_values(['SectorProducedByLength',
+ 'SectorConsumedByLength']).reset_index(drop=True)
+ return dfc
+
+
+def assign_columns_of_sector_levels_without_ambiguous_sectors(
+ df_load, ambiguous_sector_assignment=None):
+
+ dfc = assign_columns_of_sector_levels(df_load)
+
+ # check for duplicates. Rows might be duplicated if a sector is the same
+ # for multiple sector lengths
+ duplicate_cols = [e for e in dfc.columns if e not in [
+ 'SectorProducedByLength', 'SectorConsumedByLength']]
+ duplicate_df = dfc[dfc.duplicated(subset=duplicate_cols,
+ keep=False)].reset_index(drop=True)
+
+ if (len(duplicate_df) > 0) % (ambiguous_sector_assignment is not None):
+ log.info('Retaining data for %s and dropping remaining '
+ 'rows. See validation log for data dropped',
+ ambiguous_sector_assignment)
+ # first drop all data in the duplicate_df from dfc
+ dfs1 = pd.concat([dfc, duplicate_df]).drop_duplicates(keep=False)
+ # drop sector length cols, drop duplicates, aggregate df to ensure
+ # keep the intended data, and then reassign column sectors,
+ # formatted this way because would like to avoid sector aggreggation
+ # on large dfs
+ dfs2 = duplicate_df.drop(
+ columns=['SectorProducedByLength',
+ 'SectorConsumedByLength']).drop_duplicates()
+ dfs2 = sector_aggregation(dfs2)
+ dfs2 = assign_columns_of_sector_levels(dfs2)
+ # then in the duplicate df, only keep the rows that match the
+ # parameter indicated in the function call
+ sectorlength = sector_level_key[ambiguous_sector_assignment]
+ dfs2 = dfs2[
+ ((dfs2['SectorProducedByLength'] == sectorlength) &
+ (dfs2['SectorConsumedByLength'] == 0))
+ |
+ ((dfs2['SectorProducedByLength'] == 0) &
+ (dfs2['SectorConsumedByLength'] == sectorlength))
+ |
+ ((dfs2['SectorProducedByLength'] == sectorlength) &
+ (dfs2['SectorConsumedByLength'] == sectorlength))
+ ].reset_index(drop=True)
+ if len(dfs2) == 0:
+ log.warning('Data is lost from dataframe because none of the '
+ 'ambiguous sectors match %s',
+ ambiguous_sector_assignment)
+ # merge the two dfs
+ dfc = pd.concat([dfs1, dfs2])
+ # print out what data was dropped
+ df_dropped = pd.merge(
+ duplicate_df, dfs2, how='left', indicator=True).query(
+ '_merge=="left_only"').drop('_merge', axis=1)
+ df_dropped = df_dropped[
+ ['SectorProducedBy', 'SectorConsumedBy',
+ 'SectorProducedByLength', 'SectorConsumedByLength'
+ ]].drop_duplicates().reset_index(drop=True)
+ vLogDetailed.info('After assigning a column of sector lengths, '
+ 'dropped data with the following sector '
+ 'assignments due to ambiguous sector lengths '
+ '%s: \n {}'.format(df_dropped.to_string()))
+ dfc = dfc.sort_values(['SectorProducedByLength',
+ 'SectorConsumedByLength']).reset_index(drop=True)
+ return dfc
+
+
+def assign_sector_match_column(df_load, sectorcolumn, sectorlength,
+ sectorlengthmatch):
+
+ sector = 'NAICS_' + str(sectorlength)
+ sector_add = 'NAICS_' + str(sectorlengthmatch)
+
+ cw_load = load_crosswalk("sector_length")
+ cw = cw_load[[sector, sector_add]].drop_duplicates().reset_index(
+ drop=True)
+
+ df = df_load.merge(cw, how='left', left_on=sectorcolumn,
+ right_on=sector
+ ).rename(columns={sector_add: 'sector_group'}
+ ).drop(columns=sector)
+
+ return df
+
+
def aggregate_and_subset_for_target_sectors(df, method):
"""Helper function to create data at aggregated NAICS prior to
subsetting based on the target_sector_list. Designed for use when
@@ -882,7 +1171,8 @@ def aggregate_and_subset_for_target_sectors(df, method):
# return sector level specified in method yaml
# load the crosswalk linking sector lengths
secondary_sector_level = method.get('target_subset_sector_level')
- sector_list = get_sector_list(method['target_sector_level'],
+ sector_list = get_sector_list(
+ method['target_sector_level'],
secondary_sector_level_dict=secondary_sector_level)
# subset df to get NAICS at the target level
diff --git a/flowsa/flowbysector.py b/flowsa/flowbysector.py
index a1938c623..be6084d0b 100644
--- a/flowsa/flowbysector.py
+++ b/flowsa/flowbysector.py
@@ -25,31 +25,30 @@
import os
from esupy.processed_data_mgmt import write_df_to_file
import flowsa
-from flowsa.location import fips_number_key, merge_urb_cnty_pct
-from flowsa.common import load_yaml_dict, check_activities_sector_like, \
- str2bool, fba_activity_fields, rename_log_file, \
- fbs_activity_fields, fba_fill_na_dict, fbs_fill_na_dict, \
+from flowsa.allocation import equally_allocate_parent_to_child_naics
+from flowsa.common import check_activities_sector_like, str2bool, \
+ fba_activity_fields, rename_log_file, fba_fill_na_dict, fbs_fill_na_dict, \
fbs_default_grouping_fields, fbs_grouping_fields_w_activities, \
- logoutputpath, load_yaml_dict, datapath
-from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \
- flow_by_sector_fields_w_activity
-from flowsa.settings import log, vLog, \
- flowbysectoractivitysetspath, paths
-from flowsa.metadata import set_fb_meta, write_metadata
+ logoutputpath, load_yaml_dict
+from flowsa.dataclean import clean_df, harmonize_FBS_columns, \
+ reset_fbs_dq_scores
from flowsa.fbs_allocation import direct_allocation_method, \
function_allocation_method, dataset_allocation_method
-from flowsa.sectormapping import add_sectors_to_flowbyactivity, \
- map_fbs_flows, get_sector_list
from flowsa.flowbyfunctions import agg_by_geoscale, sector_aggregation, \
aggregator, subset_df_by_geoscale, sector_disaggregation, \
update_geoscale, subset_df_by_sector_list
-from flowsa.dataclean import clean_df, harmonize_FBS_columns, \
- reset_fbs_dq_scores
+from flowsa.location import fips_number_key, merge_urb_cnty_pct
+from flowsa.metadata import set_fb_meta, write_metadata
+from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \
+ flow_by_sector_fields_w_activity
+from flowsa.sectormapping import add_sectors_to_flowbyactivity, \
+ map_fbs_flows, get_sector_list
+from flowsa.settings import log, vLog, flowbysectoractivitysetspath, paths
from flowsa.validation import compare_activity_to_sector_flowamounts, \
compare_fba_geo_subset_and_fbs_output_totals, compare_geographic_totals,\
- replace_naics_w_naics_from_another_year, \
- calculate_flowamount_diff_between_dfs, check_for_negative_flowamounts
-from flowsa.allocation import equally_allocate_parent_to_child_naics
+ replace_naics_w_naics_from_another_year, check_for_negative_flowamounts, \
+ compare_child_to_parent_sectors_flowamounts, \
+ check_if_data_exists_at_geoscale, calculate_flowamount_diff_between_dfs
def parse_args():
@@ -108,12 +107,16 @@ def load_source_dataframe(method, sourcename, source_dict,
flows_df = flowsa.getFlowBySector(sourcename)
elif source_dict['data_format'] == 'FBS_outside_flowsa':
vLog.info("Retrieving flowbysector for datasource %s", sourcename)
- flows_df = source_dict["FBS_datapull_fxn"](source_dict,
- method,
- fbsconfigpath)
+ fxn = source_dict.get("FBS_datapull_fxn")
+ if callable(fxn):
+ flows_df = fxn(source_dict, method, fbsconfigpath)
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
else:
- vLog.error("Data format not specified in method "
- "file for datasource %s", sourcename)
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ message="Data format not specified in method "
+ f"file for {sourcename}")
return flows_df
@@ -133,12 +136,17 @@ def main(**kwargs):
fbsconfigpath = kwargs.get('fbsconfigpath')
download_FBA_if_missing = kwargs.get('download_FBAs_if_missing')
# assign arguments
- vLog.info("Initiating flowbysector creation for %s", method_name)
+ vLog.info(f"Initiating flowbysector creation for {method_name}")
# call on method
method = load_yaml_dict(method_name, flowbytype='FBS',
filepath=fbsconfigpath)
# create dictionary of data and allocation datasets
- fb = method['source_names']
+ try:
+ fb = method['source_names']
+ except KeyError:
+ log.error("parameter 'source_names' not found in method. "
+ f"FBS for {method_name} can not be generated.")
+ return
# Create empty list for storing fbs files
fbs_list = []
for k, v in fb.items():
@@ -158,18 +166,27 @@ def main(**kwargs):
flows = merge_urb_cnty_pct(flows)
# clean up fba before mapping, if specified in yaml
- if "clean_fba_before_mapping_df_fxn" in v:
- vLog.info("Cleaning up %s FlowByActivity", k)
- flows = v["clean_fba_before_mapping_df_fxn"](flows)
+ fxn = v.get("clean_fba_before_mapping_df_fxn")
+ if callable(fxn):
+ vLog.info(f"Cleaning up {k} FlowByActivity")
+ flows = fxn(fba=flows, source_dict=v)
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
# map flows to federal flow list or material flow list
- flows_mapped, mapping_files = \
- map_fbs_flows(flows, k, v, keep_fba_columns=True)
+ flows_mapped, mapping_files = (map_fbs_flows(
+ flows, k, v, keep_fba_columns=True,
+ keep_unmapped_rows=v.get("keep_unmapped_rows", False)))
# clean up fba, if specified in yaml
- if "clean_fba_df_fxn" in v:
- vLog.info("Cleaning up %s FlowByActivity", k)
- flows_mapped = v["clean_fba_df_fxn"](flows_mapped)
+ fxn = v.get("clean_fba_df_fxn")
+ if callable(fxn):
+ vLog.info(f"Cleaning up {k} FlowByActivity")
+ flows_mapped = fxn(fba=flows_mapped, source_dict=v)
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
# master list of activity names read in from data source
ml_act = []
@@ -191,7 +208,7 @@ def main(**kwargs):
)].reset_index(drop=True)
ml_act.extend(names)
- vLog.info("Preparing to handle %s in %s", aset, k)
+ vLog.info(f"Preparing to handle {aset} in {k}")
# subset fba data by activity
flows_subset = flows_mapped[
(flows_mapped[fba_activity_fields[0]].isin(names)) |
@@ -209,7 +226,7 @@ def main(**kwargs):
log.warning(f"all flow data for {aset} is 0")
continue
# if activities are sector-like, check sectors are valid
- if check_activities_sector_like(k):
+ if check_activities_sector_like(flows_subset):
flows_subset2 = replace_naics_w_naics_from_another_year(
flows_subset, method['target_sector_source'])
@@ -248,6 +265,10 @@ def main(**kwargs):
attr=attr,
method=method
)
+ # check for activities at geoscale - return any missing
+ # locations for an activity
+ check_if_data_exists_at_geoscale(flows_subset_geo,
+ attr['allocation_from_scale'])
# rename SourceName to MetaSources and drop columns
flows_mapped_wsec = flows_subset_wsec.\
@@ -277,7 +298,7 @@ def main(**kwargs):
# define grouping columns dependent on sectors
# being activity-like or not
- if check_activities_sector_like(k) is False:
+ if check_activities_sector_like(fbs) is False:
groupingcols = fbs_grouping_fields_w_activities
groupingdict = flow_by_sector_fields_w_activity
else:
@@ -316,9 +337,12 @@ def main(**kwargs):
fbs_agg_2 = equally_allocate_parent_to_child_naics(
fbs_agg, method)
+ # compare child sectors to parent sectors flow amounts
+ compare_child_to_parent_sectors_flowamounts(fbs)
+
# compare flowbysector with flowbyactivity
compare_activity_to_sector_flowamounts(
- flows_mapped_wsec, fbs_agg_2, aset, k, method)
+ flows_mapped_wsec, fbs_agg_2, aset, method)
# return sector level specified in method yaml
# load the crosswalk linking sector lengths
@@ -343,15 +367,19 @@ def main(**kwargs):
flows_subset_geo, fbs_sector_subset, aset, k, v, attr,
method)
- log.info("Completed flowbysector for %s", aset)
+ log.info(f"Completed flowbysector for {aset}")
fbs_list.append(fbs_sector_subset)
else:
- if 'clean_fbs_df_fxn' in v:
- flows = v["clean_fbs_df_fxn"](flows, method)
+ fxn = v.get("clean_fbs_df_fxn")
+ if callable(fxn):
+ flows = fxn(flows, method)
+ elif fxn:
+ raise flowsa.exceptions.FBSMethodConstructionError(
+ error_type='fxn_call')
flows = update_geoscale(flows, method['target_geoscale'])
# if the loaded flow dt is already in FBS format,
# append directly to list of FBS
- log.info("Append %s to FBS list", k)
+ log.info(f"Append {k} to FBS list")
# ensure correct field datatypes and add any missing fields
flows = clean_df(flows, flow_by_sector_fields, fbs_fill_na_dict)
fbs_list.append(flows)
@@ -385,7 +413,7 @@ def main(**kwargs):
# rename the log file saved to local directory
rename_log_file(method_name, meta)
log.info('See the Validation log for detailed assessment of '
- 'model results in %s', logoutputpath)
+ f'model results in {logoutputpath}')
if __name__ == '__main__':
diff --git a/flowsa/location.py b/flowsa/location.py
index 354d6241f..f8bd73f89 100644
--- a/flowsa/location.py
+++ b/flowsa/location.py
@@ -91,7 +91,7 @@ def apply_county_FIPS(df, year='2015', source_state_abbrev=True):
"""
# If using 2 letter abbrevations, map to state names
if source_state_abbrev:
- df['State'] = df['State'].map(abbrev_us_state)
+ df['State'] = df['State'].map(abbrev_us_state).fillna(df['State'])
df['State'] = df.apply(lambda x: clean_str_and_capitalize(x.State),
axis=1)
df['County'] = df.apply(lambda x: clean_str_and_capitalize(x.County),
diff --git a/flowsa/metadata.py b/flowsa/metadata.py
index eb1d8734f..7b9b7279d 100644
--- a/flowsa/metadata.py
+++ b/flowsa/metadata.py
@@ -90,7 +90,8 @@ def return_fbs_method_data(source_name, config):
:param config: dictionary, configuration/method file
:return: meta object
"""
- from flowsa.data_source_scripts.stewiFBS import add_stewi_metadata
+ from flowsa.data_source_scripts.stewiFBS import add_stewi_metadata,\
+ add_stewicombo_metadata
# load the yaml that lists what additional fbas are
# used in creating the fbs
@@ -108,8 +109,12 @@ def return_fbs_method_data(source_name, config):
for k, v in fb.items():
if k == 'stewiFBS':
# get stewi metadata
- meta['primary_source_meta'][k] = \
- add_stewi_metadata(v['inventory_dict'])
+ if v.get('local_inventory_name'):
+ meta['primary_source_meta'][k] = add_stewicombo_metadata(
+ v.get('local_inventory_name'))
+ else:
+ meta['primary_source_meta'][k] = add_stewi_metadata(
+ v['inventory_dict'])
continue
if v['data_format'] in ('FBS', 'FBS_outside_flowsa'):
meta['primary_source_meta'][k] = \
diff --git a/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml b/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml
index 7426dd04c..41d04478c 100644
--- a/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml
+++ b/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml
@@ -1,6 +1,6 @@
author: California Commercial
source_name: CalRecycle_WasteCharacterization
-source_url: ' '
+source_url: 'https://www2.calrecycle.ca.gov/WasteCharacterization/PubExtracts/2014/GenSummary.pdf'
original_data_download_date: ' '
bib_id: CalRec_WasteChar
api_name: None
diff --git a/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml b/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml
index f7a82d684..16629ee9b 100644
--- a/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml
+++ b/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml
@@ -9,7 +9,7 @@ url:
base_url: http://api.census.gov/data/
api_path: __year__/cbp? # __year__ is used to trigger a replace function to substitute in year
url_params:
- get: __NAICS__,ESTAB #,EMP,PAYANN #replace __NAICS__ based on year of data. >=2017 is NAICS2017, 2012-2016 is NAICS2012
+ get: __NAICS__,ESTAB,EMP,PAYANN #replace __NAICS__ based on year of data. >=2017 is NAICS2017, 2012-2016 is NAICS2012
for: county:__countyFIPS__ #retrieves every county
in: state:__stateFIPS__ #requests a state 2-digit FIPS code that has to be supplied dynamically
key: __apiKey__ # make the __apiKey__ part of the url and substitute in individual api key
@@ -26,3 +26,6 @@ years:
- 2015
- 2016
- 2017
+- 2018
+- 2019
+- 2020
diff --git a/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml b/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml
index 71ba02f9e..dd7e7ca36 100644
--- a/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml
+++ b/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml
@@ -35,17 +35,17 @@ table_dict:
- Coke and Breeze | million short tons
- Other | trillion Btu
regions:
- Total United States : [14,97]
- Northeast Region : [100,181]
- Midwest Region : [184,265]
- South Region : [268,349]
- West Region : [352,433]
+ Total United States : [14,98]
+ Northeast Region : [101,183]
+ Midwest Region : [186,268]
+ South Region : [271,353]
+ West Region : [356,438]
rse_regions:
- Total United States : [12,95]
- Northeast Region : [98,179]
- Midwest Region : [182,263]
- South Region : [266,347]
- West Region : [350,431]
+ Total United States : [12,96]
+ Northeast Region : [99,181]
+ Midwest Region : [184,266]
+ South Region : [269,351]
+ West Region : [354,436]
data_type: 'nonfuel consumption'
Table 2.2:
col_names:
@@ -60,17 +60,17 @@ table_dict:
- Coke and Breeze
- Other
regions:
- Total United States : [14,97]
- Northeast Region : [100,181]
- Midwest Region : [184,265]
- South Region : [268,349]
- West Region : [352,433]
+ Total United States : [14,98]
+ Northeast Region : [101,183]
+ Midwest Region : [186,268]
+ South Region : [271,353]
+ West Region : [356,438]
rse_regions:
- Total United States : [12,95]
- Northeast Region : [98,179]
- Midwest Region : [182,263]
- South Region : [266,347]
- West Region : [350,431]
+ Total United States : [12,96]
+ Northeast Region : [99,181]
+ Midwest Region : [184,266]
+ South Region : [269,351]
+ West Region : [354,436]
data_type: 'nonfuel consumption'
Table 3.1:
col_names:
@@ -86,17 +86,17 @@ table_dict:
- Coke and Breeze | million short tons
- Other | trillion Btu
regions:
- Total United States : [14,95]
- Northeast Region : [98,179]
- Midwest Region : [182,263]
- South Region : [266,347]
- West Region : [350,431]
+ Total United States : [14,96]
+ Northeast Region : [99,181]
+ Midwest Region : [184,266]
+ South Region : [269,351]
+ West Region : [354,436]
rse_regions:
- Total United States : [12,93]
- Northeast Region : [96,177]
- Midwest Region : [180,261]
- South Region : [264,345]
- West Region : [348,429]
+ Total United States : [12,94]
+ Northeast Region : [97,179]
+ Midwest Region : [182,264]
+ South Region : [267,349]
+ West Region : [352,434]
data_type: 'fuel consumption'
Table 3.2:
col_names:
@@ -112,17 +112,17 @@ table_dict:
- Coke and Breeze
- Other
regions:
- Total United States : [14,95]
- Northeast Region : [98,179]
- Midwest Region : [182,263]
- South Region : [266,347]
- West Region : [350,431]
+ Total United States : [14,96]
+ Northeast Region : [99,181]
+ Midwest Region : [184,266]
+ South Region : [269,351]
+ West Region : [354,436]
rse_regions:
- Total United States : [12,93]
- Northeast Region : [96,177]
- Midwest Region : [180,261]
- South Region : [264,345]
- West Region : [348,429]
+ Total United States : [12,94]
+ Northeast Region : [97,179]
+ Midwest Region : [182,264]
+ South Region : [267,349]
+ West Region : [352,434]
data_type: 'fuel consumption'
'2014':
diff --git a/flowsa/methods/flowbyactivitymethods/EPA_SIT.yaml b/flowsa/methods/flowbyactivitymethods/EPA_SIT.yaml
new file mode 100644
index 000000000..36a3f2fee
--- /dev/null
+++ b/flowsa/methods/flowbyactivitymethods/EPA_SIT.yaml
@@ -0,0 +1,231 @@
+author: US Environmental Protection Agency
+source_name: State Inventory Tool
+source_url: 'https://www.epa.gov/statelocalenergy/state-inventory-and-projection-tool'
+bib_id: EPA_SIT
+format: xlsm # macro-enabled spreadsheet
+url: None
+parse_response_fxn: !script_function:EPA_SIT epa_sit_parse
+state: 'ME'
+file: 'Synthesis Tool.xlsm'
+years:
+- 2017
+- 2016
+- 2015
+- 2014
+- 2013
+- 2012
+- 2011
+- 2010
+- 2009
+- 2008
+- 2007
+- 2006
+- 2005
+- 2004
+- 2003
+- 2002
+- 2001
+- 2000
+- 1999
+- 1998
+- 1997
+- 1996
+- 1995
+- 1994
+- 1993
+- 1992
+- 1991
+- 1990
+
+sheet_dict:
+
+ 'CO2FFC':
+ nrows: 32
+ unit: MMTCO2e
+ flow: CO2
+ headers:
+ - Residential
+ - Commercial
+ - Industrial
+ - Transportation
+ - Electric Utilities
+ - International Bunker Fuels
+ - TOTAL
+
+ 'IndirectCO2':
+ nrows: 45
+ unit: MMTCO2e
+ flow: CO2
+ headers:
+ - Residential
+ - Commercial
+ - Industrial
+ - Transportation
+ - TOTAL
+ subsubheaders:
+ - Conventional Boiler Use
+ - CHP and/or Cogeneration Process
+ - Process Heating
+ - Process Cooling and Refrigeration
+ - Machine Drive
+ - Electro-Chemical Processes
+ - Other Process use
+ - Facility HVAC
+ - Facility Lighting
+ - Other Facility Support
+ - Onsite Transportation
+ - Other Nonprocess Use
+
+ 'Stationary':
+ nrows: 15
+ unit: MMTCO2e
+ headers:
+ - Residential
+ - Commercial
+ - Industrial
+ - Electric Utilities
+ - TOTAL
+ skiprowstart: 13
+ skiprowend: 15
+ subgroup: flow
+
+ 'Mobile Combustion CH4':
+ sheetname: Mobile Combustion
+ header: 55
+ nrows: 21
+ flow: CH4
+ unit: MTCO2e
+ headers:
+ - Gasoline Highway
+ - Diesel Highway
+ - Non-Highway
+ - Alternative Fuel Vehicles
+ - Total
+
+ 'Mobile Combustion N2O':
+ sheetname: Mobile Combustion
+ header: 82
+ nrows: 21
+ flow: N2O
+ unit: MTCO2e
+ headers:
+ - Gasoline Highway
+ - Diesel Highway
+ - Non-Highway
+ - Alternative Fuel Vehicles
+ - Total
+
+ 'Coal':
+ header: 3
+ nrows: 5
+ unit: MTCO2e
+ flow: CH4
+ headers:
+ - Coal Mining
+ - Abandoned Coal Mines
+
+ 'Gas and Oil':
+ header: 17
+ nrows: 5
+ unit: MT
+ flow: CH4
+ headers:
+ - Natural Gas
+ - Oil
+
+ 'Natural Gas Flaring':
+ sheetname: 'Gas and Oil'
+ header: 24
+ nrows: 1
+ unit: MMT
+ flow: CO2
+ headers:
+ - Natural Gas Flaring
+
+ 'IP':
+ nrows: 20
+ unit: MTCO2e
+ headers:
+ - Carbon Dioxide Emissions
+ - Nitrous Oxide Emissions
+ - HFC, PFC, SF6 and NF3 Emissions
+ - Total Emissions
+ subgroup: 'activitybyflow'
+
+ 'Agriculture':
+ sheetname: Agriculture
+ header: 26
+ nrows: 12
+ unit: MMT
+ headers:
+ - Carbon Dioxide
+ - Methane
+ - Nitrous Oxide
+ subgroup: 'activitybyflow'
+
+ 'Agricultural Soil Management':
+ sheetname: Agriculture
+ tablename: Agricultural Soil Management
+ header: 41
+ nrows: 13
+ unit: MT
+ flow: N2O
+ headers:
+ - Direct
+ - Indirect
+ - TOTAL
+ subsubheaders:
+ - Fertilizer Runoff/Leached
+ - Manure Runoff/Leached
+
+ 'Land-Use Change and Forest Emissions and Sequestration':
+ sheetname: Forest Management
+ header: 8
+ nrows: 19
+ unit: MMTCO2E
+ flow: CO2E
+ headers:
+ - Forest Carbon Flux
+ - Urban Trees
+ - Landfilled Yard Trimmings and Food Scraps
+ - Forest Fires
+ - N2O from Settlement Soils
+ - Agricultural Soil Carbon Flux
+
+ 'Emissions from Landfills':
+ sheetname: Waste
+ tablename: Emissions from Landfills
+ header: 13
+ nrows: 9
+ unit: MTCO2E
+ flow: CH4
+ headers:
+ - Potential CH4
+ - CH4 Avoided
+ - Oxidation at MSW Landfills
+ - Oxidation at Industrial Landfills
+ - Total CH4 Emissions
+
+ 'Waste Combustion':
+ sheetname: Waste
+ tablename: Emissions from Waste Combustion
+ header: 25
+ nrows: 7
+ unit: MTCO2e
+ headers:
+ - CO2
+ - N2O
+ - CH4
+ - Total CO2, N2O, CH4 Emissions
+ subgroup: 'activitybyflow'
+
+ 'Wastewater':
+ header: 3
+ nrows: 8
+ unit: MMTCO2e
+ headers:
+ - Municipal CH4
+ - Municipal N2O
+ - Industrial CH4
+ - Total Emissions
+ subgroup: 'activitybyflow'
\ No newline at end of file
diff --git a/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI.yaml b/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI.yaml
new file mode 100644
index 000000000..026c671b4
--- /dev/null
+++ b/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI.yaml
@@ -0,0 +1,39 @@
+author: US Environmental Protection Agency
+source_name: 'State Greenhouse Gas Inventories'
+source_url: 'https://www.epa.gov/ghgemissions/state-ghg-emissions-and-removals'
+bib_id: ''
+format: json
+url: None
+parse_response_fxn: !script_function:EPA_StateGHGI epa_state_ghgi_parse
+file: "W_INV_FACTS_ACTIVE_GHG_CO2E.json"
+years:
+- 2019
+- 2018
+- 2017
+- 2016
+- 2015
+- 2014
+- 2013
+- 2012
+- 2011
+- 2010
+- 2009
+- 2008
+- 2007
+- 2006
+- 2005
+- 2004
+- 2003
+- 2002
+- 2001
+- 2000
+- 1999
+- 1998
+- 1997
+- 1996
+- 1995
+- 1994
+- 1993
+- 1992
+- 1991
+- 1990
diff --git a/flowsa/methods/flowbyactivitymethods/README.md b/flowsa/methods/flowbyactivitymethods/README.md
index 5f6e8adaa..f431fdfc8 100644
--- a/flowsa/methods/flowbyactivitymethods/README.md
+++ b/flowsa/methods/flowbyactivitymethods/README.md
@@ -1,7 +1,6 @@
# Data source configuration
-Standard source configuration files provide information needed for pulling data sources
-
-They are stored in YAML format using a .yaml extension
+Standard source configuration files provide information needed for pulling
+data sources. They are stored in YAML format using a .yaml extension
```
#Source configuration format
@@ -20,19 +19,26 @@ url
# A set of url parameters for query string, specific to data set
year_param: name of year parameter
key_param: name of key parameter
-url_replace_fxn: name of the source specific function that replaces the dynamic values in the URL
-call_response_fxn: name of the source specific function that specifies how data should be loaded
-parse_response_fxn: name of the source specific function that parses and formats the dataframe
+url_replace_fxn: name of the source specific function that replaces the
+dynamic values in the URL
+call_response_fxn: name of the source specific function that specifies how
+data should be loaded
+parse_response_fxn: name of the source specific function that parses and
+formats the dataframe
years:
#years of data as separate lines like - 2015
* can add additional yaml dictionary items specific to calling on a data set
```
To declare a value that needs to be dynamically replaced, surround
-a variable name in double underscores like \__foo__ so that a string
+a variable name in double underscores like `__foo__` so that a string
function will do a dynamic replacement
-Based on [YAML v1.1 schema](https://yaml.org/spec/1.1/)
-
-Use [YAMLlint](http://www.yamllint.com/) to assure the file is valid YAML
+Specify the functions to use in the FBA creation using the tag
+`!script_function:PythonFileName FunctionName`
+where _PythonFileName_ is the name of the Python file (e.g.,
+"Census_PEP_Population.py") and _FunctionName_ is the name of the function
+(e.g., "Census_pop_URL_helper").
+Based on [YAML v1.1 schema](https://yaml.org/spec/1.1/). Use
+[YAMLlint](http://www.yamllint.com/) to assure the file is valid YAML.
diff --git a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml
index 9bda5ab89..2d3c70e1e 100644
--- a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml
@@ -14,11 +14,8 @@ url:
source_desc: SURVEY
sector_desc: ENVIRONMENTAL
unit_desc: LB
- # __domainLevel__ triggers a replace function
- domain_desc: __domainLevel__
+ domain_desc: FERTILIZER
agg_level_desc: STATE
- # dynamically substitute the 50 state acronyms.
- state_alpha: __stateAlpha__
# __year__ is used to trigger a replace function to substitute in year
year: __year__
key_param: key
@@ -31,5 +28,3 @@ years:
- 2017
- 2018
- 2020
-domain_levels:
-- FERTILIZER
diff --git a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml
index af253cfd8..91b055188 100644
--- a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml
@@ -14,11 +14,8 @@ url:
source_desc: SURVEY
sector_desc: ENVIRONMENTAL
unit_desc: LB
- # __domainLevel__ triggers a replace function
- domain_desc: __domainLevel__
+ domain_desc: CHEMICAL,%20FUNGICIDE&CHEMICAL,%20HERBICIDE&CHEMICAL,%20INCECTICIDE&CHEMICAL,%20OTHER
agg_level_desc: STATE
- # dynamically substitute the 50 state acronyms.
- state_alpha: __stateAlpha__
# __year__ is used to trigger a replace function to substitute in year
year: __year__
key_param: key
@@ -31,8 +28,3 @@ years:
- 2017
- 2018
- 2020
-domain_levels:
-- CHEMICAL, FUNGICIDE
-- CHEMICAL, HERBICIDE
-- CHEMICAL, INCECTICIDE
-- CHEMICAL, OTHER
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml
index c49c19c22..dc6b833c5 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Asbestos Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Asbestos
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_asbestos_call
parse_response_fxn: !script_function:USGS_MYB usgs_asbestos_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml
index d5a4e9ec8..8bf04f183 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml
@@ -1,7 +1,7 @@
# Downloads and parses CSV from USGS Barite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Barite
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-barit-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-barite.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml
index 7ec6ba873..87c937166 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Bauxite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Bauxite
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_bauxite_call
parse_response_fxn: !script_function:USGS_MYB usgs_bauxite_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml
index daa3da374..a114fea13 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Beryllium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Beryllium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_beryllium_call
parse_response_fxn: !script_function:USGS_MYB usgs_beryllium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml
index e66c1f77f..288c064da 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Boron Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Boron
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-adv.xlsx
+citable_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-2.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-adv.xlsx
+ base_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-2.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_boron_call
parse_response_fxn: !script_function:USGS_MYB usgs_boron_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml
index 758f5c259..e21ed8b82 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Chromium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Chromium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom.xls
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_chromium_call
parse_response_fxn: !script_function:USGS_MYB usgs_chromium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml
index 4522fd5f7..d4d1f677e 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Clay Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Clay
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls
+citable_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls
date_generated: '2021-03-19'
years_available: 2015-2016
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls
+ base_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_clay_call
parse_response_fxn: !script_function:USGS_MYB usgs_clay_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml
index 5fea8fda6..447f855fc 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Cobalt Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Cobalt
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_cobalt_call
parse_response_fxn: !script_function:USGS_MYB usgs_cobalt_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml
index a63634bff..5b41f3df4 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Diatomite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Diatomite
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_diatomite_call
parse_response_fxn: !script_function:USGS_MYB usgs_diatomite_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml
index 7058223ca..8f70e3d07 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Feldspar Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Feldspar
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_feldspar_call
parse_response_fxn: !script_function:USGS_MYB usgs_feldspar_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml
index 6abffac09..3ac0ba5dd 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Fluorspar Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Fluorspar
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_fluorspar_call
parse_response_fxn: !script_function:USGS_MYB usgs_fluorspar_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml
index 26cbd5aac..6948bd335 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml
@@ -1,7 +1,7 @@
# Downloads and parses CSV from USGS Gallium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Gallium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-galli-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-gallium.xlsx
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml
index d89efca50..72518ef21 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Garnet Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Garnet
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_garnet_call
parse_response_fxn: !script_function:USGS_MYB usgs_garnet_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml
index 657b5f726..1d76db821 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Gold Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Gold
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_gold_call
parse_response_fxn: !script_function:USGS_MYB usgs_gold_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml
index 17ed7360a..ce1dbd78c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Graphite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Graphite
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_graphite_call
parse_response_fxn: !script_function:USGS_MYB usgs_graphite_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml
index 7f1146cfe..6d8c3f9e2 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Gypsum Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Gypsum
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-gypsu-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-gypsum.xlsx
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-gypsu-adv.xlsx
+ base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-gypsum.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_gypsum_call
parse_response_fxn: !script_function:USGS_MYB usgs_gypsum_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml
index 2c942e46b..408700d1c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Iodine Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Iodine
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin.xls
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_iodine_call
parse_response_fxn: !script_function:USGS_MYB usgs_iodine_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml
index 1dae5acee..70b1e41f2 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS IronOre Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Iron Ore
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-feore-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-iron-ore.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-feore-adv.xlsx
+ base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-iron-ore.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_iron_ore_call
parse_response_fxn: !script_function:USGS_MYB usgs_iron_ore_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml
index 3565ee645..fe168ce4c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Kyanite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Kyanite
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-kyani-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-kyanite.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-kyani-adv.xlsx
+ base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-kyanite.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_kyanite_call
parse_response_fxn: !script_function:USGS_MYB usgs_kyanite_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml
index c9038d8cc..8b6b53622 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Lead Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Lead
-citable_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2016-lead.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/myb1-2018-lead-advrel.xlsx
date_generated: '2021-03-26'
years_available: 2012-2016
format: xlsx
url:
- base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2016-lead.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/myb1-2018-lead-advrel.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_lead_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_lead_call
parse_response_fxn: !script_function:USGS_MYB usgs_lead_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml
index 6fc2f0516..3380dca38 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Lime Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Lime
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_lime_call
parse_response_fxn: !script_function:USGS_MYB usgs_lime_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml
index fe9f1eaf3..0ffb016f5 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Lithium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Lithium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_lithium_call
parse_response_fxn: !script_function:USGS_MYB usgs_lithium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml
index 840ee8ca1..2d535da32 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Magnesium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Magnesium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_magnesium_call
parse_response_fxn: !script_function:USGS_MYB usgs_magnesium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml
index 316948c20..0bbe251f1 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Manganese Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Manganese
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga.xls
date_generated: '2021-03-26'
years_available: 2012-2016
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_manganese_call
parse_response_fxn: !script_function:USGS_MYB usgs_manganese_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml
index 0fdea89cf..e2b554d50 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS ManufacturedAbrasive Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Manufactured Abrasive
-citable_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2017-abras.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-abras.xls
date_generated: '2021-03-26'
years_available: 2017-2018
format: xlsx
url:
- base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2017-abras.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-abras.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_ma_call
parse_response_fxn: !script_function:USGS_MYB usgs_ma_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml
index 1f6ab57a3..c5ca9b78b 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml
@@ -1,7 +1,7 @@
# Downloads and parses CSV from USGS Mica Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Mica
-citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-mica.xls
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-mica.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml
index ad640ffc7..7ece684c5 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Molybdenum Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Molybdenum
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-molyb-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-moly.xls
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-molyb-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-moly.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_molybdenum_call
parse_response_fxn: !script_function:USGS_MYB usgs_molybdenum_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml
index b2f28ee42..c36d32097 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Nickel Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Nickel
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls
date_generated: '2021-03-26'
years_available: 2012-2016
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_nickel_call
parse_response_fxn: !script_function:USGS_MYB usgs_nickel_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml
index 446509a1d..81a613c1f 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml
@@ -1,7 +1,7 @@
# Downloads and parses CSV from USGS Niobium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Niobium
-citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-niobium.xls
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-niobium.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml
index 5752883ca..8095a4bb6 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Peat Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Peat
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat-advrel.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat-advrel.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_peat_call
parse_response_fxn: !script_function:USGS_MYB usgs_peat_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml
index 89d057a91..42877791b 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Perlite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Perlite
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_perlite_call
parse_response_fxn: !script_function:USGS_MYB usgs_perlite_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml
index 6eb8b9155..d4b7326c1 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Phosphate Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Phosphate
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_phosphate_call
parse_response_fxn: !script_function:USGS_MYB usgs_phosphate_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml
index b69b37032..f97dcf74d 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Platinum Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Platinum
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati.xls
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_platinum_call
parse_response_fxn: !script_function:USGS_MYB usgs_platinum_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml
index 1564ff5bd..516e92c0d 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Potash Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Potash
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-potas-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/files/myb1-2018-potas.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-potas-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/files/myb1-2018-potas.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_potash_call
parse_response_fxn: !script_function:USGS_MYB usgs_potash_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml
index 8b72b4565..93eba528c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Pumice Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Pumice
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_pumice_call
parse_response_fxn: !script_function:USGS_MYB usgs_pumice_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml
index 732ada2a5..1f9815225 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Rhenium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Rhenium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_rhenium_call
parse_response_fxn: !script_function:USGS_MYB usgs_rhenium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml
index 10cd30e27..d602b9222 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Salt Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Salt
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt.xlsx
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_salt_call
parse_response_fxn: !script_function:USGS_MYB usgs_salt_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml
index debd1359d..113c94639 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS SandGravelConstruction Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Sand Gravel Construction
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls
date_generated: '2021-03-19'
years_available: 2014-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_sgc_call
parse_response_fxn: !script_function:USGS_MYB usgs_sgc_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml
index 610e34ea0..814c5257b 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS SandGravelIndustrial Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Sand Gravel Industrial
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-silic-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-silica.xlsx
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-silic-adv.xlsx
+ base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-silica.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_sgi_call
parse_response_fxn: !script_function:USGS_MYB usgs_sgi_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml
index 44c5bb6dc..3d5f7b34d 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Silver Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Silver
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls
+citable_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls
date_generated: '2021-03-19'
years_available: 2012-2016
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls
+ base_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_silver_call
parse_response_fxn: !script_function:USGS_MYB usgs_silver_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml
index 607456e31..cf87a714c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml
@@ -6,7 +6,7 @@ bib_id: USGS_MYB_SA
# api_key_required: false
format: xlsx
url:
- base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/__url_text__/myb1-__year__-sodaa.__format__
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/__url_text__/myb1-__year__-sodaa.__format__
url_replace_fxn: !script_function:USGS_MYB soda_url_helper
call_response_fxn: !script_function:USGS_MYB soda_call
parse_response_fxn: !script_function:USGS_MYB soda_parse
@@ -37,3 +37,4 @@ formats:
'2015': xlsx
'2016': xlsx
'2017': xls
+
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml
index e8a6d9ef7..2d1c40534 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml
@@ -1,18 +1,18 @@
# Downloads and parses CSV from USGS StoneCrushed Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Stone Crushed
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stonc-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-zeolites.xlsx
date_generated: '2021-03-26'
-years_available: 2013-2017
+years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stonc-adv.xlsx
+ base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-zeolites.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_stonecr_call
parse_response_fxn: !script_function:USGS_MYB usgs_stonecr_parse
years:
-- 2013
- 2014
- 2015
- 2016
- 2017
+- 2018
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml
index d6504c700..d0f3de6ff 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS StoneDimension Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Stone Dimension
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_stonedis_call
parse_response_fxn: !script_function:USGS_MYB usgs_stonedis_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml
index 331deed38..0ded944d2 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Strontium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Strontium
-citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron-adv.xlsx
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron.xls
date_generated: '2021-03-26'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron-adv.xlsx
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_strontium_call
parse_response_fxn: !script_function:USGS_MYB usgs_strontium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml
index a377e3224..278f2fc4a 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Talc Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Talc
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_talc_call
parse_response_fxn: !script_function:USGS_MYB usgs_talc_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml
index 6b9c53c2a..2d535eb9c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Titanium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Titanium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_titanium_call
parse_response_fxn: !script_function:USGS_MYB usgs_titanium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml
index 71d238966..aceeae3cf 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Tungsten Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Tungsten
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls
date_generated: '2021-03-19'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_tungsten_call
parse_response_fxn: !script_function:USGS_MYB usgs_tungsten_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml
index 6177028ae..00e322023 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Vermiculite Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Vermiculite
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-vermi-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-vermiculite.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-vermi-adv.xlsx
+ base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-vermiculite.xlsx
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_vermiculite_call
parse_response_fxn: !script_function:USGS_MYB usgs_vermiculite_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml
index 8273d804a..6e5fe1607 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml
@@ -1,7 +1,7 @@
# Downloads and parses CSV from USGS Zeolites Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Zeolites
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb-2018--zeoli-adv.xlsx
+citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-zeolites.xlsx
date_generated: '2021-03-19'
years_available: 2014-2018
format: xls
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml
index 19dff638c..918b3fda3 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Zinc Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Zinc
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_zinc_call
parse_response_fxn: !script_function:USGS_MYB usgs_zinc_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml
index 834b4f6ad..a9e32d857 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml
@@ -1,12 +1,12 @@
# Downloads and parses CSV from USGS Zirconium Statistics and Information on an annual basis.
---
source_name: USGS Mineral Yearbook Zirconium
-citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls
+citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls
date_generated: '2021-03-26'
years_available: 2013-2017
format: xls
url:
- base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls
+ base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls
url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper
call_response_fxn: !script_function:USGS_MYB usgs_zirconium_call
parse_response_fxn: !script_function:USGS_MYB usgs_zirconium_parse
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml b/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml
index fe1e75fee..9d63e3e9c 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml
@@ -15,9 +15,9 @@ url:
wu_county: ALL # only for the county level
wu_category: ALL
key_param: None
-url_replace_fxn: usgs_URL_helper
-call_response_fxn: usgs_call
-parse_response_fxn: usgs_parse
+url_replace_fxn: !script_function:USGS_NWIS_WU usgs_URL_helper
+call_response_fxn: !script_function:USGS_NWIS_WU usgs_call
+parse_response_fxn: !script_function:USGS_NWIS_WU usgs_parse
years:
- 2010
- 2015
diff --git a/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml b/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml
index 548162692..ca5b8c02a 100644
--- a/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml
+++ b/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml
@@ -8,6 +8,6 @@ api_name: None
api_key_required: false
format: csv # comma delineated data
url: None
-parse_response_fxn: usgs_coef_parse
+parse_response_fxn: !script_function:USGS_WU_Coef usgs_coef_parse
years:
- 2005
diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Common.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Common.yaml
new file mode 100644
index 000000000..9a61c425e
--- /dev/null
+++ b/flowsa/methods/flowbyactivitymethods/stateio_Common.yaml
@@ -0,0 +1,18 @@
+author: Li et al.
+# source_name: stateio_Make_Summary # Update for each dataset
+source_url: https://github.com/USEPA/stateior
+original_data_download_date:
+# bib_id: # Update for each dataset
+api_name: None
+api_key_required: False
+format: rds # R data format
+url: None # Uses esupy data retrieval
+parse_response_fxn: !script_function:stateio parse_statior
+years:
+- 2012
+- 2013
+- 2014
+- 2015
+- 2016
+- 2017
+# datatype: 'State_Summary_Make' # Update for each dataset
diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Industry_GO.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Industry_GO.yaml
new file mode 100644
index 000000000..6445af195
--- /dev/null
+++ b/flowsa/methods/flowbyactivitymethods/stateio_Industry_GO.yaml
@@ -0,0 +1,4 @@
+!include:stateio_Common.yaml
+source_name: stateio_Industry_GO
+bib_id:
+datatype: 'State_Summary_IndustryOutput'
diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Make_Summary.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Make_Summary.yaml
new file mode 100644
index 000000000..b8b8c62fb
--- /dev/null
+++ b/flowsa/methods/flowbyactivitymethods/stateio_Make_Summary.yaml
@@ -0,0 +1,4 @@
+!include:stateio_Common.yaml
+source_name: stateio_Make_Summary
+bib_id:
+datatype: 'State_Summary_Make'
diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Use_Summary.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Use_Summary.yaml
new file mode 100644
index 000000000..da1f61c89
--- /dev/null
+++ b/flowsa/methods/flowbyactivitymethods/stateio_Use_Summary.yaml
@@ -0,0 +1,4 @@
+!include:stateio_Common.yaml
+source_name: stateio_Use_Summary
+bib_id:
+datatype: 'State_Summary_Use'
diff --git a/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv b/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv
index 45465aace..061c8b201 100644
--- a/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv
+++ b/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv
@@ -146,6 +146,9 @@ qcew,115310,
qcew,21,
qcew,211,
qcew,2111,
+qcew,21111,
+qcew,211111,
+qcew,211112,
qcew,21112,
qcew,211120,
qcew,21113,
@@ -164,6 +167,8 @@ qcew,212221,
qcew,212222,
qcew,21223,
qcew,212230,
+qcew,212231,
+qcew,212234,
qcew,21229,
qcew,212291,
qcew,212299,
@@ -203,6 +208,7 @@ qcew,221115,
qcew,221116,
qcew,221117,
qcew,221118,
+qcew,221119,
qcew,22112,
qcew,221121,
qcew,221122,
@@ -321,14 +327,22 @@ qcew,311212,
qcew,311213,
qcew,31122,
qcew,311221,
+qcew,311222,
+qcew,311223,
qcew,311224,
qcew,311225,
qcew,31123,
qcew,311230,
qcew,3113,
qcew,31131,
+qcew,311311,
+qcew,311312,
qcew,311313,
qcew,311314,
+qcew,31132,
+qcew,311320,
+qcew,31133,
+qcew,311330,
qcew,31134,
qcew,311340,
qcew,31135,
@@ -359,6 +373,8 @@ qcew,311615,
qcew,3117,
qcew,31171,
qcew,311710,
+qcew,311711,
+qcew,311712,
qcew,3118,
qcew,31181,
qcew,311811,
@@ -366,6 +382,8 @@ qcew,311812,
qcew,311813,
qcew,31182,
qcew,311821,
+qcew,311822,
+qcew,311823,
qcew,311824,
qcew,31183,
qcew,311830,
@@ -396,24 +414,38 @@ qcew,312130,
qcew,31214,
qcew,312140,
qcew,3122,
+qcew,31221,
+qcew,312210,
+qcew,31222,
+qcew,312221,
+qcew,312229,
qcew,31223,
qcew,312230,
qcew,313,
qcew,3131,
qcew,31311,
qcew,313110,
+qcew,313111,
+qcew,313112,
+qcew,313113,
qcew,3132,
qcew,31321,
qcew,313210,
qcew,31322,
qcew,313220,
+qcew,313221,
+qcew,313222,
qcew,31323,
qcew,313230,
qcew,31324,
qcew,313240,
+qcew,313241,
+qcew,313249,
qcew,3133,
qcew,31331,
qcew,313310,
+qcew,313311,
+qcew,313312,
qcew,31332,
qcew,313320,
qcew,314,
@@ -422,30 +454,62 @@ qcew,31411,
qcew,314110,
qcew,31412,
qcew,314120,
+qcew,314121,
+qcew,314129,
qcew,3149,
qcew,31491,
qcew,314910,
+qcew,314911,
+qcew,314912,
qcew,31499,
+qcew,314991,
+qcew,314992,
qcew,314994,
qcew,314999,
qcew,315,
qcew,3151,
qcew,31511,
qcew,315110,
+qcew,315111,
+qcew,315119,
qcew,31519,
qcew,315190,
+qcew,315191,
+qcew,315192,
qcew,3152,
qcew,31521,
qcew,315210,
+qcew,315211,
+qcew,315212,
qcew,31522,
qcew,315220,
+qcew,315221,
+qcew,315222,
+qcew,315223,
+qcew,315224,
+qcew,315225,
+qcew,315228,
+qcew,31523,
+qcew,315231,
+qcew,315232,
+qcew,315233,
+qcew,315234,
+qcew,315239,
qcew,31524,
qcew,315240,
qcew,31528,
qcew,315280,
+qcew,31529,
+qcew,315291,
+qcew,315292,
+qcew,315299,
qcew,3159,
qcew,31599,
qcew,315990,
+qcew,315991,
+qcew,315992,
+qcew,315993,
+qcew,315999,
qcew,316,
qcew,3161,
qcew,31611,
@@ -453,10 +517,18 @@ qcew,316110,
qcew,3162,
qcew,31621,
qcew,316210,
+qcew,316211,
+qcew,316212,
+qcew,316213,
+qcew,316214,
+qcew,316219,
qcew,3169,
qcew,31699,
+qcew,316991,
qcew,316992,
+qcew,316993,
qcew,316998,
+qcew,316999,
qcew,321,
qcew,3211,
qcew,32111,
@@ -493,22 +565,43 @@ qcew,3222,
qcew,32221,
qcew,322211,
qcew,322212,
+qcew,322213,
+qcew,322214,
+qcew,322215,
qcew,322219,
qcew,32222,
qcew,322220,
+qcew,322221,
+qcew,322222,
+qcew,322223,
+qcew,322224,
+qcew,322225,
+qcew,322226,
qcew,32223,
qcew,322230,
+qcew,322231,
+qcew,322232,
+qcew,322233,
qcew,32229,
qcew,322291,
qcew,322299,
qcew,323,
qcew,3231,
qcew,32311,
+qcew,323110,
qcew,323111,
+qcew,323112,
qcew,323113,
+qcew,323114,
+qcew,323115,
+qcew,323116,
qcew,323117,
+qcew,323118,
+qcew,323119,
qcew,32312,
qcew,323120,
+qcew,323121,
+qcew,323122,
qcew,324,
qcew,3241,
qcew,32411,
@@ -527,9 +620,16 @@ qcew,32512,
qcew,325120,
qcew,32513,
qcew,325130,
+qcew,325131,
+qcew,325132,
qcew,32518,
qcew,325180,
+qcew,325181,
+qcew,325182,
+qcew,325188,
qcew,32519,
+qcew,325191,
+qcew,325192,
qcew,325193,
qcew,325194,
qcew,325199,
@@ -539,6 +639,8 @@ qcew,325211,
qcew,325212,
qcew,32522,
qcew,325220,
+qcew,325221,
+qcew,325222,
qcew,3253,
qcew,32531,
qcew,325311,
@@ -592,6 +694,7 @@ qcew,32616,
qcew,326160,
qcew,32619,
qcew,326191,
+qcew,326192,
qcew,326199,
qcew,3262,
qcew,32621,
@@ -606,8 +709,16 @@ qcew,327,
qcew,3271,
qcew,32711,
qcew,327110,
+qcew,327111,
+qcew,327112,
+qcew,327113,
qcew,32712,
qcew,327120,
+qcew,327121,
+qcew,327122,
+qcew,327123,
+qcew,327124,
+qcew,327125,
qcew,3272,
qcew,32721,
qcew,327211,
@@ -641,6 +752,8 @@ qcew,331,
qcew,3311,
qcew,33111,
qcew,331110,
+qcew,331111,
+qcew,331112,
qcew,3312,
qcew,33121,
qcew,331210,
@@ -649,15 +762,24 @@ qcew,331221,
qcew,331222,
qcew,3313,
qcew,33131,
+qcew,331311,
+qcew,331312,
qcew,331313,
qcew,331314,
qcew,331315,
+qcew,331316,
qcew,331318,
+qcew,331319,
qcew,3314,
qcew,33141,
qcew,331410,
+qcew,331411,
+qcew,331419,
qcew,33142,
qcew,331420,
+qcew,331421,
+qcew,331422,
+qcew,331423,
qcew,33149,
qcew,331491,
qcew,331492,
@@ -667,8 +789,12 @@ qcew,331511,
qcew,331512,
qcew,331513,
qcew,33152,
+qcew,331521,
+qcew,331522,
qcew,331523,
qcew,331524,
+qcew,331525,
+qcew,331528,
qcew,331529,
qcew,332,
qcew,3321,
@@ -676,10 +802,16 @@ qcew,33211,
qcew,332111,
qcew,332112,
qcew,332114,
+qcew,332115,
+qcew,332116,
qcew,332117,
qcew,332119,
qcew,3322,
qcew,33221,
+qcew,332211,
+qcew,332212,
+qcew,332213,
+qcew,332214,
qcew,332215,
qcew,332216,
qcew,3323,
@@ -704,6 +836,8 @@ qcew,33251,
qcew,332510,
qcew,3326,
qcew,33261,
+qcew,332611,
+qcew,332612,
qcew,332613,
qcew,332618,
qcew,3327,
@@ -728,7 +862,10 @@ qcew,332991,
qcew,332992,
qcew,332993,
qcew,332994,
+qcew,332995,
qcew,332996,
+qcew,332997,
+qcew,332998,
qcew,332999,
qcew,333,
qcew,3331,
@@ -741,28 +878,50 @@ qcew,33313,
qcew,333131,
qcew,333132,
qcew,3332,
+qcew,33321,
+qcew,333210,
+qcew,33322,
+qcew,333220,
qcew,33324,
qcew,333241,
qcew,333242,
qcew,333243,
qcew,333244,
qcew,333249,
+qcew,33329,
+qcew,333291,
+qcew,333292,
+qcew,333293,
+qcew,333294,
+qcew,333295,
+qcew,333298,
qcew,3333,
qcew,33331,
+qcew,333311,
+qcew,333312,
+qcew,333313,
qcew,333314,
+qcew,333315,
qcew,333316,
qcew,333318,
+qcew,333319,
qcew,3334,
qcew,33341,
+qcew,333411,
+qcew,333412,
qcew,333413,
qcew,333414,
qcew,333415,
qcew,3335,
qcew,33351,
qcew,333511,
+qcew,333512,
+qcew,333513,
qcew,333514,
qcew,333515,
+qcew,333516,
qcew,333517,
+qcew,333518,
qcew,333519,
qcew,3336,
qcew,33361,
@@ -772,7 +931,9 @@ qcew,333613,
qcew,333618,
qcew,3339,
qcew,33391,
+qcew,333911,
qcew,333912,
+qcew,333913,
qcew,333914,
qcew,33392,
qcew,333921,
@@ -793,7 +954,9 @@ qcew,3341,
qcew,33411,
qcew,334111,
qcew,334112,
+qcew,334113,
qcew,334118,
+qcew,334119,
qcew,3342,
qcew,33421,
qcew,334210,
@@ -806,8 +969,11 @@ qcew,33431,
qcew,334310,
qcew,3344,
qcew,33441,
+qcew,334411,
qcew,334412,
qcew,334413,
+qcew,334414,
+qcew,334415,
qcew,334416,
qcew,334417,
qcew,334418,
@@ -822,9 +988,12 @@ qcew,334514,
qcew,334515,
qcew,334516,
qcew,334517,
+qcew,334518,
qcew,334519,
qcew,3346,
qcew,33461,
+qcew,334611,
+qcew,334612,
qcew,334613,
qcew,334614,
qcew,335,
@@ -838,8 +1007,14 @@ qcew,335129,
qcew,3352,
qcew,33521,
qcew,335210,
+qcew,335211,
+qcew,335212,
qcew,33522,
qcew,335220,
+qcew,335221,
+qcew,335222,
+qcew,335224,
+qcew,335228,
qcew,3353,
qcew,33531,
qcew,335311,
@@ -875,8 +1050,12 @@ qcew,336214,
qcew,3363,
qcew,33631,
qcew,336310,
+qcew,336311,
+qcew,336312,
qcew,33632,
qcew,336320,
+qcew,336321,
+qcew,336322,
qcew,33633,
qcew,336330,
qcew,33634,
@@ -889,6 +1068,8 @@ qcew,33637,
qcew,336370,
qcew,33639,
qcew,336390,
+qcew,336391,
+qcew,336399,
qcew,3364,
qcew,33641,
qcew,336411,
@@ -919,6 +1100,7 @@ qcew,337122,
qcew,337124,
qcew,337125,
qcew,337127,
+qcew,337129,
qcew,3372,
qcew,33721,
qcew,337211,
@@ -933,6 +1115,7 @@ qcew,337920,
qcew,339,
qcew,3391,
qcew,33911,
+qcew,339111,
qcew,339112,
qcew,339113,
qcew,339114,
@@ -941,12 +1124,22 @@ qcew,339116,
qcew,3399,
qcew,33991,
qcew,339910,
+qcew,339911,
+qcew,339912,
+qcew,339913,
+qcew,339914,
qcew,33992,
qcew,339920,
qcew,33993,
qcew,339930,
+qcew,339931,
+qcew,339932,
qcew,33994,
qcew,339940,
+qcew,339941,
+qcew,339942,
+qcew,339943,
+qcew,339944,
qcew,33995,
qcew,339950,
qcew,33999,
@@ -1132,8 +1325,10 @@ qcew,4412,
qcew,44121,
qcew,441210,
qcew,44122,
+qcew,441221,
qcew,441222,
qcew,441228,
+qcew,441229,
qcew,4413,
qcew,44131,
qcew,441310,
@@ -1151,6 +1346,13 @@ qcew,442291,
qcew,442299,
qcew,443,
qcew,4431,
+qcew,44311,
+qcew,443111,
+qcew,443112,
+qcew,44312,
+qcew,443120,
+qcew,44313,
+qcew,443130,
qcew,44314,
qcew,443141,
qcew,443142,
@@ -1242,7 +1444,13 @@ qcew,4512,
qcew,45121,
qcew,451211,
qcew,451212,
+qcew,45122,
+qcew,451220,
qcew,452,
+qcew,4521,
+qcew,45211,
+qcew,452111,
+qcew,452112,
qcew,4522,
qcew,45221,
qcew,452210,
@@ -1250,6 +1458,11 @@ qcew,4523,
qcew,45231,
qcew,452311,
qcew,452319,
+qcew,4529,
+qcew,45291,
+qcew,452910,
+qcew,45299,
+qcew,452990,
qcew,453,
qcew,4531,
qcew,45311,
@@ -1276,12 +1489,18 @@ qcew,454,
qcew,4541,
qcew,45411,
qcew,454110,
+qcew,454111,
+qcew,454112,
+qcew,454113,
qcew,4542,
qcew,45421,
qcew,454210,
qcew,4543,
qcew,45431,
qcew,454310,
+qcew,454311,
+qcew,454312,
+qcew,454319,
qcew,45439,
qcew,454390,
qcew,48-49,
@@ -1454,6 +1673,10 @@ qcew,51219,
qcew,512191,
qcew,512199,
qcew,5122,
+qcew,51221,
+qcew,512210,
+qcew,51222,
+qcew,512220,
qcew,51223,
qcew,512230,
qcew,51224,
@@ -1472,19 +1695,40 @@ qcew,515120,
qcew,5152,
qcew,51521,
qcew,515210,
+qcew,516,
+qcew,5161,
+qcew,51611,
+qcew,516110,
qcew,517,
+qcew,5171,
+qcew,51711,
+qcew,517110,
+qcew,5172,
+qcew,51721,
+qcew,517210,
+qcew,517211,
+qcew,517212,
qcew,5173,
qcew,51731,
+qcew,517310,
qcew,517311,
qcew,517312,
qcew,5174,
qcew,51741,
qcew,517410,
+qcew,5175,
+qcew,51751,
+qcew,517510,
qcew,5179,
qcew,51791,
+qcew,517910,
qcew,517911,
qcew,517919,
qcew,518,
+qcew,5181,
+qcew,51811,
+qcew,518111,
+qcew,518112,
qcew,5182,
qcew,51821,
qcew,518210,
@@ -1585,6 +1829,8 @@ qcew,52591,
qcew,525910,
qcew,52592,
qcew,525920,
+qcew,52593,
+qcew,525930,
qcew,52599,
qcew,525990,
qcew,53,
@@ -1619,12 +1865,20 @@ qcew,532120,
qcew,5322,
qcew,53221,
qcew,532210,
+qcew,53222,
+qcew,532220,
+qcew,53223,
+qcew,532230,
qcew,53228,
qcew,532281,
qcew,532282,
qcew,532283,
qcew,532284,
qcew,532289,
+qcew,53229,
+qcew,532291,
+qcew,532292,
+qcew,532299,
qcew,5323,
qcew,53231,
qcew,532310,
@@ -1699,6 +1953,9 @@ qcew,54169,
qcew,541690,
qcew,5417,
qcew,54171,
+qcew,541710,
+qcew,541711,
+qcew,541712,
qcew,541713,
qcew,541714,
qcew,541715,
@@ -1750,6 +2007,7 @@ qcew,56121,
qcew,561210,
qcew,5613,
qcew,56131,
+qcew,561310,
qcew,561311,
qcew,561312,
qcew,56132,
@@ -2036,6 +2294,14 @@ qcew,7213,
qcew,72131,
qcew,721310,
qcew,722,
+qcew,7221,
+qcew,72211,
+qcew,722110,
+qcew,7222,
+qcew,72221,
+qcew,722211,
+qcew,722212,
+qcew,722213,
qcew,7223,
qcew,72231,
qcew,722310,
diff --git a/flowsa/methods/flowbysectoractivitysets/README.md b/flowsa/methods/flowbysectoractivitysets/README.md
index 2fe666863..ba230f4d9 100644
--- a/flowsa/methods/flowbysectoractivitysets/README.md
+++ b/flowsa/methods/flowbysectoractivitysets/README.md
@@ -1,5 +1,27 @@
# Flow-By-Sector Activity Sets
-Flow-By-Sector (FBS) activity sets are an optional method of assigning Flow-By-Actiivty (FBA)
-activities to an activity set defined in an FBS method yaml. Activity set csv files are generally
-created in the [scripts directory](https://github.com/USEPA/flowsa/tree/master/scripts/FlowBySector_Activity_Sets).
-These csvs are not required, but are recommended when an FBA has a large number of activities.
+Flow-By-Sector (FBS) activity sets are an optional method of assigning
+Flow-By-Actiivty (FBA) activities to an activity set defined in an FBS
+method yaml. Activity set csv files are generally created in the
+[scripts directory](https://github.com/USEPA/flowsa/tree/master/scripts/FlowBySector_Activity_Sets).
+These csvs are not required, but are recommended when an FBA has a large
+number of activities.
+
+The CSVs are called on in the FBS yaml under the `name:` parameter, using
+the tag `!from_index:CSVName.csv ActivitySetColumnSubset`. Where
+_CSVName.csv_ is the name of the activity set file and
+_ActivitySetColumnSubset_ is the value in the "activity_set" column to call
+on. See the example below.
+
+```
+"EPA_NEI_Onroad":
+ data_format: 'FBA'
+ class: Chemicals
+ geoscale_to_use: national
+ year: 2017
+ activity_to_sector_mapping: 'SCC'
+ clean_fba_df_fxn: !script_function:EPA_NEI clean_NEI_fba
+ fedefl_mapping: 'NEI'
+ activity_sets:
+ direct_allocation:
+ names: !from_index:NEI_Onroad_2017_asets.csv direct_allocation
+```
diff --git a/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml b/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml
index 60bc0c962..e2736d756 100644
--- a/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml
+++ b/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml
@@ -22,7 +22,6 @@ _allocation_types:
year: 2014
geoscale: national
flows: None # Verify what this does
- clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_fba_wsec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
_use_allocation: &use_allocation
@@ -43,9 +42,11 @@ _allocation_types:
helper_source_class: "Employment"
helper_source_year: 2012
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
@@ -76,11 +77,11 @@ source_names:
- AREA HARVESTED
allocation_compartment: None
geoscale: state
- clean_fba_df_fxn: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup
clean_fba_w_sec_df_fxn: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics
# OR
attribution_method: function
attribution function: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics # wrapped in an appropriate aggregation
+
industry_combustion_coal: #only allocating to 3digits
<<: *mecs_allocation
names: !from_index:NEI_Nonpoint_2017_asets.csv industry_combustion_coal
@@ -153,9 +154,11 @@ source_names:
helper_source_class: "Employment"
helper_source_year: 2012
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
pesticides: # pesticides
<<: *use_allocation
@@ -199,7 +202,7 @@ source_names:
clean_parameter: {"324110": 'ActivityProducedBy'}
"stewiFBS":
data_format: 'FBS_outside_flowsa'
- FBS_datapull_fxn: stewicombo_to_sector
+ FBS_datapull_fxn: !script_function:stewiFBS stewicombo_to_sector
inventory_dict: {"NEI":"2017", "TRI":"2017"}
local_inventory_name: 'NEI_TRI_air_2017'
clean_fbs_df_fxn: !script_function:EPA_NEI drop_GHGs
diff --git a/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml b/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml
index b2135215e..f5eaec62e 100644
--- a/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml
+++ b/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml
@@ -29,7 +29,9 @@ source_names:
helper_source_class: "Employment"
helper_source_year: 2014
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: clean_bls_qcew_fba
- clean_helper_fba_wsec: bls_clean_allocation_fba_w_sec
+ clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
diff --git a/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml b/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml
index 7c5c9b4b6..defd8d804 100644
--- a/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml
+++ b/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml
@@ -33,11 +33,13 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2014
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: state
allocation_fba_load_scale: state
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_2:
names:
diff --git a/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml b/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml
index fcb9a45e1..ebd231ca8 100644
--- a/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml
+++ b/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml
@@ -1,4 +1,4 @@
-!include:BEA_summary_target.yaml
+!include:USEEIO_summary_target.yaml
# target_sector_level: NAICS_6
# target_sector_source: NAICS_2012_Code
target_geoscale: state
diff --git a/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml b/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml
index b31332cd4..ef023ec74 100644
--- a/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml
+++ b/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml
@@ -13,7 +13,7 @@ source_names:
source_fba_load_scale: national
year: 2017
clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
- clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_sets:
qcew:
names: !from_index:BLS_QCEW_asets.csv qcew
diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml
new file mode 100644
index 000000000..51bdc62e3
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml
@@ -0,0 +1,22 @@
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: state
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: state
+ source_fba_load_scale: state
+ year: 2012
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_sets:
+ qcew:
+ names: !from_index:BLS_QCEW_asets.csv qcew
+ allocation_method: direct
+ allocation_source: None
+ allocation_source_class: None
+ allocation_source_year: None
+ allocation_flow: None
+ allocation_compartment: None
+ allocation_from_scale: state
diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml
new file mode 100644
index 000000000..f155da27b
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml
@@ -0,0 +1,23 @@
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: state
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: state
+ source_fba_load_scale: state
+ year: 2013
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_set_file: 'BLS_QCEW_asets.csv'
+ activity_sets:
+ qcew:
+ names: !from_index:BLS_QCEW_asets.csv qcew
+ allocation_method: direct
+ allocation_source: None
+ allocation_source_class: None
+ allocation_source_year: None
+ allocation_flow: None
+ allocation_compartment: None
+ allocation_from_scale: state
diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml
new file mode 100644
index 000000000..7039c9c27
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml
@@ -0,0 +1,23 @@
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: state
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: state
+ source_fba_load_scale: state
+ year: 2014
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_set_file: 'BLS_QCEW_asets.csv'
+ activity_sets:
+ qcew:
+ names: !from_index:BLS_QCEW_asets.csv qcew
+ allocation_method: direct
+ allocation_source: None
+ allocation_source_class: None
+ allocation_source_year: None
+ allocation_flow: None
+ allocation_compartment: None
+ allocation_from_scale: state
diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml
new file mode 100644
index 000000000..3834d1844
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml
@@ -0,0 +1,23 @@
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: state
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: state
+ source_fba_load_scale: state
+ year: 2015
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_set_file: 'BLS_QCEW_asets.csv'
+ activity_sets:
+ qcew:
+ names: !from_index:BLS_QCEW_asets.csv qcew
+ allocation_method: direct
+ allocation_source: None
+ allocation_source_class: None
+ allocation_source_year: None
+ allocation_flow: None
+ allocation_compartment: None
+ allocation_from_scale: state
diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml
new file mode 100644
index 000000000..9864d6dff
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml
@@ -0,0 +1,23 @@
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: state
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: state
+ source_fba_load_scale: state
+ year: 2016
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_set_file: 'BLS_QCEW_asets.csv'
+ activity_sets:
+ qcew:
+ names: !from_index:BLS_QCEW_asets.csv qcew
+ allocation_method: direct
+ allocation_source: None
+ allocation_source_class: None
+ allocation_source_year: None
+ allocation_flow: None
+ allocation_compartment: None
+ allocation_from_scale: state
diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml
new file mode 100644
index 000000000..96e34e569
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml
@@ -0,0 +1,23 @@
+target_sector_level: NAICS_6
+target_sector_source: NAICS_2012_Code
+target_geoscale: state
+source_names:
+ "BLS_QCEW":
+ data_format: 'FBA'
+ class: Employment
+ geoscale_to_use: state
+ source_fba_load_scale: state
+ year: 2017
+ clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table
+ clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
+ activity_set_file: 'BLS_QCEW_asets.csv'
+ activity_sets:
+ qcew:
+ names: !from_index:BLS_QCEW_asets.csv qcew
+ allocation_method: direct
+ allocation_source: None
+ allocation_source_class: None
+ allocation_source_year: None
+ allocation_flow: None
+ allocation_compartment: None
+ allocation_from_scale: state
diff --git a/flowsa/methods/flowbysectormethods/Land_national_2012.yaml b/flowsa/methods/flowbysectormethods/Land_national_2012.yaml
index 2e05ba900..1f93d1ce8 100644
--- a/flowsa/methods/flowbysectormethods/Land_national_2012.yaml
+++ b/flowsa/methods/flowbysectormethods/Land_national_2012.yaml
@@ -1,5 +1,7 @@
%YAML 1.2
-# 'EIA_CBECS_Land' and 'EIA_MECS_Land' must be listed prior to 'USDA_ERS_MLU' for FBS creation
+# 'EIA_CBECS_Land' and 'EIA_MECS_Land' must be listed prior to
+# 'USDA_ERS_MLU' for FBS creation because the results of the two allocated
+# datasets are used within USDA_ERS_MLU sector attribution
---
target_sector_level: NAICS_6
@@ -21,11 +23,13 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2012
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: national
allocation_fba_load_scale: national
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
"EIA_MECS_Land": # manufacturing land use
data_format: 'FBA'
@@ -42,11 +46,13 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2014
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: national
allocation_fba_load_scale: national
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
"BLM_PLS":
data_format: 'FBA'
@@ -70,11 +76,13 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2012
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: national
allocation_fba_load_scale: national
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
"USDA_ERS_MLU":
data_format: 'FBA'
@@ -147,9 +155,10 @@ source_names:
- 'Land in rural transportation facilities'
allocation_method: allocation_function
allocation_source: !script_function:USDA_ERS_MLU allocate_usda_ers_mlu_land_in_rural_transportation_areas
- literature_sources: {"urban_land_use_for_airports": "2020",
- "urban_land_use_for_railroads": "2020",
- "transportation_sectors_based_on_FHA_fees": "1997"}
+ literature_sources: {
+ "urban_land_use_for_airports": "2020",
+ "urban_land_use_for_railroads": "2020",
+ "transportation_sectors_based_on_FHA_fees": "1997"}
allocation_source_class: None
allocation_source_year: None
allocation_flow: None
@@ -160,11 +169,12 @@ source_names:
- 'Land in urban areas'
allocation_method: allocation_function
allocation_source: !script_function:USDA_ERS_MLU allocate_usda_ers_mlu_land_in_urban_areas
- literature_sources: {"area_of_urban_land_occupied_by_houses_2013": "2017",
- "transportation_sectors_based_on_FHA_fees": "1997",
- "urban_land_use_for_airports": "2020",
- "urban_land_use_for_railroads": "2020",
- "open_space_fraction_of_urban_area": "2020"}
+ literature_sources: {
+ "area_of_urban_land_occupied_by_houses_2013": "2017",
+ "transportation_sectors_based_on_FHA_fees": "1997",
+ "urban_land_use_for_airports": "2020",
+ "urban_land_use_for_railroads": "2020",
+ "open_space_fraction_of_urban_area": "2020"}
allocation_source_class: None
allocation_source_year: None
allocation_flow: None
@@ -175,7 +185,8 @@ source_names:
- 'Other land'
allocation_method: allocation_function
allocation_source: !script_function:USDA_ERS_MLU allocate_usda_ers_mlu_other_land
- literature_sources: {"area_of_rural_land_occupied_by_houses_2013": "2017" }
+ literature_sources: {
+ "area_of_rural_land_occupied_by_houses_2013": "2017" }
allocation_source_class: None
allocation_source_year: None
allocation_flow: None
diff --git a/flowsa/methods/flowbysectormethods/README.md b/flowsa/methods/flowbysectormethods/README.md
index cda43cf9e..d1f3fa368 100644
--- a/flowsa/methods/flowbysectormethods/README.md
+++ b/flowsa/methods/flowbysectormethods/README.md
@@ -1,109 +1,139 @@
# FlowBySector method yaml term descriptions
-Description of parameters in flowbysectormethods yamls. All values are strings unless noted.
+Description of parameters in flowbysectormethods yamls. All values are
+strings unless noted.
## Terms
### Target FBS output specifications
-1. _target_sector_level_: specify desired sector aggregation
- (NAICS_2, NAICS_3, NAICS_4, NAICS_5, NAICS_6)
-2. _target_sector_source_: specify NAICS version 2007, 2012, 2017 (ex. NAICS_2012_Code).
- Recommend NAICS_2012_Code, as the majority of datasets use this version of NAICS
-3. _target_geoscale_: level of geographic aggregation in output parquet (national, state, or county)
-4. _download_if_missing_: (optional) Add and set to 'True' if you would like to download all required
- FBAs from Data Commons rather than generating FBAs locally.
+- _target_sector_level_: specify desired sector aggregation (NAICS_2,
+ NAICS_3, NAICS_4, NAICS_5, NAICS_6)
+- _target_sector_source_: specify NAICS version 2007, 2012, 2017 (ex.
+ NAICS_2012_Code). At this time, only NAICS_2012_Code is supported.
+- _target_geoscale_: level of geographic aggregation in output parquet
+ (national, state, or county)
+- _download_if_missing_: (optional) Add and set to 'True' if you would like
+ to download all required FBAs from Data Commons rather than generating
+ FBAs locally.
### Source specifications (in FBA format)
-1. _source_names_: The name of the FBS dataset or the FBA dataset requiring allocation to sectors
-2. _data_format_: 'FBA', 'FBS', 'FBS_outside_flowsa', loads a FlowByActivity or a FlowBySector
- parquet stored in flowsa, or calls on a specified function to load data from outside flowsa in FBS format
-3. _class_: a text string in 'Class' column of flowbyactivity (ex. Water), see class types in
- (source_catalog.yaml)[https://github.com/USEPA/flowsa/blob/master/flowsa/data/source_catalog.yaml]
-4. _geoscale_to_use_: the geoscale of the FBA set to use for sector allocation
+- _source_names_: The name of the FBS dataset or the FBA dataset requiring
+ allocation to sectors
+- _data_format_: 'FBA', 'FBS', 'FBS_outside_flowsa', loads a FlowByActivity
+ or a FlowBySector parquet stored in flowsa, or calls on a specified
+ function to load data from outside flowsa in FBS format
+- _class_: a text string in 'Class' column of flowbyactivity (ex. Water),
+ see class types in
+ [source_catalog.yaml](https://github.com/USEPA/flowsa/blob/master/flowsa/data/source_catalog.yaml)
+- _geoscale_to_use_: the geoscale of the FBA set to use for sector allocation
(national, state, or county)
-5. _year_: year of available dataset (ex. 2015)
-6. _activity_to_sector_mapping_: (optional) name of activity to sector mapping file, if not provided will use the source name
-7. _source_flows_: (list, optional, only usable with flowsa.flowby.FlowBySector.getFlowBySector()). Specifies the 'Flowable'(s) from the FBS to use.
+- _year_: year of available dataset (ex. 2015)
+- _activity_to_sector_mapping_: (optional) name of activity to sector
+ mapping file, if not provided will use the source name
+- _source_flows_: (list, optional, only usable with flowsa.flowby.FlowBySector
+ .getFlowBySector()). Specifies the 'Flowable'(s) from the FBS to use.
If not provided, all flows are used.
-8. _apply_urban_rural_: Assign flow quantities as urban or rural based on population density by FIPS.
-8. _clean_fba_before_mapping_df_fxn_: (optional) calls on function in the source.py file to clean up/modify
- the FBA data prior to mapping flows.
-10. _clean_fba_df_fxn_: (optional) calls on function in the source.py file to clean up/modify
- the FBA data prior to allocating data to sectors.
-11. _clean_fba_w_sec_df_fxn_: (optional) calls on function in the source.py file to clean up/modify the
- FBA dataframe, after sector columns are added but prior to allocating data to sectors.
-12. _fedefl_mapping_: (optional) name of mapping file in FEDEFL. If not supplied will use
- the source_names
-13. _mfl_mapping_: (optional, should not be used if fedefl_mapping is used) name of mapping file for Material Flow List.
+- _apply_urban_rural_: Assign flow quantities as urban or rural based on
+ population density by FIPS.
+- _clean_fba_before_mapping_df_fxn_: (optional) calls on function in the
+ source.py file to clean up/modify the FBA data prior to mapping flows.
+ Function is called using the `!script_function:` tag.
+- _clean_fba_df_fxn_: (optional) calls on function in the source.py file to
+ clean up/modify the FBA data prior to allocating data to sectors.
+ Function is called using the `!script_function:` tag.
+- _clean_fba_w_sec_df_fxn_: (optional) calls on function in the source.py
+ file to clean up/modify the FBA dataframe, after sector columns are added
+ but prior to allocating data to sectors. Function is called using
+ the`!script_function:` tag.
+- _fedefl_mapping_: (optional) name of mapping file in FEDEFL. If not
+ supplied will use the source_names
+- _mfl_mapping_: (optional, should not be used if fedefl_mapping is used)
+ name of mapping file for Material Flow List.
+- _keep_unmapped_rows_: (optional) default is False, if True will maintain any
+ flows not found in mapping files.
### Activity set specifications
-1. _activity_sets_: A subset of the FBA dataset and the method and allocation datasets used to create a FBS
-2. _names_: (list) specify the subset of the FBA to allocate based on values in the
- Activity Produced/Consumed By fields. To use an external activity set .csv file, use the tag `!from_index:file_name.csv`, then give the name (e.g. `activity_set_1`) of the activity set as found in the csv file.
-3. _source_flows_: (list, optional) specify the 'FlowName'(s) from the FBA to use.
- If not provided, all flows are used.
-4. _allocation_method_: currently written for 'direct', 'allocation_function',
- 'proportional', and 'proportional-flagged'. See descriptions below.
-5. _allocation_source_: The primary data source used to allocate main FBA for
+- _activity_sets_: A subset of the FBA dataset and the method and
+ allocation datasets used to create an FBS
+- _names_: (list) specify the subset of the FBA to allocate based on values in the
+ Activity Produced/Consumed By fields. To use an external activity set .
+ csv file, use the tag `!from_index:file_name.csv`, then give the name (e.g.,
+ `activity_set_1`) of the activity set as found in the csv file.
+- _source_flows_: (list, optional) specify the 'FlowName'(s) from the FBA
+ to use. If not provided, all flows are used.
+- _allocation_method_: currently written for 'direct',
+ 'allocation_function', 'proportional', and 'proportional-flagged'. See
+ descriptions below.
+- _allocation_source_: The primary data source used to allocate main FBA for
specified activity to sectors
-6. _literature_sources_: (optional)
-7. _activity_to_sector_mapping_: (optional) name of activity to sector mapping file, if not provided will use the source name
-8. _allocation_source_class_: specific 'FlowClass' found in the allocation source
- flowbyactivity parquet
-9. _allocation_source_year_: specific to the allocation datasets, use the year relevant
- to the main FBA dataframe
-10. _allocation_flow_: (list) the relevant 'FlowName' values, as found in the source
- flowbyactivity parquet. Use 'None' to capture all flows.
-11. _allocation_compartment_: (list) the relevant 'Compartment' values, as found in the source
- flowbyactivity parquet. Use 'None' to capture all compartments.
-12. _allocation_from_scale_: national, state, or county - dependent on allocation source,
- as not every level exits for sources
-13. _allocation_fba_load_scale_: (optional) Can indicate geographic level of FBA to load,
- helpful when an FBA ia large
-14. _clean_allocation_fba_: (optional) Function to clean up the allocation FBA, as defined in
- the source.py file
-15. _clean_allocation_fba_w_sec_: (optional) Function to clean up the allocation FBA, after
- allocation activities are assigned SectorProducedBy and SectorConsumedBy columns
-16. _allocation_map_to_flow_list_: (optional) If the allocation df and source df need to be matched
- on Context and/or Flowable, set to 'True'
-17. _helper_source_: (optional) secondary df for sector allocation
-18. _helper_method_: currently written for 'multiplication', 'proportional', and 'proportional-flagged'
-19. _helper_activity_to_sector_mapping_: (optional) name of activity to sector mapping file, if not provided will use the source name
-20. _helper_source_class_: specific 'FlowClass' found in the allocation source
- flowbyactivity parquet
-21. _helper_source_year_: specific to the allocation datasets, use the year relevant
- to the main FBA dataframe
-22. _helper_flow_: (list) the relevant 'FlowName' values, as found in the source
- flowbyactivity parquet
-23. _helper_from_scale_: national, state, or county - dependent on allocation source,
- as not every level exits for sources
-24. _clean_helper_fba_: (optional) Function to clean up the helper FBA, as defined in
- the source.py file
-25. _clean_helper_fba_wsec_: (optional) Function to clean up the helper FBA, after
- allocation activities are assigned SectorProducedBy and SectorConsumedBy columns
+- _literature_sources_: (optional) Specific functions that contain values
+ from literature used to modify FBA data.
+- _activity_to_sector_mapping_: (optional) name of activity to sector
+ mapping file, if not provided will use the source name
+- _allocation_source_class_: specific 'FlowClass' found in the allocation
+ source flowbyactivity parquet
+- _allocation_source_year_: specific to the allocation datasets, use the
+ year relevant to the main FBA dataframe
+- _allocation_flow_: (list) the relevant 'FlowName' values, as found in the
+ source flowbyactivity parquet. Use 'None' to capture all flows.
+- _allocation_compartment_: (list) the relevant 'Compartment' values, as
+ found in the source flowbyactivity parquet. Use 'None' to capture all
+ compartments.
+- _allocation_from_scale_: national, state, or county - dependent on
+ allocation source, as not every level exits for sources
+- _allocation_fba_load_scale_: (optional) Can indicate geographic level of
+ FBA to load, helpful when an FBA ia large
+- _clean_allocation_fba_: (optional) Function to clean up the allocation
+ FBA, as defined in the source.py file. Function is called using
+ the`!script_function:` tag.
+- _clean_allocation_fba_w_sec_: (optional) Function to clean up the
+ allocation FBA, after allocation activities are assigned SectorProducedBy
+ and SectorConsumedBy columns. Function is called using
+ the`!script_function:` tag.
+- _allocation_map_to_flow_list_: (optional) If the allocation df and source
+ df need to be matched on Context and/or Flowable, set to 'True'
+- _helper_source_: (optional) secondary df for sector allocation
+- _helper_method_: currently written for 'multiplication', 'proportional',
+ and 'proportional-flagged'
+- _helper_activity_to_sector_mapping_: (optional) name of activity to
+ sector mapping file, if not provided will use the source name
+- _helper_source_class_: specific 'FlowClass' found in the allocation
+ source flowbyactivity parquet
+- _helper_source_year_: specific to the allocation datasets, use the year
+ relevant to the main FBA dataframe
+- _helper_flow_: (list) the relevant 'FlowName' values, as found in the
+ source flowbyactivity parquet
+- _helper_from_scale_: national, state, or county - dependent on allocation
+ source, as not every level exits for sources
+- _clean_helper_fba_: (optional) Function to clean up the helper FBA.
+ Function is called using the`!script_function:` tag.
+- _clean_helper_fba_wsec_: (optional) Function to clean up the helper FBA,
+ after allocation activities are assigned SectorProducedBy and
+ SectorConsumedBy columns. Function is called using
+ the`!script_function:` tag.
### Source specifications (in FBS format)
If source data format is specified as 'FBS':
-1. _source_names_: The name of the FBS dataset
-2. _data_format_: 'FBS', loads a FlowBySector
-3. _year_: year of available dataset (ex. 2015)
-4. _clean_fbs_df_fxn_: (optional) apply function to clean the FBS after it is accessed
+- _source_names_: The name of the FBS dataset
+- _data_format_: 'FBS', loads a FlowBySector
+- _year_: year of available dataset (ex. 2015)
+- _clean_fbs_df_fxn_: (optional) apply function to clean the FBS after it
+ is accessed. Function is called using the`!script_function:` tag.
### FBS_outside_flows specifications
-If source data_format is specified as 'FBS_outside_flowsa':
-1. _FBS_datapull_fxn_: name of the function to generate the FBS
-2. _parameters_: (list) parameters to pass into the function
-
-## Allocation Method Descriptions
-1. direct: Activities are directly assigned to sectors using the source to NAICS crosswalk
-2. allocation_function: Activities are assigned to sectors using a specified function
-3. proportional: Activities are proportionally allocated to sectors using specified allocation data source
-4. proportional-flagged: Activities that are flagged (assigned a value of '1') are proportionally allocated
- to sectors using a specified allocation data source. Activities that are not flagged
- (assigned a value of '0') are directly assigned to sectors.
+If source data_format is specified as `FBS_outside_flowsa`:
+- _FBS_datapull_fxn_: name of the function to generate the FBS. Function is
+ called using the`!script_function:` tag.
+- _parameters_: (list) parameters to pass into the function
-## Helper Method
-1. multiplication: Multiply the values in the allocation data source with values sharing the same sectors
- in the helper allocation data source
-2. proportional: Data in allocation source further allocated to sectors proportionally with the helper source
-3. proportional-flagged: Data in allocation source further allocated to sectors proportionally
- when flagged (assigned a value of '1') and directly assigned to sector when not flagged
- (assigned a value of '0')
+## Method Descriptions
+- allocation_function: Activities are assigned to sectors using a specified
+ function
+- direct: Activities are directly assigned to sectors using the source to
+ NAICS crosswalk
+- multiplication: Multiply the values in the allocation data source with
+ values sharing the same sectors in the helper allocation data source
+- proportional: Activities are proportionally allocated to sectors using
+ specified allocation data source
+- proportional-flagged: Activities that are flagged (assigned a value of
+ '1') are proportionally allocated to sectors using a specified allocation
+ data source. Activities that are not flagged (assigned a value of '0')
+ are directly assigned to sectors.
diff --git a/flowsa/methods/flowbysectormethods/USEEIO_summary_target.yaml b/flowsa/methods/flowbysectormethods/USEEIO_summary_target.yaml
new file mode 100644
index 000000000..b3a51eef3
--- /dev/null
+++ b/flowsa/methods/flowbysectormethods/USEEIO_summary_target.yaml
@@ -0,0 +1,13 @@
+# This file can be used to set up target NAICS for standard sectors
+# in a USEEIO summary level model. NAICS are targeted to enable 1:1
+# correspondence between NAICS and BEA Summary sectors
+
+# To use in a FBS method add
+# !include:USEEIO_summary_target.yaml
+# to the top of the method replacing the three parameters below
+
+target_sector_level: NAICS_3
+target_subset_sector_level: {NAICS_4: ['221', '336', '541']}
+# In USEEIO models 221 (Utilities) is disaggregated to 2211, 2212, and 2213
+# '336' and '541' carry over from the BEA summary sectors
+target_sector_source: NAICS_2012_Code
diff --git a/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml b/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml
index 0bb615d9b..979929fdd 100644
--- a/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml
+++ b/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml
@@ -37,10 +37,12 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2010
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: national
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_3:
names:
@@ -57,7 +59,6 @@ source_names:
- 'AREA GROWN, IRRIGATED'
allocation_compartment: None
allocation_from_scale: state
- clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup
clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal
helper_source: "USDA_IWMS"
helper_method: multiplication
@@ -65,7 +66,7 @@ source_names:
helper_source_year: 2013
helper_flow: None
helper_from_scale: state
- clean_helper_fba_wsec: iwms_aggregation
+ clean_helper_fba_wsec: !script_function:USDA_IWMS iwms_aggregation
activity_set_4:
names:
- "Livestock"
diff --git a/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml b/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml
index 683ecb6af..e8b82ed05 100644
--- a/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml
+++ b/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml
@@ -45,9 +45,11 @@ source_names:
helper_source_class: "Employment"
helper_source_year: 2011
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_3:
names:
@@ -67,9 +69,11 @@ source_names:
helper_source_class: "Employment"
helper_source_year: 2010
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
scale_helper_results: !script_function:Blackhurst_IO scale_blackhurst_results_to_usgs_values
activity_set_4:
diff --git a/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml b/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml
index 46c0aefcd..8efd8fc96 100644
--- a/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml
+++ b/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml
@@ -37,10 +37,12 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2015
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: national
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_3:
names:
@@ -57,7 +59,6 @@ source_names:
- 'AREA GROWN, IRRIGATED'
allocation_compartment: None
allocation_from_scale: state
- clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup
clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal
helper_source: "USDA_IWMS"
helper_method: multiplication
diff --git a/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml b/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml
index 8cdc4fc7e..a97ece017 100644
--- a/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml
+++ b/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml
@@ -45,9 +45,11 @@ source_names:
helper_source_class: "Employment"
helper_source_year: 2015
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_3:
names:
@@ -67,9 +69,11 @@ source_names:
helper_source_class: "Employment"
helper_source_year: 2015
helper_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
helper_from_scale: national
- clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
scale_helper_results: !script_function:Blackhurst_IO scale_blackhurst_results_to_usgs_values
activity_set_4:
diff --git a/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml b/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml
index 0edd6de50..d5e785068 100644
--- a/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml
+++ b/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml
@@ -52,10 +52,12 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2015
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: national
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_4:
names:
@@ -72,7 +74,6 @@ source_names:
- 'AREA GROWN, IRRIGATED'
allocation_compartment: None
allocation_from_scale: state
- clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup
clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal
helper_source: "USDA_IWMS"
helper_method: multiplication
diff --git a/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml b/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml
index db6166582..d78578630 100644
--- a/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml
+++ b/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml
@@ -1,5 +1,4 @@
-target_sector_level: NAICS_4
-target_sector_source: NAICS_2012_Code
+!include:USEEIO_summary_target.yaml
target_geoscale: state
source_names:
"USGS_NWIS_WU":
@@ -33,10 +32,12 @@ source_names:
allocation_source_class: "Employment"
allocation_source_year: 2015
allocation_flow:
- - "Number of employees"
+ - "Number of employees, Federal Government"
+ - "Number of employees, State Government"
+ - "Number of employees, Local Government"
+ - "Number of employees, Private"
allocation_compartment: None
allocation_from_scale: state
- clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba
clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec
activity_set_3:
names:
@@ -53,7 +54,6 @@ source_names:
- 'AREA GROWN, IRRIGATED'
allocation_compartment: None
allocation_from_scale: state
- clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup
clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal
helper_source: "USDA_IWMS"
helper_method: multiplication
diff --git a/flowsa/schema.py b/flowsa/schema.py
index 2961aadba..85873a83c 100644
--- a/flowsa/schema.py
+++ b/flowsa/schema.py
@@ -170,3 +170,7 @@
'ConsumedBy': [{'flowbyactivity': 'ActivityConsumedBy'},
{'flowbysector': 'SectorConsumedBy'}]
}
+
+dq_fields = ['MeasureofSpread', 'Spread', 'DistributionType', 'Min',
+ 'Max', 'DataReliability', 'DataCollection', 'TemporalCorrelation',
+ 'GeographicalCorrelation', 'TechnologicalCorrelation']
diff --git a/flowsa/sectormapping.py b/flowsa/sectormapping.py
index 71ab54509..c4e1353c1 100644
--- a/flowsa/sectormapping.py
+++ b/flowsa/sectormapping.py
@@ -8,10 +8,11 @@
import pandas as pd
import numpy as np
from esupy.mapping import apply_flow_mapping
+import flowsa
from flowsa.common import get_flowsa_base_name, \
return_true_source_catalog_name, check_activities_sector_like, \
load_yaml_dict, fba_activity_fields, SECTOR_SOURCE_NAME
-from flowsa.schema import activity_fields
+from flowsa.schema import activity_fields, dq_fields
from flowsa.settings import log
from flowsa.flowbyfunctions import fbs_activity_fields, load_crosswalk
from flowsa.validation import replace_naics_w_naics_from_another_year
@@ -230,7 +231,7 @@ def get_fba_allocation_subset(fba_allocation, source, activitynames,
if am == 'proportional-flagged':
subset_by_sector_cols = True
- if check_activities_sector_like(source) is False:
+ if check_activities_sector_like(fba_allocation, sourcename=source) is False:
# read in source crosswalk
df = get_activitytosector_mapping(
sourceconfig.get('activity_to_sector_mapping', source),
@@ -439,3 +440,98 @@ def get_sector_list(sector_level, secondary_sector_level_dict=None):
sector_list = sector_list + sector_add
return sector_list
+
+
+def map_to_BEA_sectors(fbs_load, region, io_level, year):
+ """
+ Map FBS sectors from NAICS to BEA, allocating by gross industry output.
+
+ :param fbs_load: df completed FlowBySector collapsed to single 'Sector'
+ :param region: str, 'state' or 'national'
+ :param io_level: str, 'summary' or 'detail'
+ :param year: year for industry output
+ """
+ from flowsa.sectormapping import get_activitytosector_mapping
+
+ bea = get_BEA_industry_output(region, io_level, year)
+
+ if io_level == 'summary':
+ mapping_col = 'BEA_2012_Summary_Code'
+ elif io_level == 'detail':
+ mapping_col = 'BEA_2012_Detail_Code'
+
+ # Prepare NAICS:BEA mapping file
+ mapping = (load_crosswalk('BEA')
+ .rename(columns={mapping_col: 'BEA',
+ 'NAICS_2012_Code': 'Sector'}))
+ mapping = (mapping.drop(
+ columns=mapping.columns.difference(['Sector','BEA']))
+ .drop_duplicates(ignore_index=True)
+ .dropna(subset=['Sector']))
+ mapping['Sector'] = mapping['Sector'].astype(str)
+
+ # Create allocation ratios where one to many NAICS:BEA
+ dup = mapping[mapping['Sector'].duplicated(keep=False)]
+ dup = dup.merge(bea, how='left', on='BEA')
+ dup['Allocation'] = dup['Output']/dup.groupby(
+ ['Sector','Location']).Output.transform('sum')
+
+ # Update and allocate to sectors
+ fbs = (fbs_load.merge(
+ mapping.drop_duplicates(subset='Sector', keep=False),
+ how='left',
+ on='Sector'))
+ fbs = fbs.merge(dup.drop(columns='Output'),
+ how='left', on=['Sector', 'Location'],
+ suffixes=(None, '_y'))
+ fbs['Allocation'] = fbs['Allocation'].fillna(1)
+ fbs['BEA'] = fbs['BEA'].fillna(fbs['BEA_y'])
+ fbs['FlowAmount'] = fbs['FlowAmount'] * fbs['Allocation']
+
+ fbs = (fbs.drop(columns=dq_fields +
+ ['Sector', 'SectorSourceName',
+ 'BEA_y', 'Allocation'], errors='ignore')
+ .rename(columns={'BEA':'Sector'}))
+
+ if (abs(1-(sum(fbs['FlowAmount']) /
+ sum(fbs_load['FlowAmount'])))) > 0.005:
+ log.warning('Data loss upon BEA mapping')
+
+ return fbs
+
+
+def get_BEA_industry_output(region, io_level, year):
+ """
+ Get FlowByActivity for industry output from state or national datasets
+ :param region: str, 'state' or 'national'
+ :param io_level: str, 'summary' or 'detail'
+ :param year: year for industry output
+ """
+ if region == 'state':
+ fba = 'stateio_Industry_GO'
+ if io_level == 'detail':
+ raise TypeError ('detail models not available for states')
+ elif region == 'national':
+ fba = 'BEA_GDP_GrossOutput'
+
+ # Get output by BEA sector
+ bea = flowsa.getFlowByActivity(fba, year)
+ bea = (
+ bea.drop(columns=bea.columns.difference(
+ ['FlowAmount','ActivityProducedBy','Location']))
+ .rename(columns={'FlowAmount':'Output',
+ 'ActivityProducedBy': 'BEA'}))
+
+ # If needed, aggregate from detial to summary
+ if region == 'national' and io_level == 'summary':
+ bea_mapping = (load_crosswalk('BEA')
+ [['BEA_2012_Detail_Code','BEA_2012_Summary_Code']]
+ .drop_duplicates()
+ .rename(columns={'BEA_2012_Detail_Code': 'BEA'}))
+ bea = (bea.merge(bea_mapping, how='left', on='BEA')
+ .drop(columns=['BEA'])
+ .rename(columns={'BEA_2012_Summary_Code': 'BEA'}))
+ bea = (bea.groupby(['BEA','Location']).agg({'Output': 'sum'})
+ .reset_index())
+
+ return bea
diff --git a/flowsa/test_examples.py b/flowsa/test_examples.py
index 9a6f4189f..97410a4a5 100644
--- a/flowsa/test_examples.py
+++ b/flowsa/test_examples.py
@@ -1,6 +1,7 @@
"""
Test functions work
"""
+import pytest
import flowsa
@@ -17,3 +18,18 @@ def test_get_flows_by_sector():
def test_write_bibliography():
flowsa.writeFlowBySectorBibliography('Water_national_2015_m1')
+
+@pytest.mark.generate_fbs
+def test_generate_fbs():
+ """Generate all FBS from methods in repo."""
+ for m in flowsa.seeAvailableFlowByModels("FBS", print_method=False):
+ if m not in ['BEA_summary_target',
+ 'USEEIO_summary_target',
+ 'Electricity_gen_emissions_national_2016']:
+ print("--------------------------------\n"
+ f"Method: {m}\n"
+ "--------------------------------")
+ flowsa.flowbysector.main(method=m, download_FBAs_if_missing=True)
+
+if __name__ == "__main__":
+ test_generate_fbs()
\ No newline at end of file
diff --git a/flowsa/test_methods.py b/flowsa/test_methods.py
index f8fd84b4d..092a1b48c 100644
--- a/flowsa/test_methods.py
+++ b/flowsa/test_methods.py
@@ -4,6 +4,7 @@
"""
import pytest
from flowsa import seeAvailableFlowByModels
+import flowsa.exceptions
from flowsa.flowbyactivity import load_yaml_dict, assemble_urls_for_query,\
call_urls
@@ -18,7 +19,6 @@ def test_FBA_urls():
year = max(config['years'])
if ((config.get('url', 'None') == 'None') or
- (config.get('api_key_required', False)) or
(m == 'EPA_EQUATES')):
continue
@@ -33,6 +33,9 @@ def test_FBA_urls():
config=config)
call_urls(url_list=urls, source=m, year=str(year),
config=config)
+ except flowsa.exceptions.APIError:
+ print('API Key required, skipping url')
+ continue
except Exception:
error_list.append(m)
if error_list:
diff --git a/flowsa/validation.py b/flowsa/validation.py
index c498a470d..01a2446e7 100644
--- a/flowsa/validation.py
+++ b/flowsa/validation.py
@@ -7,8 +7,10 @@
import pandas as pd
import numpy as np
+import flowsa
from flowsa.flowbyfunctions import aggregator, create_geoscale_list,\
- subset_df_by_geoscale, sector_aggregation
+ subset_df_by_geoscale, sector_aggregation, collapse_fbs_sectors,\
+ subset_df_by_sector_lengths
from flowsa.dataclean import replace_strings_with_NoneType, \
replace_NoneType_with_empty_cells
from flowsa.common import sector_level_key, \
@@ -59,47 +61,58 @@ def check_if_activities_match_sectors(fba):
return activities_missing_sectors
-def check_if_data_exists_at_geoscale(df, geoscale, activitynames='All'):
+def check_if_data_exists_at_geoscale(df_load, geoscale):
"""
Check if an activity or a sector exists at the specified geoscale
- :param df: flowbyactivity dataframe
- :param activitynames: Either an activity name (ex. 'Domestic')
- or a sector (ex. '1124')
+ :param df_load: df with activity columns
:param geoscale: national, state, or county
- :return: str, 'yes' or 'no'
"""
- # if any activity name is specified, check if activity data
- # exists at the specified geoscale
- activity_list = []
- if activitynames != 'All':
- if isinstance(activitynames, str):
- activity_list.append(activitynames)
- else:
- activity_list = activitynames
- # check for specified activity name
- df = df[(df[fba_activity_fields[0]].isin(activity_list)) |
- (df[fba_activity_fields[1]].isin(activity_list)
- )].reset_index(drop=True)
- else:
- activity_list.append('activities')
-
# filter by geoscale depends on Location System
- fips = create_geoscale_list(df, geoscale)
-
- df = df[df['Location'].isin(fips)]
-
- if len(df) == 0:
+ fips_list = create_geoscale_list(df_load, geoscale)
+ fips = pd.DataFrame(fips_list, columns=['FIPS'])
+
+ activities = df_load[['ActivityProducedBy', 'ActivityConsumedBy']]\
+ .drop_duplicates().reset_index(drop=True)
+ # add tmp column and merge
+ fips['tmp'] = 1
+ activities['tmp'] = 1
+ activities = activities.merge(fips, on='tmp').drop(columns='tmp')
+
+ # merge activities with df and determine which FIPS are missing for each
+ # activity
+ df = df_load[df_load['Location'].isin(fips_list)]
+ # if activities are defined, subset df
+ # df = df[df['']]
+
+ dfm = df.merge(activities,
+ left_on=['ActivityProducedBy', 'ActivityConsumedBy',
+ 'Location'],
+ right_on=['ActivityProducedBy', 'ActivityConsumedBy',
+ 'FIPS'],
+ how='outer')
+ # subset into df where values for state and where states do not have data
+ df1 = dfm[~dfm['FlowAmount'].isna()]
+ df2 = dfm[dfm['FlowAmount'].isna()]
+ df2 = df2[['ActivityProducedBy', 'ActivityConsumedBy',
+ 'FIPS']].reset_index(drop=True)
+
+ # define source name and year
+ sn = df_load['SourceName'][0]
+ y = df_load['Year'][0]
+
+ if len(df1) == 0:
vLog.info(
- "No flows found for %s at the %s scale",
- ', '.join(activity_list), geoscale)
- exists = "No"
- else:
- vLog.info("Flows found for %s at the %s scale",
- ', '.join(activity_list), geoscale)
- exists = "Yes"
-
- return exists
+ "No flows found for activities in %s %s at the %s scale",
+ sn, y, geoscale)
+ if len(df2) > 0:
+ # if len(df2) > 1:
+ df2 = df2.groupby(
+ ['ActivityProducedBy', 'ActivityConsumedBy'], dropna=False).agg(
+ lambda col: ','.join(col)).reset_index()
+ vLogDetailed.info("There are %s, activity combos that do not have "
+ "data in %s %s: \n {}".format(df2.to_string()),
+ geoscale, sn, y)
def check_if_data_exists_at_less_aggregated_geoscale(
@@ -176,15 +189,11 @@ def check_allocation_ratios(flow_alloc_df_load, activity_set, config, attr):
# if in the attr dictionary, merge columns are identified,
# the merge columns need to be accounted for in the grouping/checking of
# allocation ratios
+ subset_cols = ['FBA_Activity', 'Location', 'SectorLength', 'FlowAmountRatio']
+ groupcols = ['FBA_Activity', 'Location', 'SectorLength']
if 'allocation_merge_columns' in attr:
- subset_cols = ['FBA_Activity', 'Location', 'SectorLength',
- 'FlowAmountRatio'] + attr['allocation_merge_columns']
- groupcols = ['FBA_Activity', 'Location',
- 'SectorLength'] + attr['allocation_merge_columns']
- else:
- subset_cols = ['FBA_Activity', 'Location',
- 'SectorLength', 'FlowAmountRatio']
- groupcols = ['FBA_Activity', 'Location', 'SectorLength']
+ subset_cols = subset_cols + attr['allocation_merge_columns']
+ groupcols = groupcols + attr['allocation_merge_columns']
# create column of sector lengths
flow_alloc_df =\
@@ -335,7 +344,7 @@ def calculate_flowamount_diff_between_dfs(dfa_load, dfb_load):
def compare_activity_to_sector_flowamounts(fba_load, fbs_load,
- activity_set, source_name, config):
+ activity_set, config):
"""
Function to compare the loaded flowbyactivity with the final flowbysector
by activityname (if exists) to target sector level
@@ -343,12 +352,11 @@ def compare_activity_to_sector_flowamounts(fba_load, fbs_load,
:param fba_load: df, FBA loaded and mapped using FEDEFL
:param fbs_load: df, final FBS df
:param activity_set: str, activity set
- :param source_name: str, source name
:param config: dictionary, method yaml
:return: printout data differences between loaded FBA and FBS output,
save results as csv in local directory
"""
- if check_activities_sector_like(source_name):
+ if check_activities_sector_like(fba_load):
vLog.debug('Not comparing loaded FlowByActivity to FlowBySector '
'ratios for a dataset with sector-like activities because '
'if there are modifications to flowamounts for a sector, '
@@ -473,23 +481,19 @@ def compare_fba_geo_subset_and_fbs_output_totals(
# extract relevant geoscale data or aggregate existing data
fba = subset_df_by_geoscale(fba_load, from_scale,
method['target_geoscale'])
- if check_activities_sector_like(source_name):
+ if check_activities_sector_like(fba_load):
# if activities are sector-like, run sector aggregation and then
# subset df to only keep NAICS2
- fba = fba[['Class', 'FlowAmount', 'Unit', 'Context',
+ fba = fba[['Class', 'SourceName', 'FlowAmount', 'Unit', 'Context',
'ActivityProducedBy', 'ActivityConsumedBy', 'Location',
'LocationSystem']]
# rename the activity cols to sector cols for purposes of aggregation
fba = fba.rename(columns={'ActivityProducedBy': 'SectorProducedBy',
'ActivityConsumedBy': 'SectorConsumedBy'})
- group_cols_agg = ['Class', 'Context', 'Unit', 'Location',
- 'LocationSystem', 'SectorProducedBy',
- 'SectorConsumedBy']
fba = sector_aggregation(fba)
# subset fba to only include NAICS2
fba = replace_NoneType_with_empty_cells(fba)
- fba = fba[fba['SectorConsumedBy'].apply(lambda x: len(x) == 2) |
- fba['SectorProducedBy'].apply(lambda x: len(x) == 2)]
+ fba = subset_df_by_sector_lengths(fba, [2])
# subset/agg dfs
col_subset = ['Class', 'FlowAmount', 'Unit', 'Context',
'Location', 'LocationSystem']
@@ -511,11 +515,16 @@ def compare_fba_geo_subset_and_fbs_output_totals(
try:
# merge FBA and FBS totals
df_merge = fba_agg.merge(fbs_agg, how='left')
+ df_merge['FBS_amount'] = df_merge['FBS_amount'].fillna(0)
df_merge['FlowAmount_difference'] = \
df_merge['FBA_amount'] - df_merge['FBS_amount']
df_merge['Percent_difference'] = \
(df_merge['FlowAmount_difference']/df_merge['FBA_amount']) * 100
-
+ # cases where flow amount diff is 0 but because fba amount is 0,
+ # percent diff is null. Fill those cases with 0s
+ df_merge['Percent_difference'] = np.where(
+ (df_merge['FlowAmount_difference'] == 0) &
+ (df_merge['FBA_amount'] == 0), 0, df_merge['Percent_difference'])
# reorder
df_merge = df_merge[['Class', 'Context', 'Location', 'LocationSystem',
'FBA_amount', 'FBA_unit', 'FBS_amount',
@@ -583,44 +592,122 @@ def compare_fba_geo_subset_and_fbs_output_totals(
'for FlowByActivity and FlowBySector')
-def check_summation_at_sector_lengths(df):
+def compare_summation_at_sector_lengths_between_two_dfs(df1, df2):
"""
Check summed 'FlowAmount' values at each sector length
- :param df: df, requires Sector column
- :return: df, includes summed 'FlowAmount' values at each sector length
+ :param df1: df, first df of values with sector columns
+ :param df2: df, second df of values with sector columns
+ :return: df, comparison of sector summation results by region and
+ printout if any child naics sum greater than parent naics
"""
+ from flowsa.flowbyfunctions import assign_columns_of_sector_levels
- # columns to keep
- df_cols = [e for e in df.columns if e not in
- ('MeasureofSpread', 'Spread', 'DistributionType', 'Min',
- 'Max', 'DataReliability', 'DataCollection', 'FlowType',
- 'Compartment', 'Description', 'Activity')]
- # subset df
- df2 = df[df_cols]
-
- # rename columns and clean up df
- df2 = df2[~df2['Sector'].isnull()]
-
- df2 = df2.assign(SectorLength=len(df2['Sector']))
-
- # sum flowamounts by sector length
- denom_df = df2.copy()
- denom_df.loc[:, 'Denominator'] = denom_df.groupby(
- ['Location', 'SectorLength'])['FlowAmount'].transform('sum')
-
- summed_df = denom_df.drop(
- columns=['Sector', 'FlowAmount']).drop_duplicates().reset_index(
- drop=True)
+ agg_cols = ['Class', 'SourceName', 'FlowName', 'Unit', 'FlowType',
+ 'Compartment', 'Location', 'Year', 'SectorProducedByLength',
+ 'SectorConsumedByLength']
- # max value
- maxv = max(summed_df['Denominator'].apply(lambda x: x))
-
- # percent of total accounted for
- summed_df = summed_df.assign(percentOfTot=summed_df['Denominator']/maxv)
+ df_list = []
+ for df in [df1, df2]:
+ df = replace_NoneType_with_empty_cells(df)
+ df = assign_columns_of_sector_levels(df)
+ # sum flowamounts by sector length
+ dfsum = df.groupby(agg_cols).agg({'FlowAmount': 'sum'}).reset_index()
+ df_list.append(dfsum)
+
+ df_list[0] = df_list[0].rename(columns={'FlowAmount': 'df1'})
+ df_list[1] = df_list[1].rename(columns={'FlowAmount': 'df2'})
+ dfm = df_list[0].merge(df_list[1], how='outer')
+ dfm = dfm.fillna(0)
+ dfm['flowIncrease_df1_to_df2_perc'] = (dfm['df2'] - dfm['df1'])/dfm[
+ 'df1'] * 100
+ # dfm2 = dfm[dfm['flowIncrease_df1_to_df2'] != 0]
+ # drop cases where sector length is 0 because not included in naics cw
+ dfm2 = dfm[~((dfm['SectorProducedByLength'] == 0) & (dfm[
+ 'SectorConsumedByLength'] == 0))]
+ # sort df
+ dfm2 = dfm2.sort_values(['Location', 'SectorProducedByLength',
+ 'SectorConsumedByLength']).reset_index(drop=True)
+
+ dfm3 = dfm2[dfm2['flowIncrease_df1_to_df2_perc'] < 0]
+
+ if len(dfm3) > 0:
+ log.info('See validation log for cases where the second dataframe '
+ 'has flow amounts greater than the first dataframe at the '
+ 'same location/sector lengths.')
+ vLogDetailed.info('The second dataframe has flow amounts greater than '
+ 'the first dataframe at the same sector lengths: '
+ '\n {}'.format(dfm3.to_string()))
+ else:
+ vLogDetailed.info('The second dataframe does not have flow amounts '
+ 'greater than the first dataframe at any sector '
+ 'length')
- summed_df = summed_df.sort_values(['SectorLength']).reset_index(drop=True)
- return summed_df
+def compare_child_to_parent_sectors_flowamounts(df_load):
+ """
+ Sum child sectors up to one sector and compare to parent sector values
+ :param df_load: df, contains sector columns
+ :return: comparison of flow values
+ """
+ from flowsa.flowbyfunctions import return_primary_sector_column, \
+ assign_sector_match_column
+
+ merge_cols = [e for e in df_load.columns if e in [
+ 'Class', 'SourceName', 'MetaSources', 'FlowName', 'Unit',
+ 'FlowType', 'Flowable', 'ActivityProducedBy', 'ActivityConsumedBy',
+ 'Compartment', 'Context', 'Location', 'Year', 'Description']]
+ # determine if activities are sector-like
+ sector_like_activities = check_activities_sector_like(df_load)
+ # if activities are sector like, drop columns from merge group
+ if sector_like_activities:
+ merge_cols = [e for e in merge_cols if e not in (
+ 'ActivityProducedBy', 'ActivityConsumedBy')]
+
+ agg_cols = merge_cols + ['SectorProducedMatch', 'SectorConsumedMatch']
+ dfagg = pd.DataFrame()
+ for i in range(3, 7):
+ df = subset_df_by_sector_lengths(df_load, [i])
+ for s in ['Produced', 'Consumed']:
+ df = assign_sector_match_column(df, f'Sector{s}By', i, i-1).rename(
+ columns={'sector_group': f'Sector{s}Match'})
+ df = df.fillna('')
+ df2 = df.groupby(agg_cols).agg(
+ {'FlowAmount': 'sum'}).rename(columns={
+ 'FlowAmount': f'ChildNAICSSum'}).reset_index()
+ dfagg = pd.concat([dfagg, df2], ignore_index=True)
+
+ # merge new df with summed child naics to original df
+ drop_cols = [e for e in df_load.columns if e in
+ ['MeasureofSpread', 'Spread', 'DistributionType', 'Min',
+ 'Max', 'DataReliability', 'DataCollection', 'Description',
+ 'SectorProducedMatch', 'SectorConsumedMatch']]
+ dfm = df_load.merge(dfagg, how='left', left_on=merge_cols + [
+ 'SectorProducedBy', 'SectorConsumedBy'], right_on=agg_cols).drop(
+ columns=drop_cols)
+ dfm = dfm.assign(FlowDiff=dfm['ChildNAICSSum'] - dfm['FlowAmount'])
+ dfm['PercentDiff'] = (dfm['FlowDiff'] / dfm['FlowAmount']) * 100
+
+ cols_subset = [e for e in dfm.columns if e in [
+ 'Class', 'SourceName', 'MetaSources', 'Flowable', 'FlowName',
+ 'Unit', 'FlowType', 'ActivityProducedBy', 'ActivityConsumedBy',
+ 'Context', 'Location', 'Year', 'SectorProducedBy',
+ 'SectorConsumedBy', 'FlowAmount', 'ChildNAICSSum', 'PercentDiff']]
+ dfm = dfm[cols_subset]
+
+ # subset df where child sectors sum to be greater than parent sectors
+ tolerance = 1
+ dfm2 = dfm[(dfm['PercentDiff'] > tolerance) |
+ (dfm['PercentDiff'] < - tolerance)].reset_index(drop=True)
+
+ if len(dfm2) > 0:
+ log.info('See validation log for cases where child sectors sum to be '
+ 'different than parent sectors by at least %s%%.', tolerance)
+ vLogDetailed.info('There are cases where child sectors sum to be '
+ 'different than parent sectors by at least %s%%: '
+ '\n {}'.format(dfm2.to_string()), tolerance)
+ else:
+ vLogDetailed.info('No child sectors sum to be different than parent '
+ 'sectors by at least %s%%.', tolerance)
def check_for_nonetypes_in_sector_col(df):
@@ -984,3 +1071,88 @@ def compare_df_units(df1_load, df2_load):
# if list is not empty, print warning that units are different
if list_comp:
log.info('Merging df with %s and df with %s units', df1, df2)
+
+
+def calculate_industry_coefficients(fbs_load, year,region,
+ io_level, impacts=False):
+ """
+ Generates sector coefficients (flow/$) for all sectors for all locations.
+
+ :param fbs_load: flow by sector method
+ :param year: year for industry output dataset
+ :param region: str, 'state' or 'national'
+ :param io_level: str, 'summary' or 'detail'
+ :param impacts: bool, True to apply and aggregate on impacts
+ False to compare flow/contexts
+ """
+ from flowsa.sectormapping import map_to_BEA_sectors,\
+ get_BEA_industry_output
+
+ fbs = collapse_fbs_sectors(fbs_load)
+
+ fbs = map_to_BEA_sectors(fbs, region, io_level, year)
+
+ inventory = not(impacts)
+ if impacts:
+ try:
+ import lciafmt
+ fbs_summary = (lciafmt.apply_lcia_method(fbs, 'TRACI2.1')
+ .rename(columns={'FlowAmount': 'InvAmount',
+ 'Impact': 'FlowAmount'}))
+ groupby_cols = ['Location', 'Sector',
+ 'Indicator', 'Indicator unit']
+ sort_by_cols = ['Indicator', 'Sector', 'Location']
+ except ImportError:
+ log.warning('lciafmt not installed')
+ inventory = True
+ except AttributeError:
+ log.warning('check lciafmt branch')
+ inventory = True
+
+ if inventory:
+ fbs_summary = fbs.copy()
+ groupby_cols = ['Location', 'Sector',
+ 'Flowable', 'Context', 'Unit']
+ sort_by_cols = ['Context', 'Flowable',
+ 'Sector', 'Location']
+
+ # Update location if needed prior to aggregation
+ if region == 'national':
+ fbs_summary["Location"] = US_FIPS
+
+ fbs_summary = (fbs_summary.groupby(groupby_cols)
+ .agg({'FlowAmount': 'sum'}).
+ reset_index())
+
+ bea = get_BEA_industry_output(region, io_level, year)
+
+ # Add sector output and assign coefficients
+ fbs_summary = fbs_summary.merge(bea.rename(
+ columns={'BEA': 'Sector'}), how = 'left',
+ on=['Sector','Location'])
+ fbs_summary['Coefficient'] = (fbs_summary['FlowAmount'] /
+ fbs_summary['Output'])
+ fbs_summary = fbs_summary.sort_values(by=sort_by_cols)
+
+ return fbs_summary
+
+
+if __name__ == "__main__":
+ df1 = calculate_industry_coefficients(
+ flowsa.getFlowBySector('Water_national_2015_m1'), 2015,
+ "national", "summary", False)
+ df2 = calculate_industry_coefficients(
+ flowsa.getFlowBySector('GRDREL_national_2017'), 2017,
+ "national", "summary", True)
+ df3 = calculate_industry_coefficients(
+ flowsa.getFlowBySector('GRDREL_national_2017'), 2017,
+ "national", "detail", True)
+ df4 = calculate_industry_coefficients(
+ flowsa.getFlowBySector('GRDREL_state_2017'), 2017,
+ "national", "detail", True)
+ try:
+ df5 = calculate_industry_coefficients(
+ flowsa.getFlowBySector('GRDREL_state_2017'), 2017,
+ "state", "detail", True)
+ except TypeError:
+ df5 = None
diff --git a/pytest.ini b/pytest.ini
index c3432220c..e4a7c9a1d 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,3 +3,5 @@ log_cli = True
log_cli_level = INFO
norecursedirs = build dist scripts examples
addopts = --doctest-modules
+markers =
+ generate_fbs: test function to generate all FBS
diff --git a/scripts/README.md b/scripts/FlowByActivity_Crosswalks/README.md
similarity index 100%
rename from scripts/README.md
rename to scripts/FlowByActivity_Crosswalks/README.md
diff --git a/scripts/common_scripts.py b/scripts/FlowByActivity_Crosswalks/common_scripts.py
similarity index 100%
rename from scripts/common_scripts.py
rename to scripts/FlowByActivity_Crosswalks/common_scripts.py
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012_Detail.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012.py
similarity index 66%
rename from scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012_Detail.py
rename to scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012.py
index 0b81ce25a..292486b32 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012_Detail.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012.py
@@ -1,26 +1,26 @@
-# write_Crosswalk_BEA_2012_Detail.py (scripts)
+# write_Crosswalk_BEA_2012.py (scripts)
# !/usr/bin/env python3
# coding=utf-8
"""
-Create a crosswalk linking BEA to NAICS for 2012 Detail
+Create a crosswalk linking BEA to NAICS for 2012 for any level
"""
from flowsa.common import load_crosswalk
from flowsa.settings import datapath
-if __name__ == '__main__':
+def write_BEA_crosswalk(level='Detail'):
cw_load = load_crosswalk('BEA')
- cw = cw_load[['BEA_2012_Detail_Code',
+ cw = cw_load[[f'BEA_2012_{level}_Code',
'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
# drop all rows with naics >6
cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True)
df = cw.rename(columns={"NAICS_2012_Code": "Sector",
- "BEA_2012_Detail_Code":"Activity"})
+ f"BEA_2012_{level}_Code":"Activity"})
df['SectorSourceName'] = 'NAICS_2012_Code'
- df['ActivitySourceName'] = 'BEA_2012_Detail_Code'
+ df['ActivitySourceName'] = f'BEA_2012_{level}_Code'
df.dropna(subset=["Sector"], inplace=True)
# assign sector type
df['SectorType'] = None
@@ -32,4 +32,8 @@
df = df[['ActivitySourceName', 'Activity', 'SectorSourceName', 'Sector', 'SectorType']]
# save as csv
df.to_csv(datapath + "activitytosectormapping/" +
- "NAICS_Crosswalk_BEA_2012_Detail.csv", index=False)
+ f"NAICS_Crosswalk_BEA_2012_{level}.csv", index=False)
+
+if __name__ == '__main__':
+ write_BEA_crosswalk('Detail')
+ write_BEA_crosswalk('Summary')
\ No newline at end of file
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py
index a0ec4ee80..932f74662 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py
@@ -9,7 +9,7 @@
import pandas as pd
from flowsa.common import load_crosswalk
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df_load):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py
index 922088277..a75decc96 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py
@@ -10,7 +10,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py
index 66abe6396..858c17e29 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py
@@ -11,7 +11,7 @@
import pandas as pd
from flowsa.common import load_crosswalk
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df_load):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py
index 462c4864d..faaec2d1e 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py
@@ -7,7 +7,7 @@
"""
import pandas as pd
from flowsa.settings import datapath, externaldatapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
from flowsa.data_source_scripts.CalRecycle_WasteCharacterization import produced_by
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py
index f640c49f3..0dcc36266 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py
@@ -7,7 +7,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
if __name__ == '__main__':
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py
index b8d246af6..a6822f938 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py
@@ -7,7 +7,7 @@
"""
import pandas as pd
from flowsa.settings import datapath, externaldatapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py
index 3321042aa..f4f576af7 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py
@@ -7,7 +7,7 @@
"""
import pandas as pd
from flowsa.settings import datapath, externaldatapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py
index 27738fe97..0f4a52e22 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py
@@ -15,7 +15,7 @@
"""
from flowsa.common import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py
index b9a5d0ac0..c89e65d4a 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py
@@ -8,7 +8,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
if __name__ == '__main__':
# select years to pull unique activity names
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py
index 55673ce4f..bebc6f816 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py
@@ -8,7 +8,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
if __name__ == '__main__':
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py
index 977bafb9d..c14328f5a 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py
@@ -10,7 +10,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py
index 735358723..a53f7c07d 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py
@@ -15,7 +15,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py
index eb4a29fe0..b10bd1a49 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py
@@ -15,7 +15,7 @@
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py
index 1d1553dd5..65ed16e3d 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py
@@ -12,7 +12,7 @@
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py
index 6f3841837..8568ff838 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py
@@ -11,7 +11,7 @@
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py
index c7c818520..e3a99adbf 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py
@@ -15,7 +15,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py
index c157f238e..ec07ac8d5 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py
@@ -10,7 +10,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py
index 9dd908187..faa73d23c 100644
--- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py
+++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py
@@ -10,7 +10,7 @@
"""
import pandas as pd
from flowsa.settings import datapath
-from scripts.common_scripts import unique_activity_names, order_crosswalk
+from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk
def assign_naics(df):
diff --git a/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py b/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py
index cfa7bd6ae..9e9004c3a 100644
--- a/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py
+++ b/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py
@@ -2,30 +2,37 @@
# !/usr/bin/env python3
# coding=utf-8
"""
-Write the csv called on in flowbysectormethods yaml files for
-land use related to BLS QCEW employment data
+Create an activity set file file employment data. Script only needs to be
+run for additional years if there are new NAICS.
"""
+import pandas as pd
import flowsa
from flowsa.settings import flowbysectoractivitysetspath
datasource = 'BLS_QCEW'
-as_year = '2017'
+as_years = ['2002', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
+ '2017']
if __name__ == '__main__':
- df_import = flowsa.getFlowByActivity(datasource, as_year)
- df = (df_import[['ActivityProducedBy']]
- .drop_duplicates()
- .reset_index(drop=True)
- .rename(columns={"ActivityProducedBy": "name"})
- .assign(activity_set='qcew',
- note=''))
+ # empty df
+ df2 = pd.DataFrame()
+ for y in as_years:
+ df_import = flowsa.getFlowByActivity(datasource, y)
- # reorder dataframe
- df = (df[['activity_set', 'name', 'note']]
- .sort_values(['activity_set', 'name'])
- .reset_index(drop=True))
-
- df.to_csv(f'{flowbysectoractivitysetspath}{datasource}_asets.csv',
- index=False)
+ df = (df_import[['ActivityProducedBy']]
+ .drop_duplicates()
+ .reset_index(drop=True)
+ .rename(columns={"ActivityProducedBy": "name"})
+ .assign(activity_set='qcew',
+ note=''))
+ # reorder dataframe
+ df = df[['activity_set', 'name', 'note']]
+ # concat
+ df2 = pd.concat([df2, df], ignore_index=True)
+ # drop duplicates and save df
+ df3 = df2.drop_duplicates()
+ df3 = df3.sort_values(['activity_set', 'name']).reset_index(drop=True)
+ df3.to_csv(f"{flowbysectoractivitysetspath}{datasource}_asets.csv",
+ index=False)
diff --git a/scripts/update_NAICS_crosswalk.py b/scripts/writeNAICScrosswalk.py
similarity index 99%
rename from scripts/update_NAICS_crosswalk.py
rename to scripts/writeNAICScrosswalk.py
index 5047a8506..15cebd5ce 100644
--- a/scripts/update_NAICS_crosswalk.py
+++ b/scripts/writeNAICScrosswalk.py
@@ -1,4 +1,4 @@
-# update_NAICS_crosswalk.py (scripts)
+# writeNAICScrosswalk.py
# !/usr/bin/env python3
# coding=utf-8
diff --git a/setup.py b/setup.py
index f3f4f06ff..62b0cde4f 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
setup(
name='flowsa',
- version='1.1',
+ version='1.2.1',
packages=find_packages(),
package_dir={'flowsa': 'flowsa'},
include_package_data=True,
@@ -32,14 +32,14 @@
'matplotlib>=3.4.3'
],
url='https://github.com/USEPA/FLOWSA',
- license='CC0',
+ license='MIT',
author='Catherine Birney, Ben Young, Wesley Ingwersen, Melissa Conner, Jacob Specht, Mo Li',
author_email='ingwersen.wesley@epa.gov',
classifiers=[
"Development Status :: 1 - Alpha",
"Environment :: IDE",
"Intended Audience :: Science/Research",
- "License :: CC0",
+ "License :: MIT",
"Programming Language :: Python :: 3.x",
"Topic :: Utilities",
],