diff --git a/.github/workflows/generate_FBS.yml b/.github/workflows/generate_FBS.yml new file mode 100644 index 000000000..8e7abdccf --- /dev/null +++ b/.github/workflows/generate_FBS.yml @@ -0,0 +1,50 @@ +# This workflow will generate all FlowBySector files and store as artifact + +name: Generate FBS + +on: + pull_request: + branches: [master] + types: [opened, reopened, ready_for_review] # excludes syncronize to avoid redundant trigger from commits on PRs + workflow_dispatch: # also allow manual trigger, for testing purposes + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + + - name: Update pip & install testing pkgs + run: | + python -VV + python -m pip install --upgrade pip setuptools wheel + pip install pytest + + # install package & dependencies + - name: Install package and dependencies + run: | + pip install . + + - name: Generate FBS + run: | + pytest -m generate_fbs + + - name: Upload files + if: always() + uses: actions/upload-artifact@v3 + with: + # Artifact name + name: FlowBySector + # A file, directory or wildcard patter that describes what to upload + path: | # uses local user data dir for ubuntu + ~/.local/share/flowsa/FlowBySector/* + ~/.local/share/stewi/Log/* + if-no-files-found: warn # 'warn' or 'ignore' are also available, defaults to `warn` + # retention-days: 5 # cannot exceed the retention limit set by the repository, organization, or enterprise. diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index ef43e1de1..fc805c820 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -61,4 +61,4 @@ jobs: - name: Test with pytest run: | - pytest --doctest-modules + pytest --doctest-modules -m "not generate_fbs" diff --git a/.github/workflows/test_methods.yml b/.github/workflows/test_methods.yml index 43cc4e982..98062adec 100644 --- a/.github/workflows/test_methods.yml +++ b/.github/workflows/test_methods.yml @@ -34,7 +34,7 @@ jobs: - name: Test FBA config run: | - python flowsa/test_FBA_urls.py + python flowsa/test_methods.py - name: Compare FBS with remote id: FBS @@ -50,6 +50,9 @@ jobs: # Artifact name name: FBS diff files # A file, directory or wildcard patter that describes what to upload - path: ${{ env.LD_LIBRARY_PATH }}/python3.10/site-packages/flowsa/data/fbs_diff/*_diff.csv + path: | + ${{ env.LD_LIBRARY_PATH }}/python3.10/site-packages/flowsa/data/fbs_diff/*_diff.csv + ~/.local/share/flowsa/FlowBySector/* + ~/.local/share/flowsa/Log/* if-no-files-found: warn # 'warn' or 'ignore' are also available, defaults to `warn` # retention-days: 5 # cannot exceed the retention limit set by the repository, organization, or enterprise. diff --git a/LICENSE b/LICENSE index 670154e35..8a545bcfc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,116 +1,21 @@ -CC0 1.0 Universal - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator and -subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for the -purpose of contributing to a commons of creative, cultural and scientific -works ("Commons") that the public can reliably and without fear of later -claims of infringement build upon, modify, incorporate in other works, reuse -and redistribute as freely as possible in any form whatsoever and for any -purposes, including without limitation commercial purposes. These owners may -contribute to the Commons to promote the ideal of a free culture and the -further production of creative, cultural and scientific works, or to gain -reputation or greater distribution for their Work in part through the use and -efforts of others. - -For these and/or other purposes and motivations, and without any expectation -of additional consideration or compensation, the person associating CC0 with a -Work (the "Affirmer"), to the extent that he or she is an owner of Copyright -and Related Rights in the Work, voluntarily elects to apply CC0 to the Work -and publicly distribute the Work under its terms, with knowledge of his or her -Copyright and Related Rights in the Work and the meaning and intended legal -effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not limited -to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, communicate, - and translate a Work; - - ii. moral rights retained by the original author(s) and/or performer(s); - - iii. publicity and privacy rights pertaining to a person's image or likeness - depicted in a Work; - - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - - v. rights protecting the extraction, dissemination, use and reuse of data in - a Work; - - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation thereof, - including any amended or successor version of such directive); and - - vii. other similar, equivalent or corresponding rights throughout the world - based on applicable law or treaty, and any national implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention of, -applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and -unconditionally waives, abandons, and surrenders all of Affirmer's Copyright -and Related Rights and associated claims and causes of action, whether now -known or unknown (including existing as well as future claims and causes of -action), in the Work (i) in all territories worldwide, (ii) for the maximum -duration provided by applicable law or treaty (including future time -extensions), (iii) in any current or future medium and for any number of -copies, and (iv) for any purpose whatsoever, including without limitation -commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes -the Waiver for the benefit of each member of the public at large and to the -detriment of Affirmer's heirs and successors, fully intending that such Waiver -shall not be subject to revocation, rescission, cancellation, termination, or -any other legal or equitable action to disrupt the quiet enjoyment of the Work -by the public as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason be -judged legally invalid or ineffective under applicable law, then the Waiver -shall be preserved to the maximum extent permitted taking into account -Affirmer's express Statement of Purpose. In addition, to the extent the Waiver -is so judged Affirmer hereby grants to each affected person a royalty-free, -non transferable, non sublicensable, non exclusive, irrevocable and -unconditional license to exercise Affirmer's Copyright and Related Rights in -the Work (i) in all territories worldwide, (ii) for the maximum duration -provided by applicable law or treaty (including future time extensions), (iii) -in any current or future medium and for any number of copies, and (iv) for any -purpose whatsoever, including without limitation commercial, advertising or -promotional purposes (the "License"). The License shall be deemed effective as -of the date CC0 was applied by Affirmer to the Work. Should any part of the -License for any reason be judged legally invalid or ineffective under -applicable law, such partial invalidity or ineffectiveness shall not -invalidate the remainder of the License, and in such case Affirmer hereby -affirms that he or she will not (i) exercise any of his or her remaining -Copyright and Related Rights in the Work or (ii) assert any associated claims -and causes of action with respect to the Work, in either case contrary to -Affirmer's express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - - b. Affirmer offers the Work as-is and makes no representations or warranties - of any kind concerning the Work, express, implied, statutory or otherwise, - including without limitation warranties of title, merchantability, fitness - for a particular purpose, non infringement, or the absence of latent or - other defects, accuracy, or the present or absence of errors, whether or not - discoverable, all to the greatest extent permissible under applicable law. - - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without limitation - any person's Copyright and Related Rights in the Work. Further, Affirmer - disclaims responsibility for obtaining any necessary consents, permissions - or other rights required for any use of the Work. - - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to this - CC0 or use of the Work. - -For more information, please see - +MIT License + +Copyright (c) 2022 U.S. Environmental Protection Agency + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index e76070665..4addebd13 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,44 @@ # flowsa -`flowsa` is a data processing library that attributes resource use, waste, emissions, and loss to economic sectors. `flowsa` aggregates, combines, and allocates data from a variety of sources. The sources can be found in the [GitHub wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-activity-datasets) under "Flow-By-Activity Datasets". - -`flowsa` helps support [USEEIO](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-technical-content) as part of the [USEEIO modeling](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-models) framework. The USEEIO models estimate potential impacts of goods and services in the US economy. The [Flow-By-Sector datasets](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-sector-datasets) created in FLOWSA are the environmental inputs to [`useeior`](https://github.com/USEPA/useeior). +`flowsa` is a data processing library attributing resources (environmental, +monetary, and human), wastes, emissions, and losses to sectors, typically +[NAICS codes](https://www.census.gov/naics/). `flowsa` aggregates, combines, +and allocates data from a variety of sources. The sources can be found in the +[GitHub wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-activity-datasets) +under "Flow-By-Activity Datasets". + +`flowsa` helps support +[USEEIO](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-technical-content) +as part of the [USEEIO modeling](https://www.epa.gov/land-research/us-environmentally-extended-input-output-useeio-models) +framework. The USEEIO models estimate potential impacts of goods and +services in the US economy. The +[Flow-By-Sector datasets](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-sector-datasets) +created in FLOWSA are the environmental inputs to +[`useeior`](https://github.com/USEPA/useeior). ## Usage -### Flow-By-Activity Datasets -Flow-By-Activity datasets are formatted tables from a variety of sources. They are largely unchanged from the original data source, with the exception of formatting. +### Flow-By-Activity (FBA) Datasets +Flow-By-Activity datasets are formatted tables from a variety of sources. +They are largely unchanged from the original data source, with the +exception of formatting. A list of available FBA datasets can be found in +the [Wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-activity-datasets). `import flowsa` \ `flowsa.seeAvailableFlowByModels('FBA')` \ `flowsa.getFlowByActivity(datasource="USDA_CoA_Cropland", year=2017)` -### Flow-By-Sector Datasets -Flow-By-Sector datasets are tables of environmental and other data attributed to [sectors](https://www.census.gov/naics/). +### Flow-By-Sector (FBS) Datasets +Flow-By-Sector datasets are tables of environmental and other data +attributed to [sectors](https://www.census.gov/naics/). A list of available +FBS datasets can be found in the [Wiki](https://github.com/USEPA/flowsa/wiki/Available-Data#flow-by-sector-datasets). `import flowsa` \ `flowsa.seeAvailableFlowByModels('FBS')` \ `flowsa.getFlowBySector('Water_national_2015_m1')` ## Installation -`pip install git+https://github.com/USEPA/flowsa.git@v1.1#egg=flowsa` +`pip install git+https://github.com/USEPA/flowsa.git@vX.X.X#egg=flowsa` -where v1.1 can be replaced with the version you wish to install under +where vX.X.X can be replaced with the version you wish to install under [Releases](https://github.com/USEPA/flowsa/releases). ### Additional Information on Installation, Examples, Detailed Documentation @@ -29,10 +46,13 @@ For more information on `flowsa` see the [wiki](https://github.com/USEPA/flowsa/ ## Disclaimer -The United States Environmental Protection Agency (EPA) GitHub project code is provided on an "as is" basis -and the user assumes responsibility for its use. EPA has relinquished control of the information and no longer -has responsibility to protect the integrity , confidentiality, or availability of the information. Any -reference to specific commercial products, processes, or services by service mark, trademark, manufacturer, -or otherwise, does not constitute or imply their endorsement, recommendation or favoring by EPA. The EPA seal -and logo shall not be used in any manner to imply endorsement of any commercial product or activity by EPA or +The United States Environmental Protection Agency (EPA) GitHub project code +is provided on an "as is" basis and the user assumes responsibility for its +use. EPA has relinquished control of the information and no longer has +responsibility to protect the integrity, confidentiality, or availability +of the information. Any reference to specific commercial products, +processes, or services by service mark, trademark, manufacturer, or +otherwise, does not constitute or imply their endorsement, recommendation +or favoring by EPA. The EPA seal and logo shall not be used in any manner +to imply endorsement of any commercial product or activity by EPA or the United States Government. diff --git a/flowsa/README.md b/flowsa/README.md index 4ba903a31..0cb770597 100644 --- a/flowsa/README.md +++ b/flowsa/README.md @@ -2,18 +2,24 @@ Python scripts used to generate Flow-By-Activity (FBA) and Flow-By-Sector (FBS) datasets -1. _"_init_.py"_ +1. _"\_\_init\_\_.py"_ 2. _"allocation.py"_ 3. _"bibliography.py"_ 4. _"common.py"_ 5. _"dataclean.py"_ -6. _"fbs_allocation.py"_ -7. _"flowbyactivity.py"_ -8. _"flowbyfunctions.py"_ -9. _"flowbysector.py"_ -10. _"literature_values.py"_ -11. _"metadata.py"_ -12. _"sectormapping.py"_ -13. _"settings.py"_ -14. _"test_examples.py"_ -15. _"validation.py"_ +6. _"datavisualization.py"_ +7. _"fbs_allocation.py"_ +8. _"flowbyactivity.py"_ +9. _"flowbyfunctions.py"_ +10. _"flowbysector.py"_ +11. _"flowsa_yaml.py"_ +12. _"literature_values.py"_ +13. _"location.py"_ +14. _"metadata.py"_ +15. _"schema.py"_ +16. _"sectormapping.py"_ +17. _"settings.py"_ +18. _"test_examples.py"_ +19. _"test_FBS_against_remote.py"_ +20. _"test_methods.py"_ +21. _"validation.py"_ diff --git a/flowsa/__init__.py b/flowsa/__init__.py index db6d1768f..bf0789cba 100644 --- a/flowsa/__init__.py +++ b/flowsa/__init__.py @@ -41,6 +41,10 @@ def getFlowByActivity(datasource, year, flowclass=None, geographic_level=None, year=int(year), download_ok=download_FBA_if_missing ) + + if len(fba) ==0: + raise flowsa.exceptions.FBANotAvailableError( + message=f"Error generating {datasource} for {str(year)}") if flowclass is not None: fba = fba.query('Class == @flowclass') # if geographic level specified, only load rows in geo level diff --git a/flowsa/allocation.py b/flowsa/allocation.py index ae9f2dcf6..cdd01b9c3 100644 --- a/flowsa/allocation.py +++ b/flowsa/allocation.py @@ -6,9 +6,9 @@ Methods of allocating datasets """ import pandas as pd -from flowsa.settings import log -from flowsa.common import fbs_activity_fields, sector_level_key, load_crosswalk -from flowsa.settings import vLogDetailed +from flowsa.common import fbs_activity_fields, sector_level_key, \ + load_crosswalk, check_activities_sector_like +from flowsa.settings import log, vLogDetailed from flowsa.dataclean import replace_NoneType_with_empty_cells, \ replace_strings_with_NoneType from flowsa.flowbyfunctions import sector_aggregation, \ @@ -143,7 +143,8 @@ def proportional_allocation(df, attr): # calculate ratio allocation_df.loc[:, 'FlowAmountRatio'] = \ allocation_df['FlowAmount'] / allocation_df['Denominator'] - allocation_df = allocation_df.drop(columns=['Denominator']).reset_index() + allocation_df = allocation_df.drop(columns=['Denominator']).reset_index( + drop=True) # add nonetypes allocation_df = replace_strings_with_NoneType(allocation_df) @@ -219,17 +220,21 @@ def proportional_allocation_by_location_and_activity(df_load, sectorcolumn): return allocation_df -def equally_allocate_parent_to_child_naics(df_load, method): +def equally_allocate_parent_to_child_naics( + df_load, method, overwritetargetsectorlevel=None): """ Determine rows of data that will be lost if subset data at target sector level. Equally allocate parent NAICS to child NAICS where child NAICS missing :param df_load: df, FBS format - :param target_sector_level: str, target NAICS level for FBS output + :param overwritetargetsectorlevel: str, optional, specify what sector + level to allocate to :return: df, with all child NAICS at target sector level """ # determine which sector level to use, use the least aggregated level sector_level = method.get('target_sector_level') + if overwritetargetsectorlevel is not None: + sector_level = overwritetargetsectorlevel # if secondary sector levels are identified, set the sector level to the # least aggregated sector_level_list = [sector_level] @@ -244,12 +249,23 @@ def equally_allocate_parent_to_child_naics(df_load, method): # exclude nonsectors df = replace_NoneType_with_empty_cells(df_load) + # determine if activities are sector-like, if aggregating a df with a + # 'SourceName' + sector_like_activities = check_activities_sector_like(df_load) + + # if activities are source like, drop from df, + # add back in as copies of sector columns columns to keep + if sector_like_activities: + # subset df + df_cols = [e for e in df.columns if e not in + ('ActivityProducedBy', 'ActivityConsumedBy')] + df = df[df_cols] + rows_lost = pd.DataFrame() for i in range(2, sector_level_key[sector_level]): - dfm = subset_and_merge_df_by_sector_lengths(df_load, i, i+1) - + dfm = subset_and_merge_df_by_sector_lengths(df, i, i+1) # extract the rows that are not disaggregated to more - # specific naics + # specific sectors rl = dfm.query('_merge=="left_only"').drop( columns=['_merge', 'SPB_tmp', 'SCB_tmp']) rl_list = rl[['SectorProducedBy', 'SectorConsumedBy']]\ @@ -269,11 +285,11 @@ def equally_allocate_parent_to_child_naics(df_load, method): # merge df & conditionally replace sector produced/consumed columns # merge dfs assigning sector length sectype_list = ['Produced', 'Consumed'] - for s in sectype_list: - rl = rl.merge(cw, how='left', left_on=[f'Sector{s}By'], + for sec in sectype_list: + rl = rl.merge(cw, how='left', left_on=[f'Sector{sec}By'], right_on=nlength).rename( - columns={'sector_count': f'{s}Count'}) - rl[f'Sector{s}By'] = rl[sector_level] + columns={'sector_count': f'{sec}Count'}) + rl[f'Sector{sec}By'] = rl[sector_level] rl = rl.drop(columns=[sector_level, nlength]) # create one sector count column, using max value @@ -287,7 +303,7 @@ def equally_allocate_parent_to_child_naics(df_load, method): # append to df if len(rl) != 0: - vLogDetailed.warning('Data found at %s digit NAICS not ' + vLogDetailed.warning('Data found at %s digit sectors not ' 'represented in current data subset: ' '{}'.format(' '.join(map(str, rl_list))), str(i)) @@ -297,9 +313,18 @@ def equally_allocate_parent_to_child_naics(df_load, method): vLogDetailed.info('Allocating FlowAmounts equally to ' 'each %s associated with the sectors previously ' 'dropped', sector_level) + # if activities are source-like, set col values as copies + # of the sector columns + if sector_like_activities: + rows_lost = rows_lost.assign(ActivityProducedBy= + rows_lost['SectorProducedBy']) + rows_lost = rows_lost.assign(ActivityConsumedBy= + rows_lost['SectorConsumedBy']) + # reindex columns + rows_lost = rows_lost.reindex(df_load.columns, axis=1) # add rows of missing data to the fbs sector subset - df_w_lost_data = pd.concat([df, rows_lost], ignore_index=True, sort=True) + df_w_lost_data = pd.concat([df_load, rows_lost], ignore_index=True) df_w_lost_data = replace_strings_with_NoneType(df_w_lost_data) return df_w_lost_data @@ -311,21 +336,54 @@ def equal_allocation(fba_load): Function only works if all mapped sectors are the same length :param fba_load: df, FBA with activity columns mapped to sectors + :param sector_level: string ('NAICS_X') used when assigning columns of + sector levels if there are ambiguous sectors (e.g., household and + government sectors) :return: df, with FlowAmount equally allocated to all mapped sectors """ + from flowsa.flowbyfunctions import assign_columns_of_sector_levels + + # first check that all sector lengths are the same + dfc = assign_columns_of_sector_levels(fba_load) + # if duplicated rows, keep assignment to most specific sectors because + # data should already be at final assignment lengths if equally + # allocating and because not manipulating the loaded dataset, but rather + # just checking that all sector lengths match for an activity + duplicate_cols = [e for e in dfc.columns if e not in [ + 'SectorProducedByLength', 'SectorConsumedByLength']] + duplicate_df = dfc[dfc.duplicated(duplicate_cols)] + if len(duplicate_df) > 0: + log.info('Dropping rows duplicated due to assigning sector lengths ' + 'for ambiguous sectors. Keeping sector length assignments ' + 'to most specific sectors.') + dfc = dfc[dfc.duplicated(duplicate_cols, keep='first')] + + # Before equally allocating, check that each activity is being allocated + # to sectors of the same length + dfsub = dfc[['ActivityProducedBy', 'ActivityConsumedBy', + 'SectorProducedByLength', + 'SectorConsumedByLength']].drop_duplicates() + df_dup = dfsub[dfsub.duplicated(['ActivityProducedBy', 'ActivityConsumedBy'])] + if len(df_dup) > 1: + log.error('Cannot equally allocate because sector lengths vary. All ' + 'sectors must be the same sector level.') + # create groupby cols by which to determine allocation fba_cols = fba_load.select_dtypes([object]).columns.to_list() groupcols = [e for e in fba_cols if e not in ['SectorProducedBy', 'SectorConsumedBy', 'Description']] # create counts of rows df_count = fba_load.groupby( - groupcols, as_index=False, dropna=False).size().astype(str) - df_count = replace_strings_with_NoneType(df_count) + groupcols, as_index=False, dropna=False).size() + df_count = replace_NoneType_with_empty_cells(df_count) - # merge dfs - dfm = fba_load.merge(df_count, how='left') + # merge dfs, replace cells with empty strings to ensure merge occurs + # correctly + fba = replace_NoneType_with_empty_cells(fba_load) + dfm = fba.merge(df_count, how='outer', on=groupcols) # calc new flowamounts - dfm['FlowAmount'] = dfm['FlowAmount'] / dfm['size'].astype(int) + dfm['FlowAmount'] = dfm['FlowAmount'] / dfm['size'] dfm = dfm.drop(columns='size') + dfm = replace_strings_with_NoneType(dfm) return dfm diff --git a/flowsa/bibliography.py b/flowsa/bibliography.py index 358362aa6..21687e8fd 100644 --- a/flowsa/bibliography.py +++ b/flowsa/bibliography.py @@ -148,9 +148,9 @@ def generate_fbs_bibliography(methodname): f"{str(source[1])}", 'author': config['author'], 'year': str(source[1]), - 'url': config['source_url'], + 'url': config['tool_meta']['source_url'], 'urldate': bib_date, - 'ID': config['bib_id'] + '_' + str(source[1]), + 'ID': config['tool_meta']['bib_id'] + '_' + str(source[1]), 'ENTRYTYPE': 'misc' }] # append each entry to a list of BibDatabase entries diff --git a/flowsa/common.py b/flowsa/common.py index 73ceca7d6..3e41d849e 100644 --- a/flowsa/common.py +++ b/flowsa/common.py @@ -12,6 +12,7 @@ from dotenv import load_dotenv from esupy.processed_data_mgmt import create_paths_if_missing import flowsa.flowsa_yaml as flowsa_yaml +import flowsa.exceptions from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \ flow_by_sector_collapsed_fields, flow_by_activity_mapped_fields, \ flow_by_activity_wsec_fields, flow_by_activity_mapped_wsec_fields, \ @@ -51,8 +52,7 @@ def load_api_key(api_source): load_dotenv(f'{MODULEPATH}API_Keys.env', verbose=True) key = os.getenv(api_source) if key is None: - log.error(f"Key file {api_source} not found. See github wiki for help " - "https://github.com/USEPA/flowsa/wiki/Using-FLOWSA#api-keys") + raise flowsa.exceptions.APIError(api_source=api_source) return key @@ -131,9 +131,9 @@ def load_yaml_dict(filename, flowbytype=None, filepath=None): try: with open(yaml_path, 'r', encoding='utf-8') as f: config = flowsa_yaml.load(f, filepath) - except IOError: - log.error(f'{flowbytype} method file not found') - raise + except FileNotFoundError: + raise flowsa.exceptions.FlowsaMethodNotFoundError( + method_type=flowbytype, method=filename) return config @@ -324,20 +324,32 @@ def return_true_source_catalog_name(sourcename): return sourcename -def check_activities_sector_like(sourcename_load): +def check_activities_sector_like(df_load, sourcename=None): """ Check if the activities in a df are sector-like, if cannot find the sourcename in the source catalog, drop extensions on the source name + :param df_load: df, df to determine if activities are sector-like + :param source: str, optionial, can identify sourcename to use """ - sourcename = return_true_source_catalog_name(sourcename_load) + # identify sourcename + if sourcename is not None: + s = sourcename + else: + if 'SourceName' in df_load.columns: + s = pd.unique(df_load['SourceName'])[0] + elif 'MetaSources' in df_load.columns: + s = pd.unique(df_load['MetaSources'])[0] + + sourcename = return_true_source_catalog_name(s) try: sectorLike = load_yaml_dict('source_catalog')[sourcename][ 'sector-like_activities'] except KeyError: - log.error(f'%s or %s not found in {datapath}source_catalog.yaml', - sourcename_load, sourcename) + log.info(f'%s not found in {datapath}source_catalog.yaml, assuming ' + f'activities are not sector-like', sourcename) + sectorLike = False return sectorLike diff --git a/flowsa/data/README.md b/flowsa/data/README.md index 5f7c1981b..debacda02 100644 --- a/flowsa/data/README.md +++ b/flowsa/data/README.md @@ -4,16 +4,20 @@ Flow-By-Activity. ## Term descriptions - _class_: list, classes such as "Water" found in the Flow-By-Activity -- _sector-like_activities_: 'True' or 'False', “sector-like activities” are True when the Flow-By-Activity - “ActivityProducedBy” and “ActivityConsumedBy” columns are already NAICS based. For example, all BLS QCEW - data for employment and establishments are published by NAICS codes. We deem these “sector-like” because we - then implement checks to determine if the activities are published in the identified NAICS year in the - Flow-By-Sector and if not, we have a crosswalk to map the sectors/NAICS to NAICS year. -- _activity_schema_: 'None' if 'sector-like_activities' is False, otherwise the year of the sector data +- _sector-like_activities_: 'True' or 'False', “sector-like activities” are + True when the Flow-By-Activity “ActivityProducedBy” and + “ActivityConsumedBy” columns are already NAICS based. For example, all + BLS QCEW data for employment and establishments are published by NAICS + codes. We deem these “sector-like” because we then implement checks to + determine if the activities are published in the identified NAICS year in the + Flow-By-Sector and if not, we have a crosswalk to map the sectors/NAICS + to NAICS year. +- _activity_schema_: 'None' if 'sector-like_activities' is False, otherwise + the year of the sector data (ex. NAICS_2012_Code) -- _sector_aggregation_level_: 'aggregated' or 'disaggregated'. Some - dataset crosswalks contain every level of relevant sectors (ex. NAICS - for 2-6 digits), that is they are fully disaggregated. Other datasets only - contain information for the highest relevant sector level, in which case, - the dataset is marked as showing aggregated sectors only - (ex. USGS_WU_Coef crosswalk) \ No newline at end of file +- _sector_aggregation_level_: 'aggregated' or 'disaggregated'. Some dataset + crosswalks contain every level of relevant sectors (ex. NAICS for 2-6 + digits), that is they are fully disaggregated. Other datasets only + contain information for the highest relevant sector level, in which case, + the dataset is marked as showing aggregated sectors only (e.g., + USGS_WU_Coef crosswalk). \ No newline at end of file diff --git a/flowsa/data/activitytosectormapping/NAICS_Crosswalk_BEA_2012_Summary.csv b/flowsa/data/activitytosectormapping/NAICS_Crosswalk_BEA_2012_Summary.csv new file mode 100644 index 000000000..9179537c3 --- /dev/null +++ b/flowsa/data/activitytosectormapping/NAICS_Crosswalk_BEA_2012_Summary.csv @@ -0,0 +1,1117 @@ +ActivitySourceName,Activity,SectorSourceName,Sector,SectorType +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111110, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111120, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111130, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111140, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111150, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111160, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111191, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111199, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111211, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111219, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111310, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111320, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111331, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111332, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111333, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111334, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111335, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111336, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111339, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111411, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111419, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111421, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111422, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111910, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111920, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111930, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111940, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111991, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111992, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,111998, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112111, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112112, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112120, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112130, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112210, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112310, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112320, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112330, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112340, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112390, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112410, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112420, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112511, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112512, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112519, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112910, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112920, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112930, +BEA_2012_Summary_Code,111CA,NAICS_2012_Code,112990, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,113110, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,113210, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,113310, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114111, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114112, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114119, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,114210, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115111, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115112, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115113, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115114, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115115, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115116, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115210, +BEA_2012_Summary_Code,113FF,NAICS_2012_Code,115310, +BEA_2012_Summary_Code,211,NAICS_2012_Code,211111, +BEA_2012_Summary_Code,211,NAICS_2012_Code,211112, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212111, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212112, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212113, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212210, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212221, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212222, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212231, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212234, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212291, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212299, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212311, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212312, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212313, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212319, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212321, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212322, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212324, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212325, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212391, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212392, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212393, +BEA_2012_Summary_Code,212,NAICS_2012_Code,212399, +BEA_2012_Summary_Code,213,NAICS_2012_Code,213111, +BEA_2012_Summary_Code,213,NAICS_2012_Code,213112, +BEA_2012_Summary_Code,213,NAICS_2012_Code,213113, +BEA_2012_Summary_Code,213,NAICS_2012_Code,213114, +BEA_2012_Summary_Code,213,NAICS_2012_Code,213115, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221111, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221112, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221113, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221114, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221115, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221116, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221117, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221118, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221121, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221122, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221210, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221310, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221320, +BEA_2012_Summary_Code,22,NAICS_2012_Code,221330, +BEA_2012_Summary_Code,23,NAICS_2012_Code,236115, +BEA_2012_Summary_Code,23,NAICS_2012_Code,236116, +BEA_2012_Summary_Code,23,NAICS_2012_Code,236117, +BEA_2012_Summary_Code,23,NAICS_2012_Code,236118, +BEA_2012_Summary_Code,23,NAICS_2012_Code,236210, +BEA_2012_Summary_Code,23,NAICS_2012_Code,236220, +BEA_2012_Summary_Code,23,NAICS_2012_Code,237110, +BEA_2012_Summary_Code,23,NAICS_2012_Code,237120, +BEA_2012_Summary_Code,23,NAICS_2012_Code,237130, +BEA_2012_Summary_Code,23,NAICS_2012_Code,237210, +BEA_2012_Summary_Code,23,NAICS_2012_Code,237310, +BEA_2012_Summary_Code,23,NAICS_2012_Code,237990, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238110, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238120, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238130, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238140, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238150, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238160, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238170, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238190, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238210, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238220, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238290, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238310, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238320, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238330, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238340, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238350, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238390, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238910, +BEA_2012_Summary_Code,23,NAICS_2012_Code,238990, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311111, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311119, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311211, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311212, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311213, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311221, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311224, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311225, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311230, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311313, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311314, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311340, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311351, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311352, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311411, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311412, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311421, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311422, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311423, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311511, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311512, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311513, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311514, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311520, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311611, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311612, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311613, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311615, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311710, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311811, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311812, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311813, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311821, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311824, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311830, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311911, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311919, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311920, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311930, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311941, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311942, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311991, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,311999, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312111, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312112, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312113, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312120, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312130, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312140, +BEA_2012_Summary_Code,311FT,NAICS_2012_Code,312230, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313110, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313210, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313220, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313230, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313240, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313310, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,313320, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314110, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314120, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314910, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314994, +BEA_2012_Summary_Code,313TT,NAICS_2012_Code,314999, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315110, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315190, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315210, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315220, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315240, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315280, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,315990, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316110, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316210, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316992, +BEA_2012_Summary_Code,315AL,NAICS_2012_Code,316998, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321113, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321114, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321211, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321212, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321213, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321214, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321219, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321911, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321912, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321918, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321920, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321991, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321992, +BEA_2012_Summary_Code,321,NAICS_2012_Code,321999, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322110, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322121, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322122, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322130, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322211, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322212, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322219, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322220, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322230, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322291, +BEA_2012_Summary_Code,322,NAICS_2012_Code,322299, +BEA_2012_Summary_Code,323,NAICS_2012_Code,323111, +BEA_2012_Summary_Code,323,NAICS_2012_Code,323113, +BEA_2012_Summary_Code,323,NAICS_2012_Code,323117, +BEA_2012_Summary_Code,323,NAICS_2012_Code,323120, +BEA_2012_Summary_Code,324,NAICS_2012_Code,324110, +BEA_2012_Summary_Code,324,NAICS_2012_Code,324121, +BEA_2012_Summary_Code,324,NAICS_2012_Code,324122, +BEA_2012_Summary_Code,324,NAICS_2012_Code,324191, +BEA_2012_Summary_Code,324,NAICS_2012_Code,324199, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325110, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325120, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325130, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325180, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325193, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325194, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325199, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325211, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325212, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325220, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325311, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325312, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325314, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325320, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325411, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325412, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325413, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325414, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325510, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325520, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325611, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325612, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325613, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325620, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325910, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325920, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325991, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325992, +BEA_2012_Summary_Code,325,NAICS_2012_Code,325998, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326111, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326112, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326113, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326121, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326122, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326130, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326140, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326150, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326160, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326191, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326199, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326211, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326212, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326220, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326291, +BEA_2012_Summary_Code,326,NAICS_2012_Code,326299, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327110, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327120, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327211, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327212, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327213, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327215, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327310, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327320, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327331, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327332, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327390, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327410, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327420, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327910, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327991, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327992, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327993, +BEA_2012_Summary_Code,327,NAICS_2012_Code,327999, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331110, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331210, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331221, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331222, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331313, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331314, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331315, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331318, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331410, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331420, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331491, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331492, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331511, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331512, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331513, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331523, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331524, +BEA_2012_Summary_Code,331,NAICS_2012_Code,331529, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332111, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332112, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332114, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332117, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332119, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332215, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332216, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332311, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332312, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332313, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332321, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332322, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332323, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332410, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332420, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332431, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332439, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332510, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332613, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332618, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332710, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332721, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332722, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332811, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332812, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332813, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332911, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332912, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332913, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332919, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332991, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332992, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332993, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332994, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332996, +BEA_2012_Summary_Code,332,NAICS_2012_Code,332999, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333111, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333112, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333120, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333131, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333132, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333241, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333242, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333243, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333244, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333249, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333314, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333316, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333318, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333413, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333414, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333415, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333511, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333514, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333515, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333517, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333519, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333611, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333612, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333613, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333618, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333911, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333912, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333913, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333921, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333922, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333923, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333924, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333991, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333992, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333993, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333994, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333995, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333996, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333997, +BEA_2012_Summary_Code,333,NAICS_2012_Code,333999, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334111, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334112, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334118, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334210, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334220, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334290, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334310, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334412, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334413, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334416, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334417, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334418, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334419, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334510, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334511, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334512, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334513, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334514, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334515, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334516, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334517, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334519, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334613, +BEA_2012_Summary_Code,334,NAICS_2012_Code,334614, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335110, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335121, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335122, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335129, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335210, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335221, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335222, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335224, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335228, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335311, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335312, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335313, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335314, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335911, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335912, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335921, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335929, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335931, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335932, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335991, +BEA_2012_Summary_Code,335,NAICS_2012_Code,335999, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336111, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336112, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336120, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336211, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336212, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336213, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336214, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336310, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336320, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336330, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336340, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336350, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336360, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336370, +BEA_2012_Summary_Code,3361MV,NAICS_2012_Code,336390, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336411, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336412, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336413, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336414, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336415, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336419, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336510, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336611, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336612, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336991, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336992, +BEA_2012_Summary_Code,3364OT,NAICS_2012_Code,336999, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337110, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337121, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337122, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337124, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337125, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337127, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337211, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337212, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337214, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337215, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337910, +BEA_2012_Summary_Code,337,NAICS_2012_Code,337920, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339112, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339113, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339114, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339115, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339116, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339910, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339920, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339930, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339940, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339950, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339991, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339992, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339993, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339994, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339995, +BEA_2012_Summary_Code,339,NAICS_2012_Code,339999, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423110, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423120, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423130, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423140, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423210, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423220, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423310, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423320, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423330, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423390, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423410, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423420, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423430, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423440, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423450, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423460, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423490, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423510, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423520, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423610, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423620, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423690, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423710, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423720, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423730, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423740, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423810, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423820, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423830, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423840, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423850, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423860, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423910, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423920, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423930, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423940, +BEA_2012_Summary_Code,42,NAICS_2012_Code,423990, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424110, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424120, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424130, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424210, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424310, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424320, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424330, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424340, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424410, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424420, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424430, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424440, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424450, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424460, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424470, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424480, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424490, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424510, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424520, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424590, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424610, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424690, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424710, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424720, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424810, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424820, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424910, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424920, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424930, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424940, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424950, +BEA_2012_Summary_Code,42,NAICS_2012_Code,424990, +BEA_2012_Summary_Code,42,NAICS_2012_Code,425110, +BEA_2012_Summary_Code,42,NAICS_2012_Code,425120, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441110, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441120, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441210, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441222, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441228, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441310, +BEA_2012_Summary_Code,441,NAICS_2012_Code,441320, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442210, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442291, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,442299, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,443141, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,443142, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444120, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444130, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444190, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444210, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,444220, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445110, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445120, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445210, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445220, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445230, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445291, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445292, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445299, +BEA_2012_Summary_Code,445,NAICS_2012_Code,445310, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446120, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446130, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446191, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,446199, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,447110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,447190, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448120, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448130, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448140, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448150, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448190, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448210, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448310, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,448320, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451120, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451130, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451140, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451211, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,451212, +BEA_2012_Summary_Code,452,NAICS_2012_Code,452111, +BEA_2012_Summary_Code,452,NAICS_2012_Code,452112, +BEA_2012_Summary_Code,452,NAICS_2012_Code,452910, +BEA_2012_Summary_Code,452,NAICS_2012_Code,452990, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453110, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453210, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453220, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453310, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453910, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453920, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453930, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453991, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,453998, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454111, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454112, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454113, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454210, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454310, +BEA_2012_Summary_Code,4A0,NAICS_2012_Code,454390, +BEA_2012_Summary_Code,481,NAICS_2012_Code,481111, +BEA_2012_Summary_Code,481,NAICS_2012_Code,481112, +BEA_2012_Summary_Code,481,NAICS_2012_Code,481211, +BEA_2012_Summary_Code,481,NAICS_2012_Code,481212, +BEA_2012_Summary_Code,481,NAICS_2012_Code,481219, +BEA_2012_Summary_Code,482,NAICS_2012_Code,482111, +BEA_2012_Summary_Code,482,NAICS_2012_Code,482112, +BEA_2012_Summary_Code,483,NAICS_2012_Code,483111, +BEA_2012_Summary_Code,483,NAICS_2012_Code,483112, +BEA_2012_Summary_Code,483,NAICS_2012_Code,483113, +BEA_2012_Summary_Code,483,NAICS_2012_Code,483114, +BEA_2012_Summary_Code,483,NAICS_2012_Code,483211, +BEA_2012_Summary_Code,483,NAICS_2012_Code,483212, +BEA_2012_Summary_Code,484,NAICS_2012_Code,484110, +BEA_2012_Summary_Code,484,NAICS_2012_Code,484121, +BEA_2012_Summary_Code,484,NAICS_2012_Code,484122, +BEA_2012_Summary_Code,484,NAICS_2012_Code,484210, +BEA_2012_Summary_Code,484,NAICS_2012_Code,484220, +BEA_2012_Summary_Code,484,NAICS_2012_Code,484230, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485111, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485112, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485113, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485119, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485210, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485310, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485320, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485410, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485510, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485991, +BEA_2012_Summary_Code,485,NAICS_2012_Code,485999, +BEA_2012_Summary_Code,486,NAICS_2012_Code,486110, +BEA_2012_Summary_Code,486,NAICS_2012_Code,486210, +BEA_2012_Summary_Code,486,NAICS_2012_Code,486910, +BEA_2012_Summary_Code,486,NAICS_2012_Code,486990, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,487110, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,487210, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,487990, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488111, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488119, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488190, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488210, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488310, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488320, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488330, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488390, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488410, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488490, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488510, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488991, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,488999, +BEA_2012_Summary_Code,GFE,NAICS_2012_Code,491110, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,492110, +BEA_2012_Summary_Code,487OS,NAICS_2012_Code,492210, +BEA_2012_Summary_Code,493,NAICS_2012_Code,493110, +BEA_2012_Summary_Code,493,NAICS_2012_Code,493120, +BEA_2012_Summary_Code,493,NAICS_2012_Code,493130, +BEA_2012_Summary_Code,493,NAICS_2012_Code,493190, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511110, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511120, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511130, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511140, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511191, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511199, +BEA_2012_Summary_Code,511,NAICS_2012_Code,511210, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512110, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512120, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512131, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512132, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512191, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512199, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512210, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512220, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512230, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512240, +BEA_2012_Summary_Code,512,NAICS_2012_Code,512290, +BEA_2012_Summary_Code,513,NAICS_2012_Code,515111, +BEA_2012_Summary_Code,513,NAICS_2012_Code,515112, +BEA_2012_Summary_Code,513,NAICS_2012_Code,515120, +BEA_2012_Summary_Code,513,NAICS_2012_Code,515210, +BEA_2012_Summary_Code,513,NAICS_2012_Code,517110, +BEA_2012_Summary_Code,513,NAICS_2012_Code,517210, +BEA_2012_Summary_Code,513,NAICS_2012_Code,517410, +BEA_2012_Summary_Code,513,NAICS_2012_Code,517911, +BEA_2012_Summary_Code,513,NAICS_2012_Code,517919, +BEA_2012_Summary_Code,514,NAICS_2012_Code,518210, +BEA_2012_Summary_Code,514,NAICS_2012_Code,519110, +BEA_2012_Summary_Code,514,NAICS_2012_Code,519120, +BEA_2012_Summary_Code,514,NAICS_2012_Code,519130, +BEA_2012_Summary_Code,514,NAICS_2012_Code,519190, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,521110, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522110, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522120, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522130, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522190, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522210, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522220, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522291, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522292, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522293, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522294, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522298, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522310, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522320, +BEA_2012_Summary_Code,521CI,NAICS_2012_Code,522390, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523110, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523120, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523130, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523140, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523210, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523910, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523920, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523930, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523991, +BEA_2012_Summary_Code,523,NAICS_2012_Code,523999, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524113, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524114, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524126, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524127, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524128, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524130, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524210, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524291, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524292, +BEA_2012_Summary_Code,524,NAICS_2012_Code,524298, +BEA_2012_Summary_Code,525,NAICS_2012_Code,525110, +BEA_2012_Summary_Code,525,NAICS_2012_Code,525120, +BEA_2012_Summary_Code,525,NAICS_2012_Code,525190, +BEA_2012_Summary_Code,525,NAICS_2012_Code,525910, +BEA_2012_Summary_Code,525,NAICS_2012_Code,525920, +BEA_2012_Summary_Code,525,NAICS_2012_Code,525990, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531110, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531110, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531120, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531120, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531130, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531130, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531190, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531190, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531210, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531210, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531311, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531311, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531312, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531312, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531320, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531320, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,531390, +BEA_2012_Summary_Code,ORE,NAICS_2012_Code,531390, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532111, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532111, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532112, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532112, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532120, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532120, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532210, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532210, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532220, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532220, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532230, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532230, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532291, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532291, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532292, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532292, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532299, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532299, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532310, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532310, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532411, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532411, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532412, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532412, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532420, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532420, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,532490, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,532490, +BEA_2012_Summary_Code,532RL,NAICS_2012_Code,533110, +BEA_2012_Summary_Code,HS,NAICS_2012_Code,533110, +BEA_2012_Summary_Code,5411,NAICS_2012_Code,541110, +BEA_2012_Summary_Code,5411,NAICS_2012_Code,541120, +BEA_2012_Summary_Code,5411,NAICS_2012_Code,541191, +BEA_2012_Summary_Code,5411,NAICS_2012_Code,541199, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541211, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541213, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541214, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541219, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541310, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541320, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541330, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541340, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541350, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541360, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541370, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541380, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541410, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541420, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541430, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541490, +BEA_2012_Summary_Code,5415,NAICS_2012_Code,541511, +BEA_2012_Summary_Code,5415,NAICS_2012_Code,541512, +BEA_2012_Summary_Code,5415,NAICS_2012_Code,541513, +BEA_2012_Summary_Code,5415,NAICS_2012_Code,541519, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541611, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541612, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541613, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541614, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541618, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541620, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541690, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541711, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541712, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541720, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541810, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541820, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541830, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541840, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541850, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541860, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541870, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541890, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541910, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541921, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541922, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541930, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541940, +BEA_2012_Summary_Code,5412OP,NAICS_2012_Code,541990, +BEA_2012_Summary_Code,55,NAICS_2012_Code,551111, +BEA_2012_Summary_Code,55,NAICS_2012_Code,551112, +BEA_2012_Summary_Code,55,NAICS_2012_Code,551114, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561110, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561210, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561311, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561312, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561320, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561330, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561410, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561421, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561422, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561431, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561439, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561440, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561450, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561491, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561492, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561499, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561510, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561520, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561591, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561599, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561611, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561612, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561613, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561621, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561622, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561710, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561720, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561730, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561740, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561790, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561910, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561920, +BEA_2012_Summary_Code,561,NAICS_2012_Code,561990, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562111, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562112, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562119, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562211, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562212, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562213, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562219, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562910, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562920, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562991, +BEA_2012_Summary_Code,562,NAICS_2012_Code,562998, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611110, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611210, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611310, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611410, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611420, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611430, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611511, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611512, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611513, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611519, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611610, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611620, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611630, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611691, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611692, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611699, +BEA_2012_Summary_Code,61,NAICS_2012_Code,611710, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621111, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621112, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621210, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621310, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621320, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621330, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621340, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621391, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621399, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621410, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621420, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621491, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621492, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621493, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621498, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621511, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621512, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621610, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621910, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621991, +BEA_2012_Summary_Code,621,NAICS_2012_Code,621999, +BEA_2012_Summary_Code,622,NAICS_2012_Code,622110, +BEA_2012_Summary_Code,622,NAICS_2012_Code,622210, +BEA_2012_Summary_Code,622,NAICS_2012_Code,622310, +BEA_2012_Summary_Code,623,NAICS_2012_Code,623110, +BEA_2012_Summary_Code,623,NAICS_2012_Code,623210, +BEA_2012_Summary_Code,623,NAICS_2012_Code,623220, +BEA_2012_Summary_Code,623,NAICS_2012_Code,623311, +BEA_2012_Summary_Code,623,NAICS_2012_Code,623312, +BEA_2012_Summary_Code,623,NAICS_2012_Code,623990, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624110, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624120, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624190, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624210, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624221, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624229, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624230, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624310, +BEA_2012_Summary_Code,624,NAICS_2012_Code,624410, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711110, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711120, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711130, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711190, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711211, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711212, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711219, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711310, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711320, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711410, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,711510, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712110, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712120, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712130, +BEA_2012_Summary_Code,711AS,NAICS_2012_Code,712190, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713110, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713120, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713210, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713290, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713910, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713920, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713930, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713940, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713950, +BEA_2012_Summary_Code,713,NAICS_2012_Code,713990, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721110, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721120, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721191, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721199, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721211, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721214, +BEA_2012_Summary_Code,721,NAICS_2012_Code,721310, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722310, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722320, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722330, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722410, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722511, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722513, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722514, +BEA_2012_Summary_Code,722,NAICS_2012_Code,722515, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811111, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811112, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811113, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811118, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811121, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811122, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811191, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811192, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811198, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811211, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811212, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811213, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811219, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811310, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811411, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811412, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811420, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811430, +BEA_2012_Summary_Code,81,NAICS_2012_Code,811490, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812111, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812112, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812113, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812191, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812199, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812210, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812220, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812310, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812320, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812331, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812332, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812910, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812921, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812922, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812930, +BEA_2012_Summary_Code,81,NAICS_2012_Code,812990, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813110, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813211, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813212, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813219, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813311, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813312, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813319, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813410, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813910, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813920, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813930, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813940, +BEA_2012_Summary_Code,81,NAICS_2012_Code,813990, +BEA_2012_Summary_Code,81,NAICS_2012_Code,814110, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921110, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921120, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921120, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921130, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921130, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921140, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921140, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921150, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,921190, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,921190, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922110, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922120, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922120, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922130, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922130, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922140, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922140, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922150, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922150, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922160, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922160, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,922190, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,922190, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,923110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923110, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,923120, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923120, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,923130, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923130, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,923140, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,924110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,924110, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,924120, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,924120, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,925110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,925110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,925120, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,925120, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926110, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926120, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926130, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926130, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926140, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926140, +BEA_2012_Summary_Code,GSLG,NAICS_2012_Code,926150, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,926150, +BEA_2012_Summary_Code,GFE,NAICS_2012_Code,927110, +BEA_2012_Summary_Code,GFGD,NAICS_2012_Code,928110, +BEA_2012_Summary_Code,GFGN,NAICS_2012_Code,928120, +BEA_2012_Summary_Code,F010,NAICS_2012_Code,F01000, +BEA_2012_Summary_Code,GFE,NAICS_2012_Code,S00101, +BEA_2012_Summary_Code,GSLE,NAICS_2012_Code,S00201, +BEA_2012_Summary_Code,GSLE,NAICS_2012_Code,S00202, diff --git a/flowsa/data/activitytosectormapping/README.md b/flowsa/data/activitytosectormapping/README.md index 70b678694..5d22b18dc 100644 --- a/flowsa/data/activitytosectormapping/README.md +++ b/flowsa/data/activitytosectormapping/README.md @@ -4,17 +4,18 @@ sectors. These files are not required for datasets where activities are already NAICS-like. Each csv contains columns for: -1. _ActivitySourceName_: Activity Source Name must match the file name, although the name - can be missing extensions. For example, the Activity Source Name can be - "EPA_GHGI" rather than "EPA_GHGI_T_2_4", as the function that looks for the - file names will strip "_XXX" from the file name until the file is found +1. _ActivitySourceName_: Activity Source Name must match the file name, + although the name can be missing extensions. For example, the Activity + Source Name can be "EPA_GHGI" rather than "EPA_GHGI_T_2_4", as the + function that looks for the file names will strip "_XXX" from the file + name until the file is found 2. _Activity_: Any activities that should be mapped to a sector -3. _SectorSourceName_ Specify the sector year being mapped to (e.g. NAICS_2012_Code) -4. _Sector_: The 2- to 6-digit NAICS code that the activity relates to. Can map to - multiple NAICS codes of varying lengths. Optional: If necessary a user can - map to their own non-official NAICS codes. If mapped to non-official NAICS, - the NAICS crosswalk must be recreated in the +3. _SectorSourceName_ Specify the sector year being mapped to (e.g. + NAICS_2012_Code) +4. _Sector_: The 2- to 7-digit NAICS code that the activity relates to. Can + map to multiple NAICS codes of varying lengths. Optional: If necessary a + user can map to their own non-official NAICS codes. If mapped to + non-official NAICS, the NAICS crosswalk must be recreated in the [scripts directory](https://github.com/USEPA/flowsa/blob/master/scripts/update_NAICS_crosswalk.py) 5. _SectorType_: "I" for industry, "C" for commodity 6. _Notes_: (optional) Any additional relevant information -7. \ No newline at end of file diff --git a/flowsa/data/process_adjustments/README.md b/flowsa/data/process_adjustments/README.md index 31de68107..102a80495 100644 --- a/flowsa/data/process_adjustments/README.md +++ b/flowsa/data/process_adjustments/README.md @@ -1,8 +1,11 @@ # Process Adjustments -Process adjustments allow for adjustments to the `SectorProducedBy` field for data obtained from stewicombo. -Records that are from the `source_naics` AND the `source_process` are reassigned to the `target_naics` indicated in the process adjustment file. +Process adjustments allow for adjustments to the `SectorProducedBy` field +for data obtained from stewicombo. Records that are from the `source_naics` +AND the `source_process` are reassigned to the `target_naics` indicated in +the process adjustment file. -Adjustments are indicated by identifying one or more named files in the `reassign_process_to_sectors` FBS parameter. +Adjustments are indicated by identifying one or more named files in the +`reassign_process_to_sectors` FBS parameter. ## Available Adjustments diff --git a/flowsa/data/source_catalog.yaml b/flowsa/data/source_catalog.yaml index 40696897f..1fde6f036 100644 --- a/flowsa/data/source_catalog.yaml +++ b/flowsa/data/source_catalog.yaml @@ -131,6 +131,18 @@ EPA_GHGI: sector-like_activities: False activity_schema: sector_aggregation_level: "aggregated" +EPA_StateGHGI: + class: + - Chemicals + sector-like_activities: False + activity_schema: + sector_aggregation_level: "aggregated" +EPA_SIT: + class: + - Chemicals + sector-like_activities: False + activity_schema: + sector_aggregation_level: "aggregated" EPA_NEI_Nonpoint: data_format: FBA class: @@ -182,6 +194,12 @@ StatCan_LFS: sector-like_activities: False activity_schema: sector_aggregation_level: "aggregated" +stateio: + class: + - Money + sector-like_activities: False #update to true once alternate activity_schema in place + # activity_schema: BEA_2012_Summary_Code + sector_aggregation_level: "disaggregated" USDA_CoA_Cropland: data_format: FBA class: diff --git a/flowsa/data_source_scripts/BEA.py b/flowsa/data_source_scripts/BEA.py index f19757870..7add8178b 100644 --- a/flowsa/data_source_scripts/BEA.py +++ b/flowsa/data_source_scripts/BEA.py @@ -62,28 +62,8 @@ def bea_use_detail_br_parse(*, year, **_): f'_Detail_Use_PRO_BeforeRedef.csv' df_raw = pd.read_csv(csv_load) - # first column is the commodity being consumed - df = df_raw.rename(columns={'Unnamed: 0': 'ActivityProducedBy'}) - - # use "melt" fxn to convert colummns into rows - df = df.melt(id_vars=["ActivityProducedBy"], - var_name="ActivityConsumedBy", - value_name="FlowAmount") - - df['Year'] = str(year) - # hardcode data - df['FlowName'] = "USD" + str(year) - df["Class"] = "Money" - df["FlowType"] = "TECHNOSPHERE_FLOW" - df['Description'] = 'BEA_2012_Detail_Code' + df = bea_detail_parse(df_raw, year) df["SourceName"] = "BEA_Use_Detail_PRO_BeforeRedef" - df["Location"] = US_FIPS - df['LocationSystem'] = "FIPS_2015" - # original unit in million USD - df['FlowAmount'] = df['FlowAmount'] * 1000000 - df["Unit"] = "USD" - df['DataReliability'] = 5 # tmp - df['DataCollection'] = 5 # tmp return df @@ -96,10 +76,17 @@ def bea_make_detail_br_parse(*, year, **_): flowbyactivity specifications """ # Read directly into a pandas df - df_raw = pd.read_csv(externaldatapath + "BEA_" + str(year) + - "_Detail_Make_BeforeRedef.csv") + csv_load = f'{externaldatapath}BEA_{str(year)}' \ + f'_Detail_Make_BeforeRedef.csv' + df_raw = pd.read_csv(csv_load) + + df = bea_detail_parse(df_raw, year) + df["SourceName"] = "BEA_Make_Detail_BeforeRedef" - # first column is the industry + return df + + +def bea_detail_parse(df_raw, year): df = df_raw.rename(columns={'Unnamed: 0': 'ActivityProducedBy'}) # use "melt" fxn to convert colummns into rows @@ -109,11 +96,10 @@ def bea_make_detail_br_parse(*, year, **_): df['Year'] = str(year) # hardcode data - df['FlowName'] = "USD" + str(year) + df['FlowName'] = f"USD{str(year)}" df["Class"] = "Money" df["FlowType"] = "TECHNOSPHERE_FLOW" df['Description'] = 'BEA_2012_Detail_Code' - df["SourceName"] = "BEA_Make_Detail_BeforeRedef" df["Location"] = US_FIPS df['LocationSystem'] = "FIPS_2015" # original unit in million USD @@ -121,7 +107,6 @@ def bea_make_detail_br_parse(*, year, **_): df["Unit"] = "USD" df['DataReliability'] = 5 # tmp df['DataCollection'] = 5 # tmp - return df @@ -187,18 +172,27 @@ def subset_and_allocate_BEA_table(df, attr, **_): """ Temporary function to mimic use of 2nd helper allocation dataset """ + df = subset_BEA_table(df, attr) v = {'geoscale_to_use': 'national'} method2 = {'target_sector_source': 'NAICS_2012_Code'} + + import importlib + fxn = getattr(importlib.import_module( + 'flowsa.data_source_scripts.BLS_QCEW'), + "bls_clean_allocation_fba_w_sec") + attr2 = {"helper_source": "BLS_QCEW", "helper_method": "proportional", "helper_source_class": "Employment", "helper_source_year": 2012, - "helper_flow": ["Number of employees"], + "helper_flow": ["Number of employees, Federal Government", + "Number of employees, State Government", + "Number of employees, Local Government", + "Number of employees, Private"], "helper_from_scale": "national", "allocation_from_scale": "national", - "clean_helper_fba": "clean_bls_qcew_fba", - "clean_helper_fba_wsec": "bls_clean_allocation_fba_w_sec"} + "clean_helper_fba_wsec": fxn} df2 = allocation_helper(df, attr2, method2, v, False) # Drop remaining rows with no sectors e.g. T001 and other final demands df2 = df2.dropna(subset=['SectorConsumedBy']).reset_index(drop=True) diff --git a/flowsa/data_source_scripts/BLS_QCEW.py b/flowsa/data_source_scripts/BLS_QCEW.py index 7a964ce75..aa7aa7b3f 100644 --- a/flowsa/data_source_scripts/BLS_QCEW.py +++ b/flowsa/data_source_scripts/BLS_QCEW.py @@ -17,13 +17,8 @@ import pandas as pd import numpy as np from flowsa.location import US_FIPS -from flowsa.common import fba_default_grouping_fields -from flowsa.schema import flow_by_activity_wsec_fields, \ - flow_by_activity_mapped_wsec_fields from flowsa.flowbyfunctions import assign_fips_location_system, \ - aggregator -from flowsa.dataclean import add_missing_flow_by_fields, \ - replace_strings_with_NoneType + aggregator, equally_allocate_suppressed_parent_to_child_naics def BLS_QCEW_URL_helper(*, build_url, year, **_): @@ -92,16 +87,19 @@ def bls_qcew_parse(*, df_list, year, **_): df.loc[df['area_fips'] == 'US000', 'area_fips'] = US_FIPS # set datatypes float_cols = [col for col in df.columns if col not in - ['area_fips', 'industry_code', 'year']] + ['area_fips', 'own_code', 'industry_code', 'year']] for col in float_cols: df[col] = df[col].astype('float') # Keep owner_code = 1, 2, 3, 5 - df = df[df.own_code.isin([1, 2, 3, 5])] - # Aggregate annual_avg_estabs and annual_avg_emplvl by area_fips, - # industry_code, year, flag - df = df.groupby(['area_fips', 'industry_code', 'year'])[[ - 'annual_avg_estabs', 'annual_avg_emplvl', - 'total_annual_wages']].sum().reset_index() + df = df[df.own_code.isin(['1', '2', '3', '5'])] + # replace ownership code with text defined by bls + # https://www.bls.gov/cew/classifications/ownerships/ownership-titles.htm + replace_dict = {'1': 'Federal Government', + '2': 'State Government', + '3': 'Local Government', + '5': 'Private'} + for key in replace_dict.keys(): + df['own_code'] = df['own_code'].replace(key, replace_dict[key]) # Rename fields df = df.rename(columns={'area_fips': 'Location', 'industry_code': 'ActivityProducedBy', @@ -112,140 +110,47 @@ def bls_qcew_parse(*, df_list, year, **_): # Reformat FIPs to 5-digit df['Location'] = df['Location'].apply('{:0>5}'.format) # use "melt" fxn to convert colummns into rows - df = df.melt(id_vars=["Location", "ActivityProducedBy", "Year"], - var_name="FlowName", - value_name="FlowAmount") + df2 = df.melt(id_vars=["Location", "ActivityProducedBy", "Year", + 'own_code'], + var_name="FlowName", + value_name="FlowAmount") # specify unit based on flowname - df['Unit'] = np.where(df["FlowName"] == 'Annual payroll', "USD", "p") + df2['Unit'] = np.where(df2["FlowName"] == 'Annual payroll', "USD", "p") # specify class - df.loc[df['FlowName'] == 'Number of employees', 'Class'] = 'Employment' - df.loc[df['FlowName'] == 'Number of establishments', 'Class'] = 'Other' - df.loc[df['FlowName'] == 'Annual payroll', 'Class'] = 'Money' + df2.loc[df2['FlowName'] == 'Number of employees', 'Class'] = 'Employment' + df2.loc[df2['FlowName'] == 'Number of establishments', 'Class'] = 'Other' + df2.loc[df2['FlowName'] == 'Annual payroll', 'Class'] = 'Money' + # update flow name + df2['FlowName'] = df2['FlowName'] + ', ' + df2['own_code'] + df2 = df2.drop(columns='own_code') # add location system based on year of data - df = assign_fips_location_system(df, year) + df2 = assign_fips_location_system(df2, year) # add hard code data - df['SourceName'] = 'BLS_QCEW' + df2['SourceName'] = 'BLS_QCEW' # Add tmp DQ scores - df['DataReliability'] = 5 - df['DataCollection'] = 5 - df['Compartment'] = None - df['FlowType'] = "ELEMENTARY_FLOW" + df2['DataReliability'] = 5 + df2['DataCollection'] = 5 + df2['Compartment'] = None + df2['FlowType'] = "ELEMENTARY_FLOW" - return df + return df2 -def clean_bls_qcew_fba_for_employment_sat_table(fba_df, **kwargs): +def clean_bls_qcew_fba_for_employment_sat_table(fba, **_): """ When creating the employment satellite table for use in useeior, - modify the flow name to match prior methodology for mapping/impact factors + modify the flow name to match prior methodology for mapping/impact factors. + clean_fba_df_fxn - :param fba_df: df, flowbyactivity - :param kwargs: dictionary, can include attr, a dictionary of parameters - in the FBA method yaml + :param fba: df, flowbyactivity :return: df, flowbyactivity, with modified flow names """ - fba_df = clean_bls_qcew_fba(fba_df, **kwargs) - # rename flowname value for c in ['FlowName', 'Flowable']: - fba_df[c] = fba_df[c].replace({'Number of employees': 'Jobs'}) - - return fba_df - - -def clean_bls_qcew_fba(fba_df, **kwargs): - """ - Function to clean BLS QCEW data when FBA is not used for employment - satellite table - :param fba_df: df, FBA format - :param kwargs: dictionary, can include attr, a dictionary of parameters - in the FBA method yaml - :return: df, modified BLS QCEW data - """ - - fba_df = fba_df.reset_index(drop=True) - # aggregate data to NAICS 2 digits, if 2 digit value is missing - fba_df = replace_missing_2_digit_sector_values(fba_df) - # drop rows of data where sectors are provided in ranges - fba_df = remove_2_digit_sector_ranges(fba_df) - - return fba_df - - -def replace_missing_2_digit_sector_values(df): - """ - In the 2015 (and possibly other dfs, there are instances of values - at the 3 digit NAICS level, while the 2 digit NAICS is reported as 0. - The 0 values are replaced with summed 3 digit NAICS - :param df: df, BLS QCEW data in FBA format - :return: df, BLS QCEW data with 2-digit NAICS sector FlowAmounts - """ - - # check for 2 digit 0 values - df_missing = df[(df['ActivityProducedBy'].apply( - lambda x: len(x) == 2)) & (df['FlowAmount'] == 0)] - # create list of location/activityproduced by combos - missing_sectors = df_missing[[ - 'Location', 'ActivityProducedBy']].drop_duplicates().values.tolist() - - # subset the df to 3 naics where flow amount is not 0 and - # that would sum to the missing 2 digit naics - df_subset = df[df['ActivityProducedBy'].apply( - lambda x: len(x) == 3) & (df['FlowAmount'] != 0)] - new_sectors_list = [] - for q, r in missing_sectors: - c1 = df_subset['Location'] == q - c2 = df_subset['ActivityProducedBy'].apply(lambda x: x[0:2] == r) - # subset data - new_sectors_list.append(df_subset[c1 & c2]) - if len(new_sectors_list) != 0: - new_sectors = pd.concat( - new_sectors_list, sort=False, ignore_index=True) - - # drop last digit of naics and aggregate - new_sectors.loc[:, 'ActivityProducedBy'] = \ - new_sectors['ActivityProducedBy'].apply(lambda x: x[0:2]) - new_sectors = aggregator(new_sectors, fba_default_grouping_fields) - - # drop the old location/activity columns in the bls df and - # add new sector values - new_sectors_list = \ - new_sectors[['Location', 'ActivityProducedBy' - ]].drop_duplicates().values.tolist() - - # rows to drop - rows_list = [] - for q, r in new_sectors_list: - c1 = df['Location'] == q - c2 = df['ActivityProducedBy'].apply(lambda x: x == r) - # subset data - rows_list.append(df[(c1 & c2)]) - rows_to_drop = pd.concat(rows_list, ignore_index=True) - # drop rows from df - modified_df = pd.merge(df, rows_to_drop, indicator=True, - how='outer').query('_merge=="left_only"' - ).drop('_merge', axis=1) - # add new rows - modified_df = modified_df.append(new_sectors, sort=False) - return modified_df - else: - return df - - -def remove_2_digit_sector_ranges(fba_df): - """ - BLS publishes activity ranges of '31-33', 44-45', '48-49... - drop these ranges. - The individual 2 digit naics are summed later. - :param fba_df: df, BLS QCEW in FBA format - :return: df, no sector ranges - """ + fba[c] = fba[c].str.replace('Number of employees', 'Jobs') - df = fba_df[ - ~fba_df['ActivityProducedBy'].str.contains('-')].reset_index(drop=True) - - return df + return fba def bls_clean_allocation_fba_w_sec(df_w_sec, **kwargs): @@ -256,26 +161,18 @@ def bls_clean_allocation_fba_w_sec(df_w_sec, **kwargs): dictionary of FBA method yaml parameters :return: df, BLS QCEW FBA with estimated suppressed data """ - df_w_sec = df_w_sec.reset_index(drop=True) - df2 = add_missing_flow_by_fields( - df_w_sec, flow_by_activity_wsec_fields).reset_index(drop=True) - df3 = replace_strings_with_NoneType(df2) - - return df3 - - -def bls_clean_allocation_fba_w_sec_sat_table(df_w_sec, **kwargs): - """ - clean up bls df with sectors by estimating suppresed data - :param df_w_sec: df, FBA format BLS QCEW data - :param kwargs: additional arguments can include 'attr', a - dictionary of FBA method yaml parameters - :return: df, BLS QCEW FBA with estimated suppressed data - """ - df_w_sec = df_w_sec.reset_index(drop=True) - df2 = add_missing_flow_by_fields(df_w_sec, - flow_by_activity_mapped_wsec_fields - ).reset_index(drop=True) - df3 = replace_strings_with_NoneType(df2) - - return df3 + groupcols = list(df_w_sec.select_dtypes(include=['object', 'int']).columns) + # estimate supressed data + df = equally_allocate_suppressed_parent_to_child_naics( + df_w_sec, kwargs['method'], 'SectorProducedBy', groupcols) + + # for purposes of allocation, we do not need to differentiate between + # federal government, state government, local government, or private + # sectors. So after estimating the suppressed data (above), modify the + # flow names and aggregate data + col_list = [e for e in df_w_sec.columns if e in ['FlowName', 'Flowable']] + for c in col_list: + df[c] = df[c].str.split(',').str[0] + df2 = aggregator(df, groupcols) + + return df2 diff --git a/flowsa/data_source_scripts/Blackhurst_IO.py b/flowsa/data_source_scripts/Blackhurst_IO.py index b67c0d6cb..84a1e1ff0 100644 --- a/flowsa/data_source_scripts/Blackhurst_IO.py +++ b/flowsa/data_source_scripts/Blackhurst_IO.py @@ -17,7 +17,6 @@ from flowsa.allocation import \ proportional_allocation_by_location_and_activity from flowsa.sectormapping import add_sectors_to_flowbyactivity -from flowsa.data_source_scripts.BLS_QCEW import clean_bls_qcew_fba from flowsa.validation import compare_df_units @@ -127,9 +126,6 @@ def convert_blackhurst_data_to_kg_per_employee( flowclass='Employment', geographic_level='national', download_FBA_if_missing=kwargs['download_FBA_if_missing']) - # clean df - bls = clean_bls_qcew_fba(bls, attr=attr) - # assign naics to allocation dataset bls_wsec = add_sectors_to_flowbyactivity( bls, sectorsourcename=method['target_sector_source']) diff --git a/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py b/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py index 54b4c56b8..fa4f73f84 100644 --- a/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py +++ b/flowsa/data_source_scripts/CalRecycle_WasteCharacterization.py @@ -17,10 +17,9 @@ load_fba_w_standardized_units, \ aggregate_and_subset_for_target_sectors from flowsa.settings import externaldatapath -from flowsa.data_source_scripts.BLS_QCEW import clean_bls_qcew_fba from flowsa.sectormapping import get_fba_allocation_subset, \ add_sectors_to_flowbyactivity -from flowsa.dataclean import replace_strings_with_NoneType +from flowsa.dataclean import replace_strings_with_NoneType, standardize_units def produced_by(entry): @@ -110,16 +109,17 @@ def calR_parse(*, year, **_): return output -def keep_generated_quantity(fba, **kwargs): +def keep_generated_quantity(fba, **_): """ Function to clean CalRecycles FBA to remove quantities not assigned as Generated :param fba: df, FBA format - :param kwargs: dictionary, can include attr, a dictionary of parameters in - the FBA method yaml :return: df, modified CalRecycles FBA """ - fba = fba[fba['Description'] == 'Generated'] + fba = fba[fba['Description'] == 'Generated'].reset_index(drop=True) + # if no mapping performed, still update units + if 'tons' in fba['Unit'].values: + fba = standardize_units(fba) return fba @@ -133,9 +133,10 @@ def apply_tons_per_employee_per_year_to_states(fbs, method, **_): year=fbs['Year'].unique()[0], flowclass='Employment', geographic_level='state') - bls = bls[bls['FlowName'] == 'Number of employees'] - # clean df - bls = clean_bls_qcew_fba(bls) + bls = bls[bls['FlowName'].isin(["Number of employees, Federal Government", + "Number of employees, State Government", + "Number of employees, Local Government", + "Number of employees, Private"])] bls = add_sectors_to_flowbyactivity(bls) # Subset BLS dataset @@ -146,6 +147,10 @@ def apply_tons_per_employee_per_year_to_states(fbs, method, **_): # Calculate tons per employee per year per material and sector in CA bls_CA = bls[bls['Location'] == '06000'] # California + # aggregate all employment prior to generating tpepy + bls_CA = (bls_CA.groupby(['Location','Year','SectorProducedBy']) + .agg({'Employees':'sum'}) + .reset_index()) tpepy = fbs.merge(bls_CA, how='inner') tpepy['TPEPY'] = np.divide(tpepy['FlowAmount'], tpepy['Employees'], out=np.zeros_like(tpepy['Employees']), diff --git a/flowsa/data_source_scripts/Census_CBP.py b/flowsa/data_source_scripts/Census_CBP.py index c805a76ab..16f06f052 100644 --- a/flowsa/data_source_scripts/Census_CBP.py +++ b/flowsa/data_source_scripts/Census_CBP.py @@ -34,11 +34,7 @@ def Census_CBP_URL_helper(*, build_url, year, **_): # This is only for years 2010 and 2011. This is done because the State # query that gets all counties returns too many results and errors out. if year in ['2010', '2011']: - if year == '2011': - fips_year = '2010' - else: - fips_year = '2010' - county_fips_df = get_county_FIPS(fips_year) + county_fips_df = get_county_FIPS('2010') county_fips = county_fips_df.FIPS for d in county_fips: url = build_url @@ -82,16 +78,15 @@ def Census_CBP_URL_helper(*, build_url, year, **_): urls_census.append(url) else: FIPS_2 = get_all_state_FIPS_2()['FIPS_2'] - for c in FIPS_2: + for state in FIPS_2: url = build_url - url = url.replace("__stateFIPS__", c) + url = url.replace("__stateFIPS__", state) # specified NAICS code year depends on year of data - if year in ['2017']: + if year in ['2017', '2018', '2019', '2020']: url = url.replace("__NAICS__", "NAICS2017") - url = url.replace("__countyFIPS__", "*") - if year in ['2012', '2013', '2014', '2015', '2016']: + elif year in ['2012', '2013', '2014', '2015', '2016']: url = url.replace("__NAICS__", "NAICS2012") - url = url.replace("__countyFIPS__", "*") + url = url.replace("__countyFIPS__", "*") urls_census.append(url) return urls_census @@ -152,6 +147,10 @@ def census_cbp_parse(*, df_list, year, **_): value_name="FlowAmount") # specify unit based on flowname df['Unit'] = np.where(df["FlowName"] == 'Annual payroll', "USD", "p") + # Payroll in units of thousand USD + df['FlowAmount'] = np.where(df["FlowName"] == 'Annual payroll', + df['FlowAmount'] * 1000, + df['FlowAmount']) # specify class df.loc[df['FlowName'] == 'Number of employees', 'Class'] = 'Employment' df.loc[df['FlowName'] == 'Number of establishments', 'Class'] = 'Other' diff --git a/flowsa/data_source_scripts/EIA_CBECS_Land.py b/flowsa/data_source_scripts/EIA_CBECS_Land.py index 610d28389..dba2a5536 100644 --- a/flowsa/data_source_scripts/EIA_CBECS_Land.py +++ b/flowsa/data_source_scripts/EIA_CBECS_Land.py @@ -233,15 +233,15 @@ def standardize_eia_cbecs_land_activity_names(df, column_to_standardize): return df -def cbecs_land_fba_cleanup(fba_load): +def cbecs_land_fba_cleanup(fba, **_): """ Clean up the land fba for use in allocation - :param fba_load: df, eia cbecs land flowbyactivity format + :param fba: df, eia cbecs land flowbyactivity format :return: df, flowbyactivity with modified values """ # estimate floor space using number of floors - fba = calculate_floorspace_based_on_number_of_floors(fba_load) + fba = calculate_floorspace_based_on_number_of_floors(fba) # calculate the land area in addition to building footprint fba1 = calculate_total_facility_land_area(fba) diff --git a/flowsa/data_source_scripts/EIA_MECS.py b/flowsa/data_source_scripts/EIA_MECS.py index 7e9f6f790..40086e9d2 100644 --- a/flowsa/data_source_scripts/EIA_MECS.py +++ b/flowsa/data_source_scripts/EIA_MECS.py @@ -436,7 +436,7 @@ def eia_mecs_energy_clean_allocation_fba_w_sec( return df2 -def mecs_land_fba_cleanup(fba): +def mecs_land_fba_cleanup(fba, **_): """ Modify the EIA MECS Land FBA :param fba: df, EIA MECS Land FBA format @@ -452,7 +452,7 @@ def mecs_land_fba_cleanup(fba): return fba -def mecs_land_fba_cleanup_for_land_2012_fbs(fba): +def mecs_land_fba_cleanup_for_land_2012_fbs(fba, **_): """ The 'land_national_2012' FlowBySector uses MECS 2014 data, set MECS year to 2012 @@ -460,7 +460,7 @@ def mecs_land_fba_cleanup_for_land_2012_fbs(fba): :return: df, EIA MECS Land FBA modified """ - fba = mecs_land_fba_cleanup(fba) + fba = mecs_land_fba_cleanup(fba=fba) # reset the EIA MECS Land year from 2014 to 2012 to match # the USDA ERS MLU year diff --git a/flowsa/data_source_scripts/EPA_CDDPath.py b/flowsa/data_source_scripts/EPA_CDDPath.py index 33d2339bc..8ef581457 100644 --- a/flowsa/data_source_scripts/EPA_CDDPath.py +++ b/flowsa/data_source_scripts/EPA_CDDPath.py @@ -14,6 +14,7 @@ from flowsa.location import US_FIPS from flowsa.settings import externaldatapath from flowsa.flowbyfunctions import assign_fips_location_system +from flowsa.dataclean import standardize_units # Read pdf into list of DataFrame @@ -95,17 +96,21 @@ def combine_cdd_path(*, resp, **_): return df -def assign_wood_to_engineering(df): +def assign_wood_to_engineering(fba, **_): """clean_fba_df_fxn that reclassifies Wood from 'Other' to 'Other - Wood' so that its mapping can be adjusted to only use 237990/Heavy engineering NAICS according to method in Meyer et al. 2020 - :param df: df, FBA of CDDPath + :param fba: df, FBA of CDDPath :return: df, CDDPath FBA with wood reassigned """ # Update wood to a new activity for improved mapping - df.loc[((df.FlowName == 'Wood') & - (df.ActivityProducedBy == 'Other')), + fba.loc[((fba.FlowName == 'Wood') & + (fba.ActivityProducedBy == 'Other')), 'ActivityProducedBy'] = 'Other - Wood' - return df + # if no mapping performed, still update units + if 'short tons' in fba['Unit'].values: + fba = standardize_units(fba) + + return fba diff --git a/flowsa/data_source_scripts/EPA_GHGI.py b/flowsa/data_source_scripts/EPA_GHGI.py index 18d4d0966..ab02079f4 100644 --- a/flowsa/data_source_scripts/EPA_GHGI.py +++ b/flowsa/data_source_scripts/EPA_GHGI.py @@ -15,8 +15,10 @@ from flowsa.dataclean import replace_NoneType_with_empty_cells from flowsa.settings import log, externaldatapath from flowsa.schema import flow_by_activity_fields +from flowsa.common import load_yaml_dict from flowsa.data_source_scripts import EIA_MECS + SECTOR_DICT = {'Res.': 'Residential', 'Comm.': 'Commercial', 'Ind.': 'Industrial', @@ -421,9 +423,9 @@ def ghg_parse(*, df_list, year, config, **_): source_No_activity = ["3-22", "3-22b"] # Handle tables with 1 parent level category source_activity_1 = ["3-7", "3-8", "3-9", "3-10", "3-14", "3-15", - "5-18", "5-19", "A-76", "A-77", "A-103"] + "5-18", "5-19", "A-76", "A-77"] # Tables with sub categories - source_activity_2 = ["3-38", "3-63"] + source_activity_2 = ["3-38", "3-63", "A-103"] if table_name in multi_chem_names: bool_apb = False @@ -520,7 +522,9 @@ def ghg_parse(*, df_list, year, config, **_): flow_name_list = ["Explorationb", "Production", "Processing", "Transmission and Storage", "Distribution", "Crude Oil Transportation", "Refining", - "Exploration"] + "Exploration", "Mobile AC", + "Refrigerated Transport", + "Comfort Cooling for Trains and Buses"] for index, row in df.iterrows(): apb_value = row["ActivityProducedBy"] start_activity = row["FlowName"] @@ -546,7 +550,7 @@ def ghg_parse(*, df_list, year, config, **_): df.loc[index, 'ActivityProducedBy' ] = f"{apb_txt} {apbe_value}" if "Total" == apb_value or "Total " == apb_value: - df = df.drop(index) + df = df.drop(index) elif table_name == "A-79": fuel_name = "" @@ -593,7 +597,7 @@ def ghg_parse(*, df_list, year, config, **_): text_split = apb_value.split("(") df.loc[index, 'ActivityProducedBy'] = text_split[0] - elif table_name in ["A-101", "A-103"]: + elif table_name in ["A-101"]: for index, row in df.iterrows(): apb_value = strip_char(row["ActivityProducedBy"]) df.loc[index, 'ActivityProducedBy'] = apb_value @@ -633,18 +637,35 @@ def get_manufacturing_energy_ratios(year): 'Natural Gas': 'Natural Gas', } - # TODO make this year dynamic + def closest_value(input_list, input_value): + difference = lambda input_list : abs(input_list - input_value) + return min(input_list, key=difference) + + mecs_year = closest_value(load_yaml_dict('EIA_MECS_Energy', + flowbytype='FBA').get('years'), + year) + # Filter MECS for total national energy consumption for manufacturing sectors mecs = load_fba_w_standardized_units(datasource='EIA_MECS_Energy', - year=year, + year=mecs_year, flowclass='Energy') mecs = mecs.loc[(mecs['ActivityConsumedBy'] == '31-33') & (mecs['Location'] == '00000')].reset_index(drop=True) mecs = EIA_MECS.mecs_energy_fba_cleanup(mecs, None) - # TODO dynamically change the table imported here based on year - ghgi = load_fba_w_standardized_units(datasource='EPA_GHGI_T_A_14', - year=2016, + # Identify the GHGI table that matches EIA_MECS + for t, v in (load_yaml_dict('EPA_GHGI', 'FBA') + .get('Annex').get('Annex').items()): + if ((v.get('class') == 'Energy') + & ('Energy Consumption Data' in v.get('desc')) + & (v.get('year') == str(mecs_year))): + table = f"EPA_GHGI_T_{t.replace('-', '_')}" + break + else: + log.error('unable to identify corresponding GHGI table') + + ghgi = load_fba_w_standardized_units(datasource=table, + year=mecs_year, flowclass='Energy') ghgi = ghgi[ghgi['ActivityConsumedBy']=='Industrial'].reset_index(drop=True) @@ -659,7 +680,7 @@ def get_manufacturing_energy_ratios(year): return pct_dict -def allocate_industrial_combustion(df): +def allocate_industrial_combustion(fba, source_dict, **_): """ Split industrial combustion emissions into two buckets to be further allocated. @@ -667,9 +688,7 @@ def allocate_industrial_combustion(df): EIA MECS relative to EPA GHGI. Create new activities to distinguish those which use EIA MECS as allocation source and those that use alternate source. """ - # TODO make this year dynamic - year = 2014 - pct_dict = get_manufacturing_energy_ratios(year) + pct_dict = get_manufacturing_energy_ratios(source_dict.get('year')) # activities reflect flows in A_14 and 3_8 and 3_9 activities_to_split = {'Industrial Other Coal Industrial': 'Coal', @@ -678,29 +697,30 @@ def allocate_industrial_combustion(df): 'Natural gas industrial': 'Natural Gas'} for activity, fuel in activities_to_split.items(): - df_subset = df.loc[df['ActivityProducedBy'] == activity].reset_index(drop=True) + df_subset = fba.loc[fba['ActivityProducedBy'] == activity].reset_index(drop=True) if len(df_subset) == 0: continue df_subset['FlowAmount'] = df_subset['FlowAmount'] * pct_dict[fuel] df_subset['ActivityProducedBy'] = f"{activity} - Manufacturing" - df.loc[df['ActivityProducedBy'] == activity, - 'FlowAmount'] = df['FlowAmount'] * (1-pct_dict[fuel]) - df = pd.concat([df, df_subset], ignore_index=True) + fba.loc[fba['ActivityProducedBy'] == activity, + 'FlowAmount'] = fba['FlowAmount'] * (1-pct_dict[fuel]) + fba = pd.concat([fba, df_subset], ignore_index=True) - return df + return fba -def split_HFCs_by_type(df): - """Speciates HFCs and PFCs for all activities based on T_4_99.""" +def split_HFCs_by_type(fba, **_): + """Speciates HFCs and PFCs for all activities based on T_4_99. + clean_fba_before_mapping_df_fxn""" splits = load_fba_w_standardized_units(datasource='EPA_GHGI_T_4_99', - year=df['Year'][0]) + year=fba['Year'][0]) splits['pct'] = splits['FlowAmount'] / splits['FlowAmount'].sum() splits = splits[['FlowName', 'pct']] - speciated_df = df.apply(lambda x: [p * x['FlowAmount'] for p in splits['pct']], + speciated_df = fba.apply(lambda x: [p * x['FlowAmount'] for p in splits['pct']], axis=1, result_type='expand') speciated_df.columns = splits['FlowName'] - speciated_df = pd.concat([df, speciated_df], axis=1) + speciated_df = pd.concat([fba, speciated_df], axis=1) speciated_df = speciated_df.melt(id_vars=flow_by_activity_fields.keys(), var_name='Flow') speciated_df['FlowName'] = speciated_df['Flow'] @@ -780,20 +800,20 @@ def split_HFC_foams(df): return df -def clean_HFC_fba(df): +def clean_HFC_fba(fba, **_): """Adjust HFC emissions for improved parsing. clean_fba_before_mapping_df_fxn used in EPA_GHGI_T_4_101.""" - df = subtract_HFC_transport_emissions(df) + df = subtract_HFC_transport_emissions(fba) df = allocate_HFC_to_residential(df) df = split_HFC_foams(df) df = split_HFCs_by_type(df) return df -def remove_HFC_kt(df): +def remove_HFC_kt(fba, **_): """Remove records of emissions in kt, data are also provided in MMT CO2e. clean_fba_before_mapping_df_fxn used in EPA_GHGI_T_4_50.""" - return df.loc[df['Unit'] != 'kt'] + return fba.loc[fba['Unit'] != 'kt'] def adjust_transport_activities(df, **_): diff --git a/flowsa/data_source_scripts/EPA_NEI.py b/flowsa/data_source_scripts/EPA_NEI.py index dbddf0f23..3fe4e99d4 100644 --- a/flowsa/data_source_scripts/EPA_NEI.py +++ b/flowsa/data_source_scripts/EPA_NEI.py @@ -155,7 +155,7 @@ def epa_nei_nonpoint_parse(*, df_list, source, year, config, **_): return df -def clean_NEI_fba(fba): +def clean_NEI_fba(fba, **_): """ Clean up the NEI FBA for use in FBS creation :param fba: df, FBA format @@ -181,14 +181,14 @@ def clean_NEI_fba(fba): return fba -def clean_NEI_fba_no_pesticides(fba): +def clean_NEI_fba_no_pesticides(fba, **_): """ Clean up the NEI FBA with no pesicides for use in FBS creation :param fba: df, FBA format :return: df, modified FBA """ fba = drop_pesticides(fba) - fba = clean_NEI_fba(fba) + fba = clean_NEI_fba(fba=fba) return fba @@ -217,10 +217,10 @@ def drop_GHGs(df, *_): :return: df """"" flowlist = [ - 'Carbon Dioxide', + 'Carbon Dioxide', 'Carbon dioxide', 'Methane', - 'Nitrous Oxide', - 'Sulfur Hexafluoride', + 'Nitrous Oxide', 'Nitrous oxide', + 'Sulfur Hexafluoride', 'Sulfur hexafluoride', ] flow_var = 'Flowable' if 'Flowable' in df.columns else 'FlowName' return df.query(f'{flow_var} not in @flowlist') diff --git a/flowsa/data_source_scripts/EPA_SIT.py b/flowsa/data_source_scripts/EPA_SIT.py new file mode 100644 index 000000000..969b60bbd --- /dev/null +++ b/flowsa/data_source_scripts/EPA_SIT.py @@ -0,0 +1,121 @@ +# EPA_SIT.py (flowsa) +# !/usr/bin/env python3 +# coding=utf-8 +""" +Loads EPA State Inventory Tool (SIT) data for state specified from external +data directory. Parses EPA SIT data to flowbyactivity format. +""" + +import pandas as pd +import os +from flowsa.settings import externaldatapath, log +from flowsa.flowbyfunctions import assign_fips_location_system +from flowsa.location import apply_county_FIPS + +def epa_sit_parse(*, source, year, config, **_): + + state = config['state'] + filepath = f"{externaldatapath}/SIT_data/{state}/{config['file']}" + # dictionary containing Excel sheet-specific information + sheet_dict = config['sheet_dict'] + # initialize the dataframe + df0 = pd.DataFrame() + + if not os.path.exists(filepath): + raise FileNotFoundError(f'SIT file not found in {filepath}') + + # for each sheet in the Excel file containing data... + for sheet, sheet_dict in config.get('sheet_dict').items(): + sheetname = sheet_dict.get('sheetname', sheet) + tablename = sheet_dict.get('tablename') + if tablename: + sheetandtable = f'{sheetname}, {tablename}' + else: + sheetandtable = sheetname + tablename = sheet_dict.get('tablename', sheetname) + log.debug(f'Loading data from: {sheetname}...') + # read in data from Excel sheet + df = pd.read_excel(filepath, + sheet_name = sheetname, + header=sheet_dict.get('header', 2), + skiprows=range(sheet_dict.get('skiprowstart', 0), + sheet_dict.get('skiprowend', 0)), + usecols="B:AG", + nrows=sheet_dict.get('nrows')) + df.columns = df.columns.map(str) + df['ActivityProducedBy'] = df.iloc[:,0] + + # for each row in the data table... + # ...emissions categories will be renamed with the format + # 'sheet name, emissions category' + # ...emissions subcategories will be renamed with the format + # 'sheet name, emissions category, emissions subcategory' + for ind in df.index: + current_header = df['ActivityProducedBy'][ind].strip() + # for level 1 headers... + if current_header in sheet_dict.get('headers'): + active_header = current_header + if sheet_dict.get('subgroup') == 'activitybyflow': + df.loc[ind, 'FlowName'] = active_header + elif sheet_dict.get('subgroup') == 'flow': + df.loc[ind, 'FlowName'] = 'Total N2O and CH4 Emissions' + df.loc[ind,'ActivityProducedBy'] = ( + f'{sheetandtable}, {active_header}') + # for level 2 headers... + elif current_header not in sheet_dict.get('subsubheaders',''): + active_subheader = df['ActivityProducedBy'][ind].strip() + if sheet_dict.get('subgroup') == 'flow': + df.loc[ind, 'FlowName'] = active_subheader + df.loc[ind,'ActivityProducedBy'] = ( + f'{sheetandtable}, {active_header}') + elif sheet_dict.get('subgroup') == 'activitybyflow': + df.loc[ind, 'FlowName'] = active_header + df.loc[ind,'ActivityProducedBy'] = ( + f'{sheetandtable}, {active_subheader}') + else: + df.loc[ind,'ActivityProducedBy'] = ( + f'{sheetandtable}, {active_header}, ' + f'{active_subheader}') + # for level 3 headers (only occur in IndirectCO2 and Agriculture tabs)... + else: + subsubheader = df['ActivityProducedBy'][ind].strip() + df.loc[ind,'ActivityProducedBy'] = ( + f'{sheetandtable}, {active_header}, ' + f'{active_subheader}, {subsubheader}') + + # drop all columns except the desired emissions year and the + # emissions activity source + df = df.filter([year, 'ActivityProducedBy', 'FlowName']) + # rename columns + df = df.rename(columns={year: 'FlowAmount'}) + # add sheet-specific hardcoded data + if 'subgroup' not in sheet_dict: + df['FlowName'] = sheet_dict.get('flow') + df['Unit'] = sheet_dict.get('unit') + df['Description'] = sheetname + + # concatenate dataframe from each sheet with existing master dataframe + df0 = pd.concat([df0, df]) + + # add general hardcoded data + df0['Class'] = 'Chemicals' + df0['SourceName'] = source + df0['FlowType'] = "ELEMENTARY_FLOW" + df0['Compartment'] = 'air' + df0['Year'] = year + df0['DataReliability'] = 5 + df0['DataCollection'] = 5 + + # add state FIPS code + df0['State'] = state + df0['County'] = '' + df0 = apply_county_FIPS(df0, year='2015', source_state_abbrev=True) + # add FIPS location system + df0 = assign_fips_location_system(df0, '2015') + + return df0 + +if __name__ == '__main__': + import flowsa + flowsa.flowbyactivity.main(source='EPA_SIT', year='2017') + fba = flowsa.getFlowByActivity('EPA_SIT', '2017') diff --git a/flowsa/data_source_scripts/EPA_StateGHGI.py b/flowsa/data_source_scripts/EPA_StateGHGI.py new file mode 100644 index 000000000..8f59134f8 --- /dev/null +++ b/flowsa/data_source_scripts/EPA_StateGHGI.py @@ -0,0 +1,99 @@ +# EPA_StateGHGI.py (flowsa) +# !/usr/bin/env python3 +# coding=utf-8 +""" +Inventory of US GHGs from EPA disaggregated to States +""" +import json +import pandas as pd +from flowsa.settings import externaldatapath +from flowsa.location import apply_county_FIPS +from flowsa.flowbyfunctions import assign_fips_location_system +import flowsa.exceptions + + +def epa_state_ghgi_parse(*, source, year, config, **_): + + try: + with open(externaldatapath + config.get('file')) as f: + data = json.load(f) + except FileNotFoundError: + raise FileNotFoundError('State GHGI data not yet available for ' + 'external users') + + data_df = pd.DataFrame(data) + activity_cols = ['SECTOR', 'SOURCE', 'SUBSOURCE', 'FUEL_TYPE', + 'SUB_REFERENCE', 'SECSUB_REFERENCE'] + + states = data_df[['STATE']].drop_duplicates() + flows = data_df[['GHG_NAME']].drop_duplicates() + + df = data_df.melt(id_vars = activity_cols + ['STATE'] + ['GHG_NAME'], + value_vars=f'EMISSION_{year}', + var_name = 'Year', + value_name = 'FlowAmount') + df['Year'] = year + df['Unit'] = 'MMT CO2e' # TODO confirm units + df['FlowType'] = 'ELEMENTARY_FLOW' + df['SourceName'] = source + df['Class'] = 'Chemicals' + df['Compartment'] = 'air' + + df.rename(columns={'STATE': 'State', + 'GHG_NAME': 'FlowName'}, + inplace=True) + + df['ActivityProducedBy'] = (df[activity_cols] + .apply(lambda row: ' - '.join( + row.values.astype(str)), axis=1)) + df['ActivityProducedBy'] = (df['ActivityProducedBy'] + .str.replace(' - None', '')) + df.drop(columns=activity_cols, inplace=True) + activities = df[['ActivityProducedBy']].drop_duplicates() + + df['County'] = '' + df = apply_county_FIPS(df) + df = assign_fips_location_system(df, '2015') + df.drop(columns=['County'], inplace=True) + + return df + + +def remove_select_states(fba, source_dict, **_): + """ + clean_fba_df_fxn to remove selected states so they can be added + from alternate sources. State abbreviations must be passed as list + in method parameter 'state_list' + + :param fba: df + :param source_dict: dictionary of source methods includes 'state_list' + key of states to remove + """ + state_list = source_dict.get('state_list') + state_df = pd.DataFrame(state_list, columns=['State']) + state_df['County'] ='' + state_df = apply_county_FIPS(state_df) + df_subset = fba[~fba['Location'].isin(state_df['Location'])] + return df_subset + + +def tag_biogenic_activities(fba, source_dict, **_): + """ + clean_fba_before_mapping_df_fxn to tag emissions from passed activities + as biogenic. Activities passed as list in paramter 'activity_list'. + """ + a_list = source_dict.get('activity_list') + if a_list is None: + raise flowsa.exceptions.FBSMethodConstructionError( + message="Activities to tag must be passed in FBS parameter " + "'activity_list'") + fba.loc[fba['ActivityProducedBy'].isin(a_list), + 'FlowName'] = fba['FlowName'] + ' - biogenic' + + return fba + + +if __name__ == '__main__': + import flowsa + flowsa.flowbyactivity.main(source='EPA_StateGHGI', year='2017') + fba = flowsa.getFlowByActivity('EPA_StateGHGI', '2017') diff --git a/flowsa/data_source_scripts/README.md b/flowsa/data_source_scripts/README.md index 64e6bc452..4837492e2 100644 --- a/flowsa/data_source_scripts/README.md +++ b/flowsa/data_source_scripts/README.md @@ -1,85 +1,27 @@ -# Datapull Descriptions -Descriptions of the type of data pulled from each data source and the information in the -FlowByActivity parquet files. - -## BLS_QCEW -US Bureau of Labor Statistics, Quarterly Census of Employment and Wages - -## Census_CBP -US Census Bureau, County Business Patterns - -## Census_PEP_Population -US Census Bureau, Population Estimates Program, Population - -## EIA_CBECS_Land -US Energy Information Administration, Commercial Buildings Energy Consumption Survey, Land - -## EIA_CBECS_Water -US Energy Information Administration, Commercial Buildings Energy Consumption Survey, Water - -## [EIA_MECS_Energy](https://www.eia.gov/consumption/manufacturing/) -US Energy Information Administration, Manufacturing Energy Consumption Survey -- Energy (Tables 2.1, 2.2, 3.1, 3.2) -- National and regional (4 Census regions) -- Flows in energy units (MJ) and physical units (varies), represents duplicate data -- Fuel consumption Class: Energy -- Nonfuel consumption (feedstock) Class: Other - -## [EPA_NEI](https://www.epa.gov/air-emissions-inventories/national-emissions-inventory-nei) -Environmental Protection Agency National Emissions Inventory -- Nonpoint, Nonroad, Onroad emissions -- County level - -## NOAA_FisheryLandings -National Oceanic and Atmospheric Administration, Fishery Landings - -## [StatCan_IWS_MI]('https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=3810003701') -Statistics Canada, Industrial Water Survey, Manufacturing Industries -- Table: 38-10-0037-01 (formerly CANSIM 153-0047) - -## [USDA_CoA_Cropland]('https://www.nass.usda.gov/AgCensus/') -US Department of Agriculture, Census of Agriculture, Cropland -- National, state, county levels -- Total cropland and pastureland -- Harvested cropland and pastureland -- Harvested, irrigated cropland and pastureland - -## [USDA_CoA_Livestock]('https://www.nass.usda.gov/AgCensus/') -US Department of Agriculture, Census of Agriculture, Livestock -- Tables 12, 15-17, 19, 27, 27-30, 32 (2017 report @ national level) -- National, state, county levels -- Livestock inventory for animal types - -## [USDA_ERS_FIWS]('https://www.ers.usda.gov/data-products/farm-income-and-wealth-statistics/data-files-us-and-state-level-farm-income-and-wealth-statistics/') -US Department of Agriculture, Economic Research Service, Farm Income and Wealth Statistics -- National, state level -- Cash Receipts by commodity for US crops and animals - -## [USDA_ERS_MLU]('https://www.ers.usda.gov/data-products/major-land-uses/') -US Department of Agriculture, Economic Research Service, Major Land Use -- National level -- Major uses of public/private land for 15 land use categories in Thousand Acres - -## [USDA_IWMS]('https://www.nass.usda.gov/Publications/AgCensus/2017/Online_Resources/Farm_and_Ranch_Irrigation_Survey/index.php') -US Department of Agriculture, Irrigation and Water Management Survey -- Table 36: Field Water Distribution for Selected Crops Harvested in the Open and Irrigated - Pastureland: 2018 and 2013 (2018 report) -- National, State level -- Water Application rates in Average acre-feet applied per acre by crop type - -## [USGS_NWIS_WU]('https://waterdata.usgs.gov/nwis') -US Geological Survey, National Water Information System, Water Use -- National, State, County level water withdrawals for the US, originally in million gallons per day -- Water withdrawals for ground/surface/total water, fresh/saline/total water -- Withdrawals for Aquaculture, Public Supply, Domestic Deliveries, Livestock, Irrigation (Crop and Golf), - Thermoelectric Power, Industrial, Mining - -## [USGS_WU_Coef]('https://pubs.er.usgs.gov/publication/sir20095041') -US Geological Survey, Water Use Coefficients -- Source: Lovelace, John K., 2009, Method for estimating water withdrawals for livestock in the - United States, 2005: U.S. Geological Survey Scientific Investigations Report 2009–5041, 7 p. -- Table 1 -- Livestock water use originally provided in gallons/animal/day for 9 animal types based on 2005 USGS NWIS WU -- Ground and surface water associated with livestock watering, feedlots, dairy operations, and other on-farm needs. - Water for drinking, cooling, sanitation, waste disposal, and other needs. - +# Data Source Scripts +The Python files in the `data_source_scripts` folder include functions +specific to each Flow-By-Activity (FBA) dataset. These functions are used to +help load, call, and parse the FBAs. These files can also contain functions +used in Flow-By-Sector generation. + +The functions in these files are called on in FBA and FBS method yamls +using the tag `!script_function:PythonFileName FunctionName` +where _PythonFileName_ is the name of the Python file (e.g., +"BLS_QCEW.py") and _FunctionName_ is the name of the function +(e.g., "bls_clean_allocation_fba_w_sec"). + +``` +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: national +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: national + source_fba_load_scale: national + year: 2017 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_sets: +``` diff --git a/flowsa/data_source_scripts/USDA_ACUP.py b/flowsa/data_source_scripts/USDA_ACUP.py index 24b591a55..3f0bf9629 100644 --- a/flowsa/data_source_scripts/USDA_ACUP.py +++ b/flowsa/data_source_scripts/USDA_ACUP.py @@ -33,13 +33,9 @@ def acup_url_helper(*, build_url, config, **_): state_abbrevs = abbrev_us_state state_abbrevs = {k: v for (k, v) in state_abbrevs.items() if k != "DC"} - for x in config['domain_levels']: - for y in state_abbrevs: - url = build_url - url = url.replace("__domainLevel__", x) - url = url.replace("__stateAlpha__", y) - url = url.replace(" ", "%20") - urls.append(url) + url = build_url + url = url.replace(" ", "%20") + urls.append(url) return urls @@ -52,12 +48,7 @@ def acup_call(*, resp, **_): :return: pandas dataframe of original source data """ response_json = json.loads(resp.text) - # not all states have data, so return empty df if does not exist - try: - df = pd.DataFrame(data=response_json["data"]) - except KeyError: - log.info('No data exists for state') - df = [] + df = pd.DataFrame(data=response_json["data"]) return df diff --git a/flowsa/data_source_scripts/USDA_CoA_Cropland.py b/flowsa/data_source_scripts/USDA_CoA_Cropland.py index 45f41e18a..535320025 100644 --- a/flowsa/data_source_scripts/USDA_CoA_Cropland.py +++ b/flowsa/data_source_scripts/USDA_CoA_Cropland.py @@ -9,17 +9,16 @@ import json import numpy as np import pandas as pd -from flowsa.location import US_FIPS, abbrev_us_state -from flowsa.common import WITHDRAWN_KEYWORD, \ - fbs_default_grouping_fields, fbs_fill_na_dict, \ - fba_wsec_default_grouping_fields -from flowsa.schema import flow_by_sector_fields -from flowsa.flowbyfunctions import assign_fips_location_system, sector_aggregation, \ - sector_disaggregation, sector_ratios, \ - load_fba_w_standardized_units, equally_allocate_suppressed_parent_to_child_naics -from flowsa.allocation import allocate_by_sector, equally_allocate_parent_to_child_naics +from flowsa.allocation import allocate_by_sector, \ + equally_allocate_parent_to_child_naics, equal_allocation +from flowsa.common import WITHDRAWN_KEYWORD, fba_wsec_default_grouping_fields from flowsa.dataclean import replace_NoneType_with_empty_cells, \ - replace_strings_with_NoneType, clean_df + replace_strings_with_NoneType +from flowsa.flowbyfunctions import assign_fips_location_system, \ + sector_aggregation, sector_disaggregation, sector_ratios, \ + load_fba_w_standardized_units, \ + equally_allocate_suppressed_parent_to_child_naics +from flowsa.location import US_FIPS, abbrev_us_state from flowsa.sectormapping import add_sectors_to_flowbyactivity from flowsa.validation import compare_df_units @@ -188,6 +187,13 @@ def coa_cropland_parse(*, df_list, year, **_): ", ALL PRODUCTION PRACTICES", "", regex=True) df.loc[:, 'FlowName'] = df['FlowName'].str.replace( ", IN THE OPEN", "", regex=True) + # want to included "harvested" in the flowname when "harvested" is + # included in the class_desc + df['FlowName'] = np.where(df['class_desc'].str.contains(', HARVESTED'), + df['FlowName'] + " HARVESTED", df['FlowName']) + # reorder + df['FlowName'] = np.where(df['FlowName'] == 'AREA, IRRIGATED HARVESTED', + 'AREA HARVESTED, IRRIGATED', df['FlowName']) # combine column information to create activity # information, and create two new columns for activities df['Activity'] = df['commodity_desc'] + ', ' + df['class_desc'] + ', ' + \ @@ -241,42 +247,27 @@ def coa_cropland_parse(*, df_list, year, **_): return df -def coa_irrigated_cropland_fba_cleanup(fba, **kwargs): - """ - When using irrigated cropland, aggregate sectors to cropland and total - ag land. Doing this because published values for irrigated harvested - cropland do not include the water use for vegetables, woody crops, berries. - :param fba: df, COA FBA format - :return: df, COA with dropped rows based on ActivityConsumedBy column - """ - - fba =\ - fba[~fba['ActivityConsumedBy'].isin(['AG LAND', - 'AG LAND, CROPLAND, HARVESTED'] - )].reset_index(drop=True) - - return fba - - def coa_nonirrigated_cropland_fba_cleanup(fba, **kwargs): """ - When using irrigated cropland, aggregate sectors to cropland and total - ag land. Doing this because published values for irrigated harvested - cropland do not include the water use for vegetables, woody crops, berries. + Cleanup coa cropland data for nonirrigated crops :param fba: df, COA when using non-irrigated data :return: df, COA nonirrigated data, modified """ - # drop rows of data that contain certain strings - fba = fba[~fba['ActivityConsumedBy'].isin( - ['AG LAND', 'AG LAND, CROPLAND, HARVESTED'])] - # when include 'area harvested' and 'area in production' in # single dataframe, which is necessary to include woody crops, # 'vegetable totals' are double counted fba = fba[~((fba['FlowName'] == 'AREA IN PRODUCTION') & (fba['ActivityConsumedBy'] == 'VEGETABLE TOTALS'))] + # When using a mix of flow names, drop activities for ag land (naics 11) + # and ag land, cropland, harvested (naics 111),because published values + # for harvested cropland do not include data for vegetables, woody crops, + # berries. Values for sectors 11 and 111 will be aggregated from the + # dataframe later + fba = fba[~fba['ActivityConsumedBy'].isin( + ['AG LAND', 'AG LAND, CROPLAND, HARVESTED'])].reset_index(drop=True) + return fba @@ -294,7 +285,6 @@ def disaggregate_coa_cropland_to_6_digit_naics( # define the activity and sector columns to base modifications on # these definitions will vary dependent on class type - activity_col = 'ActivityConsumedBy' sector_col = 'SectorConsumedBy' # drop rows without assigned sectors @@ -302,8 +292,7 @@ def disaggregate_coa_cropland_to_6_digit_naics( ~fba_w_sector[sector_col].isna()].reset_index(drop=True) # modify the flowamounts related to the 6 naics 'orchards' are mapped to - fba_w_sector = modify_orchard_flowamounts( - fba_w_sector, activity_column=activity_col) + fba_w_sector = equal_allocation(fba_w_sector) # use ratios of usda 'land in farms' to determine animal use of # pasturelands at 6 digit naics @@ -312,38 +301,55 @@ def disaggregate_coa_cropland_to_6_digit_naics( sector_column=sector_col, download_FBA_if_missing=kwargs['download_FBA_if_missing']) - # use ratios of usda 'harvested cropland' to determine missing 6 digit naics - fba_w_sector = disaggregate_cropland(fba_w_sector, attr, - method, year=attr['allocation_source_year'], - sector_column=sector_col, - download_FBA_if_missing=kwargs['download_FBA_if_missing']) + # use ratios of usda 'harvested cropland' to determine missing 6 digit + # naics + fba_w_sector = disaggregate_cropland( + fba_w_sector, attr, method, year=attr['allocation_source_year'], + sector_column=sector_col, download_FBA_if_missing=kwargs[ + 'download_FBA_if_missing']) return fba_w_sector def disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal( - fba_w_sector, attr, method, **kwargs): + fba_w_sector_load, attr, method, **kwargs): """ Disaggregate usda coa cropland to naics 6 - :param fba_w_sector: df, CoA cropland data, FBA format with sector columns + :param fba_w_sector_load: df, CoA cropland data, FBA format with sector + columns :param attr: dictionary, attribute data from method yaml for activity set :param method: dictionary, FBS method yaml - :param kwargs: dictionary, arguments that might be required for other functions. - Currently includes data source name. + :param kwargs: dictionary, arguments that might be required for other + functions. Currently includes data source name. :return: df, CoA cropland with disaggregated NAICS sectors """ # define the activity and sector columns to base modifications on # these definitions will vary dependent on class type - activity_col = 'ActivityConsumedBy' sector_col = 'SectorConsumedBy' # drop rows without assigned sectors - fba_w_sector = fba_w_sector[~fba_w_sector[sector_col].isna()].reset_index(drop=True) + fba_w_sector = fba_w_sector_load[~fba_w_sector_load[sector_col].isna()]\ + .reset_index(drop=True) # modify the flowamounts related to the 6 naics 'orchards' are mapped to - fba_w_sector = modify_orchard_flowamounts( - fba_w_sector, activity_column=activity_col) + fba_w_sector = equal_allocation(fba_w_sector) + + # todo: add back in once suppression fxn modified to accept non-naics + # like activities and mixed level final naics (naics6 and naics7) + # then estimate any suppressed data by equally allocating parent to + # child naics + # groupcols = list(fba_w_sector3.select_dtypes( + # include=['object', 'int']).columns) + # fba_w_sector = equally_allocate_suppressed_parent_to_child_naics( + # fba_w_sector, method, 'SectorConsumedBy', groupcols) + + # When using irrigated cropland, aggregate sectors to cropland and total + # ag land. Doing this because published values for irrigated harvested + # cropland do not include the water use for vegetables, woody crops, + # berries. + fba_w_sector = fba_w_sector[~fba_w_sector['ActivityConsumedBy'].isin( + ['AG LAND', 'AG LAND, CROPLAND, HARVESTED'])].reset_index(drop=True) # use ratios of usda 'land in farms' to determine animal use of # pasturelands at 6 digit naics @@ -362,24 +368,6 @@ def disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal( return fba_w_sector -def modify_orchard_flowamounts(fba, activity_column): - """ - In the CoA cropland crosswalk, the activity 'orchards' is mapped - to eight 6-digit naics. Therefore, after mapping, - divide the orchard flow amount by 8. - :param fba: A FlowByActiivty df mapped to sectors - :param activity_column: The activity column to base FlowAmount - modifications on (ActivityProducedBy or ActivityConsumedBy) - :return: df, CoA cropland data with modified FlowAmounts - """ - - # divide the Orchards data allocated to NAICS by 6 to avoid double counting - fba.loc[fba[activity_column] == 'ORCHARDS', - 'FlowAmount'] = fba['FlowAmount'] / 8 - - return fba - - def disaggregate_pastureland(fba_w_sector, attr, method, year, sector_column, download_FBA_if_missing, **kwargs): """ @@ -422,14 +410,18 @@ def disaggregate_pastureland(fba_w_sector, attr, method, year, df_f = df_f[~df_f['ActivityConsumedBy'].str.contains('&')] if 'parameter_drop' in kwargs: # drop aquaculture because pastureland not used for aquaculture - df_f = df_f[~df_f['ActivityConsumedBy'].isin( - kwargs['parameter_drop'])] + # drop any activities at a more aggregated sector level because + # will need to be reaggregated after dropping a parameter to + # accurately calculate the allocation ratios + drop_list = [sub[ : -1] for sub in kwargs['parameter_drop']] + drop_list = drop_list + kwargs['parameter_drop'] + df_f = df_f[~df_f['ActivityConsumedBy'].isin(drop_list)] # create sector columns df_f = add_sectors_to_flowbyactivity( df_f, sectorsourcename=method['target_sector_source']) # estimate suppressed data by equal allocation df_f = equally_allocate_suppressed_parent_to_child_naics( - df_f, 'SectorConsumedBy', fba_wsec_default_grouping_fields) + df_f, method, 'SectorConsumedBy', fba_wsec_default_grouping_fields) # create proportional ratios group_cols = [e for e in fba_wsec_default_grouping_fields if e not in ('ActivityProducedBy', 'ActivityConsumedBy')] @@ -458,8 +450,7 @@ def disaggregate_pastureland(fba_w_sector, attr, method, year, # original fba_w_sector fba_w_sector = fba_w_sector[fba_w_sector[sector_column].apply( lambda x: x[0:3]) != '112'].reset_index(drop=True) - fba_w_sector = pd.concat([fba_w_sector, df], - sort=True).reset_index(drop=True) + fba_w_sector = pd.concat([fba_w_sector, df]).reset_index(drop=True) # fill empty cells with NoneType fba_w_sector = replace_strings_with_NoneType(fba_w_sector) @@ -512,17 +503,12 @@ def disaggregate_cropland(fba_w_sector, attr, method, year, naics, sectorsourcename=method['target_sector_source']) # estimate suppressed data by equally allocating parent to child naics naics = equally_allocate_suppressed_parent_to_child_naics( - naics, 'SectorConsumedBy', fba_wsec_default_grouping_fields) - # add missing fbs fields - naics = clean_df(naics, flow_by_sector_fields, fbs_fill_na_dict) + naics, method, 'SectorConsumedBy', fba_wsec_default_grouping_fields) # aggregate sectors to create any missing naics levels - group_cols = fbs_default_grouping_fields naics2 = sector_aggregation(naics) # add missing naics5/6 when only one naics5/6 associated with a naics4 naics3 = sector_disaggregation(naics2) - # drop rows where FlowAmount 0 - naics3 = naics3.loc[naics3['FlowAmount'] != 0] # create ratios naics4 = sector_ratios(naics3, sector_column) # create temporary sector column to match the two dfs on @@ -577,7 +563,7 @@ def disaggregate_cropland(fba_w_sector, attr, method, year, # tmp drop Nonetypes df_subset = replace_NoneType_with_empty_cells(df_subset) # add new rows of data to crop df - crop = pd.concat([crop, df_subset], sort=True).reset_index(drop=True) + crop = pd.concat([crop, df_subset]).reset_index(drop=True) # clean up df crop = crop.drop(columns=['Location_tmp']) @@ -590,7 +576,7 @@ def disaggregate_cropland(fba_w_sector, attr, method, year, fba_w_sector.loc[fba_w_sector[sector_column].apply( lambda x: x[0:3]) == '112'].reset_index(drop=True) # concat crop and pasture - fba_w_sector = pd.concat([pasture, crop], sort=True).reset_index(drop=True) + fba_w_sector = pd.concat([pasture, crop]).reset_index(drop=True) # fill empty cells with NoneType fba_w_sector = replace_strings_with_NoneType(fba_w_sector) diff --git a/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py b/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py index 01a55c693..004147140 100644 --- a/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py +++ b/flowsa/data_source_scripts/USDA_CoA_Cropland_NAICS.py @@ -164,6 +164,8 @@ def coa_cropland_naics_fba_wsec_cleanup(fba_w_sector, **kwargs): :return: df, flowbyactivity with modified values """ + method = kwargs.get('method') df = equally_allocate_suppressed_parent_to_child_naics( - fba_w_sector, 'SectorConsumedBy', fba_wsec_default_grouping_fields) + fba_w_sector, method, 'SectorConsumedBy', + fba_wsec_default_grouping_fields) return df diff --git a/flowsa/data_source_scripts/USDA_ERS_MLU.py b/flowsa/data_source_scripts/USDA_ERS_MLU.py index aed702aac..9f5b1a868 100644 --- a/flowsa/data_source_scripts/USDA_ERS_MLU.py +++ b/flowsa/data_source_scripts/USDA_ERS_MLU.py @@ -13,7 +13,7 @@ import numpy as np from flowsa.location import get_all_state_FIPS_2, US_FIPS from flowsa.settings import vLogDetailed -from flowsa.flowbyfunctions import assign_fips_location_system +from flowsa.flowbyfunctions import assign_fips_location_system, aggregator from flowsa.common import load_crosswalk from flowsa.literature_values import \ get_area_of_rural_land_occupied_by_houses_2013, \ @@ -195,7 +195,13 @@ def allocate_usda_ers_mlu_land_in_urban_areas(df, attr, fbs_list): [df_residential, df_openspace, df_airport, df_railroad, df_highway2], ignore_index=True, sort=False).reset_index(drop=True) - return allocated_urban_areas_df + # aggregate because multiple rows to household data due to residential + # land area and highway fee shares + groupcols = list(df.select_dtypes(include=['object', 'int']).columns) + allocated_urban_areas_df_2 = aggregator(allocated_urban_areas_df, + groupcols) + + return allocated_urban_areas_df_2 def allocate_usda_ers_mlu_land_in_rural_transportation_areas( diff --git a/flowsa/data_source_scripts/USGS_MYB.py b/flowsa/data_source_scripts/USGS_MYB.py index 05a063814..66fa6880b 100644 --- a/flowsa/data_source_scripts/USGS_MYB.py +++ b/flowsa/data_source_scripts/USGS_MYB.py @@ -1754,6 +1754,11 @@ def usgs_iodine_call(*, resp, year, **_): df_data.columns = ["Production", "space_1", "year_1", "space_2", "year_2", "space_3", "year_3", "space_4", "year_4", "space_5", "year_5"] + elif len(df_data. columns) == 13: + df_data.columns = ["Production", "unit", "space_1", "year_1", "space_2", + "year_2", "space_3", "year_3", "space_4", + "year_4", "space_5", "year_5", "space_6"] + col_to_use = ["Production"] col_to_use.append(usgs_myb_year(YEARS_COVERED['iodine'], year)) @@ -1819,7 +1824,7 @@ def usgs_iron_ore_call(*, resp, year, **_): :return: pandas dataframe of original source data """ df_raw_data = pd.io.excel.read_excel(io.BytesIO(resp.content), - sheet_name='T1 ') + sheet_name='T1') df_data = pd.DataFrame(df_raw_data.loc[7:25]).reindex() df_data = df_data.reset_index() del df_data["index"] @@ -1972,15 +1977,14 @@ def usgs_lead_url_helper(*, year, **_): format """ if int(year) < 2013: - build_url = ('https://prd-wret.s3.us-west-2.amazonaws.com/assets/' + build_url = ('https://d9-wret.s3.us-west-2.amazonaws.com/assets/' 'palladium/production/atoms/files/myb1-2016-lead.xls') elif int(year) < 2014: - build_url = ('https://prd-wret.s3.us-west-2.amazonaws.com/assets/' + build_url = ('https://d9-wret.s3.us-west-2.amazonaws.com/assets/' 'palladium/production/atoms/files/myb1-2017-lead.xls') else: - build_url = ('https://s3-us-west-2.amazonaws.com/prd-wret/assets/' - 'palladium/production/atoms/files/myb1-2018-lead-adv' - '.xlsx') + build_url = ('https://d9-wret.s3.us-west-2.amazonaws.com/assets/' + 'palladium/production/s3fs-public/media/files/myb1-2018-lead-advrel.xlsx') url = build_url return [url] @@ -3109,7 +3113,15 @@ def usgs_platinum_call(*, resp, year, **_): "year_1", "space_2", "year_2", "space_3", "year_3", "space_4", "year_4", "space_5", "year_5"] - + elif len(df_data_1. columns) == 12: + df_data_1.columns = ["Production", "Units", "space_1", + "year_1", "space_2", "year_2", "space_3", + "year_3", "space_4", "year_4", "space_5", + "year_5"] + df_data_2.columns = ["Production", "Units", "space_1", + "year_1", "space_2", "year_2", "space_3", + "year_3", "space_4", "year_4", "space_5", + "year_5"] col_to_use = ["Production"] col_to_use.append(usgs_myb_year(YEARS_COVERED['platinum'], year)) for col in df_data_1.columns: @@ -3823,8 +3835,8 @@ def soda_call(*, resp, year, **_): """ col_to_use = ["Production", "NAICS code", "End use", "year_5", "total"] - - if str(year) in YEARS_COVERED['sodaash_T4']: + years_covered = YEARS_COVERED['sodaash_t4'] + if str(year) in years_covered: df_raw_data = pd.io.excel.read_excel(io.BytesIO(resp.content), sheet_name='T4') df_data_one = pd.DataFrame(df_raw_data.loc[7:25]).reindex() @@ -3855,7 +3867,7 @@ def soda_call(*, resp, year, **_): "year_2", "space_3", "year_3", "space_4", "year_4", "space_5", "year_5"] - if str(year) in YEARS_COVERED['sodaash_T4']: + if str(year) in years_covered: for col in df_data_one.columns: if col not in col_to_use: del df_data_one[col] @@ -3864,7 +3876,7 @@ def soda_call(*, resp, year, **_): if col not in col_to_use: del df_data_two[col] - if str(year) in YEARS_COVERED['sodaash_T4']: + if str(year) in years_covered: frames = [df_data_one, df_data_two] else: frames = [df_data_two] @@ -4467,6 +4479,10 @@ def usgs_vermiculite_call(*, resp, year, **_): df_data_one.columns = ["Production", "Unit", "space_2", "year_1", "space_3", "year_2", "space_4", "year_3", "space_5", "year_4", "space_6", "year_5"] + elif len(df_data_one. columns) == 13: + df_data_one.columns = ["Production", "Unit", "space_2", "year_1", + "space_3", "year_2", "space_4", "year_3", + "space_5", "year_4", "space_6", "year_5", "space_7"] col_to_use = ["Production"] col_to_use.append(usgs_myb_year(YEARS_COVERED['vermiculite'], year)) diff --git a/flowsa/data_source_scripts/USGS_NWIS_WU.py b/flowsa/data_source_scripts/USGS_NWIS_WU.py index 1bb1ee170..5fcf66f36 100644 --- a/flowsa/data_source_scripts/USGS_NWIS_WU.py +++ b/flowsa/data_source_scripts/USGS_NWIS_WU.py @@ -12,7 +12,7 @@ from flowsa.location import abbrev_us_state, US_FIPS from flowsa.common import fba_activity_fields, capitalize_first_letter from flowsa.settings import vLogDetailed -from flowsa.flowbyfunctions import assign_fips_location_system +from flowsa.flowbyfunctions import assign_fips_location_system, aggregator from flowsa.validation import compare_df_units, \ calculate_flowamount_diff_between_dfs @@ -354,10 +354,10 @@ def standardize_usgs_nwis_names(flowbyactivity_df): return flowbyactivity_df -def usgs_fba_data_cleanup(df): +def usgs_fba_data_cleanup(fba, **_): """ Clean up the dataframe to prepare for flowbysector. Used in flowbysector.py - :param df: df, FBA format + :param fba: df, FBA format :return: df, modified FBA """ @@ -368,9 +368,9 @@ def usgs_fba_data_cleanup(df): vLogDetailed.info('Removing all rows for Commercial Data because does not ' 'exist for all states and causes issues as information ' 'on Public Supply deliveries.') - dfa = df[~df['Description'].str.lower().str.contains( + dfa = fba[~fba['Description'].str.lower().str.contains( 'commercial|closed-loop cooling|once-through')] - calculate_flowamount_diff_between_dfs(df, dfa) + calculate_flowamount_diff_between_dfs(fba, dfa) # calculated NET PUBLIC SUPPLY by subtracting out deliveries to domestic vLogDetailed.info('Modify the public supply values to generate ' 'NET public supply by subtracting out deliveries ' @@ -542,6 +542,50 @@ def check_golf_and_crop_irrigation_totals(df_load): # drop national data df = df_load[df_load['Location'] != '00000'] + df_m2 = subset_and_merge_irrigation_types(df) + + df_m3 = df_m2[df_m2['Diff'] > 0].reset_index(drop=True) + + # rename irrigation to irrigation crop and append rows to df + df_m3.loc[df_m3['ActivityProducedBy'] == + 'Irrigation', 'ActivityProducedBy'] = 'Irrigation Crop' + df_m3.loc[df_m3['ActivityConsumedBy'] == + 'Irrigation', 'ActivityConsumedBy'] = 'Irrigation Crop' + df_m3['Description'] = df_m3['Description'].str.replace( + 'Irrigation, Total', 'Irrigation, Crop').str.replace( + 'withdrawals', 'withdrawals for crops').str.replace( + 'use', 'use for crops') + df_m3 = df_m3.drop(columns=['Golf_Amount', 'Golf_APB', 'Golf_ACB', + 'Crop_Amount', 'Crop_APB', + 'Crop_ACB', 'subset_sum', 'FlowAmount', + 'Crop_Description']) + df_m3 = df_m3.rename(columns={'Diff': 'FlowAmount'}) + + if len(df_m3) != 0: + df_w_missing_crop = pd.concat([df_load, df_m3], ignore_index=True) + + group_cols = list(df.select_dtypes(include=['object', 'int']).columns) + df_w_missing_crop = aggregator(df_w_missing_crop, group_cols, + retain_zeros=True) + + # validate results - the differences should all be 0 + df_check = subset_and_merge_irrigation_types(df_w_missing_crop) + df_check = df_check[df_check['Location'] != US_FIPS].reset_index( + drop=True) + df_check['Diff'] = df_check['Diff'].apply(lambda x: round(x, 2)) + df_check2 = df_check[df_check['Diff'] != 0] + if len(df_check2) > 0: + vLogDetailed.info('The golf and crop irrigation do not add up to ' + 'total irrigation.') + else: + vLogDetailed.info('The golf and crop irrigation add up to total ' + 'irrigation.') + return df_w_missing_crop + else: + return df_load + + +def subset_and_merge_irrigation_types(df): # subset into golf, crop, and total irrigation (and non irrigation) df_i = df[(df[fba_activity_fields[0]] == 'Irrigation') | (df[fba_activity_fields[1]] == 'Irrigation')] @@ -572,7 +616,7 @@ def check_golf_and_crop_irrigation_totals(df_load): df_m2 = pd.merge(df_m, df_c[['FlowName', 'FlowAmount', 'ActivityProducedBy', 'ActivityConsumedBy', 'Compartment', - 'Location', 'Year']], + 'Location', 'Year', 'Description']], how='outer', right_on=['FlowName', 'Compartment', 'Location', 'Year'], left_on=['FlowName', 'Compartment', 'Location', 'Year']) @@ -581,29 +625,17 @@ def check_golf_and_crop_irrigation_totals(df_load): "ActivityConsumedBy_x": "ActivityConsumedBy", "FlowAmount_y": "Crop_Amount", "ActivityProducedBy_y": "Crop_APB", - "ActivityConsumedBy_y": "Crop_ACB"}) + "ActivityConsumedBy_y": "Crop_ACB", + "Description_x": 'Description', + "Description_y": "Crop_Description"}) # fill na and sum crop and golf - # df_m2 = df_m2.fillna(0) + for col in df_m2: + if df_m2[col].dtype in ("int", "float"): + df_m2[col] = df_m2[col].fillna(0) df_m2['subset_sum'] = df_m2['Crop_Amount'] + df_m2['Golf_Amount'] df_m2['Diff'] = df_m2['FlowAmount'] - df_m2['subset_sum'] - df_m3 = df_m2[df_m2['Diff'] >= 0.000001].reset_index(drop=True) - - # rename irrigation to irrgation crop and append rows to df - df_m3.loc[df_m3['ActivityProducedBy'] == - 'Irrigation', 'ActivityProducedBy'] = 'Irrigation Crop' - df_m3.loc[df_m3['ActivityConsumedBy'] == - 'Irrigation', 'ActivityConsumedBy'] = 'Irrigation Crop' - df_m3 = df_m3.drop(columns=['Golf_Amount', 'Golf_APB', 'Golf_ACB', - 'Crop_Amount', 'Crop_APB', - 'Crop_ACB', 'subset_sum', 'Diff']) - - if len(df_m3) != 0: - df_w_missing_crop = pd.concat([df_load, df_m3], sort=True, - ignore_index=True) - return df_w_missing_crop - else: - return df_load + return df_m2 def usgs_fba_w_sectors_data_cleanup(df_wsec, attr, **kwargs): @@ -680,7 +712,7 @@ def modify_sector_length(df_wsec): df2 = df2.drop(columns=["LengthToModify", 'TargetLength']) - df = pd.concat([df1, df2], sort=True) + df = pd.concat([df1, df2]) return df else: return df1 diff --git a/flowsa/data_source_scripts/stateio.py b/flowsa/data_source_scripts/stateio.py new file mode 100644 index 000000000..d3aa77771 --- /dev/null +++ b/flowsa/data_source_scripts/stateio.py @@ -0,0 +1,80 @@ +# stateio.py (flowsa) +# !/usr/bin/env python3 +# coding=utf-8 +""" +Supporting functions for accessing files from stateior via data commons. +https://github.com/USEPA/stateior +""" + +import os +import pandas as pd + +from esupy.processed_data_mgmt import download_from_remote, Paths,\ + load_preprocessed_output +from flowsa.metadata import set_fb_meta +from flowsa.location import us_state_abbrev, apply_county_FIPS +from flowsa.flowbyfunctions import assign_fips_location_system + + +def parse_statior(*, source, year, config, **_): + """parse_response_fxn for stateio make and use tables""" + # Prepare meta for downloading stateior datasets + name = config.get('datatype') + fname = f"{name}_{year}" + meta = set_fb_meta(fname, "") + meta.tool = 'stateio' + meta.ext = 'rds' + stateio_paths = Paths() + stateio_paths.local_path = os.path.realpath(stateio_paths.local_path + + "/stateio") + # Download and load the latest version from remote + download_from_remote(meta, stateio_paths) + states = load_preprocessed_output(meta, stateio_paths) + + data_dict = {} + + # uses rpy2 + # this .rds is stored as a list of named dataframes by state + for state in us_state_abbrev.keys(): + df = states.rx2(state) + df2 = df.melt(ignore_index=False, value_name = 'FlowAmount', + var_name = 'ActivityConsumedBy') + df2['ActivityProducedBy'] = df2.index + if source == 'stateio_Make_Summary': + # Adjust the index by removing the state: STATE.SECTOR + df2['ActivityProducedBy'] = df2[ + 'ActivityProducedBy'].str.split(".", expand=True)[1] + df2.reset_index(drop=True, inplace=True) + df2['State'] = state + data_dict[state] = df2 + + fba = pd.concat(data_dict, ignore_index=True) + fba.dropna(subset=['FlowAmount'], inplace=True) + + # Gross Output + if 'GO' in source and 'ActivityConsumedBy' in fba.columns: + fba = fba.drop(columns=['ActivityConsumedBy']) + + # Assign location + fba['County'] = '' + fba = apply_county_FIPS(fba) + fba = assign_fips_location_system(fba, '2015') + fba.drop(columns=['County'], inplace=True) + + # Hardcoded data + fba['Year'] = year + fba['SourceName'] = source + fba['Class'] = 'Money' + fba['Unit'] = "USD" + fba['FlowName'] = f"USD{year}" + fba["FlowType"] = "TECHNOSPHERE_FLOW" + fba['DataReliability'] = 5 # tmp + fba['DataCollection'] = 5 # tmp + return fba + + +if __name__ == "__main__": + import flowsa + source = 'stateio_Industry_GO' + flowsa.flowbyactivity.main(year=2017, source=source) + fba = flowsa.getFlowByActivity(source, 2017) diff --git a/flowsa/data_source_scripts/stewiFBS.py b/flowsa/data_source_scripts/stewiFBS.py index 06b23ee69..77f0053c3 100644 --- a/flowsa/data_source_scripts/stewiFBS.py +++ b/flowsa/data_source_scripts/stewiFBS.py @@ -15,6 +15,7 @@ import os import pandas as pd from esupy.dqi import get_weighted_average +from esupy.processed_data_mgmt import read_source_metadata from flowsa.allocation import equally_allocate_parent_to_child_naics from flowsa.flowbyfunctions import assign_fips_location_system,\ aggregate_and_subset_for_target_sectors @@ -24,6 +25,12 @@ from flowsa.schema import flow_by_sector_fields from flowsa.settings import log, process_adjustmentpath from flowsa.validation import replace_naics_w_naics_from_another_year +import stewicombo +import stewi +from stewicombo.overlaphandler import remove_default_flow_overlaps +from stewicombo.globals import addChemicalMatches, compile_metadata,\ + set_stewicombo_meta +import facilitymatcher def stewicombo_to_sector(yaml_load, method, fbsconfigpath=None): @@ -43,8 +50,6 @@ def stewicombo_to_sector(yaml_load, method, fbsconfigpath=None): :param fbsconfigpath, str, optional path to an FBS method outside flowsa repo :return: df, FBS format """ - import stewicombo - inventory_name = yaml_load.get('local_inventory_name') df = None @@ -102,8 +107,6 @@ def stewi_to_sector(yaml_load, method, *_): :param method: dictionary, FBS method :return: df, FBS format """ - import stewi - # determine if fxns specified in FBS method yaml functions = yaml_load.get('functions', []) @@ -147,10 +150,6 @@ def reassign_process_to_sectors(df, year, file_list, fbsconfigpath): :param fbsconfigpath, str, optional path to an FBS method outside flowsa repo :return: df """ - import stewi - from stewicombo.overlaphandler import remove_default_flow_overlaps - from stewicombo.globals import addChemicalMatches - df_adj = pd.DataFrame() for file in file_list: fpath = f"{process_adjustmentpath}{file}.csv" @@ -218,7 +217,6 @@ def extract_facility_data(inventory_dict): {'NEI':'2017', 'TRI':'2017'}) :return: df """ - import stewi facilities_list = [] # load facility data from stewi output directory, keeping only the # facility IDs, and geographic information @@ -248,7 +246,6 @@ def obtain_NAICS_from_facility_matcher(inventory_list): :param inventory_list: a list of inventories (e.g., ['NEI', 'TRI']) :return: df """ - import facilitymatcher # Access NAICS From facility matcher and assign based on FRS_ID all_NAICS = \ facilitymatcher.get_FRS_NAICSInfo_for_facility_list( @@ -372,10 +369,15 @@ def add_stewi_metadata(inventory_dict): {'NEI':'2017', 'TRI':'2017'}) :return: combined dictionary of metadata from each inventory """ - from stewicombo.globals import compile_metadata return compile_metadata(inventory_dict) +def add_stewicombo_metadata(inventory_name): + """Access locally stored stewicombo metadata by filename""" + return read_source_metadata(stewicombo.globals.paths, + set_stewicombo_meta(inventory_name)) + + if __name__ == "__main__": import flowsa flowsa.flowbysector.main(method='CRHW_state_2017') diff --git a/flowsa/dataclean.py b/flowsa/dataclean.py index 477e55c6d..8a07161b6 100644 --- a/flowsa/dataclean.py +++ b/flowsa/dataclean.py @@ -210,13 +210,15 @@ def standardize_units(df): np.where(df['Unit'] == 'million Cubic metres/year', 'kg', df['Unit']) # Convert mass units (LB or TON) to kg - df.loc[:, 'FlowAmount'] = np.where(df['Unit'] == 'TON', + df.loc[:, 'FlowAmount'] = np.where(df['Unit'].isin(['TON', 'tons', + 'short tons']), df['FlowAmount'] * ton_to_kg, df['FlowAmount']) df.loc[:, 'FlowAmount'] = np.where(df['Unit'] == 'LB', df['FlowAmount'] * lb_to_kg, df['FlowAmount']) - df.loc[:, 'Unit'] = np.where(df['Unit'].isin(['TON', 'LB']), + df.loc[:, 'Unit'] = np.where(df['Unit'].isin(['TON', 'tons', + 'short tons', 'LB']), 'kg', df['Unit']) return df diff --git a/flowsa/exceptions.py b/flowsa/exceptions.py new file mode 100644 index 000000000..5d922fe92 --- /dev/null +++ b/flowsa/exceptions.py @@ -0,0 +1,46 @@ +# exceptions.py (flowsa) +# !/usr/bin/env python3 +# coding=utf-8 + +"""Defines custom exceptions for flowsa""" + + +class FBANotAvailableError(Exception): + def __init__(self, method=None, year=None, message=None): + if message is None: + message = ("FBA not available for requested year") + if method: + message = message.replace("FBA", method) + if year: + message = message.replace("requested year", str(year)) + self.message = message + super().__init__(self.message) + + +class FlowsaMethodNotFoundError(FileNotFoundError): + def __init__(self, method_type=None, method=None): + message = (f"{method_type} method file not found") + if method: + message = " ".join((message, f"for {method}")) + self.message = message + super().__init__(self.message) + + +class APIError(Exception): + def __init__(self, api_source): + message = (f"Key file {api_source} not found. See github wiki for help " + "https://github.com/USEPA/flowsa/wiki/Using-FLOWSA#api-keys") + self.message = message + super().__init__(self.message) + + +class FBSMethodConstructionError(Exception): + """Errors in FBS methods which result in incompatible models""" + def __init__(self, message=None, error_type=None): + if message is None: + message = ("Error in method construction.") + if error_type == 'fxn_call': + message = ("Calling functions in method files must be preceded " + "by '!script_function:'") + self.message = message + super().__init__(self.message) diff --git a/flowsa/fbs_allocation.py b/flowsa/fbs_allocation.py index 4b6a2946d..ae402e879 100644 --- a/flowsa/fbs_allocation.py +++ b/flowsa/fbs_allocation.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +import flowsa from flowsa.common import fba_activity_fields, fbs_activity_fields, \ fba_mapped_wsec_default_grouping_fields, fba_wsec_default_grouping_fields, \ check_activities_sector_like, return_bea_codes_used_as_naics @@ -17,7 +18,7 @@ check_if_location_systems_match from flowsa.flowbyfunctions import collapse_activity_fields, \ sector_aggregation, sector_disaggregation, subset_df_by_geoscale, \ - load_fba_w_standardized_units + load_fba_w_standardized_units, aggregator from flowsa.allocation import allocate_by_sector, proportional_allocation_by_location_and_activity, \ equally_allocate_parent_to_child_naics, equal_allocation from flowsa.sectormapping import get_fba_allocation_subset, add_sectors_to_flowbyactivity @@ -37,7 +38,7 @@ def direct_allocation_method(fbs, k, names, method): log.info('Directly assigning activities to sectors') # for each activity, if activities are not sector like, # check that there is no data loss - if check_activities_sector_like(k) is False: + if check_activities_sector_like(fbs) is False: activity_list = [] n_allocated = [] for n in names: @@ -50,14 +51,12 @@ def direct_allocation_method(fbs, k, names, method): fbs_subset = \ fbs[(fbs[fba_activity_fields[0]] == n) | (fbs[fba_activity_fields[1]] == n)].reset_index(drop=True) - # check if an Activity maps to more than one sector, - # if so, equally allocate - fbs_subset = equal_allocation(fbs_subset) - fbs_subset = equally_allocate_parent_to_child_naics( - fbs_subset, method) activity_list.append(fbs_subset) n_allocated.append(n) fbs = pd.concat(activity_list, ignore_index=True) + # check if an Activity maps to more than one sector, + # if so, equally allocate + fbs = equal_allocation(fbs) return fbs @@ -191,20 +190,20 @@ def dataset_allocation_method(flow_subset_mapped, attr, names, method, # subset fba allocation table to the values in the activity # list, based on overlapping sectors - flow_subset_mapped = flow_subset_mapped.loc[ + flow_subset_mapped2 = flow_subset_mapped.loc[ (flow_subset_mapped[fbs_activity_fields[0]].isin(sector_list)) | (flow_subset_mapped[fbs_activity_fields[1]].isin(sector_list))] # check if fba and allocation dfs have the same LocationSystem log.info("Checking if flowbyactivity and allocation " "dataframes use the same location systems") - check_if_location_systems_match(flow_subset_mapped, flow_allocation) + check_if_location_systems_match(flow_subset_mapped2, flow_allocation) # merge fba df w/flow allocation dataset log.info("Merge %s and subset of %s", k, attr['allocation_source']) for i, j in activity_fields.items(): # check units - compare_df_units(flow_subset_mapped, flow_allocation) + compare_df_units(flow_subset_mapped2, flow_allocation) # create list of columns to merge on if 'allocation_merge_columns' in attr: fa_cols = \ @@ -219,21 +218,21 @@ def dataset_allocation_method(flow_subset_mapped, attr, names, method, fa_cols = ['Location', 'Sector', 'FlowAmountRatio', 'FBA_Activity'] l_cols = ['Location', j[1]["flowbysector"], j[0]["flowbyactivity"]] r_cols = ['Location', 'Sector', 'FBA_Activity'] - flow_subset_mapped = \ - flow_subset_mapped.merge(flow_allocation[fa_cols], left_on=l_cols, + flow_subset_mapped2 = \ + flow_subset_mapped2.merge(flow_allocation[fa_cols], left_on=l_cols, right_on=r_cols, how='left') # merge the flowamount columns - flow_subset_mapped.loc[:, 'FlowAmountRatio'] =\ - flow_subset_mapped['FlowAmountRatio_x'].fillna( - flow_subset_mapped['FlowAmountRatio_y']) + flow_subset_mapped2.loc[:, 'FlowAmountRatio'] =\ + flow_subset_mapped2['FlowAmountRatio_x'].fillna( + flow_subset_mapped2['FlowAmountRatio_y']) # fill null rows with 0 because no allocation info - flow_subset_mapped['FlowAmountRatio'] = \ - flow_subset_mapped['FlowAmountRatio'].fillna(0) + flow_subset_mapped2['FlowAmountRatio'] = \ + flow_subset_mapped2['FlowAmountRatio'].fillna(0) # drop rows where there is no allocation data - fbs = flow_subset_mapped.dropna( - subset=['Sector_x', 'Sector_y'], how='all').reset_index() + fbs = flow_subset_mapped2.dropna( + subset=['Sector_x', 'Sector_y'], how='all').reset_index(drop=True) # calculate flow amounts for each sector log.info("Calculating new flow amounts using flow ratios") @@ -243,8 +242,22 @@ def dataset_allocation_method(flow_subset_mapped, attr, names, method, log.info("Cleaning up new flow by sector") fbs = fbs.drop(columns=['Sector_x', 'FlowAmountRatio_x', 'Sector_y', 'FlowAmountRatio_y', 'FlowAmountRatio', - 'FBA_Activity_x', 'FBA_Activity_y']) - return fbs + 'FBA_Activity_x', 'FBA_Activity_y', + 'disaggregate_flag', 'Description'], + errors='ignore') + + # if activities are source like, reset activity columns + sector_like_activities = check_activities_sector_like(flow_subset_mapped) + if sector_like_activities: + fbs = fbs.assign(ActivityProducedBy = fbs['SectorProducedBy'], + ActivityConsumedBy = fbs['SectorConsumedBy']) + + group_cols = list(fbs.select_dtypes(include=['object', 'int']).columns) + fbs2 = aggregator(fbs, group_cols) + + fbs3 = sector_aggregation(fbs2) + + return fbs3 def allocation_helper(df_w_sector, attr, method, v, download_FBA_if_missing): @@ -465,6 +478,7 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass, 'clean_allocation_fba_w_sec' :return: df, fba format """ + from flowsa.sectormapping import get_activitytosector_mapping # dictionary to load/standardize fba kwargs_dict = {} if 'download_FBA_if_missing' in kwargs: @@ -482,15 +496,6 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass, **kwargs_dict ) - # check if allocation data exists at specified geoscale to use - log.info("Checking if allocation data exists at the %s level", - geoscale_from) - check_if_data_exists_at_geoscale(fba, geoscale_from) - - # aggregate geographically to the scale of the flowbyactivty source, - # if necessary - fba = subset_df_by_geoscale(fba, geoscale_from, geoscale_to) - # subset based on yaml settings if 'flowname_subset' in kwargs: if kwargs['flowname_subset'] != 'None': @@ -499,17 +504,44 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass, if kwargs['compartment_subset'] != 'None': fba = \ fba.loc[fba['Compartment'].isin(kwargs['compartment_subset'])] + fba = fba.reset_index(drop=True) + + if len(fba) == 0: + raise flowsa.exceptions.FBSMethodConstructionError( + message='Allocation dataset is length 0; check flow or ' + 'compartment subset for errors') + + # load relevant activities if activities are not naics-like + try: + sm = get_activitytosector_mapping( + fba_sourcename, fbsconfigpath=fbsconfigpath) + sm_list = sm['Activity'].drop_duplicates().values.tolist() + # subset fba data by activities listed in the sector crosswalk + fba = fba[(fba[fba_activity_fields[0]].isin(sm_list)) | + (fba[fba_activity_fields[1]].isin(sm_list) + )].reset_index(drop=True) + except FileNotFoundError: + pass + + # check if allocation data exists at specified geoscale to use + log.info("Checking if allocation data exists at the %s level", + geoscale_from) + check_if_data_exists_at_geoscale(fba, geoscale_from) + + # aggregate geographically to the scale of the flowbyactivity source, + # if necessary + fba2 = subset_df_by_geoscale(fba, geoscale_from, geoscale_to) # cleanup the fba allocation df, if necessary if 'clean_fba' in kwargs: log.info("Cleaning %s", fba_sourcename) - fba = kwargs["clean_fba"]( - fba, + fba2 = kwargs["clean_fba"]( + fba2, attr=attr, download_FBA_if_missing=kwargs['download_FBA_if_missing'] ) # reset index - fba = fba.reset_index(drop=True) + fba2 = fba2.reset_index(drop=True) # assign sector to allocation dataset activity_to_sector_mapping = attr.get('activity_to_sector_mapping') @@ -521,7 +553,7 @@ def load_map_clean_fba(method, attr, fba_sourcename, df_year, flowclass, overwrite_sectorlevel = 'aggregated' else: overwrite_sectorlevel = None - fba_wsec = add_sectors_to_flowbyactivity(fba, sectorsourcename=method[ + fba_wsec = add_sectors_to_flowbyactivity(fba2, sectorsourcename=method[ 'target_sector_source'], activity_to_sector_mapping=activity_to_sector_mapping, fbsconfigpath=fbsconfigpath, overwrite_sectorlevel=overwrite_sectorlevel) diff --git a/flowsa/flowbyactivity.py b/flowsa/flowbyactivity.py index 4ca83760d..adce16020 100644 --- a/flowsa/flowbyactivity.py +++ b/flowsa/flowbyactivity.py @@ -11,11 +11,12 @@ import argparse import pandas as pd from urllib import parse +import flowsa from esupy.processed_data_mgmt import write_df_to_file from esupy.remote import make_url_request -from flowsa.common import log, load_api_key, sourceconfigpath, \ +from flowsa.common import load_api_key, sourceconfigpath, \ load_yaml_dict, rename_log_file, get_flowsa_base_name -from flowsa.settings import paths +from flowsa.settings import paths, log from flowsa.metadata import set_fb_meta, write_metadata from flowsa.flowbyfunctions import fba_fill_na_dict from flowsa.schema import flow_by_activity_fields @@ -74,11 +75,14 @@ def assemble_urls_for_query(*, source, year, config): userAPIKey = load_api_key(config['api_name']) # (common.py fxn) build_url = build_url.replace("__apiKey__", userAPIKey) - if "url_replace_fxn" in config: - # dynamically import and call on function - urls = config["url_replace_fxn"](build_url=build_url, source=source, - year=year, config=config) + fxn = config.get("url_replace_fxn") + if callable(fxn): + urls = fxn(build_url=build_url, source=source, + year=year, config=config) return urls + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') else: return [build_url] @@ -108,11 +112,13 @@ def call_urls(*, url_list, source, year, config): resp = make_url_request(url, set_cookies=set_cookies, confirm_gdrive=confirm_gdrive) - if "call_response_fxn" in config: - # dynamically import and call on function - df = config["call_response_fxn"](resp=resp, source=source, - year=year, config=config, - url=url) + fxn = config.get("call_response_fxn") + if callable(fxn): + df = fxn(resp=resp, source=source, year=year, + config=config, url=url) + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') if isinstance(df, pd.DataFrame): data_frames_list.append(df) elif isinstance(df, list): @@ -131,10 +137,15 @@ def parse_data(*, df_list, source, year, config): :param config: dictionary, FBA yaml :return: df, single df formatted to FBA """ - if "parse_response_fxn" in config: - # dynamically import and call on function - df = config["parse_response_fxn"](df_list=df_list, source=source, - year=year, config=config) + + fxn = config.get("parse_response_fxn") + if callable(fxn): + df = fxn(df_list=df_list, source=source, year=year, config=config) + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') + # else: + # Handle parse_response_fxn = None return df @@ -187,7 +198,7 @@ def main(**kwargs): # filename if run into error try: config = load_yaml_dict(source, flowbytype='FBA') - except (UnboundLocalError, FileNotFoundError): + except FileNotFoundError: log.info(f'Could not find Flow-By-Activity config file for {source}') source = get_flowsa_base_name(sourceconfigpath, source, "yaml") log.info(f'Generating FBA for {source}') diff --git a/flowsa/flowbyfunctions.py b/flowsa/flowbyfunctions.py index bd23f6ea0..d0ecd2ec4 100644 --- a/flowsa/flowbyfunctions.py +++ b/flowsa/flowbyfunctions.py @@ -9,20 +9,19 @@ import numpy as np from esupy.dqi import get_weighted_average import flowsa -from flowsa.common import fbs_activity_fields, \ - load_crosswalk, fbs_fill_na_dict, \ +from flowsa.common import fbs_activity_fields, sector_level_key, \ + load_crosswalk, fbs_fill_na_dict, check_activities_sector_like, \ fbs_collapsed_default_grouping_fields, fbs_collapsed_fill_na_dict, \ fba_activity_fields, fba_default_grouping_fields, \ load_sector_length_cw_melt, fba_fill_na_dict, \ - get_flowsa_base_name, fba_mapped_default_grouping_fields, \ - check_activities_sector_like + fba_mapped_default_grouping_fields +from flowsa.dataclean import clean_df, replace_strings_with_NoneType, \ + replace_NoneType_with_empty_cells, standardize_units from flowsa.location import US_FIPS, get_state_FIPS, \ get_county_FIPS, update_geoscale, fips_number_key from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \ flow_by_sector_collapsed_fields, flow_by_activity_mapped_fields -from flowsa.settings import datasourcescriptspath, log -from flowsa.dataclean import clean_df, replace_strings_with_NoneType, \ - replace_NoneType_with_empty_cells, standardize_units +from flowsa.settings import log, vLogDetailed, vLog def create_geoscale_list(df, geoscale, year='2015'): @@ -40,9 +39,9 @@ def create_geoscale_list(df, geoscale, year='2015'): if geoscale == "national": fips.append(US_FIPS) elif df['LocationSystem'].str.contains('FIPS').any(): - # all_FIPS = read_stored_FIPS() if geoscale == "state": state_FIPS = get_state_FIPS(year) + state_FIPS = state_FIPS[state_FIPS['FIPS'] != '72000'] fips = list(state_FIPS['FIPS']) elif geoscale == "county": county_FIPS = get_county_FIPS(year) @@ -64,8 +63,9 @@ def filter_by_geoscale(df, geoscale): df = df[df['Location'].isin(fips)].reset_index(drop=True) if len(df) == 0: - log.error("No flows found in the flow dataset at the %s scale", - geoscale) + raise flowsa.exceptions.FBSMethodConstructionError( + message="No flows found in the flow dataset at " + f"the {geoscale} scale") else: return df @@ -92,13 +92,16 @@ def agg_by_geoscale(df, from_scale, to_scale, groupbycols): return fba_agg -def aggregator(df, groupbycols): +def aggregator(df, groupbycols, retain_zeros=True): """ Aggregates flowbyactivity or flowbysector 'FlowAmount' column in df and generate weighted average values based on FlowAmount values for numeric columns :param df: df, Either flowbyactivity or flowbysector :param groupbycols: list, Either flowbyactivity or flowbysector columns + :param retain_zeros, bool, default True, if set to True, all rows that + have a FlowAmount = 0 will be returned in df. If False, those rows will + be dropped :return: df, with aggregated columns """ @@ -108,7 +111,8 @@ def aggregator(df, groupbycols): df = replace_NoneType_with_empty_cells(df) # drop columns with flowamount = 0 - df = df[df['FlowAmount'] != 0] + if retain_zeros is False: + df = df[df['FlowAmount'] != 0] # list of column headers, that if exist in df, should be # aggregated using the weighted avg fxn @@ -163,14 +167,13 @@ def sector_ratios(df, sectorcolumn): # appends missing naics levels to df sec_ratios = [] for i in range(length, 3, -1): - # subset df to sectors with length = i and length = i + 1 - df_subset = df.loc[df[sectorcolumn].apply(lambda x: len(x) == i)] + # subset df to sectors with length = i + df_subset = subset_df_by_sector_lengths(df, [i]) # create column for sector grouping - df_subset = df_subset.assign( - Sector_group=df_subset[sectorcolumn].apply(lambda x: x[0:i-1])) + df_subset = assign_sector_match_column(df_subset, sectorcolumn, i, i-1) # subset df to create denominator - df_denom = df_subset[['FlowAmount', 'Location', 'Sector_group']] - df_denom = df_denom.groupby(['Location', 'Sector_group'], + df_denom = df_subset[['FlowAmount', 'Location', 'sector_group']] + df_denom = df_denom.groupby(['Location', 'sector_group'], as_index=False).agg({"FlowAmount": sum}) df_denom = df_denom.rename(columns={"FlowAmount": "Denominator"}) # merge the denominator column with fba_w_sector df @@ -179,10 +182,10 @@ def sector_ratios(df, sectorcolumn): ratio_df.loc[:, 'FlowAmountRatio'] = \ ratio_df['FlowAmount'] / ratio_df['Denominator'] ratio_df = ratio_df.drop( - columns=['Denominator', 'Sector_group']).reset_index() + columns=['Denominator', 'sector_group']) sec_ratios.append(ratio_df) # concat list of dataframes (info on each page) - df_w_ratios = pd.concat(sec_ratios, sort=True).reset_index(drop=True) + df_w_ratios = pd.concat(sec_ratios, ignore_index=True) return df_w_ratios @@ -200,12 +203,9 @@ def sector_aggregation(df_load): # determine grouping columns - based on datatype group_cols = list(df.select_dtypes(include=['object', 'int']).columns) - # determine if activities are sector-like, - # if aggregating a df with a 'SourceName' - sector_like_activities = False - if 'SourceName' in df_load.columns: - s = pd.unique(df_load['SourceName'])[0] - sector_like_activities = check_activities_sector_like(s) + # determine if activities are sector-like, if aggregating a df with a + # 'SourceName' + sector_like_activities = check_activities_sector_like(df_load) # if activities are sector like, drop columns while running ag then # add back in @@ -263,7 +263,8 @@ def sector_aggregation(df_load): drop=True) # if activities are source-like, set col values as # copies of the sector columns - if sector_like_activities & ('FlowAmount' in df.columns): + if sector_like_activities & ('FlowAmount' in df.columns) & \ + ('ActivityProducedBy' in df_load.columns): df = df.assign(ActivityProducedBy=df['SectorProducedBy']) df = df.assign(ActivityConsumedBy=df['SectorConsumedBy']) @@ -286,15 +287,13 @@ def sector_disaggregation(df_load): # determine if activities are sector-like, if aggregating # a df with a 'SourceName' - sector_like_activities = False - if 'SourceName' in df_load.columns: - s = pd.unique(df_load['SourceName'])[0] - sector_like_activities = check_activities_sector_like(s) + sector_like_activities = check_activities_sector_like(df_load) # if activities are sector like, drop columns while running disag then # add back in if sector_like_activities: - df = df.drop(columns=['ActivityProducedBy', 'ActivityConsumedBy']) + df = df.drop(columns=['ActivityProducedBy', 'ActivityConsumedBy'], + errors='ignore') df = df.reset_index(drop=True) # load naics 2 to naics 6 crosswalk @@ -313,9 +312,7 @@ def sector_disaggregation(df_load): sector_add = 'NAICS_' + str(i + 1) # subset the df by naics length - cw = cw_load[[sector_merge, sector_add]] - # first drop all duplicates - cw = cw.drop_duplicates() + cw = cw_load[[sector_merge, sector_add]].drop_duplicates() # only keep the rows where there is only one value in sector_add for # a value in sector_merge cw = cw.drop_duplicates(subset=[sector_merge], keep=False).reset_index( @@ -369,6 +366,33 @@ def assign_fips_location_system(df, year_of_data): return df +def return_primary_sector_column(df_load): + """ + Determine sector column with values + :param fbs: fbs df with two sector columns + :return: string, primary sector column + """ + # determine the df_w_sector column to merge on + if 'Sector' in df_load.columns: + primary_sec_column = 'Sector' + else: + df = replace_strings_with_NoneType(df_load) + sec_consumed_list = \ + df['SectorConsumedBy'].drop_duplicates().values.tolist() + sec_produced_list = \ + df['SectorProducedBy'].drop_duplicates().values.tolist() + # if a sector field column is not all 'none', that is the column to + # merge + if all(v is None for v in sec_consumed_list): + primary_sec_column = 'SectorProducedBy' + elif all(v is None for v in sec_produced_list): + primary_sec_column = 'SectorConsumedBy' + else: + log.error('There are values in both SectorProducedBy and ' + 'SectorConsumedBy, cannot isolate Sector column') + return primary_sec_column + + def collapse_fbs_sectors(fbs): """ Collapses the Sector Produced/Consumed into a single column named "Sector" @@ -589,25 +613,48 @@ def dataframe_difference(df1, df2, which=None): def equally_allocate_suppressed_parent_to_child_naics( - df_load, sector_column, groupcols): + df_load, method, sector_column, groupcols, + equally_allocate_parent_to_child=True): """ Estimate data suppression, by equally allocating parent NAICS values to child NAICS :param df_load: df with sector columns + :param method: dictionary, FBS method yaml :param sector_column: str, column to estimate suppressed data for :param groupcols: list, columns to group df by + :param equally_allocate_parent_to_child: default True, if True will + first equally allocate parent to child sectors if the child sector is + missing :return: df, with estimated suppressed data """ + from flowsa.allocation import equally_allocate_parent_to_child_naics + from flowsa.validation import \ + compare_child_to_parent_sectors_flowamounts, \ + compare_summation_at_sector_lengths_between_two_dfs + + vLogDetailed.info('Estimating suppressed data by equally allocating ' + 'parent to child sectors.') df = sector_disaggregation(df_load) + + # equally allocate parent to child naics where child naics are not + # included in the dataset. This step is necessary to accurately + # calculate the flow that has already been allocated. Must allocate to + # NAICS_6 for suppressed data function to work correctly. + if equally_allocate_parent_to_child: + vLogDetailed.info('Before estimating suppressed data, equally ' + 'allocate parent sectors to child sectors.') + df = equally_allocate_parent_to_child_naics( + df, method, overwritetargetsectorlevel='NAICS_6') + df = replace_NoneType_with_empty_cells(df) df = df[df[sector_column] != ''] # determine if activities are sector-like, # if aggregating a df with a 'SourceName' - sector_like_activities = False - if 'SourceName' in df_load.columns: - s = pd.unique(df_load['SourceName'])[0] - sector_like_activities = check_activities_sector_like(s) + sector_like_activities = check_activities_sector_like(df_load) + if sector_like_activities is False: + log.error('Function is not written to estimate suppressed data when ' + 'activities are not NAICS-like.') # if activities are source like, drop from df, # add back in as copies of sector columns columns to keep @@ -623,7 +670,9 @@ def equally_allocate_suppressed_parent_to_child_naics( # load naics 2 to naics 6 crosswalk cw_load = load_crosswalk('sector_length') - cw_melt = cw_load.melt( + # only keep official naics + cw = cw_load.drop(columns=['NAICS_7']).drop_duplicates() + cw_melt = pd.melt(cw, id_vars=["NAICS_6"], var_name="NAICS_Length", value_name="NAICS_Match").drop( columns=['NAICS_Length']).drop_duplicates() @@ -637,10 +686,15 @@ def equally_allocate_suppressed_parent_to_child_naics( new_naics[sector_column] = new_naics['NAICS_6'].copy() new_naics = new_naics.drop(columns=['NAICS_6', 'NAICS_Match']) + # if a parent and child naics are both suppressed, can get situations + # where a naics6 code is duplicated because both the parent and child + # will match with the naics6. Therefore, drop duplicates + new_naics2 = new_naics.drop_duplicates() + # merge the new naics with the existing df, if data already # existed for a NAICS6, keep the original dfm = pd.merge( - new_naics[groupcols], df, how='left', on=groupcols, + new_naics2[groupcols], df, how='left', on=groupcols, indicator=True).query('_merge=="left_only"').drop('_merge', axis=1) dfm = replace_NoneType_with_empty_cells(dfm) dfm = dfm.fillna(0) @@ -648,54 +702,148 @@ def equally_allocate_suppressed_parent_to_child_naics( # add length column and subset the data # subtract out existing data at NAICS6 from total data # at a length where no suppressed data - df = df.assign(secLength=df[sector_column].apply(lambda x: len(x))) - - # add column for each state of sector length where - # there are no missing values - df_sup = df_sup.assign( - secLength=df_sup[sector_column].apply(lambda x: len(x))) - df_sup2 = (df_sup.groupby( - ['FlowName', 'Compartment', 'Location'])['secLength'].agg( - lambda x: x.min()-1).reset_index(name='secLengthsup')) - - # merge the dfs and sub out the last sector lengths with - # all data for each state drop states that don't have suppressed dat - df1 = df.merge(df_sup2) - - df2 = df1[df1['secLength'] == 6].reset_index(drop=True) - # determine sector to merge on - df2.loc[:, 'mergeSec'] = df2.apply( - lambda x: x[sector_column][:x['secLengthsup']], axis=1) - - sum_cols = [e for e in fba_default_grouping_fields if e not in - ['ActivityConsumedBy', 'ActivityProducedBy']] - sum_cols.append('mergeSec') - df2 = df2.assign( - FlowAlloc=df2.groupby(sum_cols)['FlowAmount'].transform('sum')) - # rename columns for the merge and define merge cols - df2 = df2.rename(columns={sector_column: 'NewNAICS', - 'mergeSec': sector_column}) - # keep flows with 0 flow - df3 = df2[df2['FlowAmount'] == 0].reset_index(drop=True) - m_cols = groupcols + ['NewNAICS', 'FlowAlloc'] - # merge the two dfs - dfe = df1.merge(df3[m_cols]) - # add count column used to divide the unallocated flows - dfe = dfe.assign( - secCount=dfe.groupby(groupcols)['NewNAICS'].transform('count')) - dfe = dfe.assign( - newFlow=(dfe['FlowAmount'] - dfe['FlowAlloc']) / dfe['secCount']) - # reassign values and drop columns - dfe = dfe.assign(FlowAmount=dfe['newFlow']) - dfe[sector_column] = dfe['NewNAICS'].copy() - dfe = dfe.drop(columns=['NewNAICS', 'FlowAlloc', 'secCount', 'newFlow']) - - # new df with estimated naics6 - dfn = pd.concat([df, dfe], ignore_index=True) - dfn2 = dfn[dfn['FlowAmount'] != 0].reset_index(drop=True) - dfn2 = dfn2.drop(columns=['secLength']) - - dff = sector_aggregation(dfn2) + drop_col = 'SectorConsumedByLength' + if sector_column == 'SectorConsumedBy': + drop_col = 'SectorProducedByLength' + df = assign_columns_of_sector_levels(df).rename( + columns={f'{sector_column}Length': 'SectorLength'}).drop(columns=[ + drop_col]) + # df with non-suppressed data only + dfns = df[df['FlowAmount'] != 0].reset_index(drop=True) + + df_sup2 = pd.DataFrame() + cw_load = load_crosswalk('sector_length') + df_sup = df_sup.assign(SectorMatchFlow=np.nan) + merge_cols = list(df_sup.select_dtypes( + include=['object', 'int']).columns) + # also drop sector and description cols + merge_cols = [c for c in merge_cols + if c not in ['SectorConsumedBy', 'SectorProducedBy', + 'Description']] + # subset the df by length i + dfs = subset_df_by_sector_lengths(df_sup, [6]) + + counter = 1 + while dfs.isnull().values.any() and 6-counter > 1: + # subset the crosswalk by i and i-1 + cw = cw_load[[f'NAICS_6', + f'NAICS_{6-counter}']].drop_duplicates() + # merge df with the cw to determine which sector to look for in + # non-suppressed data + for s in ['Produced', 'Consumed']: + dfs = dfs.merge(cw, how='left', left_on=f'Sector{s}By', + right_on=f'NAICS_6').drop( + columns=f'NAICS_6').rename( + columns={f'NAICS_{6-counter}': f'Sector{s}Match'}) + dfs[f'Sector{s}Match'] = dfs[f'Sector{s}Match'].fillna('') + # merge with non suppressed data + dfs = dfs.merge(dfns, how='left', + left_on=merge_cols + ['SectorProducedMatch', + 'SectorConsumedMatch'], + right_on=merge_cols + ['SectorProducedBy', + 'SectorConsumedBy']) + dfs['SectorMatchFlow'].fillna(dfs['FlowAmount_y'], inplace=True) + # drop all columns from the non suppressed data + dfs = dfs[dfs.columns[~dfs.columns.str.endswith('_y')]] + dfs.columns = dfs.columns.str.replace('_x', '') + # subset the df into rows assigned a new value and those not + dfs_assigned = dfs[~dfs['SectorMatchFlow'].isnull()] + dfs = dfs[dfs['SectorMatchFlow'].isnull()].drop( + columns=['SectorProducedMatch', 'SectorConsumedMatch', + 'SectorLength']).reset_index(drop=True) + df_sup2 = pd.concat([df_sup2, dfs_assigned], ignore_index=True) + counter = counter + 1 + + # merge in the df where calculated how much flow has already been + # allocated to NAICS6 + mergecols = [e for e in groupcols if e not in + ['SectorProducedBy', 'SectorConsumedBy']] + mergecols = mergecols + ['SectorProducedMatch', 'SectorConsumedMatch'] + meltcols = mergecols + ['sector_allocated'] + + if len(df_sup2) > 0: + for ii in range(5, 1, -1): + # subset the df by length i + dfs = df_sup2[df_sup2['SectorLength'] == ii] + + dfns_sub = dfns[dfns['SectorLength'] == 6].reset_index(drop=True) + for s in ['Produced', 'Consumed']: + dfns_sub = assign_sector_match_column( + dfns_sub, f'Sector{s}By', 6, ii).rename( + columns={'sector_group': f'Sector{s}Match'}) + dfns_sub = dfns_sub.fillna('') + dfsum = dfns_sub.groupby(mergecols, as_index=False).agg( + {"FlowAmount": sum}).rename(columns={ + "FlowAmount": 'sector_allocated'}) + + df_sup3 = dfs.merge(dfsum[meltcols], on=mergecols, how='left') + df_sup3['sector_allocated'] = df_sup3['sector_allocated'].fillna(0) + # calc the remaining flow that can be allocated + df_sup3['FlowRemainder'] = df_sup3['SectorMatchFlow'] - \ + df_sup3['sector_allocated'] + # Due to rounding, there can be slight differences in data at + # sector levels, which can result in some minor negative values. + # If the percent of FlowRemainder is less than the assigned + # tolerance for negative numbers, or if the flowremainder is + # -1, reset the number to 0. If it is greater, issue a warning. + percenttolerance = 1 + flowtolerance = -1 + df_sup3 = df_sup3.assign(PercentOfAllocated= + (abs(df_sup3['FlowRemainder']) / df_sup3[ + 'SectorMatchFlow']) * 100) + df_sup3['FlowRemainder'] = np.where( + (df_sup3["FlowRemainder"] < 0) & + (df_sup3['PercentOfAllocated'] < percenttolerance), 0, + df_sup3['FlowRemainder']) + df_sup3['FlowRemainder'] = np.where( + df_sup3["FlowRemainder"].between(flowtolerance, 0), 0, + df_sup3['FlowRemainder']) + + # check for negative values + negv = df_sup3[df_sup3['FlowRemainder'] < 0] + if len(negv) > 0: + col_subset = [e for e in negv.columns if e in + ['Class', 'SourceName', 'FlowName', + 'Flowable', 'FlowAmount', 'Unit', + 'Compartment', 'Context', 'Location', 'Year', + 'SectorProducedBy', 'SectorConsumedBy', + 'SectorMatchFlow', 'SectorProducedMatch', + 'SectorConsumedMatch', 'sector_allocated', + 'FlowRemainder']] + negv = negv[col_subset].reset_index(drop=True) + vLog.info( + 'There are negative values when allocating suppressed ' + 'parent data to child sector. The values are more than ' + '%s%% of the total parent sector with a negative flow ' + 'amount being allocated more than %s. Resetting flow ' + 'values to be allocated to 0. See validation log for ' + 'details.', str(percenttolerance), str(flowtolerance)) + vLogDetailed.info('Values where flow remainders are ' + 'negative, resetting to 0: ' + '\n {}'.format(negv.to_string())) + df_sup3['FlowRemainder'] = np.where(df_sup3["FlowRemainder"] < 0, + 0, df_sup3['FlowRemainder']) + df_sup3 = df_sup3.drop(columns=[ + 'SectorMatchFlow', 'sector_allocated', 'PercentOfAllocated']) + # add count column used to divide the unallocated flows + sector_column_match = sector_column.replace('By', 'Match') + df_sup3 = df_sup3.assign(secCount=df_sup3.groupby(mergecols)[ + sector_column_match].transform('count')) + df_sup3 = df_sup3.assign(newFlow=df_sup3['FlowRemainder'] / + df_sup3['secCount']) + # reassign values and drop columns + df_sup3 = df_sup3.assign(FlowAmount=df_sup3['newFlow']) + df_sup3 = df_sup3.drop(columns=['SectorProducedMatch', + 'SectorConsumedMatch', + 'FlowRemainder', 'secCount', + 'newFlow']) + # reset SectorLength + df_sup3['SectorLength'] = 6 + # add to the df with no suppressed data + dfns = pd.concat([dfns, df_sup3], ignore_index=True) + + dfns = dfns.drop(columns=['SectorLength']) + dff = sector_aggregation(dfns) # if activities are source-like, set col values as copies # of the sector columns @@ -705,8 +853,15 @@ def equally_allocate_suppressed_parent_to_child_naics( # reindex columns dff = dff.reindex(df_load.columns, axis=1) + vLogDetailed.info('Checking results of allocating suppressed parent to ' + 'child sectors. ') + compare_summation_at_sector_lengths_between_two_dfs(df_load, dff) + compare_child_to_parent_sectors_flowamounts(dff) + # todo: add third check comparing smallest child naics (6) to largest (2) + # replace null values dff = replace_strings_with_NoneType(dff).reset_index(drop=True) + return dff @@ -825,7 +980,7 @@ def subset_df_by_sector_list(df_load, sector_list): df['SectorConsumedBy'].isin(sector_list) ) | ( df['SectorProducedBy'].isin(sector_list) & - df['SectorConsumedBy'].isin(sector_list))] + df['SectorConsumedBy'].isin(sector_list))].reset_index(drop=True) return df @@ -840,6 +995,7 @@ def subset_and_merge_df_by_sector_lengths(df, length1, length2): drop=True) # df where either sector column is length or both columns are + df = df.reset_index(drop=True) df1 = subset_df_by_sector_lengths(df, [length1]) # second dataframe where length is length2 df2 = subset_df_by_sector_lengths(df, [length2]) @@ -873,6 +1029,139 @@ def subset_and_merge_df_by_sector_lengths(df, length1, length2): return dfm +def assign_columns_of_sector_levels(df_load): + """ + Add additional column capturing the sector level in the two columns + :param df_load: df with at least on sector column + :param ambiguous_sector_assignment: if there are sectors that can be + assigned to multiple sector lengths (e.g., for government or household + sectors), option to specify which sector assignment to keep. + :return: df with new column for sector length + """ + df = replace_NoneType_with_empty_cells(df_load) + # load cw with column of sector levels + cw = load_sector_length_cw_melt() + # merge df assigning sector lengths + for s in ['Produced', 'Consumed']: + df = df.merge(cw, how='left', left_on=f'Sector{s}By', + right_on='Sector').drop(columns=['Sector']).rename( + columns={'SectorLength': f'Sector{s}ByLength'}) + df[f'Sector{s}ByLength'] = df[f'Sector{s}ByLength'].fillna(0) + + # There are cases where non-traditional sectors (non naics) have + # multiple naics assignments. If there is a non-zero value in the other + # sector length column, keep that row because sector lengths must always + # match. + # subset df into two dfs, one where one sector column length has a zero + # value and the second where both sector length columns have non-zero + # values + df1 = df[(df['SectorProducedByLength'] == 0) | + (df['SectorConsumedByLength'] == 0)] + + df2 = df[(df['SectorProducedByLength'] != 0) & + (df['SectorConsumedByLength'] != 0)] + # only keep rows where the values are equal + df2e = df2[df2['SectorProducedByLength'] == df2['SectorConsumedByLength']] + + # concat dfs + dfc = pd.concat([df1, df2e], ignore_index=True) + + # check for duplicates. Rows might be duplicated if a sector is the same + # for multiple sector lengths + duplicate_cols = [e for e in dfc.columns if e not in [ + 'SectorProducedByLength', 'SectorConsumedByLength']] + duplicate_df = dfc[dfc.duplicated(subset=duplicate_cols, + keep=False)].reset_index(drop=True) + + if len(duplicate_df) > 0: + log.warning('There are duplicate rows caused by ambiguous sectors.') + + dfc = dfc.sort_values(['SectorProducedByLength', + 'SectorConsumedByLength']).reset_index(drop=True) + return dfc + + +def assign_columns_of_sector_levels_without_ambiguous_sectors( + df_load, ambiguous_sector_assignment=None): + + dfc = assign_columns_of_sector_levels(df_load) + + # check for duplicates. Rows might be duplicated if a sector is the same + # for multiple sector lengths + duplicate_cols = [e for e in dfc.columns if e not in [ + 'SectorProducedByLength', 'SectorConsumedByLength']] + duplicate_df = dfc[dfc.duplicated(subset=duplicate_cols, + keep=False)].reset_index(drop=True) + + if (len(duplicate_df) > 0) % (ambiguous_sector_assignment is not None): + log.info('Retaining data for %s and dropping remaining ' + 'rows. See validation log for data dropped', + ambiguous_sector_assignment) + # first drop all data in the duplicate_df from dfc + dfs1 = pd.concat([dfc, duplicate_df]).drop_duplicates(keep=False) + # drop sector length cols, drop duplicates, aggregate df to ensure + # keep the intended data, and then reassign column sectors, + # formatted this way because would like to avoid sector aggreggation + # on large dfs + dfs2 = duplicate_df.drop( + columns=['SectorProducedByLength', + 'SectorConsumedByLength']).drop_duplicates() + dfs2 = sector_aggregation(dfs2) + dfs2 = assign_columns_of_sector_levels(dfs2) + # then in the duplicate df, only keep the rows that match the + # parameter indicated in the function call + sectorlength = sector_level_key[ambiguous_sector_assignment] + dfs2 = dfs2[ + ((dfs2['SectorProducedByLength'] == sectorlength) & + (dfs2['SectorConsumedByLength'] == 0)) + | + ((dfs2['SectorProducedByLength'] == 0) & + (dfs2['SectorConsumedByLength'] == sectorlength)) + | + ((dfs2['SectorProducedByLength'] == sectorlength) & + (dfs2['SectorConsumedByLength'] == sectorlength)) + ].reset_index(drop=True) + if len(dfs2) == 0: + log.warning('Data is lost from dataframe because none of the ' + 'ambiguous sectors match %s', + ambiguous_sector_assignment) + # merge the two dfs + dfc = pd.concat([dfs1, dfs2]) + # print out what data was dropped + df_dropped = pd.merge( + duplicate_df, dfs2, how='left', indicator=True).query( + '_merge=="left_only"').drop('_merge', axis=1) + df_dropped = df_dropped[ + ['SectorProducedBy', 'SectorConsumedBy', + 'SectorProducedByLength', 'SectorConsumedByLength' + ]].drop_duplicates().reset_index(drop=True) + vLogDetailed.info('After assigning a column of sector lengths, ' + 'dropped data with the following sector ' + 'assignments due to ambiguous sector lengths ' + '%s: \n {}'.format(df_dropped.to_string())) + dfc = dfc.sort_values(['SectorProducedByLength', + 'SectorConsumedByLength']).reset_index(drop=True) + return dfc + + +def assign_sector_match_column(df_load, sectorcolumn, sectorlength, + sectorlengthmatch): + + sector = 'NAICS_' + str(sectorlength) + sector_add = 'NAICS_' + str(sectorlengthmatch) + + cw_load = load_crosswalk("sector_length") + cw = cw_load[[sector, sector_add]].drop_duplicates().reset_index( + drop=True) + + df = df_load.merge(cw, how='left', left_on=sectorcolumn, + right_on=sector + ).rename(columns={sector_add: 'sector_group'} + ).drop(columns=sector) + + return df + + def aggregate_and_subset_for_target_sectors(df, method): """Helper function to create data at aggregated NAICS prior to subsetting based on the target_sector_list. Designed for use when @@ -882,7 +1171,8 @@ def aggregate_and_subset_for_target_sectors(df, method): # return sector level specified in method yaml # load the crosswalk linking sector lengths secondary_sector_level = method.get('target_subset_sector_level') - sector_list = get_sector_list(method['target_sector_level'], + sector_list = get_sector_list( + method['target_sector_level'], secondary_sector_level_dict=secondary_sector_level) # subset df to get NAICS at the target level diff --git a/flowsa/flowbysector.py b/flowsa/flowbysector.py index a1938c623..be6084d0b 100644 --- a/flowsa/flowbysector.py +++ b/flowsa/flowbysector.py @@ -25,31 +25,30 @@ import os from esupy.processed_data_mgmt import write_df_to_file import flowsa -from flowsa.location import fips_number_key, merge_urb_cnty_pct -from flowsa.common import load_yaml_dict, check_activities_sector_like, \ - str2bool, fba_activity_fields, rename_log_file, \ - fbs_activity_fields, fba_fill_na_dict, fbs_fill_na_dict, \ +from flowsa.allocation import equally_allocate_parent_to_child_naics +from flowsa.common import check_activities_sector_like, str2bool, \ + fba_activity_fields, rename_log_file, fba_fill_na_dict, fbs_fill_na_dict, \ fbs_default_grouping_fields, fbs_grouping_fields_w_activities, \ - logoutputpath, load_yaml_dict, datapath -from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \ - flow_by_sector_fields_w_activity -from flowsa.settings import log, vLog, \ - flowbysectoractivitysetspath, paths -from flowsa.metadata import set_fb_meta, write_metadata + logoutputpath, load_yaml_dict +from flowsa.dataclean import clean_df, harmonize_FBS_columns, \ + reset_fbs_dq_scores from flowsa.fbs_allocation import direct_allocation_method, \ function_allocation_method, dataset_allocation_method -from flowsa.sectormapping import add_sectors_to_flowbyactivity, \ - map_fbs_flows, get_sector_list from flowsa.flowbyfunctions import agg_by_geoscale, sector_aggregation, \ aggregator, subset_df_by_geoscale, sector_disaggregation, \ update_geoscale, subset_df_by_sector_list -from flowsa.dataclean import clean_df, harmonize_FBS_columns, \ - reset_fbs_dq_scores +from flowsa.location import fips_number_key, merge_urb_cnty_pct +from flowsa.metadata import set_fb_meta, write_metadata +from flowsa.schema import flow_by_activity_fields, flow_by_sector_fields, \ + flow_by_sector_fields_w_activity +from flowsa.sectormapping import add_sectors_to_flowbyactivity, \ + map_fbs_flows, get_sector_list +from flowsa.settings import log, vLog, flowbysectoractivitysetspath, paths from flowsa.validation import compare_activity_to_sector_flowamounts, \ compare_fba_geo_subset_and_fbs_output_totals, compare_geographic_totals,\ - replace_naics_w_naics_from_another_year, \ - calculate_flowamount_diff_between_dfs, check_for_negative_flowamounts -from flowsa.allocation import equally_allocate_parent_to_child_naics + replace_naics_w_naics_from_another_year, check_for_negative_flowamounts, \ + compare_child_to_parent_sectors_flowamounts, \ + check_if_data_exists_at_geoscale, calculate_flowamount_diff_between_dfs def parse_args(): @@ -108,12 +107,16 @@ def load_source_dataframe(method, sourcename, source_dict, flows_df = flowsa.getFlowBySector(sourcename) elif source_dict['data_format'] == 'FBS_outside_flowsa': vLog.info("Retrieving flowbysector for datasource %s", sourcename) - flows_df = source_dict["FBS_datapull_fxn"](source_dict, - method, - fbsconfigpath) + fxn = source_dict.get("FBS_datapull_fxn") + if callable(fxn): + flows_df = fxn(source_dict, method, fbsconfigpath) + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') else: - vLog.error("Data format not specified in method " - "file for datasource %s", sourcename) + raise flowsa.exceptions.FBSMethodConstructionError( + message="Data format not specified in method " + f"file for {sourcename}") return flows_df @@ -133,12 +136,17 @@ def main(**kwargs): fbsconfigpath = kwargs.get('fbsconfigpath') download_FBA_if_missing = kwargs.get('download_FBAs_if_missing') # assign arguments - vLog.info("Initiating flowbysector creation for %s", method_name) + vLog.info(f"Initiating flowbysector creation for {method_name}") # call on method method = load_yaml_dict(method_name, flowbytype='FBS', filepath=fbsconfigpath) # create dictionary of data and allocation datasets - fb = method['source_names'] + try: + fb = method['source_names'] + except KeyError: + log.error("parameter 'source_names' not found in method. " + f"FBS for {method_name} can not be generated.") + return # Create empty list for storing fbs files fbs_list = [] for k, v in fb.items(): @@ -158,18 +166,27 @@ def main(**kwargs): flows = merge_urb_cnty_pct(flows) # clean up fba before mapping, if specified in yaml - if "clean_fba_before_mapping_df_fxn" in v: - vLog.info("Cleaning up %s FlowByActivity", k) - flows = v["clean_fba_before_mapping_df_fxn"](flows) + fxn = v.get("clean_fba_before_mapping_df_fxn") + if callable(fxn): + vLog.info(f"Cleaning up {k} FlowByActivity") + flows = fxn(fba=flows, source_dict=v) + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') # map flows to federal flow list or material flow list - flows_mapped, mapping_files = \ - map_fbs_flows(flows, k, v, keep_fba_columns=True) + flows_mapped, mapping_files = (map_fbs_flows( + flows, k, v, keep_fba_columns=True, + keep_unmapped_rows=v.get("keep_unmapped_rows", False))) # clean up fba, if specified in yaml - if "clean_fba_df_fxn" in v: - vLog.info("Cleaning up %s FlowByActivity", k) - flows_mapped = v["clean_fba_df_fxn"](flows_mapped) + fxn = v.get("clean_fba_df_fxn") + if callable(fxn): + vLog.info(f"Cleaning up {k} FlowByActivity") + flows_mapped = fxn(fba=flows_mapped, source_dict=v) + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') # master list of activity names read in from data source ml_act = [] @@ -191,7 +208,7 @@ def main(**kwargs): )].reset_index(drop=True) ml_act.extend(names) - vLog.info("Preparing to handle %s in %s", aset, k) + vLog.info(f"Preparing to handle {aset} in {k}") # subset fba data by activity flows_subset = flows_mapped[ (flows_mapped[fba_activity_fields[0]].isin(names)) | @@ -209,7 +226,7 @@ def main(**kwargs): log.warning(f"all flow data for {aset} is 0") continue # if activities are sector-like, check sectors are valid - if check_activities_sector_like(k): + if check_activities_sector_like(flows_subset): flows_subset2 = replace_naics_w_naics_from_another_year( flows_subset, method['target_sector_source']) @@ -248,6 +265,10 @@ def main(**kwargs): attr=attr, method=method ) + # check for activities at geoscale - return any missing + # locations for an activity + check_if_data_exists_at_geoscale(flows_subset_geo, + attr['allocation_from_scale']) # rename SourceName to MetaSources and drop columns flows_mapped_wsec = flows_subset_wsec.\ @@ -277,7 +298,7 @@ def main(**kwargs): # define grouping columns dependent on sectors # being activity-like or not - if check_activities_sector_like(k) is False: + if check_activities_sector_like(fbs) is False: groupingcols = fbs_grouping_fields_w_activities groupingdict = flow_by_sector_fields_w_activity else: @@ -316,9 +337,12 @@ def main(**kwargs): fbs_agg_2 = equally_allocate_parent_to_child_naics( fbs_agg, method) + # compare child sectors to parent sectors flow amounts + compare_child_to_parent_sectors_flowamounts(fbs) + # compare flowbysector with flowbyactivity compare_activity_to_sector_flowamounts( - flows_mapped_wsec, fbs_agg_2, aset, k, method) + flows_mapped_wsec, fbs_agg_2, aset, method) # return sector level specified in method yaml # load the crosswalk linking sector lengths @@ -343,15 +367,19 @@ def main(**kwargs): flows_subset_geo, fbs_sector_subset, aset, k, v, attr, method) - log.info("Completed flowbysector for %s", aset) + log.info(f"Completed flowbysector for {aset}") fbs_list.append(fbs_sector_subset) else: - if 'clean_fbs_df_fxn' in v: - flows = v["clean_fbs_df_fxn"](flows, method) + fxn = v.get("clean_fbs_df_fxn") + if callable(fxn): + flows = fxn(flows, method) + elif fxn: + raise flowsa.exceptions.FBSMethodConstructionError( + error_type='fxn_call') flows = update_geoscale(flows, method['target_geoscale']) # if the loaded flow dt is already in FBS format, # append directly to list of FBS - log.info("Append %s to FBS list", k) + log.info(f"Append {k} to FBS list") # ensure correct field datatypes and add any missing fields flows = clean_df(flows, flow_by_sector_fields, fbs_fill_na_dict) fbs_list.append(flows) @@ -385,7 +413,7 @@ def main(**kwargs): # rename the log file saved to local directory rename_log_file(method_name, meta) log.info('See the Validation log for detailed assessment of ' - 'model results in %s', logoutputpath) + f'model results in {logoutputpath}') if __name__ == '__main__': diff --git a/flowsa/location.py b/flowsa/location.py index 354d6241f..f8bd73f89 100644 --- a/flowsa/location.py +++ b/flowsa/location.py @@ -91,7 +91,7 @@ def apply_county_FIPS(df, year='2015', source_state_abbrev=True): """ # If using 2 letter abbrevations, map to state names if source_state_abbrev: - df['State'] = df['State'].map(abbrev_us_state) + df['State'] = df['State'].map(abbrev_us_state).fillna(df['State']) df['State'] = df.apply(lambda x: clean_str_and_capitalize(x.State), axis=1) df['County'] = df.apply(lambda x: clean_str_and_capitalize(x.County), diff --git a/flowsa/metadata.py b/flowsa/metadata.py index eb1d8734f..7b9b7279d 100644 --- a/flowsa/metadata.py +++ b/flowsa/metadata.py @@ -90,7 +90,8 @@ def return_fbs_method_data(source_name, config): :param config: dictionary, configuration/method file :return: meta object """ - from flowsa.data_source_scripts.stewiFBS import add_stewi_metadata + from flowsa.data_source_scripts.stewiFBS import add_stewi_metadata,\ + add_stewicombo_metadata # load the yaml that lists what additional fbas are # used in creating the fbs @@ -108,8 +109,12 @@ def return_fbs_method_data(source_name, config): for k, v in fb.items(): if k == 'stewiFBS': # get stewi metadata - meta['primary_source_meta'][k] = \ - add_stewi_metadata(v['inventory_dict']) + if v.get('local_inventory_name'): + meta['primary_source_meta'][k] = add_stewicombo_metadata( + v.get('local_inventory_name')) + else: + meta['primary_source_meta'][k] = add_stewi_metadata( + v['inventory_dict']) continue if v['data_format'] in ('FBS', 'FBS_outside_flowsa'): meta['primary_source_meta'][k] = \ diff --git a/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml b/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml index 7426dd04c..41d04478c 100644 --- a/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml +++ b/flowsa/methods/flowbyactivitymethods/CalRecycle_WasteCharacterization.yaml @@ -1,6 +1,6 @@ author: California Commercial source_name: CalRecycle_WasteCharacterization -source_url: ' ' +source_url: 'https://www2.calrecycle.ca.gov/WasteCharacterization/PubExtracts/2014/GenSummary.pdf' original_data_download_date: ' ' bib_id: CalRec_WasteChar api_name: None diff --git a/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml b/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml index f7a82d684..16629ee9b 100644 --- a/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml +++ b/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml @@ -9,7 +9,7 @@ url: base_url: http://api.census.gov/data/ api_path: __year__/cbp? # __year__ is used to trigger a replace function to substitute in year url_params: - get: __NAICS__,ESTAB #,EMP,PAYANN #replace __NAICS__ based on year of data. >=2017 is NAICS2017, 2012-2016 is NAICS2012 + get: __NAICS__,ESTAB,EMP,PAYANN #replace __NAICS__ based on year of data. >=2017 is NAICS2017, 2012-2016 is NAICS2012 for: county:__countyFIPS__ #retrieves every county in: state:__stateFIPS__ #requests a state 2-digit FIPS code that has to be supplied dynamically key: __apiKey__ # make the __apiKey__ part of the url and substitute in individual api key @@ -26,3 +26,6 @@ years: - 2015 - 2016 - 2017 +- 2018 +- 2019 +- 2020 diff --git a/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml b/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml index 71ba02f9e..dd7e7ca36 100644 --- a/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml +++ b/flowsa/methods/flowbyactivitymethods/EIA_MECS_Energy.yaml @@ -35,17 +35,17 @@ table_dict: - Coke and Breeze | million short tons - Other | trillion Btu regions: - Total United States : [14,97] - Northeast Region : [100,181] - Midwest Region : [184,265] - South Region : [268,349] - West Region : [352,433] + Total United States : [14,98] + Northeast Region : [101,183] + Midwest Region : [186,268] + South Region : [271,353] + West Region : [356,438] rse_regions: - Total United States : [12,95] - Northeast Region : [98,179] - Midwest Region : [182,263] - South Region : [266,347] - West Region : [350,431] + Total United States : [12,96] + Northeast Region : [99,181] + Midwest Region : [184,266] + South Region : [269,351] + West Region : [354,436] data_type: 'nonfuel consumption' Table 2.2: col_names: @@ -60,17 +60,17 @@ table_dict: - Coke and Breeze - Other regions: - Total United States : [14,97] - Northeast Region : [100,181] - Midwest Region : [184,265] - South Region : [268,349] - West Region : [352,433] + Total United States : [14,98] + Northeast Region : [101,183] + Midwest Region : [186,268] + South Region : [271,353] + West Region : [356,438] rse_regions: - Total United States : [12,95] - Northeast Region : [98,179] - Midwest Region : [182,263] - South Region : [266,347] - West Region : [350,431] + Total United States : [12,96] + Northeast Region : [99,181] + Midwest Region : [184,266] + South Region : [269,351] + West Region : [354,436] data_type: 'nonfuel consumption' Table 3.1: col_names: @@ -86,17 +86,17 @@ table_dict: - Coke and Breeze | million short tons - Other | trillion Btu regions: - Total United States : [14,95] - Northeast Region : [98,179] - Midwest Region : [182,263] - South Region : [266,347] - West Region : [350,431] + Total United States : [14,96] + Northeast Region : [99,181] + Midwest Region : [184,266] + South Region : [269,351] + West Region : [354,436] rse_regions: - Total United States : [12,93] - Northeast Region : [96,177] - Midwest Region : [180,261] - South Region : [264,345] - West Region : [348,429] + Total United States : [12,94] + Northeast Region : [97,179] + Midwest Region : [182,264] + South Region : [267,349] + West Region : [352,434] data_type: 'fuel consumption' Table 3.2: col_names: @@ -112,17 +112,17 @@ table_dict: - Coke and Breeze - Other regions: - Total United States : [14,95] - Northeast Region : [98,179] - Midwest Region : [182,263] - South Region : [266,347] - West Region : [350,431] + Total United States : [14,96] + Northeast Region : [99,181] + Midwest Region : [184,266] + South Region : [269,351] + West Region : [354,436] rse_regions: - Total United States : [12,93] - Northeast Region : [96,177] - Midwest Region : [180,261] - South Region : [264,345] - West Region : [348,429] + Total United States : [12,94] + Northeast Region : [97,179] + Midwest Region : [182,264] + South Region : [267,349] + West Region : [352,434] data_type: 'fuel consumption' '2014': diff --git a/flowsa/methods/flowbyactivitymethods/EPA_SIT.yaml b/flowsa/methods/flowbyactivitymethods/EPA_SIT.yaml new file mode 100644 index 000000000..36a3f2fee --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/EPA_SIT.yaml @@ -0,0 +1,231 @@ +author: US Environmental Protection Agency +source_name: State Inventory Tool +source_url: 'https://www.epa.gov/statelocalenergy/state-inventory-and-projection-tool' +bib_id: EPA_SIT +format: xlsm # macro-enabled spreadsheet +url: None +parse_response_fxn: !script_function:EPA_SIT epa_sit_parse +state: 'ME' +file: 'Synthesis Tool.xlsm' +years: +- 2017 +- 2016 +- 2015 +- 2014 +- 2013 +- 2012 +- 2011 +- 2010 +- 2009 +- 2008 +- 2007 +- 2006 +- 2005 +- 2004 +- 2003 +- 2002 +- 2001 +- 2000 +- 1999 +- 1998 +- 1997 +- 1996 +- 1995 +- 1994 +- 1993 +- 1992 +- 1991 +- 1990 + +sheet_dict: + + 'CO2FFC': + nrows: 32 + unit: MMTCO2e + flow: CO2 + headers: + - Residential + - Commercial + - Industrial + - Transportation + - Electric Utilities + - International Bunker Fuels + - TOTAL + + 'IndirectCO2': + nrows: 45 + unit: MMTCO2e + flow: CO2 + headers: + - Residential + - Commercial + - Industrial + - Transportation + - TOTAL + subsubheaders: + - Conventional Boiler Use + - CHP and/or Cogeneration Process + - Process Heating + - Process Cooling and Refrigeration + - Machine Drive + - Electro-Chemical Processes + - Other Process use + - Facility HVAC + - Facility Lighting + - Other Facility Support + - Onsite Transportation + - Other Nonprocess Use + + 'Stationary': + nrows: 15 + unit: MMTCO2e + headers: + - Residential + - Commercial + - Industrial + - Electric Utilities + - TOTAL + skiprowstart: 13 + skiprowend: 15 + subgroup: flow + + 'Mobile Combustion CH4': + sheetname: Mobile Combustion + header: 55 + nrows: 21 + flow: CH4 + unit: MTCO2e + headers: + - Gasoline Highway + - Diesel Highway + - Non-Highway + - Alternative Fuel Vehicles + - Total + + 'Mobile Combustion N2O': + sheetname: Mobile Combustion + header: 82 + nrows: 21 + flow: N2O + unit: MTCO2e + headers: + - Gasoline Highway + - Diesel Highway + - Non-Highway + - Alternative Fuel Vehicles + - Total + + 'Coal': + header: 3 + nrows: 5 + unit: MTCO2e + flow: CH4 + headers: + - Coal Mining + - Abandoned Coal Mines + + 'Gas and Oil': + header: 17 + nrows: 5 + unit: MT + flow: CH4 + headers: + - Natural Gas + - Oil + + 'Natural Gas Flaring': + sheetname: 'Gas and Oil' + header: 24 + nrows: 1 + unit: MMT + flow: CO2 + headers: + - Natural Gas Flaring + + 'IP': + nrows: 20 + unit: MTCO2e + headers: + - Carbon Dioxide Emissions + - Nitrous Oxide Emissions + - HFC, PFC, SF6 and NF3 Emissions + - Total Emissions + subgroup: 'activitybyflow' + + 'Agriculture': + sheetname: Agriculture + header: 26 + nrows: 12 + unit: MMT + headers: + - Carbon Dioxide + - Methane + - Nitrous Oxide + subgroup: 'activitybyflow' + + 'Agricultural Soil Management': + sheetname: Agriculture + tablename: Agricultural Soil Management + header: 41 + nrows: 13 + unit: MT + flow: N2O + headers: + - Direct + - Indirect + - TOTAL + subsubheaders: + - Fertilizer Runoff/Leached + - Manure Runoff/Leached + + 'Land-Use Change and Forest Emissions and Sequestration': + sheetname: Forest Management + header: 8 + nrows: 19 + unit: MMTCO2E + flow: CO2E + headers: + - Forest Carbon Flux + - Urban Trees + - Landfilled Yard Trimmings and Food Scraps + - Forest Fires + - N2O from Settlement Soils + - Agricultural Soil Carbon Flux + + 'Emissions from Landfills': + sheetname: Waste + tablename: Emissions from Landfills + header: 13 + nrows: 9 + unit: MTCO2E + flow: CH4 + headers: + - Potential CH4 + - CH4 Avoided + - Oxidation at MSW Landfills + - Oxidation at Industrial Landfills + - Total CH4 Emissions + + 'Waste Combustion': + sheetname: Waste + tablename: Emissions from Waste Combustion + header: 25 + nrows: 7 + unit: MTCO2e + headers: + - CO2 + - N2O + - CH4 + - Total CO2, N2O, CH4 Emissions + subgroup: 'activitybyflow' + + 'Wastewater': + header: 3 + nrows: 8 + unit: MMTCO2e + headers: + - Municipal CH4 + - Municipal N2O + - Industrial CH4 + - Total Emissions + subgroup: 'activitybyflow' \ No newline at end of file diff --git a/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI.yaml b/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI.yaml new file mode 100644 index 000000000..026c671b4 --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI.yaml @@ -0,0 +1,39 @@ +author: US Environmental Protection Agency +source_name: 'State Greenhouse Gas Inventories' +source_url: 'https://www.epa.gov/ghgemissions/state-ghg-emissions-and-removals' +bib_id: '' +format: json +url: None +parse_response_fxn: !script_function:EPA_StateGHGI epa_state_ghgi_parse +file: "W_INV_FACTS_ACTIVE_GHG_CO2E.json" +years: +- 2019 +- 2018 +- 2017 +- 2016 +- 2015 +- 2014 +- 2013 +- 2012 +- 2011 +- 2010 +- 2009 +- 2008 +- 2007 +- 2006 +- 2005 +- 2004 +- 2003 +- 2002 +- 2001 +- 2000 +- 1999 +- 1998 +- 1997 +- 1996 +- 1995 +- 1994 +- 1993 +- 1992 +- 1991 +- 1990 diff --git a/flowsa/methods/flowbyactivitymethods/README.md b/flowsa/methods/flowbyactivitymethods/README.md index 5f6e8adaa..f431fdfc8 100644 --- a/flowsa/methods/flowbyactivitymethods/README.md +++ b/flowsa/methods/flowbyactivitymethods/README.md @@ -1,7 +1,6 @@ # Data source configuration -Standard source configuration files provide information needed for pulling data sources - -They are stored in YAML format using a .yaml extension +Standard source configuration files provide information needed for pulling +data sources. They are stored in YAML format using a .yaml extension ``` #Source configuration format @@ -20,19 +19,26 @@ url # A set of url parameters for query string, specific to data set year_param: name of year parameter key_param: name of key parameter -url_replace_fxn: name of the source specific function that replaces the dynamic values in the URL -call_response_fxn: name of the source specific function that specifies how data should be loaded -parse_response_fxn: name of the source specific function that parses and formats the dataframe +url_replace_fxn: name of the source specific function that replaces the +dynamic values in the URL +call_response_fxn: name of the source specific function that specifies how +data should be loaded +parse_response_fxn: name of the source specific function that parses and +formats the dataframe years: #years of data as separate lines like - 2015 * can add additional yaml dictionary items specific to calling on a data set ``` To declare a value that needs to be dynamically replaced, surround -a variable name in double underscores like \__foo__ so that a string +a variable name in double underscores like `__foo__` so that a string function will do a dynamic replacement -Based on [YAML v1.1 schema](https://yaml.org/spec/1.1/) - -Use [YAMLlint](http://www.yamllint.com/) to assure the file is valid YAML +Specify the functions to use in the FBA creation using the tag +`!script_function:PythonFileName FunctionName` +where _PythonFileName_ is the name of the Python file (e.g., +"Census_PEP_Population.py") and _FunctionName_ is the name of the function +(e.g., "Census_pop_URL_helper"). +Based on [YAML v1.1 schema](https://yaml.org/spec/1.1/). Use +[YAMLlint](http://www.yamllint.com/) to assure the file is valid YAML. diff --git a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml index 9bda5ab89..2d3c70e1e 100644 --- a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml +++ b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Fertilizer.yaml @@ -14,11 +14,8 @@ url: source_desc: SURVEY sector_desc: ENVIRONMENTAL unit_desc: LB - # __domainLevel__ triggers a replace function - domain_desc: __domainLevel__ + domain_desc: FERTILIZER agg_level_desc: STATE - # dynamically substitute the 50 state acronyms. - state_alpha: __stateAlpha__ # __year__ is used to trigger a replace function to substitute in year year: __year__ key_param: key @@ -31,5 +28,3 @@ years: - 2017 - 2018 - 2020 -domain_levels: -- FERTILIZER diff --git a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml index af253cfd8..91b055188 100644 --- a/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml +++ b/flowsa/methods/flowbyactivitymethods/USDA_ACUP_Pesticide.yaml @@ -14,11 +14,8 @@ url: source_desc: SURVEY sector_desc: ENVIRONMENTAL unit_desc: LB - # __domainLevel__ triggers a replace function - domain_desc: __domainLevel__ + domain_desc: CHEMICAL,%20FUNGICIDE&CHEMICAL,%20HERBICIDE&CHEMICAL,%20INCECTICIDE&CHEMICAL,%20OTHER agg_level_desc: STATE - # dynamically substitute the 50 state acronyms. - state_alpha: __stateAlpha__ # __year__ is used to trigger a replace function to substitute in year year: __year__ key_param: key @@ -31,8 +28,3 @@ years: - 2017 - 2018 - 2020 -domain_levels: -- CHEMICAL, FUNGICIDE -- CHEMICAL, HERBICIDE -- CHEMICAL, INCECTICIDE -- CHEMICAL, OTHER diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml index c49c19c22..dc6b833c5 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Asbestos.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Asbestos Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Asbestos -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-asbes.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_asbestos_call parse_response_fxn: !script_function:USGS_MYB usgs_asbestos_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml index d5a4e9ec8..8bf04f183 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Barite.yaml @@ -1,7 +1,7 @@ # Downloads and parses CSV from USGS Barite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Barite -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-barit-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-barite.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml index 7ec6ba873..87c937166 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Bauxite.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Bauxite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Bauxite -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-bauxi.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_bauxite_call parse_response_fxn: !script_function:USGS_MYB usgs_bauxite_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml index daa3da374..a114fea13 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Beryllium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Beryllium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Beryllium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-beryl.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_beryllium_call parse_response_fxn: !script_function:USGS_MYB usgs_beryllium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml index e66c1f77f..288c064da 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Boron.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Boron Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Boron -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-adv.xlsx +citable_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-2.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-adv.xlsx + base_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-boron-2.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_boron_call parse_response_fxn: !script_function:USGS_MYB usgs_boron_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml index 758f5c259..e21ed8b82 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Chromium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Chromium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Chromium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom.xls date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-chrom.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_chromium_call parse_response_fxn: !script_function:USGS_MYB usgs_chromium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml index 4522fd5f7..d4d1f677e 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Clay.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Clay Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Clay -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls +citable_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls date_generated: '2021-03-19' years_available: 2015-2016 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls + base_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-clays.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_clay_call parse_response_fxn: !script_function:USGS_MYB usgs_clay_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml index 5fea8fda6..447f855fc 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Cobalt.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Cobalt Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Cobalt -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-cobal.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_cobalt_call parse_response_fxn: !script_function:USGS_MYB usgs_cobalt_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml index a63634bff..5b41f3df4 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Diatomite.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Diatomite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Diatomite -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-diato.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_diatomite_call parse_response_fxn: !script_function:USGS_MYB usgs_diatomite_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml index 7058223ca..8f70e3d07 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Feldspar.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Feldspar Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Feldspar -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-felds.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_feldspar_call parse_response_fxn: !script_function:USGS_MYB usgs_feldspar_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml index 6abffac09..3ac0ba5dd 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Fluorspar.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Fluorspar Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Fluorspar -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-fluor.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_fluorspar_call parse_response_fxn: !script_function:USGS_MYB usgs_fluorspar_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml index 26cbd5aac..6948bd335 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gallium.yaml @@ -1,7 +1,7 @@ # Downloads and parses CSV from USGS Gallium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Gallium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-galli-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-gallium.xlsx date_generated: '2021-03-26' years_available: 2014-2018 format: xls diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml index d89efca50..72518ef21 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Garnet.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Garnet Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Garnet -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-garne.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_garnet_call parse_response_fxn: !script_function:USGS_MYB usgs_garnet_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml index 657b5f726..1d76db821 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gold.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Gold Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Gold -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-gold.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_gold_call parse_response_fxn: !script_function:USGS_MYB usgs_gold_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml index 17ed7360a..ce1dbd78c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Graphite.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Graphite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Graphite -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-graph.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_graphite_call parse_response_fxn: !script_function:USGS_MYB usgs_graphite_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml index 7f1146cfe..6d8c3f9e2 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Gypsum.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Gypsum Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Gypsum -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-gypsu-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-gypsum.xlsx date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-gypsu-adv.xlsx + base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-gypsum.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_gypsum_call parse_response_fxn: !script_function:USGS_MYB usgs_gypsum_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml index 2c942e46b..408700d1c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Iodine.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Iodine Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Iodine -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin.xls date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-iodin.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_iodine_call parse_response_fxn: !script_function:USGS_MYB usgs_iodine_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml index 1dae5acee..70b1e41f2 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_IronOre.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS IronOre Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Iron Ore -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-feore-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-iron-ore.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-feore-adv.xlsx + base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-iron-ore.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_iron_ore_call parse_response_fxn: !script_function:USGS_MYB usgs_iron_ore_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml index 3565ee645..fe168ce4c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Kyanite.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Kyanite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Kyanite -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-kyani-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-kyanite.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-kyani-adv.xlsx + base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-kyanite.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_kyanite_call parse_response_fxn: !script_function:USGS_MYB usgs_kyanite_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml index c9038d8cc..8b6b53622 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lead.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Lead Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Lead -citable_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2016-lead.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/myb1-2018-lead-advrel.xlsx date_generated: '2021-03-26' years_available: 2012-2016 format: xlsx url: - base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2016-lead.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/myb1-2018-lead-advrel.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_lead_url_helper call_response_fxn: !script_function:USGS_MYB usgs_lead_call parse_response_fxn: !script_function:USGS_MYB usgs_lead_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml index 6fc2f0516..3380dca38 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lime.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Lime Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Lime -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-lime.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_lime_call parse_response_fxn: !script_function:USGS_MYB usgs_lime_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml index fe9f1eaf3..0ffb016f5 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Lithium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Lithium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Lithium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-lithi.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_lithium_call parse_response_fxn: !script_function:USGS_MYB usgs_lithium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml index 840ee8ca1..2d535da32 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Magnesium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Magnesium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Magnesium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-mgmet.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_magnesium_call parse_response_fxn: !script_function:USGS_MYB usgs_magnesium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml index 316948c20..0bbe251f1 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Manganese.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Manganese Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Manganese -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga.xls date_generated: '2021-03-26' years_available: 2012-2016 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-manga.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_manganese_call parse_response_fxn: !script_function:USGS_MYB usgs_manganese_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml index 0fdea89cf..e2b554d50 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_ManufacturedAbrasive.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS ManufacturedAbrasive Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Manufactured Abrasive -citable_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2017-abras.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-abras.xls date_generated: '2021-03-26' years_available: 2017-2018 format: xlsx url: - base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/atoms/files/myb1-2017-abras.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-abras.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_ma_call parse_response_fxn: !script_function:USGS_MYB usgs_ma_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml index 1f6ab57a3..c5ca9b78b 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Mica.yaml @@ -1,7 +1,7 @@ # Downloads and parses CSV from USGS Mica Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Mica -citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-mica.xls +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-mica.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml index ad640ffc7..7ece684c5 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Molybdenum.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Molybdenum Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Molybdenum -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-molyb-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-moly.xls date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-molyb-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-moly.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_molybdenum_call parse_response_fxn: !script_function:USGS_MYB usgs_molybdenum_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml index b2f28ee42..c36d32097 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Nickel.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Nickel Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Nickel -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls date_generated: '2021-03-26' years_available: 2012-2016 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-nickel.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_nickel_call parse_response_fxn: !script_function:USGS_MYB usgs_nickel_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml index 446509a1d..81a613c1f 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Niobium.yaml @@ -1,7 +1,7 @@ # Downloads and parses CSV from USGS Niobium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Niobium -citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-niobium.xls +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-niobium.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml index 5752883ca..8095a4bb6 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Peat.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Peat Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Peat -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat-advrel.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat-advrel.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-peat.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_peat_call parse_response_fxn: !script_function:USGS_MYB usgs_peat_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml index 89d057a91..42877791b 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Perlite.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Perlite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Perlite -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-perli.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_perlite_call parse_response_fxn: !script_function:USGS_MYB usgs_perlite_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml index 6eb8b9155..d4b7326c1 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Phosphate.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Phosphate Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Phosphate -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-phosp.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_phosphate_call parse_response_fxn: !script_function:USGS_MYB usgs_phosphate_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml index b69b37032..f97dcf74d 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Platinum.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Platinum Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Platinum -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati.xls date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-plati.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_platinum_call parse_response_fxn: !script_function:USGS_MYB usgs_platinum_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml index 1564ff5bd..516e92c0d 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Potash.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Potash Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Potash -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-potas-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/files/myb1-2018-potas.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-potas-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/files/myb1-2018-potas.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_potash_call parse_response_fxn: !script_function:USGS_MYB usgs_potash_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml index 8b72b4565..93eba528c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Pumice.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Pumice Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Pumice -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-pumic.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_pumice_call parse_response_fxn: !script_function:USGS_MYB usgs_pumice_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml index 732ada2a5..1f9815225 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Rhenium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Rhenium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Rhenium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-rheni.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_rhenium_call parse_response_fxn: !script_function:USGS_MYB usgs_rhenium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml index 10cd30e27..d602b9222 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Salt.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Salt Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Salt -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt.xlsx date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-salt.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_salt_call parse_response_fxn: !script_function:USGS_MYB usgs_salt_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml index debd1359d..113c94639 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelConstruction.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS SandGravelConstruction Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Sand Gravel Construction -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls date_generated: '2021-03-19' years_available: 2014-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-sandc.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_sgc_call parse_response_fxn: !script_function:USGS_MYB usgs_sgc_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml index 610e34ea0..814c5257b 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SandGravelIndustrial.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS SandGravelIndustrial Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Sand Gravel Industrial -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-silic-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-silica.xlsx date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-silic-adv.xlsx + base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-silica.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_sgi_call parse_response_fxn: !script_function:USGS_MYB usgs_sgi_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml index 44c5bb6dc..3d5f7b34d 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Silver.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Silver Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Silver -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls +citable_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls date_generated: '2021-03-19' years_available: 2012-2016 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls + base_url: https://d9-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2016-silve.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_silver_call parse_response_fxn: !script_function:USGS_MYB usgs_silver_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml index 607456e31..cf87a714c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_SodaAsh.yaml @@ -6,7 +6,7 @@ bib_id: USGS_MYB_SA # api_key_required: false format: xlsx url: - base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/__url_text__/myb1-__year__-sodaa.__format__ + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/__url_text__/myb1-__year__-sodaa.__format__ url_replace_fxn: !script_function:USGS_MYB soda_url_helper call_response_fxn: !script_function:USGS_MYB soda_call parse_response_fxn: !script_function:USGS_MYB soda_parse @@ -37,3 +37,4 @@ formats: '2015': xlsx '2016': xlsx '2017': xls + diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml index e8a6d9ef7..2d1c40534 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneCrushed.yaml @@ -1,18 +1,18 @@ # Downloads and parses CSV from USGS StoneCrushed Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Stone Crushed -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stonc-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-zeolites.xlsx date_generated: '2021-03-26' -years_available: 2013-2017 +years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stonc-adv.xlsx + base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-zeolites.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_stonecr_call parse_response_fxn: !script_function:USGS_MYB usgs_stonecr_parse years: -- 2013 - 2014 - 2015 - 2016 - 2017 +- 2018 diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml index d6504c700..d0f3de6ff 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_StoneDimension.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS StoneDimension Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Stone Dimension -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-stond.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_stonedis_call parse_response_fxn: !script_function:USGS_MYB usgs_stonedis_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml index 331deed38..0ded944d2 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Strontium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Strontium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Strontium -citable_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron-adv.xlsx +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron.xls date_generated: '2021-03-26' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron-adv.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-stron.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_strontium_call parse_response_fxn: !script_function:USGS_MYB usgs_strontium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml index a377e3224..278f2fc4a 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Talc.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Talc Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Talc -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-talc.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_talc_call parse_response_fxn: !script_function:USGS_MYB usgs_talc_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml index 6b9c53c2a..2d535eb9c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Titanium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Titanium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Titanium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-titan.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_titanium_call parse_response_fxn: !script_function:USGS_MYB usgs_titanium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml index 71d238966..aceeae3cf 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Tungsten.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Tungsten Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Tungsten -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls date_generated: '2021-03-19' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-tungs.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_tungsten_call parse_response_fxn: !script_function:USGS_MYB usgs_tungsten_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml index 6177028ae..00e322023 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Vermiculite.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Vermiculite Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Vermiculite -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-vermi-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-vermiculite.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2018-vermi-adv.xlsx + base_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-vermiculite.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_vermiculite_call parse_response_fxn: !script_function:USGS_MYB usgs_vermiculite_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml index 8273d804a..6e5fe1607 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zeolites.yaml @@ -1,7 +1,7 @@ # Downloads and parses CSV from USGS Zeolites Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Zeolites -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb-2018--zeoli-adv.xlsx +citable_url: https://pubs.usgs.gov/myb/vol1/2018/myb1-2018-zeolites.xlsx date_generated: '2021-03-19' years_available: 2014-2018 format: xls diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml index 19dff638c..918b3fda3 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zinc.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Zinc Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Zinc -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zinc.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_zinc_call parse_response_fxn: !script_function:USGS_MYB usgs_zinc_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml index 834b4f6ad..a9e32d857 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Zirconium.yaml @@ -1,12 +1,12 @@ # Downloads and parses CSV from USGS Zirconium Statistics and Information on an annual basis. --- source_name: USGS Mineral Yearbook Zirconium -citable_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls +citable_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls date_generated: '2021-03-26' years_available: 2013-2017 format: xls url: - base_url: https://prd-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/atoms/files/myb1-2017-zirco.xls url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_zirconium_call parse_response_fxn: !script_function:USGS_MYB usgs_zirconium_parse diff --git a/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml b/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml index fe1e75fee..9d63e3e9c 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_NWIS_WU.yaml @@ -15,9 +15,9 @@ url: wu_county: ALL # only for the county level wu_category: ALL key_param: None -url_replace_fxn: usgs_URL_helper -call_response_fxn: usgs_call -parse_response_fxn: usgs_parse +url_replace_fxn: !script_function:USGS_NWIS_WU usgs_URL_helper +call_response_fxn: !script_function:USGS_NWIS_WU usgs_call +parse_response_fxn: !script_function:USGS_NWIS_WU usgs_parse years: - 2010 - 2015 diff --git a/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml b/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml index 548162692..ca5b8c02a 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_WU_Coef.yaml @@ -8,6 +8,6 @@ api_name: None api_key_required: false format: csv # comma delineated data url: None -parse_response_fxn: usgs_coef_parse +parse_response_fxn: !script_function:USGS_WU_Coef usgs_coef_parse years: - 2005 diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Common.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Common.yaml new file mode 100644 index 000000000..9a61c425e --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/stateio_Common.yaml @@ -0,0 +1,18 @@ +author: Li et al. +# source_name: stateio_Make_Summary # Update for each dataset +source_url: https://github.com/USEPA/stateior +original_data_download_date: +# bib_id: # Update for each dataset +api_name: None +api_key_required: False +format: rds # R data format +url: None # Uses esupy data retrieval +parse_response_fxn: !script_function:stateio parse_statior +years: +- 2012 +- 2013 +- 2014 +- 2015 +- 2016 +- 2017 +# datatype: 'State_Summary_Make' # Update for each dataset diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Industry_GO.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Industry_GO.yaml new file mode 100644 index 000000000..6445af195 --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/stateio_Industry_GO.yaml @@ -0,0 +1,4 @@ +!include:stateio_Common.yaml +source_name: stateio_Industry_GO +bib_id: +datatype: 'State_Summary_IndustryOutput' diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Make_Summary.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Make_Summary.yaml new file mode 100644 index 000000000..b8b8c62fb --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/stateio_Make_Summary.yaml @@ -0,0 +1,4 @@ +!include:stateio_Common.yaml +source_name: stateio_Make_Summary +bib_id: +datatype: 'State_Summary_Make' diff --git a/flowsa/methods/flowbyactivitymethods/stateio_Use_Summary.yaml b/flowsa/methods/flowbyactivitymethods/stateio_Use_Summary.yaml new file mode 100644 index 000000000..da1f61c89 --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/stateio_Use_Summary.yaml @@ -0,0 +1,4 @@ +!include:stateio_Common.yaml +source_name: stateio_Use_Summary +bib_id: +datatype: 'State_Summary_Use' diff --git a/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv b/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv index 45465aace..061c8b201 100644 --- a/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv +++ b/flowsa/methods/flowbysectoractivitysets/BLS_QCEW_asets.csv @@ -146,6 +146,9 @@ qcew,115310, qcew,21, qcew,211, qcew,2111, +qcew,21111, +qcew,211111, +qcew,211112, qcew,21112, qcew,211120, qcew,21113, @@ -164,6 +167,8 @@ qcew,212221, qcew,212222, qcew,21223, qcew,212230, +qcew,212231, +qcew,212234, qcew,21229, qcew,212291, qcew,212299, @@ -203,6 +208,7 @@ qcew,221115, qcew,221116, qcew,221117, qcew,221118, +qcew,221119, qcew,22112, qcew,221121, qcew,221122, @@ -321,14 +327,22 @@ qcew,311212, qcew,311213, qcew,31122, qcew,311221, +qcew,311222, +qcew,311223, qcew,311224, qcew,311225, qcew,31123, qcew,311230, qcew,3113, qcew,31131, +qcew,311311, +qcew,311312, qcew,311313, qcew,311314, +qcew,31132, +qcew,311320, +qcew,31133, +qcew,311330, qcew,31134, qcew,311340, qcew,31135, @@ -359,6 +373,8 @@ qcew,311615, qcew,3117, qcew,31171, qcew,311710, +qcew,311711, +qcew,311712, qcew,3118, qcew,31181, qcew,311811, @@ -366,6 +382,8 @@ qcew,311812, qcew,311813, qcew,31182, qcew,311821, +qcew,311822, +qcew,311823, qcew,311824, qcew,31183, qcew,311830, @@ -396,24 +414,38 @@ qcew,312130, qcew,31214, qcew,312140, qcew,3122, +qcew,31221, +qcew,312210, +qcew,31222, +qcew,312221, +qcew,312229, qcew,31223, qcew,312230, qcew,313, qcew,3131, qcew,31311, qcew,313110, +qcew,313111, +qcew,313112, +qcew,313113, qcew,3132, qcew,31321, qcew,313210, qcew,31322, qcew,313220, +qcew,313221, +qcew,313222, qcew,31323, qcew,313230, qcew,31324, qcew,313240, +qcew,313241, +qcew,313249, qcew,3133, qcew,31331, qcew,313310, +qcew,313311, +qcew,313312, qcew,31332, qcew,313320, qcew,314, @@ -422,30 +454,62 @@ qcew,31411, qcew,314110, qcew,31412, qcew,314120, +qcew,314121, +qcew,314129, qcew,3149, qcew,31491, qcew,314910, +qcew,314911, +qcew,314912, qcew,31499, +qcew,314991, +qcew,314992, qcew,314994, qcew,314999, qcew,315, qcew,3151, qcew,31511, qcew,315110, +qcew,315111, +qcew,315119, qcew,31519, qcew,315190, +qcew,315191, +qcew,315192, qcew,3152, qcew,31521, qcew,315210, +qcew,315211, +qcew,315212, qcew,31522, qcew,315220, +qcew,315221, +qcew,315222, +qcew,315223, +qcew,315224, +qcew,315225, +qcew,315228, +qcew,31523, +qcew,315231, +qcew,315232, +qcew,315233, +qcew,315234, +qcew,315239, qcew,31524, qcew,315240, qcew,31528, qcew,315280, +qcew,31529, +qcew,315291, +qcew,315292, +qcew,315299, qcew,3159, qcew,31599, qcew,315990, +qcew,315991, +qcew,315992, +qcew,315993, +qcew,315999, qcew,316, qcew,3161, qcew,31611, @@ -453,10 +517,18 @@ qcew,316110, qcew,3162, qcew,31621, qcew,316210, +qcew,316211, +qcew,316212, +qcew,316213, +qcew,316214, +qcew,316219, qcew,3169, qcew,31699, +qcew,316991, qcew,316992, +qcew,316993, qcew,316998, +qcew,316999, qcew,321, qcew,3211, qcew,32111, @@ -493,22 +565,43 @@ qcew,3222, qcew,32221, qcew,322211, qcew,322212, +qcew,322213, +qcew,322214, +qcew,322215, qcew,322219, qcew,32222, qcew,322220, +qcew,322221, +qcew,322222, +qcew,322223, +qcew,322224, +qcew,322225, +qcew,322226, qcew,32223, qcew,322230, +qcew,322231, +qcew,322232, +qcew,322233, qcew,32229, qcew,322291, qcew,322299, qcew,323, qcew,3231, qcew,32311, +qcew,323110, qcew,323111, +qcew,323112, qcew,323113, +qcew,323114, +qcew,323115, +qcew,323116, qcew,323117, +qcew,323118, +qcew,323119, qcew,32312, qcew,323120, +qcew,323121, +qcew,323122, qcew,324, qcew,3241, qcew,32411, @@ -527,9 +620,16 @@ qcew,32512, qcew,325120, qcew,32513, qcew,325130, +qcew,325131, +qcew,325132, qcew,32518, qcew,325180, +qcew,325181, +qcew,325182, +qcew,325188, qcew,32519, +qcew,325191, +qcew,325192, qcew,325193, qcew,325194, qcew,325199, @@ -539,6 +639,8 @@ qcew,325211, qcew,325212, qcew,32522, qcew,325220, +qcew,325221, +qcew,325222, qcew,3253, qcew,32531, qcew,325311, @@ -592,6 +694,7 @@ qcew,32616, qcew,326160, qcew,32619, qcew,326191, +qcew,326192, qcew,326199, qcew,3262, qcew,32621, @@ -606,8 +709,16 @@ qcew,327, qcew,3271, qcew,32711, qcew,327110, +qcew,327111, +qcew,327112, +qcew,327113, qcew,32712, qcew,327120, +qcew,327121, +qcew,327122, +qcew,327123, +qcew,327124, +qcew,327125, qcew,3272, qcew,32721, qcew,327211, @@ -641,6 +752,8 @@ qcew,331, qcew,3311, qcew,33111, qcew,331110, +qcew,331111, +qcew,331112, qcew,3312, qcew,33121, qcew,331210, @@ -649,15 +762,24 @@ qcew,331221, qcew,331222, qcew,3313, qcew,33131, +qcew,331311, +qcew,331312, qcew,331313, qcew,331314, qcew,331315, +qcew,331316, qcew,331318, +qcew,331319, qcew,3314, qcew,33141, qcew,331410, +qcew,331411, +qcew,331419, qcew,33142, qcew,331420, +qcew,331421, +qcew,331422, +qcew,331423, qcew,33149, qcew,331491, qcew,331492, @@ -667,8 +789,12 @@ qcew,331511, qcew,331512, qcew,331513, qcew,33152, +qcew,331521, +qcew,331522, qcew,331523, qcew,331524, +qcew,331525, +qcew,331528, qcew,331529, qcew,332, qcew,3321, @@ -676,10 +802,16 @@ qcew,33211, qcew,332111, qcew,332112, qcew,332114, +qcew,332115, +qcew,332116, qcew,332117, qcew,332119, qcew,3322, qcew,33221, +qcew,332211, +qcew,332212, +qcew,332213, +qcew,332214, qcew,332215, qcew,332216, qcew,3323, @@ -704,6 +836,8 @@ qcew,33251, qcew,332510, qcew,3326, qcew,33261, +qcew,332611, +qcew,332612, qcew,332613, qcew,332618, qcew,3327, @@ -728,7 +862,10 @@ qcew,332991, qcew,332992, qcew,332993, qcew,332994, +qcew,332995, qcew,332996, +qcew,332997, +qcew,332998, qcew,332999, qcew,333, qcew,3331, @@ -741,28 +878,50 @@ qcew,33313, qcew,333131, qcew,333132, qcew,3332, +qcew,33321, +qcew,333210, +qcew,33322, +qcew,333220, qcew,33324, qcew,333241, qcew,333242, qcew,333243, qcew,333244, qcew,333249, +qcew,33329, +qcew,333291, +qcew,333292, +qcew,333293, +qcew,333294, +qcew,333295, +qcew,333298, qcew,3333, qcew,33331, +qcew,333311, +qcew,333312, +qcew,333313, qcew,333314, +qcew,333315, qcew,333316, qcew,333318, +qcew,333319, qcew,3334, qcew,33341, +qcew,333411, +qcew,333412, qcew,333413, qcew,333414, qcew,333415, qcew,3335, qcew,33351, qcew,333511, +qcew,333512, +qcew,333513, qcew,333514, qcew,333515, +qcew,333516, qcew,333517, +qcew,333518, qcew,333519, qcew,3336, qcew,33361, @@ -772,7 +931,9 @@ qcew,333613, qcew,333618, qcew,3339, qcew,33391, +qcew,333911, qcew,333912, +qcew,333913, qcew,333914, qcew,33392, qcew,333921, @@ -793,7 +954,9 @@ qcew,3341, qcew,33411, qcew,334111, qcew,334112, +qcew,334113, qcew,334118, +qcew,334119, qcew,3342, qcew,33421, qcew,334210, @@ -806,8 +969,11 @@ qcew,33431, qcew,334310, qcew,3344, qcew,33441, +qcew,334411, qcew,334412, qcew,334413, +qcew,334414, +qcew,334415, qcew,334416, qcew,334417, qcew,334418, @@ -822,9 +988,12 @@ qcew,334514, qcew,334515, qcew,334516, qcew,334517, +qcew,334518, qcew,334519, qcew,3346, qcew,33461, +qcew,334611, +qcew,334612, qcew,334613, qcew,334614, qcew,335, @@ -838,8 +1007,14 @@ qcew,335129, qcew,3352, qcew,33521, qcew,335210, +qcew,335211, +qcew,335212, qcew,33522, qcew,335220, +qcew,335221, +qcew,335222, +qcew,335224, +qcew,335228, qcew,3353, qcew,33531, qcew,335311, @@ -875,8 +1050,12 @@ qcew,336214, qcew,3363, qcew,33631, qcew,336310, +qcew,336311, +qcew,336312, qcew,33632, qcew,336320, +qcew,336321, +qcew,336322, qcew,33633, qcew,336330, qcew,33634, @@ -889,6 +1068,8 @@ qcew,33637, qcew,336370, qcew,33639, qcew,336390, +qcew,336391, +qcew,336399, qcew,3364, qcew,33641, qcew,336411, @@ -919,6 +1100,7 @@ qcew,337122, qcew,337124, qcew,337125, qcew,337127, +qcew,337129, qcew,3372, qcew,33721, qcew,337211, @@ -933,6 +1115,7 @@ qcew,337920, qcew,339, qcew,3391, qcew,33911, +qcew,339111, qcew,339112, qcew,339113, qcew,339114, @@ -941,12 +1124,22 @@ qcew,339116, qcew,3399, qcew,33991, qcew,339910, +qcew,339911, +qcew,339912, +qcew,339913, +qcew,339914, qcew,33992, qcew,339920, qcew,33993, qcew,339930, +qcew,339931, +qcew,339932, qcew,33994, qcew,339940, +qcew,339941, +qcew,339942, +qcew,339943, +qcew,339944, qcew,33995, qcew,339950, qcew,33999, @@ -1132,8 +1325,10 @@ qcew,4412, qcew,44121, qcew,441210, qcew,44122, +qcew,441221, qcew,441222, qcew,441228, +qcew,441229, qcew,4413, qcew,44131, qcew,441310, @@ -1151,6 +1346,13 @@ qcew,442291, qcew,442299, qcew,443, qcew,4431, +qcew,44311, +qcew,443111, +qcew,443112, +qcew,44312, +qcew,443120, +qcew,44313, +qcew,443130, qcew,44314, qcew,443141, qcew,443142, @@ -1242,7 +1444,13 @@ qcew,4512, qcew,45121, qcew,451211, qcew,451212, +qcew,45122, +qcew,451220, qcew,452, +qcew,4521, +qcew,45211, +qcew,452111, +qcew,452112, qcew,4522, qcew,45221, qcew,452210, @@ -1250,6 +1458,11 @@ qcew,4523, qcew,45231, qcew,452311, qcew,452319, +qcew,4529, +qcew,45291, +qcew,452910, +qcew,45299, +qcew,452990, qcew,453, qcew,4531, qcew,45311, @@ -1276,12 +1489,18 @@ qcew,454, qcew,4541, qcew,45411, qcew,454110, +qcew,454111, +qcew,454112, +qcew,454113, qcew,4542, qcew,45421, qcew,454210, qcew,4543, qcew,45431, qcew,454310, +qcew,454311, +qcew,454312, +qcew,454319, qcew,45439, qcew,454390, qcew,48-49, @@ -1454,6 +1673,10 @@ qcew,51219, qcew,512191, qcew,512199, qcew,5122, +qcew,51221, +qcew,512210, +qcew,51222, +qcew,512220, qcew,51223, qcew,512230, qcew,51224, @@ -1472,19 +1695,40 @@ qcew,515120, qcew,5152, qcew,51521, qcew,515210, +qcew,516, +qcew,5161, +qcew,51611, +qcew,516110, qcew,517, +qcew,5171, +qcew,51711, +qcew,517110, +qcew,5172, +qcew,51721, +qcew,517210, +qcew,517211, +qcew,517212, qcew,5173, qcew,51731, +qcew,517310, qcew,517311, qcew,517312, qcew,5174, qcew,51741, qcew,517410, +qcew,5175, +qcew,51751, +qcew,517510, qcew,5179, qcew,51791, +qcew,517910, qcew,517911, qcew,517919, qcew,518, +qcew,5181, +qcew,51811, +qcew,518111, +qcew,518112, qcew,5182, qcew,51821, qcew,518210, @@ -1585,6 +1829,8 @@ qcew,52591, qcew,525910, qcew,52592, qcew,525920, +qcew,52593, +qcew,525930, qcew,52599, qcew,525990, qcew,53, @@ -1619,12 +1865,20 @@ qcew,532120, qcew,5322, qcew,53221, qcew,532210, +qcew,53222, +qcew,532220, +qcew,53223, +qcew,532230, qcew,53228, qcew,532281, qcew,532282, qcew,532283, qcew,532284, qcew,532289, +qcew,53229, +qcew,532291, +qcew,532292, +qcew,532299, qcew,5323, qcew,53231, qcew,532310, @@ -1699,6 +1953,9 @@ qcew,54169, qcew,541690, qcew,5417, qcew,54171, +qcew,541710, +qcew,541711, +qcew,541712, qcew,541713, qcew,541714, qcew,541715, @@ -1750,6 +2007,7 @@ qcew,56121, qcew,561210, qcew,5613, qcew,56131, +qcew,561310, qcew,561311, qcew,561312, qcew,56132, @@ -2036,6 +2294,14 @@ qcew,7213, qcew,72131, qcew,721310, qcew,722, +qcew,7221, +qcew,72211, +qcew,722110, +qcew,7222, +qcew,72221, +qcew,722211, +qcew,722212, +qcew,722213, qcew,7223, qcew,72231, qcew,722310, diff --git a/flowsa/methods/flowbysectoractivitysets/README.md b/flowsa/methods/flowbysectoractivitysets/README.md index 2fe666863..ba230f4d9 100644 --- a/flowsa/methods/flowbysectoractivitysets/README.md +++ b/flowsa/methods/flowbysectoractivitysets/README.md @@ -1,5 +1,27 @@ # Flow-By-Sector Activity Sets -Flow-By-Sector (FBS) activity sets are an optional method of assigning Flow-By-Actiivty (FBA) -activities to an activity set defined in an FBS method yaml. Activity set csv files are generally -created in the [scripts directory](https://github.com/USEPA/flowsa/tree/master/scripts/FlowBySector_Activity_Sets). -These csvs are not required, but are recommended when an FBA has a large number of activities. +Flow-By-Sector (FBS) activity sets are an optional method of assigning +Flow-By-Actiivty (FBA) activities to an activity set defined in an FBS +method yaml. Activity set csv files are generally created in the +[scripts directory](https://github.com/USEPA/flowsa/tree/master/scripts/FlowBySector_Activity_Sets). +These csvs are not required, but are recommended when an FBA has a large +number of activities. + +The CSVs are called on in the FBS yaml under the `name:` parameter, using +the tag `!from_index:CSVName.csv ActivitySetColumnSubset`. Where +_CSVName.csv_ is the name of the activity set file and +_ActivitySetColumnSubset_ is the value in the "activity_set" column to call +on. See the example below. + +``` +"EPA_NEI_Onroad": + data_format: 'FBA' + class: Chemicals + geoscale_to_use: national + year: 2017 + activity_to_sector_mapping: 'SCC' + clean_fba_df_fxn: !script_function:EPA_NEI clean_NEI_fba + fedefl_mapping: 'NEI' + activity_sets: + direct_allocation: + names: !from_index:NEI_Onroad_2017_asets.csv direct_allocation +``` diff --git a/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml b/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml index 60bc0c962..e2736d756 100644 --- a/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml +++ b/flowsa/methods/flowbysectormethods/CAP_HAP_national_2017.yaml @@ -22,7 +22,6 @@ _allocation_types: year: 2014 geoscale: national flows: None # Verify what this does - clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba clean_fba_wsec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec _use_allocation: &use_allocation @@ -43,9 +42,11 @@ _allocation_types: helper_source_class: "Employment" helper_source_year: 2012 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec @@ -76,11 +77,11 @@ source_names: - AREA HARVESTED allocation_compartment: None geoscale: state - clean_fba_df_fxn: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup clean_fba_w_sec_df_fxn: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics # OR attribution_method: function attribution function: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics # wrapped in an appropriate aggregation + industry_combustion_coal: #only allocating to 3digits <<: *mecs_allocation names: !from_index:NEI_Nonpoint_2017_asets.csv industry_combustion_coal @@ -153,9 +154,11 @@ source_names: helper_source_class: "Employment" helper_source_year: 2012 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec pesticides: # pesticides <<: *use_allocation @@ -199,7 +202,7 @@ source_names: clean_parameter: {"324110": 'ActivityProducedBy'} "stewiFBS": data_format: 'FBS_outside_flowsa' - FBS_datapull_fxn: stewicombo_to_sector + FBS_datapull_fxn: !script_function:stewiFBS stewicombo_to_sector inventory_dict: {"NEI":"2017", "TRI":"2017"} local_inventory_name: 'NEI_TRI_air_2017' clean_fbs_df_fxn: !script_function:EPA_NEI drop_GHGs diff --git a/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml b/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml index b2135215e..f5eaec62e 100644 --- a/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml +++ b/flowsa/methods/flowbysectormethods/CNHWC_national_2014.yaml @@ -29,7 +29,9 @@ source_names: helper_source_class: "Employment" helper_source_year: 2014 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: clean_bls_qcew_fba - clean_helper_fba_wsec: bls_clean_allocation_fba_w_sec + clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec diff --git a/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml b/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml index 7c5c9b4b6..defd8d804 100644 --- a/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml +++ b/flowsa/methods/flowbysectormethods/CNHW_CA_2014.yaml @@ -33,11 +33,13 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2014 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: state allocation_fba_load_scale: state - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_2: names: diff --git a/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml b/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml index fcb9a45e1..ebd231ca8 100644 --- a/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml +++ b/flowsa/methods/flowbysectormethods/CNHW_state_2014.yaml @@ -1,4 +1,4 @@ -!include:BEA_summary_target.yaml +!include:USEEIO_summary_target.yaml # target_sector_level: NAICS_6 # target_sector_source: NAICS_2012_Code target_geoscale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml b/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml index b31332cd4..ef023ec74 100644 --- a/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_national_2017.yaml @@ -13,7 +13,7 @@ source_names: source_fba_load_scale: national year: 2017 clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table - clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_sets: qcew: names: !from_index:BLS_QCEW_asets.csv qcew diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml new file mode 100644 index 000000000..51bdc62e3 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml @@ -0,0 +1,22 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2012 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_source: None + allocation_source_class: None + allocation_source_year: None + allocation_flow: None + allocation_compartment: None + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml new file mode 100644 index 000000000..f155da27b --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml @@ -0,0 +1,23 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2013 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_set_file: 'BLS_QCEW_asets.csv' + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_source: None + allocation_source_class: None + allocation_source_year: None + allocation_flow: None + allocation_compartment: None + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml new file mode 100644 index 000000000..7039c9c27 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml @@ -0,0 +1,23 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2014 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_set_file: 'BLS_QCEW_asets.csv' + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_source: None + allocation_source_class: None + allocation_source_year: None + allocation_flow: None + allocation_compartment: None + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml new file mode 100644 index 000000000..3834d1844 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml @@ -0,0 +1,23 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2015 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_set_file: 'BLS_QCEW_asets.csv' + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_source: None + allocation_source_class: None + allocation_source_year: None + allocation_flow: None + allocation_compartment: None + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml new file mode 100644 index 000000000..9864d6dff --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml @@ -0,0 +1,23 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2016 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_set_file: 'BLS_QCEW_asets.csv' + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_source: None + allocation_source_class: None + allocation_source_year: None + allocation_flow: None + allocation_compartment: None + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml new file mode 100644 index 000000000..96e34e569 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml @@ -0,0 +1,23 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2017 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_set_file: 'BLS_QCEW_asets.csv' + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_source: None + allocation_source_class: None + allocation_source_year: None + allocation_flow: None + allocation_compartment: None + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Land_national_2012.yaml b/flowsa/methods/flowbysectormethods/Land_national_2012.yaml index 2e05ba900..1f93d1ce8 100644 --- a/flowsa/methods/flowbysectormethods/Land_national_2012.yaml +++ b/flowsa/methods/flowbysectormethods/Land_national_2012.yaml @@ -1,5 +1,7 @@ %YAML 1.2 -# 'EIA_CBECS_Land' and 'EIA_MECS_Land' must be listed prior to 'USDA_ERS_MLU' for FBS creation +# 'EIA_CBECS_Land' and 'EIA_MECS_Land' must be listed prior to +# 'USDA_ERS_MLU' for FBS creation because the results of the two allocated +# datasets are used within USDA_ERS_MLU sector attribution --- target_sector_level: NAICS_6 @@ -21,11 +23,13 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2012 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: national allocation_fba_load_scale: national - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec "EIA_MECS_Land": # manufacturing land use data_format: 'FBA' @@ -42,11 +46,13 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2014 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: national allocation_fba_load_scale: national - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec "BLM_PLS": data_format: 'FBA' @@ -70,11 +76,13 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2012 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: national allocation_fba_load_scale: national - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec "USDA_ERS_MLU": data_format: 'FBA' @@ -147,9 +155,10 @@ source_names: - 'Land in rural transportation facilities' allocation_method: allocation_function allocation_source: !script_function:USDA_ERS_MLU allocate_usda_ers_mlu_land_in_rural_transportation_areas - literature_sources: {"urban_land_use_for_airports": "2020", - "urban_land_use_for_railroads": "2020", - "transportation_sectors_based_on_FHA_fees": "1997"} + literature_sources: { + "urban_land_use_for_airports": "2020", + "urban_land_use_for_railroads": "2020", + "transportation_sectors_based_on_FHA_fees": "1997"} allocation_source_class: None allocation_source_year: None allocation_flow: None @@ -160,11 +169,12 @@ source_names: - 'Land in urban areas' allocation_method: allocation_function allocation_source: !script_function:USDA_ERS_MLU allocate_usda_ers_mlu_land_in_urban_areas - literature_sources: {"area_of_urban_land_occupied_by_houses_2013": "2017", - "transportation_sectors_based_on_FHA_fees": "1997", - "urban_land_use_for_airports": "2020", - "urban_land_use_for_railroads": "2020", - "open_space_fraction_of_urban_area": "2020"} + literature_sources: { + "area_of_urban_land_occupied_by_houses_2013": "2017", + "transportation_sectors_based_on_FHA_fees": "1997", + "urban_land_use_for_airports": "2020", + "urban_land_use_for_railroads": "2020", + "open_space_fraction_of_urban_area": "2020"} allocation_source_class: None allocation_source_year: None allocation_flow: None @@ -175,7 +185,8 @@ source_names: - 'Other land' allocation_method: allocation_function allocation_source: !script_function:USDA_ERS_MLU allocate_usda_ers_mlu_other_land - literature_sources: {"area_of_rural_land_occupied_by_houses_2013": "2017" } + literature_sources: { + "area_of_rural_land_occupied_by_houses_2013": "2017" } allocation_source_class: None allocation_source_year: None allocation_flow: None diff --git a/flowsa/methods/flowbysectormethods/README.md b/flowsa/methods/flowbysectormethods/README.md index cda43cf9e..d1f3fa368 100644 --- a/flowsa/methods/flowbysectormethods/README.md +++ b/flowsa/methods/flowbysectormethods/README.md @@ -1,109 +1,139 @@ # FlowBySector method yaml term descriptions -Description of parameters in flowbysectormethods yamls. All values are strings unless noted. +Description of parameters in flowbysectormethods yamls. All values are +strings unless noted. ## Terms ### Target FBS output specifications -1. _target_sector_level_: specify desired sector aggregation - (NAICS_2, NAICS_3, NAICS_4, NAICS_5, NAICS_6) -2. _target_sector_source_: specify NAICS version 2007, 2012, 2017 (ex. NAICS_2012_Code). - Recommend NAICS_2012_Code, as the majority of datasets use this version of NAICS -3. _target_geoscale_: level of geographic aggregation in output parquet (national, state, or county) -4. _download_if_missing_: (optional) Add and set to 'True' if you would like to download all required - FBAs from Data Commons rather than generating FBAs locally. +- _target_sector_level_: specify desired sector aggregation (NAICS_2, + NAICS_3, NAICS_4, NAICS_5, NAICS_6) +- _target_sector_source_: specify NAICS version 2007, 2012, 2017 (ex. + NAICS_2012_Code). At this time, only NAICS_2012_Code is supported. +- _target_geoscale_: level of geographic aggregation in output parquet + (national, state, or county) +- _download_if_missing_: (optional) Add and set to 'True' if you would like + to download all required FBAs from Data Commons rather than generating + FBAs locally. ### Source specifications (in FBA format) -1. _source_names_: The name of the FBS dataset or the FBA dataset requiring allocation to sectors -2. _data_format_: 'FBA', 'FBS', 'FBS_outside_flowsa', loads a FlowByActivity or a FlowBySector - parquet stored in flowsa, or calls on a specified function to load data from outside flowsa in FBS format -3. _class_: a text string in 'Class' column of flowbyactivity (ex. Water), see class types in - (source_catalog.yaml)[https://github.com/USEPA/flowsa/blob/master/flowsa/data/source_catalog.yaml] -4. _geoscale_to_use_: the geoscale of the FBA set to use for sector allocation +- _source_names_: The name of the FBS dataset or the FBA dataset requiring + allocation to sectors +- _data_format_: 'FBA', 'FBS', 'FBS_outside_flowsa', loads a FlowByActivity + or a FlowBySector parquet stored in flowsa, or calls on a specified + function to load data from outside flowsa in FBS format +- _class_: a text string in 'Class' column of flowbyactivity (ex. Water), + see class types in + [source_catalog.yaml](https://github.com/USEPA/flowsa/blob/master/flowsa/data/source_catalog.yaml) +- _geoscale_to_use_: the geoscale of the FBA set to use for sector allocation (national, state, or county) -5. _year_: year of available dataset (ex. 2015) -6. _activity_to_sector_mapping_: (optional) name of activity to sector mapping file, if not provided will use the source name -7. _source_flows_: (list, optional, only usable with flowsa.flowby.FlowBySector.getFlowBySector()). Specifies the 'Flowable'(s) from the FBS to use. +- _year_: year of available dataset (ex. 2015) +- _activity_to_sector_mapping_: (optional) name of activity to sector + mapping file, if not provided will use the source name +- _source_flows_: (list, optional, only usable with flowsa.flowby.FlowBySector + .getFlowBySector()). Specifies the 'Flowable'(s) from the FBS to use. If not provided, all flows are used. -8. _apply_urban_rural_: Assign flow quantities as urban or rural based on population density by FIPS. -8. _clean_fba_before_mapping_df_fxn_: (optional) calls on function in the source.py file to clean up/modify - the FBA data prior to mapping flows. -10. _clean_fba_df_fxn_: (optional) calls on function in the source.py file to clean up/modify - the FBA data prior to allocating data to sectors. -11. _clean_fba_w_sec_df_fxn_: (optional) calls on function in the source.py file to clean up/modify the - FBA dataframe, after sector columns are added but prior to allocating data to sectors. -12. _fedefl_mapping_: (optional) name of mapping file in FEDEFL. If not supplied will use - the source_names -13. _mfl_mapping_: (optional, should not be used if fedefl_mapping is used) name of mapping file for Material Flow List. +- _apply_urban_rural_: Assign flow quantities as urban or rural based on + population density by FIPS. +- _clean_fba_before_mapping_df_fxn_: (optional) calls on function in the + source.py file to clean up/modify the FBA data prior to mapping flows. + Function is called using the `!script_function:` tag. +- _clean_fba_df_fxn_: (optional) calls on function in the source.py file to + clean up/modify the FBA data prior to allocating data to sectors. + Function is called using the `!script_function:` tag. +- _clean_fba_w_sec_df_fxn_: (optional) calls on function in the source.py + file to clean up/modify the FBA dataframe, after sector columns are added + but prior to allocating data to sectors. Function is called using + the`!script_function:` tag. +- _fedefl_mapping_: (optional) name of mapping file in FEDEFL. If not + supplied will use the source_names +- _mfl_mapping_: (optional, should not be used if fedefl_mapping is used) + name of mapping file for Material Flow List. +- _keep_unmapped_rows_: (optional) default is False, if True will maintain any + flows not found in mapping files. ### Activity set specifications -1. _activity_sets_: A subset of the FBA dataset and the method and allocation datasets used to create a FBS -2. _names_: (list) specify the subset of the FBA to allocate based on values in the - Activity Produced/Consumed By fields. To use an external activity set .csv file, use the tag `!from_index:file_name.csv`, then give the name (e.g. `activity_set_1`) of the activity set as found in the csv file. -3. _source_flows_: (list, optional) specify the 'FlowName'(s) from the FBA to use. - If not provided, all flows are used. -4. _allocation_method_: currently written for 'direct', 'allocation_function', - 'proportional', and 'proportional-flagged'. See descriptions below. -5. _allocation_source_: The primary data source used to allocate main FBA for +- _activity_sets_: A subset of the FBA dataset and the method and + allocation datasets used to create an FBS +- _names_: (list) specify the subset of the FBA to allocate based on values in the + Activity Produced/Consumed By fields. To use an external activity set . + csv file, use the tag `!from_index:file_name.csv`, then give the name (e.g., + `activity_set_1`) of the activity set as found in the csv file. +- _source_flows_: (list, optional) specify the 'FlowName'(s) from the FBA + to use. If not provided, all flows are used. +- _allocation_method_: currently written for 'direct', + 'allocation_function', 'proportional', and 'proportional-flagged'. See + descriptions below. +- _allocation_source_: The primary data source used to allocate main FBA for specified activity to sectors -6. _literature_sources_: (optional) -7. _activity_to_sector_mapping_: (optional) name of activity to sector mapping file, if not provided will use the source name -8. _allocation_source_class_: specific 'FlowClass' found in the allocation source - flowbyactivity parquet -9. _allocation_source_year_: specific to the allocation datasets, use the year relevant - to the main FBA dataframe -10. _allocation_flow_: (list) the relevant 'FlowName' values, as found in the source - flowbyactivity parquet. Use 'None' to capture all flows. -11. _allocation_compartment_: (list) the relevant 'Compartment' values, as found in the source - flowbyactivity parquet. Use 'None' to capture all compartments. -12. _allocation_from_scale_: national, state, or county - dependent on allocation source, - as not every level exits for sources -13. _allocation_fba_load_scale_: (optional) Can indicate geographic level of FBA to load, - helpful when an FBA ia large -14. _clean_allocation_fba_: (optional) Function to clean up the allocation FBA, as defined in - the source.py file -15. _clean_allocation_fba_w_sec_: (optional) Function to clean up the allocation FBA, after - allocation activities are assigned SectorProducedBy and SectorConsumedBy columns -16. _allocation_map_to_flow_list_: (optional) If the allocation df and source df need to be matched - on Context and/or Flowable, set to 'True' -17. _helper_source_: (optional) secondary df for sector allocation -18. _helper_method_: currently written for 'multiplication', 'proportional', and 'proportional-flagged' -19. _helper_activity_to_sector_mapping_: (optional) name of activity to sector mapping file, if not provided will use the source name -20. _helper_source_class_: specific 'FlowClass' found in the allocation source - flowbyactivity parquet -21. _helper_source_year_: specific to the allocation datasets, use the year relevant - to the main FBA dataframe -22. _helper_flow_: (list) the relevant 'FlowName' values, as found in the source - flowbyactivity parquet -23. _helper_from_scale_: national, state, or county - dependent on allocation source, - as not every level exits for sources -24. _clean_helper_fba_: (optional) Function to clean up the helper FBA, as defined in - the source.py file -25. _clean_helper_fba_wsec_: (optional) Function to clean up the helper FBA, after - allocation activities are assigned SectorProducedBy and SectorConsumedBy columns +- _literature_sources_: (optional) Specific functions that contain values + from literature used to modify FBA data. +- _activity_to_sector_mapping_: (optional) name of activity to sector + mapping file, if not provided will use the source name +- _allocation_source_class_: specific 'FlowClass' found in the allocation + source flowbyactivity parquet +- _allocation_source_year_: specific to the allocation datasets, use the + year relevant to the main FBA dataframe +- _allocation_flow_: (list) the relevant 'FlowName' values, as found in the + source flowbyactivity parquet. Use 'None' to capture all flows. +- _allocation_compartment_: (list) the relevant 'Compartment' values, as + found in the source flowbyactivity parquet. Use 'None' to capture all + compartments. +- _allocation_from_scale_: national, state, or county - dependent on + allocation source, as not every level exits for sources +- _allocation_fba_load_scale_: (optional) Can indicate geographic level of + FBA to load, helpful when an FBA ia large +- _clean_allocation_fba_: (optional) Function to clean up the allocation + FBA, as defined in the source.py file. Function is called using + the`!script_function:` tag. +- _clean_allocation_fba_w_sec_: (optional) Function to clean up the + allocation FBA, after allocation activities are assigned SectorProducedBy + and SectorConsumedBy columns. Function is called using + the`!script_function:` tag. +- _allocation_map_to_flow_list_: (optional) If the allocation df and source + df need to be matched on Context and/or Flowable, set to 'True' +- _helper_source_: (optional) secondary df for sector allocation +- _helper_method_: currently written for 'multiplication', 'proportional', + and 'proportional-flagged' +- _helper_activity_to_sector_mapping_: (optional) name of activity to + sector mapping file, if not provided will use the source name +- _helper_source_class_: specific 'FlowClass' found in the allocation + source flowbyactivity parquet +- _helper_source_year_: specific to the allocation datasets, use the year + relevant to the main FBA dataframe +- _helper_flow_: (list) the relevant 'FlowName' values, as found in the + source flowbyactivity parquet +- _helper_from_scale_: national, state, or county - dependent on allocation + source, as not every level exits for sources +- _clean_helper_fba_: (optional) Function to clean up the helper FBA. + Function is called using the`!script_function:` tag. +- _clean_helper_fba_wsec_: (optional) Function to clean up the helper FBA, + after allocation activities are assigned SectorProducedBy and + SectorConsumedBy columns. Function is called using + the`!script_function:` tag. ### Source specifications (in FBS format) If source data format is specified as 'FBS': -1. _source_names_: The name of the FBS dataset -2. _data_format_: 'FBS', loads a FlowBySector -3. _year_: year of available dataset (ex. 2015) -4. _clean_fbs_df_fxn_: (optional) apply function to clean the FBS after it is accessed +- _source_names_: The name of the FBS dataset +- _data_format_: 'FBS', loads a FlowBySector +- _year_: year of available dataset (ex. 2015) +- _clean_fbs_df_fxn_: (optional) apply function to clean the FBS after it + is accessed. Function is called using the`!script_function:` tag. ### FBS_outside_flows specifications -If source data_format is specified as 'FBS_outside_flowsa': -1. _FBS_datapull_fxn_: name of the function to generate the FBS -2. _parameters_: (list) parameters to pass into the function - -## Allocation Method Descriptions -1. direct: Activities are directly assigned to sectors using the source to NAICS crosswalk -2. allocation_function: Activities are assigned to sectors using a specified function -3. proportional: Activities are proportionally allocated to sectors using specified allocation data source -4. proportional-flagged: Activities that are flagged (assigned a value of '1') are proportionally allocated - to sectors using a specified allocation data source. Activities that are not flagged - (assigned a value of '0') are directly assigned to sectors. +If source data_format is specified as `FBS_outside_flowsa`: +- _FBS_datapull_fxn_: name of the function to generate the FBS. Function is + called using the`!script_function:` tag. +- _parameters_: (list) parameters to pass into the function -## Helper Method -1. multiplication: Multiply the values in the allocation data source with values sharing the same sectors - in the helper allocation data source -2. proportional: Data in allocation source further allocated to sectors proportionally with the helper source -3. proportional-flagged: Data in allocation source further allocated to sectors proportionally - when flagged (assigned a value of '1') and directly assigned to sector when not flagged - (assigned a value of '0') +## Method Descriptions +- allocation_function: Activities are assigned to sectors using a specified + function +- direct: Activities are directly assigned to sectors using the source to + NAICS crosswalk +- multiplication: Multiply the values in the allocation data source with + values sharing the same sectors in the helper allocation data source +- proportional: Activities are proportionally allocated to sectors using + specified allocation data source +- proportional-flagged: Activities that are flagged (assigned a value of + '1') are proportionally allocated to sectors using a specified allocation + data source. Activities that are not flagged (assigned a value of '0') + are directly assigned to sectors. diff --git a/flowsa/methods/flowbysectormethods/USEEIO_summary_target.yaml b/flowsa/methods/flowbysectormethods/USEEIO_summary_target.yaml new file mode 100644 index 000000000..b3a51eef3 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/USEEIO_summary_target.yaml @@ -0,0 +1,13 @@ +# This file can be used to set up target NAICS for standard sectors +# in a USEEIO summary level model. NAICS are targeted to enable 1:1 +# correspondence between NAICS and BEA Summary sectors + +# To use in a FBS method add +# !include:USEEIO_summary_target.yaml +# to the top of the method replacing the three parameters below + +target_sector_level: NAICS_3 +target_subset_sector_level: {NAICS_4: ['221', '336', '541']} +# In USEEIO models 221 (Utilities) is disaggregated to 2211, 2212, and 2213 +# '336' and '541' carry over from the BEA summary sectors +target_sector_source: NAICS_2012_Code diff --git a/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml b/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml index 0bb615d9b..979929fdd 100644 --- a/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml +++ b/flowsa/methods/flowbysectormethods/Water_national_2010_m1.yaml @@ -37,10 +37,12 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2010 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: national - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_3: names: @@ -57,7 +59,6 @@ source_names: - 'AREA GROWN, IRRIGATED' allocation_compartment: None allocation_from_scale: state - clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal helper_source: "USDA_IWMS" helper_method: multiplication @@ -65,7 +66,7 @@ source_names: helper_source_year: 2013 helper_flow: None helper_from_scale: state - clean_helper_fba_wsec: iwms_aggregation + clean_helper_fba_wsec: !script_function:USDA_IWMS iwms_aggregation activity_set_4: names: - "Livestock" diff --git a/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml b/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml index 683ecb6af..e8b82ed05 100644 --- a/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml +++ b/flowsa/methods/flowbysectormethods/Water_national_2010_m2.yaml @@ -45,9 +45,11 @@ source_names: helper_source_class: "Employment" helper_source_year: 2011 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_3: names: @@ -67,9 +69,11 @@ source_names: helper_source_class: "Employment" helper_source_year: 2010 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec scale_helper_results: !script_function:Blackhurst_IO scale_blackhurst_results_to_usgs_values activity_set_4: diff --git a/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml b/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml index 46c0aefcd..8efd8fc96 100644 --- a/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml +++ b/flowsa/methods/flowbysectormethods/Water_national_2015_m1.yaml @@ -37,10 +37,12 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2015 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: national - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_3: names: @@ -57,7 +59,6 @@ source_names: - 'AREA GROWN, IRRIGATED' allocation_compartment: None allocation_from_scale: state - clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal helper_source: "USDA_IWMS" helper_method: multiplication diff --git a/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml b/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml index 8cdc4fc7e..a97ece017 100644 --- a/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml +++ b/flowsa/methods/flowbysectormethods/Water_national_2015_m2.yaml @@ -45,9 +45,11 @@ source_names: helper_source_class: "Employment" helper_source_year: 2015 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_3: names: @@ -67,9 +69,11 @@ source_names: helper_source_class: "Employment" helper_source_year: 2015 helper_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" helper_from_scale: national - clean_helper_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_helper_fba_wsec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec scale_helper_results: !script_function:Blackhurst_IO scale_blackhurst_results_to_usgs_values activity_set_4: diff --git a/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml b/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml index 0edd6de50..d5e785068 100644 --- a/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml +++ b/flowsa/methods/flowbysectormethods/Water_national_2015_m3.yaml @@ -52,10 +52,12 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2015 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: national - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_4: names: @@ -72,7 +74,6 @@ source_names: - 'AREA GROWN, IRRIGATED' allocation_compartment: None allocation_from_scale: state - clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal helper_source: "USDA_IWMS" helper_method: multiplication diff --git a/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml b/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml index db6166582..d78578630 100644 --- a/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml +++ b/flowsa/methods/flowbysectormethods/Water_state_2015_m1.yaml @@ -1,5 +1,4 @@ -target_sector_level: NAICS_4 -target_sector_source: NAICS_2012_Code +!include:USEEIO_summary_target.yaml target_geoscale: state source_names: "USGS_NWIS_WU": @@ -33,10 +32,12 @@ source_names: allocation_source_class: "Employment" allocation_source_year: 2015 allocation_flow: - - "Number of employees" + - "Number of employees, Federal Government" + - "Number of employees, State Government" + - "Number of employees, Local Government" + - "Number of employees, Private" allocation_compartment: None allocation_from_scale: state - clean_allocation_fba: !script_function:BLS_QCEW clean_bls_qcew_fba clean_allocation_fba_w_sec: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec activity_set_3: names: @@ -53,7 +54,6 @@ source_names: - 'AREA GROWN, IRRIGATED' allocation_compartment: None allocation_from_scale: state - clean_allocation_fba: !script_function:USDA_CoA_Cropland coa_irrigated_cropland_fba_cleanup clean_allocation_fba_w_sec: !script_function:USDA_CoA_Cropland disaggregate_coa_cropland_to_6_digit_naics_for_water_withdrawal helper_source: "USDA_IWMS" helper_method: multiplication diff --git a/flowsa/schema.py b/flowsa/schema.py index 2961aadba..85873a83c 100644 --- a/flowsa/schema.py +++ b/flowsa/schema.py @@ -170,3 +170,7 @@ 'ConsumedBy': [{'flowbyactivity': 'ActivityConsumedBy'}, {'flowbysector': 'SectorConsumedBy'}] } + +dq_fields = ['MeasureofSpread', 'Spread', 'DistributionType', 'Min', + 'Max', 'DataReliability', 'DataCollection', 'TemporalCorrelation', + 'GeographicalCorrelation', 'TechnologicalCorrelation'] diff --git a/flowsa/sectormapping.py b/flowsa/sectormapping.py index 71ab54509..c4e1353c1 100644 --- a/flowsa/sectormapping.py +++ b/flowsa/sectormapping.py @@ -8,10 +8,11 @@ import pandas as pd import numpy as np from esupy.mapping import apply_flow_mapping +import flowsa from flowsa.common import get_flowsa_base_name, \ return_true_source_catalog_name, check_activities_sector_like, \ load_yaml_dict, fba_activity_fields, SECTOR_SOURCE_NAME -from flowsa.schema import activity_fields +from flowsa.schema import activity_fields, dq_fields from flowsa.settings import log from flowsa.flowbyfunctions import fbs_activity_fields, load_crosswalk from flowsa.validation import replace_naics_w_naics_from_another_year @@ -230,7 +231,7 @@ def get_fba_allocation_subset(fba_allocation, source, activitynames, if am == 'proportional-flagged': subset_by_sector_cols = True - if check_activities_sector_like(source) is False: + if check_activities_sector_like(fba_allocation, sourcename=source) is False: # read in source crosswalk df = get_activitytosector_mapping( sourceconfig.get('activity_to_sector_mapping', source), @@ -439,3 +440,98 @@ def get_sector_list(sector_level, secondary_sector_level_dict=None): sector_list = sector_list + sector_add return sector_list + + +def map_to_BEA_sectors(fbs_load, region, io_level, year): + """ + Map FBS sectors from NAICS to BEA, allocating by gross industry output. + + :param fbs_load: df completed FlowBySector collapsed to single 'Sector' + :param region: str, 'state' or 'national' + :param io_level: str, 'summary' or 'detail' + :param year: year for industry output + """ + from flowsa.sectormapping import get_activitytosector_mapping + + bea = get_BEA_industry_output(region, io_level, year) + + if io_level == 'summary': + mapping_col = 'BEA_2012_Summary_Code' + elif io_level == 'detail': + mapping_col = 'BEA_2012_Detail_Code' + + # Prepare NAICS:BEA mapping file + mapping = (load_crosswalk('BEA') + .rename(columns={mapping_col: 'BEA', + 'NAICS_2012_Code': 'Sector'})) + mapping = (mapping.drop( + columns=mapping.columns.difference(['Sector','BEA'])) + .drop_duplicates(ignore_index=True) + .dropna(subset=['Sector'])) + mapping['Sector'] = mapping['Sector'].astype(str) + + # Create allocation ratios where one to many NAICS:BEA + dup = mapping[mapping['Sector'].duplicated(keep=False)] + dup = dup.merge(bea, how='left', on='BEA') + dup['Allocation'] = dup['Output']/dup.groupby( + ['Sector','Location']).Output.transform('sum') + + # Update and allocate to sectors + fbs = (fbs_load.merge( + mapping.drop_duplicates(subset='Sector', keep=False), + how='left', + on='Sector')) + fbs = fbs.merge(dup.drop(columns='Output'), + how='left', on=['Sector', 'Location'], + suffixes=(None, '_y')) + fbs['Allocation'] = fbs['Allocation'].fillna(1) + fbs['BEA'] = fbs['BEA'].fillna(fbs['BEA_y']) + fbs['FlowAmount'] = fbs['FlowAmount'] * fbs['Allocation'] + + fbs = (fbs.drop(columns=dq_fields + + ['Sector', 'SectorSourceName', + 'BEA_y', 'Allocation'], errors='ignore') + .rename(columns={'BEA':'Sector'})) + + if (abs(1-(sum(fbs['FlowAmount']) / + sum(fbs_load['FlowAmount'])))) > 0.005: + log.warning('Data loss upon BEA mapping') + + return fbs + + +def get_BEA_industry_output(region, io_level, year): + """ + Get FlowByActivity for industry output from state or national datasets + :param region: str, 'state' or 'national' + :param io_level: str, 'summary' or 'detail' + :param year: year for industry output + """ + if region == 'state': + fba = 'stateio_Industry_GO' + if io_level == 'detail': + raise TypeError ('detail models not available for states') + elif region == 'national': + fba = 'BEA_GDP_GrossOutput' + + # Get output by BEA sector + bea = flowsa.getFlowByActivity(fba, year) + bea = ( + bea.drop(columns=bea.columns.difference( + ['FlowAmount','ActivityProducedBy','Location'])) + .rename(columns={'FlowAmount':'Output', + 'ActivityProducedBy': 'BEA'})) + + # If needed, aggregate from detial to summary + if region == 'national' and io_level == 'summary': + bea_mapping = (load_crosswalk('BEA') + [['BEA_2012_Detail_Code','BEA_2012_Summary_Code']] + .drop_duplicates() + .rename(columns={'BEA_2012_Detail_Code': 'BEA'})) + bea = (bea.merge(bea_mapping, how='left', on='BEA') + .drop(columns=['BEA']) + .rename(columns={'BEA_2012_Summary_Code': 'BEA'})) + bea = (bea.groupby(['BEA','Location']).agg({'Output': 'sum'}) + .reset_index()) + + return bea diff --git a/flowsa/test_examples.py b/flowsa/test_examples.py index 9a6f4189f..97410a4a5 100644 --- a/flowsa/test_examples.py +++ b/flowsa/test_examples.py @@ -1,6 +1,7 @@ """ Test functions work """ +import pytest import flowsa @@ -17,3 +18,18 @@ def test_get_flows_by_sector(): def test_write_bibliography(): flowsa.writeFlowBySectorBibliography('Water_national_2015_m1') + +@pytest.mark.generate_fbs +def test_generate_fbs(): + """Generate all FBS from methods in repo.""" + for m in flowsa.seeAvailableFlowByModels("FBS", print_method=False): + if m not in ['BEA_summary_target', + 'USEEIO_summary_target', + 'Electricity_gen_emissions_national_2016']: + print("--------------------------------\n" + f"Method: {m}\n" + "--------------------------------") + flowsa.flowbysector.main(method=m, download_FBAs_if_missing=True) + +if __name__ == "__main__": + test_generate_fbs() \ No newline at end of file diff --git a/flowsa/test_methods.py b/flowsa/test_methods.py index f8fd84b4d..092a1b48c 100644 --- a/flowsa/test_methods.py +++ b/flowsa/test_methods.py @@ -4,6 +4,7 @@ """ import pytest from flowsa import seeAvailableFlowByModels +import flowsa.exceptions from flowsa.flowbyactivity import load_yaml_dict, assemble_urls_for_query,\ call_urls @@ -18,7 +19,6 @@ def test_FBA_urls(): year = max(config['years']) if ((config.get('url', 'None') == 'None') or - (config.get('api_key_required', False)) or (m == 'EPA_EQUATES')): continue @@ -33,6 +33,9 @@ def test_FBA_urls(): config=config) call_urls(url_list=urls, source=m, year=str(year), config=config) + except flowsa.exceptions.APIError: + print('API Key required, skipping url') + continue except Exception: error_list.append(m) if error_list: diff --git a/flowsa/validation.py b/flowsa/validation.py index c498a470d..01a2446e7 100644 --- a/flowsa/validation.py +++ b/flowsa/validation.py @@ -7,8 +7,10 @@ import pandas as pd import numpy as np +import flowsa from flowsa.flowbyfunctions import aggregator, create_geoscale_list,\ - subset_df_by_geoscale, sector_aggregation + subset_df_by_geoscale, sector_aggregation, collapse_fbs_sectors,\ + subset_df_by_sector_lengths from flowsa.dataclean import replace_strings_with_NoneType, \ replace_NoneType_with_empty_cells from flowsa.common import sector_level_key, \ @@ -59,47 +61,58 @@ def check_if_activities_match_sectors(fba): return activities_missing_sectors -def check_if_data_exists_at_geoscale(df, geoscale, activitynames='All'): +def check_if_data_exists_at_geoscale(df_load, geoscale): """ Check if an activity or a sector exists at the specified geoscale - :param df: flowbyactivity dataframe - :param activitynames: Either an activity name (ex. 'Domestic') - or a sector (ex. '1124') + :param df_load: df with activity columns :param geoscale: national, state, or county - :return: str, 'yes' or 'no' """ - # if any activity name is specified, check if activity data - # exists at the specified geoscale - activity_list = [] - if activitynames != 'All': - if isinstance(activitynames, str): - activity_list.append(activitynames) - else: - activity_list = activitynames - # check for specified activity name - df = df[(df[fba_activity_fields[0]].isin(activity_list)) | - (df[fba_activity_fields[1]].isin(activity_list) - )].reset_index(drop=True) - else: - activity_list.append('activities') - # filter by geoscale depends on Location System - fips = create_geoscale_list(df, geoscale) - - df = df[df['Location'].isin(fips)] - - if len(df) == 0: + fips_list = create_geoscale_list(df_load, geoscale) + fips = pd.DataFrame(fips_list, columns=['FIPS']) + + activities = df_load[['ActivityProducedBy', 'ActivityConsumedBy']]\ + .drop_duplicates().reset_index(drop=True) + # add tmp column and merge + fips['tmp'] = 1 + activities['tmp'] = 1 + activities = activities.merge(fips, on='tmp').drop(columns='tmp') + + # merge activities with df and determine which FIPS are missing for each + # activity + df = df_load[df_load['Location'].isin(fips_list)] + # if activities are defined, subset df + # df = df[df['']] + + dfm = df.merge(activities, + left_on=['ActivityProducedBy', 'ActivityConsumedBy', + 'Location'], + right_on=['ActivityProducedBy', 'ActivityConsumedBy', + 'FIPS'], + how='outer') + # subset into df where values for state and where states do not have data + df1 = dfm[~dfm['FlowAmount'].isna()] + df2 = dfm[dfm['FlowAmount'].isna()] + df2 = df2[['ActivityProducedBy', 'ActivityConsumedBy', + 'FIPS']].reset_index(drop=True) + + # define source name and year + sn = df_load['SourceName'][0] + y = df_load['Year'][0] + + if len(df1) == 0: vLog.info( - "No flows found for %s at the %s scale", - ', '.join(activity_list), geoscale) - exists = "No" - else: - vLog.info("Flows found for %s at the %s scale", - ', '.join(activity_list), geoscale) - exists = "Yes" - - return exists + "No flows found for activities in %s %s at the %s scale", + sn, y, geoscale) + if len(df2) > 0: + # if len(df2) > 1: + df2 = df2.groupby( + ['ActivityProducedBy', 'ActivityConsumedBy'], dropna=False).agg( + lambda col: ','.join(col)).reset_index() + vLogDetailed.info("There are %s, activity combos that do not have " + "data in %s %s: \n {}".format(df2.to_string()), + geoscale, sn, y) def check_if_data_exists_at_less_aggregated_geoscale( @@ -176,15 +189,11 @@ def check_allocation_ratios(flow_alloc_df_load, activity_set, config, attr): # if in the attr dictionary, merge columns are identified, # the merge columns need to be accounted for in the grouping/checking of # allocation ratios + subset_cols = ['FBA_Activity', 'Location', 'SectorLength', 'FlowAmountRatio'] + groupcols = ['FBA_Activity', 'Location', 'SectorLength'] if 'allocation_merge_columns' in attr: - subset_cols = ['FBA_Activity', 'Location', 'SectorLength', - 'FlowAmountRatio'] + attr['allocation_merge_columns'] - groupcols = ['FBA_Activity', 'Location', - 'SectorLength'] + attr['allocation_merge_columns'] - else: - subset_cols = ['FBA_Activity', 'Location', - 'SectorLength', 'FlowAmountRatio'] - groupcols = ['FBA_Activity', 'Location', 'SectorLength'] + subset_cols = subset_cols + attr['allocation_merge_columns'] + groupcols = groupcols + attr['allocation_merge_columns'] # create column of sector lengths flow_alloc_df =\ @@ -335,7 +344,7 @@ def calculate_flowamount_diff_between_dfs(dfa_load, dfb_load): def compare_activity_to_sector_flowamounts(fba_load, fbs_load, - activity_set, source_name, config): + activity_set, config): """ Function to compare the loaded flowbyactivity with the final flowbysector by activityname (if exists) to target sector level @@ -343,12 +352,11 @@ def compare_activity_to_sector_flowamounts(fba_load, fbs_load, :param fba_load: df, FBA loaded and mapped using FEDEFL :param fbs_load: df, final FBS df :param activity_set: str, activity set - :param source_name: str, source name :param config: dictionary, method yaml :return: printout data differences between loaded FBA and FBS output, save results as csv in local directory """ - if check_activities_sector_like(source_name): + if check_activities_sector_like(fba_load): vLog.debug('Not comparing loaded FlowByActivity to FlowBySector ' 'ratios for a dataset with sector-like activities because ' 'if there are modifications to flowamounts for a sector, ' @@ -473,23 +481,19 @@ def compare_fba_geo_subset_and_fbs_output_totals( # extract relevant geoscale data or aggregate existing data fba = subset_df_by_geoscale(fba_load, from_scale, method['target_geoscale']) - if check_activities_sector_like(source_name): + if check_activities_sector_like(fba_load): # if activities are sector-like, run sector aggregation and then # subset df to only keep NAICS2 - fba = fba[['Class', 'FlowAmount', 'Unit', 'Context', + fba = fba[['Class', 'SourceName', 'FlowAmount', 'Unit', 'Context', 'ActivityProducedBy', 'ActivityConsumedBy', 'Location', 'LocationSystem']] # rename the activity cols to sector cols for purposes of aggregation fba = fba.rename(columns={'ActivityProducedBy': 'SectorProducedBy', 'ActivityConsumedBy': 'SectorConsumedBy'}) - group_cols_agg = ['Class', 'Context', 'Unit', 'Location', - 'LocationSystem', 'SectorProducedBy', - 'SectorConsumedBy'] fba = sector_aggregation(fba) # subset fba to only include NAICS2 fba = replace_NoneType_with_empty_cells(fba) - fba = fba[fba['SectorConsumedBy'].apply(lambda x: len(x) == 2) | - fba['SectorProducedBy'].apply(lambda x: len(x) == 2)] + fba = subset_df_by_sector_lengths(fba, [2]) # subset/agg dfs col_subset = ['Class', 'FlowAmount', 'Unit', 'Context', 'Location', 'LocationSystem'] @@ -511,11 +515,16 @@ def compare_fba_geo_subset_and_fbs_output_totals( try: # merge FBA and FBS totals df_merge = fba_agg.merge(fbs_agg, how='left') + df_merge['FBS_amount'] = df_merge['FBS_amount'].fillna(0) df_merge['FlowAmount_difference'] = \ df_merge['FBA_amount'] - df_merge['FBS_amount'] df_merge['Percent_difference'] = \ (df_merge['FlowAmount_difference']/df_merge['FBA_amount']) * 100 - + # cases where flow amount diff is 0 but because fba amount is 0, + # percent diff is null. Fill those cases with 0s + df_merge['Percent_difference'] = np.where( + (df_merge['FlowAmount_difference'] == 0) & + (df_merge['FBA_amount'] == 0), 0, df_merge['Percent_difference']) # reorder df_merge = df_merge[['Class', 'Context', 'Location', 'LocationSystem', 'FBA_amount', 'FBA_unit', 'FBS_amount', @@ -583,44 +592,122 @@ def compare_fba_geo_subset_and_fbs_output_totals( 'for FlowByActivity and FlowBySector') -def check_summation_at_sector_lengths(df): +def compare_summation_at_sector_lengths_between_two_dfs(df1, df2): """ Check summed 'FlowAmount' values at each sector length - :param df: df, requires Sector column - :return: df, includes summed 'FlowAmount' values at each sector length + :param df1: df, first df of values with sector columns + :param df2: df, second df of values with sector columns + :return: df, comparison of sector summation results by region and + printout if any child naics sum greater than parent naics """ + from flowsa.flowbyfunctions import assign_columns_of_sector_levels - # columns to keep - df_cols = [e for e in df.columns if e not in - ('MeasureofSpread', 'Spread', 'DistributionType', 'Min', - 'Max', 'DataReliability', 'DataCollection', 'FlowType', - 'Compartment', 'Description', 'Activity')] - # subset df - df2 = df[df_cols] - - # rename columns and clean up df - df2 = df2[~df2['Sector'].isnull()] - - df2 = df2.assign(SectorLength=len(df2['Sector'])) - - # sum flowamounts by sector length - denom_df = df2.copy() - denom_df.loc[:, 'Denominator'] = denom_df.groupby( - ['Location', 'SectorLength'])['FlowAmount'].transform('sum') - - summed_df = denom_df.drop( - columns=['Sector', 'FlowAmount']).drop_duplicates().reset_index( - drop=True) + agg_cols = ['Class', 'SourceName', 'FlowName', 'Unit', 'FlowType', + 'Compartment', 'Location', 'Year', 'SectorProducedByLength', + 'SectorConsumedByLength'] - # max value - maxv = max(summed_df['Denominator'].apply(lambda x: x)) - - # percent of total accounted for - summed_df = summed_df.assign(percentOfTot=summed_df['Denominator']/maxv) + df_list = [] + for df in [df1, df2]: + df = replace_NoneType_with_empty_cells(df) + df = assign_columns_of_sector_levels(df) + # sum flowamounts by sector length + dfsum = df.groupby(agg_cols).agg({'FlowAmount': 'sum'}).reset_index() + df_list.append(dfsum) + + df_list[0] = df_list[0].rename(columns={'FlowAmount': 'df1'}) + df_list[1] = df_list[1].rename(columns={'FlowAmount': 'df2'}) + dfm = df_list[0].merge(df_list[1], how='outer') + dfm = dfm.fillna(0) + dfm['flowIncrease_df1_to_df2_perc'] = (dfm['df2'] - dfm['df1'])/dfm[ + 'df1'] * 100 + # dfm2 = dfm[dfm['flowIncrease_df1_to_df2'] != 0] + # drop cases where sector length is 0 because not included in naics cw + dfm2 = dfm[~((dfm['SectorProducedByLength'] == 0) & (dfm[ + 'SectorConsumedByLength'] == 0))] + # sort df + dfm2 = dfm2.sort_values(['Location', 'SectorProducedByLength', + 'SectorConsumedByLength']).reset_index(drop=True) + + dfm3 = dfm2[dfm2['flowIncrease_df1_to_df2_perc'] < 0] + + if len(dfm3) > 0: + log.info('See validation log for cases where the second dataframe ' + 'has flow amounts greater than the first dataframe at the ' + 'same location/sector lengths.') + vLogDetailed.info('The second dataframe has flow amounts greater than ' + 'the first dataframe at the same sector lengths: ' + '\n {}'.format(dfm3.to_string())) + else: + vLogDetailed.info('The second dataframe does not have flow amounts ' + 'greater than the first dataframe at any sector ' + 'length') - summed_df = summed_df.sort_values(['SectorLength']).reset_index(drop=True) - return summed_df +def compare_child_to_parent_sectors_flowamounts(df_load): + """ + Sum child sectors up to one sector and compare to parent sector values + :param df_load: df, contains sector columns + :return: comparison of flow values + """ + from flowsa.flowbyfunctions import return_primary_sector_column, \ + assign_sector_match_column + + merge_cols = [e for e in df_load.columns if e in [ + 'Class', 'SourceName', 'MetaSources', 'FlowName', 'Unit', + 'FlowType', 'Flowable', 'ActivityProducedBy', 'ActivityConsumedBy', + 'Compartment', 'Context', 'Location', 'Year', 'Description']] + # determine if activities are sector-like + sector_like_activities = check_activities_sector_like(df_load) + # if activities are sector like, drop columns from merge group + if sector_like_activities: + merge_cols = [e for e in merge_cols if e not in ( + 'ActivityProducedBy', 'ActivityConsumedBy')] + + agg_cols = merge_cols + ['SectorProducedMatch', 'SectorConsumedMatch'] + dfagg = pd.DataFrame() + for i in range(3, 7): + df = subset_df_by_sector_lengths(df_load, [i]) + for s in ['Produced', 'Consumed']: + df = assign_sector_match_column(df, f'Sector{s}By', i, i-1).rename( + columns={'sector_group': f'Sector{s}Match'}) + df = df.fillna('') + df2 = df.groupby(agg_cols).agg( + {'FlowAmount': 'sum'}).rename(columns={ + 'FlowAmount': f'ChildNAICSSum'}).reset_index() + dfagg = pd.concat([dfagg, df2], ignore_index=True) + + # merge new df with summed child naics to original df + drop_cols = [e for e in df_load.columns if e in + ['MeasureofSpread', 'Spread', 'DistributionType', 'Min', + 'Max', 'DataReliability', 'DataCollection', 'Description', + 'SectorProducedMatch', 'SectorConsumedMatch']] + dfm = df_load.merge(dfagg, how='left', left_on=merge_cols + [ + 'SectorProducedBy', 'SectorConsumedBy'], right_on=agg_cols).drop( + columns=drop_cols) + dfm = dfm.assign(FlowDiff=dfm['ChildNAICSSum'] - dfm['FlowAmount']) + dfm['PercentDiff'] = (dfm['FlowDiff'] / dfm['FlowAmount']) * 100 + + cols_subset = [e for e in dfm.columns if e in [ + 'Class', 'SourceName', 'MetaSources', 'Flowable', 'FlowName', + 'Unit', 'FlowType', 'ActivityProducedBy', 'ActivityConsumedBy', + 'Context', 'Location', 'Year', 'SectorProducedBy', + 'SectorConsumedBy', 'FlowAmount', 'ChildNAICSSum', 'PercentDiff']] + dfm = dfm[cols_subset] + + # subset df where child sectors sum to be greater than parent sectors + tolerance = 1 + dfm2 = dfm[(dfm['PercentDiff'] > tolerance) | + (dfm['PercentDiff'] < - tolerance)].reset_index(drop=True) + + if len(dfm2) > 0: + log.info('See validation log for cases where child sectors sum to be ' + 'different than parent sectors by at least %s%%.', tolerance) + vLogDetailed.info('There are cases where child sectors sum to be ' + 'different than parent sectors by at least %s%%: ' + '\n {}'.format(dfm2.to_string()), tolerance) + else: + vLogDetailed.info('No child sectors sum to be different than parent ' + 'sectors by at least %s%%.', tolerance) def check_for_nonetypes_in_sector_col(df): @@ -984,3 +1071,88 @@ def compare_df_units(df1_load, df2_load): # if list is not empty, print warning that units are different if list_comp: log.info('Merging df with %s and df with %s units', df1, df2) + + +def calculate_industry_coefficients(fbs_load, year,region, + io_level, impacts=False): + """ + Generates sector coefficients (flow/$) for all sectors for all locations. + + :param fbs_load: flow by sector method + :param year: year for industry output dataset + :param region: str, 'state' or 'national' + :param io_level: str, 'summary' or 'detail' + :param impacts: bool, True to apply and aggregate on impacts + False to compare flow/contexts + """ + from flowsa.sectormapping import map_to_BEA_sectors,\ + get_BEA_industry_output + + fbs = collapse_fbs_sectors(fbs_load) + + fbs = map_to_BEA_sectors(fbs, region, io_level, year) + + inventory = not(impacts) + if impacts: + try: + import lciafmt + fbs_summary = (lciafmt.apply_lcia_method(fbs, 'TRACI2.1') + .rename(columns={'FlowAmount': 'InvAmount', + 'Impact': 'FlowAmount'})) + groupby_cols = ['Location', 'Sector', + 'Indicator', 'Indicator unit'] + sort_by_cols = ['Indicator', 'Sector', 'Location'] + except ImportError: + log.warning('lciafmt not installed') + inventory = True + except AttributeError: + log.warning('check lciafmt branch') + inventory = True + + if inventory: + fbs_summary = fbs.copy() + groupby_cols = ['Location', 'Sector', + 'Flowable', 'Context', 'Unit'] + sort_by_cols = ['Context', 'Flowable', + 'Sector', 'Location'] + + # Update location if needed prior to aggregation + if region == 'national': + fbs_summary["Location"] = US_FIPS + + fbs_summary = (fbs_summary.groupby(groupby_cols) + .agg({'FlowAmount': 'sum'}). + reset_index()) + + bea = get_BEA_industry_output(region, io_level, year) + + # Add sector output and assign coefficients + fbs_summary = fbs_summary.merge(bea.rename( + columns={'BEA': 'Sector'}), how = 'left', + on=['Sector','Location']) + fbs_summary['Coefficient'] = (fbs_summary['FlowAmount'] / + fbs_summary['Output']) + fbs_summary = fbs_summary.sort_values(by=sort_by_cols) + + return fbs_summary + + +if __name__ == "__main__": + df1 = calculate_industry_coefficients( + flowsa.getFlowBySector('Water_national_2015_m1'), 2015, + "national", "summary", False) + df2 = calculate_industry_coefficients( + flowsa.getFlowBySector('GRDREL_national_2017'), 2017, + "national", "summary", True) + df3 = calculate_industry_coefficients( + flowsa.getFlowBySector('GRDREL_national_2017'), 2017, + "national", "detail", True) + df4 = calculate_industry_coefficients( + flowsa.getFlowBySector('GRDREL_state_2017'), 2017, + "national", "detail", True) + try: + df5 = calculate_industry_coefficients( + flowsa.getFlowBySector('GRDREL_state_2017'), 2017, + "state", "detail", True) + except TypeError: + df5 = None diff --git a/pytest.ini b/pytest.ini index c3432220c..e4a7c9a1d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,3 +3,5 @@ log_cli = True log_cli_level = INFO norecursedirs = build dist scripts examples addopts = --doctest-modules +markers = + generate_fbs: test function to generate all FBS diff --git a/scripts/README.md b/scripts/FlowByActivity_Crosswalks/README.md similarity index 100% rename from scripts/README.md rename to scripts/FlowByActivity_Crosswalks/README.md diff --git a/scripts/common_scripts.py b/scripts/FlowByActivity_Crosswalks/common_scripts.py similarity index 100% rename from scripts/common_scripts.py rename to scripts/FlowByActivity_Crosswalks/common_scripts.py diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012_Detail.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012.py similarity index 66% rename from scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012_Detail.py rename to scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012.py index 0b81ce25a..292486b32 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012_Detail.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_2012.py @@ -1,26 +1,26 @@ -# write_Crosswalk_BEA_2012_Detail.py (scripts) +# write_Crosswalk_BEA_2012.py (scripts) # !/usr/bin/env python3 # coding=utf-8 """ -Create a crosswalk linking BEA to NAICS for 2012 Detail +Create a crosswalk linking BEA to NAICS for 2012 for any level """ from flowsa.common import load_crosswalk from flowsa.settings import datapath -if __name__ == '__main__': +def write_BEA_crosswalk(level='Detail'): cw_load = load_crosswalk('BEA') - cw = cw_load[['BEA_2012_Detail_Code', + cw = cw_load[[f'BEA_2012_{level}_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True) # drop all rows with naics >6 cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True) df = cw.rename(columns={"NAICS_2012_Code": "Sector", - "BEA_2012_Detail_Code":"Activity"}) + f"BEA_2012_{level}_Code":"Activity"}) df['SectorSourceName'] = 'NAICS_2012_Code' - df['ActivitySourceName'] = 'BEA_2012_Detail_Code' + df['ActivitySourceName'] = f'BEA_2012_{level}_Code' df.dropna(subset=["Sector"], inplace=True) # assign sector type df['SectorType'] = None @@ -32,4 +32,8 @@ df = df[['ActivitySourceName', 'Activity', 'SectorSourceName', 'Sector', 'SectorType']] # save as csv df.to_csv(datapath + "activitytosectormapping/" + - "NAICS_Crosswalk_BEA_2012_Detail.csv", index=False) + f"NAICS_Crosswalk_BEA_2012_{level}.csv", index=False) + +if __name__ == '__main__': + write_BEA_crosswalk('Detail') + write_BEA_crosswalk('Summary') \ No newline at end of file diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py index a0ec4ee80..932f74662 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BEA_Make_Table.py @@ -9,7 +9,7 @@ import pandas as pd from flowsa.common import load_crosswalk from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df_load): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py index 922088277..a75decc96 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_BLM_PLS.py @@ -10,7 +10,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py index 66abe6396..858c17e29 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Blackhurst_IO.py @@ -11,7 +11,7 @@ import pandas as pd from flowsa.common import load_crosswalk from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df_load): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py index 462c4864d..faaec2d1e 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_CalRecycle_WasteCharacterization.py @@ -7,7 +7,7 @@ """ import pandas as pd from flowsa.settings import datapath, externaldatapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk from flowsa.data_source_scripts.CalRecycle_WasteCharacterization import produced_by diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py index f640c49f3..0dcc36266 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_CBP.py @@ -7,7 +7,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk if __name__ == '__main__': diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py index b8d246af6..a6822f938 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_Census_VIP.py @@ -7,7 +7,7 @@ """ import pandas as pd from flowsa.settings import datapath, externaldatapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py index 3321042aa..f4f576af7 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_EPA_CDDPath.py @@ -7,7 +7,7 @@ """ import pandas as pd from flowsa.settings import datapath, externaldatapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py index 27738fe97..0f4a52e22 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_NETL_EIA_PlantWater.py @@ -15,7 +15,7 @@ """ from flowsa.common import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py index b9a5d0ac0..c89e65d4a 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_GDP.py @@ -8,7 +8,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk if __name__ == '__main__': # select years to pull unique activity names diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py index 55673ce4f..bebc6f816 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_StatCan_IWS_MI.py @@ -8,7 +8,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk if __name__ == '__main__': diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py index 977bafb9d..c14328f5a 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ACUP.py @@ -10,7 +10,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py index 735358723..a53f7c07d 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Cropland.py @@ -15,7 +15,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py index eb4a29fe0..b10bd1a49 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_CoA_Livestock.py @@ -15,7 +15,7 @@ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py index 1d1553dd5..65ed16e3d 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_FIWS.py @@ -12,7 +12,7 @@ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py index 6f3841837..8568ff838 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_ERS_MLU.py @@ -11,7 +11,7 @@ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py index c7c818520..e3a99adbf 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USDA_IWMS.py @@ -15,7 +15,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py index c157f238e..ec07ac8d5 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_NWIS_WU.py @@ -10,7 +10,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py index 9dd908187..faa73d23c 100644 --- a/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py +++ b/scripts/FlowByActivity_Crosswalks/write_Crosswalk_USGS_WU_Coef.py @@ -10,7 +10,7 @@ """ import pandas as pd from flowsa.settings import datapath -from scripts.common_scripts import unique_activity_names, order_crosswalk +from scripts.FlowByActivity_Crosswalks.common_scripts import unique_activity_names, order_crosswalk def assign_naics(df): diff --git a/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py b/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py index cfa7bd6ae..9e9004c3a 100644 --- a/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py +++ b/scripts/FlowBySector_Activity_Sets/write_FBS_activity_set_BLS_QCEW.py @@ -2,30 +2,37 @@ # !/usr/bin/env python3 # coding=utf-8 """ -Write the csv called on in flowbysectormethods yaml files for -land use related to BLS QCEW employment data +Create an activity set file file employment data. Script only needs to be +run for additional years if there are new NAICS. """ +import pandas as pd import flowsa from flowsa.settings import flowbysectoractivitysetspath datasource = 'BLS_QCEW' -as_year = '2017' +as_years = ['2002', '2010', '2011', '2012', '2013', '2014', '2015', '2016', + '2017'] if __name__ == '__main__': - df_import = flowsa.getFlowByActivity(datasource, as_year) - df = (df_import[['ActivityProducedBy']] - .drop_duplicates() - .reset_index(drop=True) - .rename(columns={"ActivityProducedBy": "name"}) - .assign(activity_set='qcew', - note='')) + # empty df + df2 = pd.DataFrame() + for y in as_years: + df_import = flowsa.getFlowByActivity(datasource, y) - # reorder dataframe - df = (df[['activity_set', 'name', 'note']] - .sort_values(['activity_set', 'name']) - .reset_index(drop=True)) - - df.to_csv(f'{flowbysectoractivitysetspath}{datasource}_asets.csv', - index=False) + df = (df_import[['ActivityProducedBy']] + .drop_duplicates() + .reset_index(drop=True) + .rename(columns={"ActivityProducedBy": "name"}) + .assign(activity_set='qcew', + note='')) + # reorder dataframe + df = df[['activity_set', 'name', 'note']] + # concat + df2 = pd.concat([df2, df], ignore_index=True) + # drop duplicates and save df + df3 = df2.drop_duplicates() + df3 = df3.sort_values(['activity_set', 'name']).reset_index(drop=True) + df3.to_csv(f"{flowbysectoractivitysetspath}{datasource}_asets.csv", + index=False) diff --git a/scripts/update_NAICS_crosswalk.py b/scripts/writeNAICScrosswalk.py similarity index 99% rename from scripts/update_NAICS_crosswalk.py rename to scripts/writeNAICScrosswalk.py index 5047a8506..15cebd5ce 100644 --- a/scripts/update_NAICS_crosswalk.py +++ b/scripts/writeNAICScrosswalk.py @@ -1,4 +1,4 @@ -# update_NAICS_crosswalk.py (scripts) +# writeNAICScrosswalk.py # !/usr/bin/env python3 # coding=utf-8 diff --git a/setup.py b/setup.py index f3f4f06ff..62b0cde4f 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='flowsa', - version='1.1', + version='1.2.1', packages=find_packages(), package_dir={'flowsa': 'flowsa'}, include_package_data=True, @@ -32,14 +32,14 @@ 'matplotlib>=3.4.3' ], url='https://github.com/USEPA/FLOWSA', - license='CC0', + license='MIT', author='Catherine Birney, Ben Young, Wesley Ingwersen, Melissa Conner, Jacob Specht, Mo Li', author_email='ingwersen.wesley@epa.gov', classifiers=[ "Development Status :: 1 - Alpha", "Environment :: IDE", "Intended Audience :: Science/Research", - "License :: CC0", + "License :: MIT", "Programming Language :: Python :: 3.x", "Topic :: Utilities", ],