From 67f1fd063d32b6decbfe8535e6baebb6e96e7a3b Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Mon, 14 Oct 2024 09:57:27 +0200 Subject: [PATCH] 0.9.DEV40 --- CHANGES.md | 5 ++ bw2io/__init__.py | 2 +- bw2io/strategies/simapro.py | 85 ++++++++++++++++++++++ tests/strategies/simapro.py | 60 +++++++++++++++ tests/strategies/simapro_name_splitting.py | 39 +++++++++- 5 files changed, 189 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a1d39fc..0dcb8d3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # `bw2io` Changelog +### 0.9.DEV40 (2024-10-14) + +* Add `split_simapro_name_geo_curly_brackets` strategy +* Add `remove_biosphere_location_prefix_if_flow_in_same_location` strategy + ### 0.9.DEV39 (2024-10-13) * Add `create_products_as_new_nodes` strategy diff --git a/bw2io/__init__.py b/bw2io/__init__.py index be34fe2..580c5f2 100644 --- a/bw2io/__init__.py +++ b/bw2io/__init__.py @@ -48,7 +48,7 @@ "useeio20", ] -__version__ = "0.9.DEV39" +__version__ = "0.9.DEV40" from .backup import ( backup_data_directory, diff --git a/bw2io/strategies/simapro.py b/bw2io/strategies/simapro.py index 01bca5d..6af3b46 100644 --- a/bw2io/strategies/simapro.py +++ b/bw2io/strategies/simapro.py @@ -404,6 +404,91 @@ def split_simapro_name_geo(db): return db +def split_simapro_name_geo_curly_brackets(db: List[dict], suffix: str = "") -> List[dict]: + """ + Split a name like 'Wheat straw, at farm {NL} Energy, U' into name and geo components in a dataset. + + The original name is stored in a new field called 'simapro name' if that field is not yet present. + + White space around the suffix and process name are stripped. + + Parameters + ---------- + db : list + A list of dictionaries representing datasets with names to be split. + suffix : str + Suffix expected to be added to the end of process names, like "foo" in "Energy {CO} foo". + + Returns + ------- + db : list + A list of dictionaries representing modified datasets with split names and geo components. + + Examples + -------- + >>> db = [ + ... { + ... "name": "Wheat straw, at farm {NL} Energy, U", + ... "exchanges": [ + ... {"name": "Dairy cows ration, at farm {ES} Energy, U"}, + ... ], + ... } + ... ] + >>> split_simapro_name_geo_curly_brackets(db, "Energy, U") + [ + { + "name": "Wheat straw, at farm", + "simapro name": "Wheat straw, at farm {NL} Energy, U", + "location": "NL", + "exchanges": [ + { + "name": "Dairy cows ration, at farm", + "simapro name": "Dairy cows ration, at farm {ES} Energy, U", + "location": "ES", + }, + ], + }, + ] + """ + if not suffix: + suffix = "" + curly_fries = re.compile("^(?P.+?)\\s?\\{(?P.+?)\\}\\s?" + suffix + "\\s?$") + + for ds in db: + if match := curly_fries.match(ds["name"]): + gd = match.groupdict() + if "simapro name" not in ds: + ds["simapro name"] = ds["name"].strip() + ds["location"] = gd["geo"].strip() + ds["name"] = gd["name"].strip() + for exc in ds.get("exchanges", []): + match = curly_fries.match(exc["name"]) + if match: + gd = match.groupdict() + if "simapro name" not in exc: + exc["simapro name"] = exc["name"] + exc["location"] = gd["geo"].strip() + exc["name"] = gd["name"].strip() + return db + + +def remove_biosphere_location_prefix_if_flow_in_same_location(db: List[dict]) -> List[dict]: + """If a biosphere flow is SimaPro-regionalized, like 'Ammonia, AR', and the process location is + 'AR", then remove that suffix.""" + for ds in db: + if 'location' not in ds: + continue + finder = re.compile(f"(?P.+?)[\\,/]* (?P{re.escape(ds['location'])})\\s?$") + for exc in filter(lambda x: x.get("type") == "biosphere", ds['exchanges']): + if match := finder.match(exc['name']): + gd = match.groupdict() + if gd['location'].strip() == ds['location']: + if 'simapro name' not in exc: + exc['simapro name'] = exc['name'] + exc['name'] = gd['name'].strip() + return db + + def normalize_simapro_biosphere_categories(db): """ Normalize biosphere categories in a dataset to the ecoinvent standard. diff --git a/tests/strategies/simapro.py b/tests/strategies/simapro.py index 7f932cc..cfb6db9 100644 --- a/tests/strategies/simapro.py +++ b/tests/strategies/simapro.py @@ -330,3 +330,63 @@ def test_normalize_simapro_labels_to_brightway_standard(): } ] assert normalize_simapro_labels_to_brightway_standard(given) == expected + + +def test_remove_biosphere_location_prefix_if_flow_in_same_location(): + given = [{ + "location": "FR", + "exchanges": [{ + "name": "Water, unspecified natural origin, RO", + "type": "biosphere", + }, { + "name": "Transformation, to permanent crop, FR", + "type": "biosphere", + }, { + "name": "Phosphorus, FR", + "type": "biosphere", + }, { + "name": "Phosphorus FR", + "type": "biosphere", + }, { + "name": "Phosphorus/ FR", + "type": "biosphere", + }] + }, { + "location": "IAI Area, South America", + "exchanges": [{ + "name": "Transformation, to permanent crop, IAI Area, South America", + "type": "biosphere", + }] + }] + expected = [{ + "location": "FR", + "exchanges": [{ + "name": "Water, unspecified natural origin, RO", + "type": "biosphere", + }, { + "name": "Transformation, to permanent crop", + "simapro name": "Transformation, to permanent crop, FR", + "type": "biosphere", + }, { + "simapro name": "Phosphorus, FR", + "name": "Phosphorus", + "type": "biosphere", + }, { + "simapro name": "Phosphorus FR", + "name": "Phosphorus", + "type": "biosphere", + }, { + "simapro name": "Phosphorus/ FR", + "name": "Phosphorus", + "type": "biosphere", + }] + }, { + "location": "IAI Area, South America", + "exchanges": [{ + "simapro name": "Transformation, to permanent crop, IAI Area, South America", + "name": "Transformation, to permanent crop", + "type": "biosphere", + }] + }] + result = remove_biosphere_location_prefix_if_flow_in_same_location(given) + assert result == expected diff --git a/tests/strategies/simapro_name_splitting.py b/tests/strategies/simapro_name_splitting.py index 1a02b74..897327e 100644 --- a/tests/strategies/simapro_name_splitting.py +++ b/tests/strategies/simapro_name_splitting.py @@ -1,4 +1,4 @@ -from bw2io.strategies.simapro import detoxify_re, split_simapro_name_geo +from bw2io.strategies.simapro import detoxify_re, split_simapro_name_geo, split_simapro_name_geo_curly_brackets def test_detoxify_re(): @@ -134,3 +134,40 @@ def test_splitting_exchanges(): }, ] assert split_simapro_name_geo(db) == result + + +def test_split_simapro_name_geo_curly_brackets_custom_suffix(): + given = [{ + "name": "Wheat straw, at farm {NL} Energy, U", + "exchanges": [{ + "name": "Wheat straw, at farm{NL}Energy, U " + }], + }, { + "name": "Dairy cows ration, at farm {ES}Energy, U", + "simapro name": "foo", + "exchanges": [{ + "name": "Dairy cows ration, at farm {IAI Area, South America}Energy, U\t" + }] + }] + expected = [{ + "name": "Wheat straw, at farm", + "location": "NL", + "simapro name": "Wheat straw, at farm {NL} Energy, U", + "exchanges": [{ + "simapro name": "Wheat straw, at farm{NL}Energy, U ", + "name": "Wheat straw, at farm", + "location": "NL", + }], + }, { + "name": "Dairy cows ration, at farm", + "location": "ES", + "simapro name": "foo", + "exchanges": [{ + "simapro name": "Dairy cows ration, at farm {IAI Area, South America}Energy, U\t", + "name": "Dairy cows ration, at farm", + "location": "IAI Area, South America", + }] + }] + result = split_simapro_name_geo_curly_brackets(given, "Energy, U") + print(result) + assert result == expected