Skip to content

Commit

Permalink
0.9.DEV40
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Oct 14, 2024
1 parent 4d831a9 commit 67f1fd0
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 2 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# `bw2io` Changelog

### 0.9.DEV40 (2024-10-14)

* Add `split_simapro_name_geo_curly_brackets` strategy
* Add `remove_biosphere_location_prefix_if_flow_in_same_location` strategy

### 0.9.DEV39 (2024-10-13)

* Add `create_products_as_new_nodes` strategy
Expand Down
2 changes: 1 addition & 1 deletion bw2io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"useeio20",
]

__version__ = "0.9.DEV39"
__version__ = "0.9.DEV40"

from .backup import (
backup_data_directory,
Expand Down
85 changes: 85 additions & 0 deletions bw2io/strategies/simapro.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,91 @@ def split_simapro_name_geo(db):
return db


def split_simapro_name_geo_curly_brackets(db: List[dict], suffix: str = "") -> List[dict]:
"""
Split a name like 'Wheat straw, at farm {NL} Energy, U' into name and geo components in a dataset.
The original name is stored in a new field called 'simapro name' if that field is not yet present.
White space around the suffix and process name are stripped.
Parameters
----------
db : list
A list of dictionaries representing datasets with names to be split.
suffix : str
Suffix expected to be added to the end of process names, like "foo" in "Energy {CO} foo".
Returns
-------
db : list
A list of dictionaries representing modified datasets with split names and geo components.
Examples
--------
>>> db = [
... {
... "name": "Wheat straw, at farm {NL} Energy, U",
... "exchanges": [
... {"name": "Dairy cows ration, at farm {ES} Energy, U"},
... ],
... }
... ]
>>> split_simapro_name_geo_curly_brackets(db, "Energy, U")
[
{
"name": "Wheat straw, at farm",
"simapro name": "Wheat straw, at farm {NL} Energy, U",
"location": "NL",
"exchanges": [
{
"name": "Dairy cows ration, at farm",
"simapro name": "Dairy cows ration, at farm {ES} Energy, U",
"location": "ES",
},
],
},
]
"""
if not suffix:
suffix = ""
curly_fries = re.compile("^(?P<name>.+?)\\s?\\{(?P<geo>.+?)\\}\\s?" + suffix + "\\s?$")

for ds in db:
if match := curly_fries.match(ds["name"]):
gd = match.groupdict()
if "simapro name" not in ds:
ds["simapro name"] = ds["name"].strip()
ds["location"] = gd["geo"].strip()
ds["name"] = gd["name"].strip()
for exc in ds.get("exchanges", []):
match = curly_fries.match(exc["name"])
if match:
gd = match.groupdict()
if "simapro name" not in exc:
exc["simapro name"] = exc["name"]
exc["location"] = gd["geo"].strip()
exc["name"] = gd["name"].strip()
return db


def remove_biosphere_location_prefix_if_flow_in_same_location(db: List[dict]) -> List[dict]:
"""If a biosphere flow is SimaPro-regionalized, like 'Ammonia, AR', and the process location is
'AR", then remove that suffix."""
for ds in db:
if 'location' not in ds:
continue
finder = re.compile(f"(?P<name>.+?)[\\,/]* (?P<location>{re.escape(ds['location'])})\\s?$")
for exc in filter(lambda x: x.get("type") == "biosphere", ds['exchanges']):
if match := finder.match(exc['name']):
gd = match.groupdict()
if gd['location'].strip() == ds['location']:
if 'simapro name' not in exc:
exc['simapro name'] = exc['name']
exc['name'] = gd['name'].strip()
return db


def normalize_simapro_biosphere_categories(db):
"""
Normalize biosphere categories in a dataset to the ecoinvent standard.
Expand Down
60 changes: 60 additions & 0 deletions tests/strategies/simapro.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,63 @@ def test_normalize_simapro_labels_to_brightway_standard():
}
]
assert normalize_simapro_labels_to_brightway_standard(given) == expected


def test_remove_biosphere_location_prefix_if_flow_in_same_location():
given = [{
"location": "FR",
"exchanges": [{
"name": "Water, unspecified natural origin, RO",
"type": "biosphere",
}, {
"name": "Transformation, to permanent crop, FR",
"type": "biosphere",
}, {
"name": "Phosphorus, FR",
"type": "biosphere",
}, {
"name": "Phosphorus FR",
"type": "biosphere",
}, {
"name": "Phosphorus/ FR",
"type": "biosphere",
}]
}, {
"location": "IAI Area, South America",
"exchanges": [{
"name": "Transformation, to permanent crop, IAI Area, South America",
"type": "biosphere",
}]
}]
expected = [{
"location": "FR",
"exchanges": [{
"name": "Water, unspecified natural origin, RO",
"type": "biosphere",
}, {
"name": "Transformation, to permanent crop",
"simapro name": "Transformation, to permanent crop, FR",
"type": "biosphere",
}, {
"simapro name": "Phosphorus, FR",
"name": "Phosphorus",
"type": "biosphere",
}, {
"simapro name": "Phosphorus FR",
"name": "Phosphorus",
"type": "biosphere",
}, {
"simapro name": "Phosphorus/ FR",
"name": "Phosphorus",
"type": "biosphere",
}]
}, {
"location": "IAI Area, South America",
"exchanges": [{
"simapro name": "Transformation, to permanent crop, IAI Area, South America",
"name": "Transformation, to permanent crop",
"type": "biosphere",
}]
}]
result = remove_biosphere_location_prefix_if_flow_in_same_location(given)
assert result == expected
39 changes: 38 additions & 1 deletion tests/strategies/simapro_name_splitting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from bw2io.strategies.simapro import detoxify_re, split_simapro_name_geo
from bw2io.strategies.simapro import detoxify_re, split_simapro_name_geo, split_simapro_name_geo_curly_brackets


def test_detoxify_re():
Expand Down Expand Up @@ -134,3 +134,40 @@ def test_splitting_exchanges():
},
]
assert split_simapro_name_geo(db) == result


def test_split_simapro_name_geo_curly_brackets_custom_suffix():
given = [{
"name": "Wheat straw, at farm {NL} Energy, U",
"exchanges": [{
"name": "Wheat straw, at farm{NL}Energy, U "
}],
}, {
"name": "Dairy cows ration, at farm {ES}Energy, U",
"simapro name": "foo",
"exchanges": [{
"name": "Dairy cows ration, at farm {IAI Area, South America}Energy, U\t"
}]
}]
expected = [{
"name": "Wheat straw, at farm",
"location": "NL",
"simapro name": "Wheat straw, at farm {NL} Energy, U",
"exchanges": [{
"simapro name": "Wheat straw, at farm{NL}Energy, U ",
"name": "Wheat straw, at farm",
"location": "NL",
}],
}, {
"name": "Dairy cows ration, at farm",
"location": "ES",
"simapro name": "foo",
"exchanges": [{
"simapro name": "Dairy cows ration, at farm {IAI Area, South America}Energy, U\t",
"name": "Dairy cows ration, at farm",
"location": "IAI Area, South America",
}]
}]
result = split_simapro_name_geo_curly_brackets(given, "Energy, U")
print(result)
assert result == expected

0 comments on commit 67f1fd0

Please sign in to comment.