Skip to content

Commit

Permalink
Update UN scripts for WHO places (#995)
Browse files Browse the repository at this point in the history
* update UN scripts for WHO places

* fix

* fix

* fix
  • Loading branch information
n-h-diaz authored Mar 5, 2024
1 parent b249c31 commit 2f330bb
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 23 deletions.
19 changes: 9 additions & 10 deletions scripts/un/sdg/geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,27 +208,26 @@ def write_un_places(input_geos, output, sdg2type, un2sdg, un2dc_curated):
'''
un2dc_generated = {}
new_subjects = []
with open(input_geos, encoding='utf-8') as f_in:
with open(input_geos, encoding='utf-8-sig') as f_in:
with open(output, 'w', encoding='utf-8') as f_out:
reader = csv.DictReader(f_in)
for row in reader:
subject = row['subject_id']
subject = UN_PREFIX + ':' + row['undata_geo_id']
if subject in un2dc_curated:
dcid = un2dc_curated[subject].dcid
type = un2dc_curated[subject].type
name = un2dc_curated[subject].name
else:
dcid = row['subject_id'].replace(':', '/')
if row['subject_id'] in un2sdg and un2sdg[
row['subject_id']] in sdg2type:
sdg_type = sdg2type[un2sdg[row['subject_id']]]
dcid = subject.replace(':', '/')
if subject in un2sdg and un2sdg[subject] in sdg2type:
sdg_type = sdg2type[un2sdg[subject]]
if sdg_type == SAMPLING_STATION or sdg_type == CITY:
type = sdg_type
else:
type = GEO_REGION
else:
type = GEO_REGION
name = row['subject_label'].split('_')[-1]
name = row['undata_geo_desc'].split('_')[-1]
un2dc_generated[subject] = Node(dcid, type, name)

# Add non-UN-specific places to new_subjects.
Expand All @@ -241,8 +240,8 @@ def write_un_places(input_geos, output, sdg2type, un2sdg, un2dc_curated):
'dcid': dcid,
'type': type,
'name': name,
'code': row['subject_id'],
'label': row['subject_label']
'code': subject,
'label': row['undata_geo_desc']
}))
return un2dc_generated, new_subjects

Expand Down Expand Up @@ -355,7 +354,7 @@ def write_place_mappings(output, sdg2un, un2dc_curated, un2dc_generated):
un2dc_curated = get_un2dc_curated(os.path.join(FOLDER, 'places.csv'))

un2dc_generated, new_subjects = write_un_places(
os.path.join(FOLDER, 'geographies.csv'),
'sssom-mappings/data/enumerations/undata/geography.csv',
os.path.join(FOLDER, 'un_places.mcf'), sdg2type, un2sdg, un2dc_curated)
containment = process_containment(
'sssom-mappings/data/enumerations/undata/geography_hierarchy.csv',
Expand Down
3 changes: 0 additions & 3 deletions scripts/un/sdg/geography/geographies.csv

This file was deleted.

4 changes: 2 additions & 2 deletions scripts/un/sdg/geography/un_containment.mcf
Git LFS file not shown
4 changes: 2 additions & 2 deletions scripts/un/sdg/geography/un_places.mcf
Git LFS file not shown
2 changes: 1 addition & 1 deletion scripts/un/sdg/geography_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_write_un_places(self):
with tempfile.TemporaryDirectory() as tmp_dir:
output = os.path.join(tmp_dir, 'un_places.mcf')
un2dc_generated, new_subjects = geography.write_un_places(
os.path.join(FOLDER, 'test_geographies.csv'), output, SDG2TYPE,
os.path.join(FOLDER, 'test_geography.csv'), output, SDG2TYPE,
UN2SDG, UN2DC_CURATED)
with open(output) as result:
with open(os.path.join(FOLDER,
Expand Down
2 changes: 1 addition & 1 deletion scripts/un/sdg/sdg-dataset
Submodule sdg-dataset updated from bdb619 to a5e866
2 changes: 1 addition & 1 deletion scripts/un/sdg/sssom-mappings
Submodule sssom-mappings updated from b854ba to 71a3aa
3 changes: 0 additions & 3 deletions scripts/un/sdg/testdata/test_geography/test_geographies.csv

This file was deleted.

3 changes: 3 additions & 0 deletions scripts/un/sdg/testdata/test_geography/test_geography.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
undata_geo_id,undata_geo_desc
G00000020,Afghanistan
G00000030,Ajman

0 comments on commit 2f330bb

Please sign in to comment.