Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/datacommonsorg/website in…
Browse files Browse the repository at this point in the history
…to app
  • Loading branch information
shifucun committed May 22, 2024
2 parents 581be7d + 4ca736e commit 89eb7cd
Show file tree
Hide file tree
Showing 12 changed files with 1,545 additions and 14 deletions.
1 change: 1 addition & 0 deletions deploy/helm_charts/envs/autopush.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ nl:
"sdg_ft",
"undata_ft",
"undata_ilo_ft",
"undata_dev_ft",
]
vertexAIModels:
dc-all-minilm-l6-v2-model:
Expand Down
24 changes: 13 additions & 11 deletions deploy/helm_charts/envs/dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,19 @@ nl:
enabled: true
embeddingsSpec:
defaultIndex: "base_uae_mem"
enabledIndexes: [
"base_uae_mem",
"bio_ft",
"medium_ft",
"medium_lance_ft",
"medium_vertex_ft",
"medium_vertex_mistral",
"sdg_ft",
"undata_ft",
"undata_ilo_ft",
]
enabledIndexes:
[
"base_uae_mem",
"bio_ft",
"medium_ft",
"medium_lance_ft",
"medium_vertex_ft",
"medium_vertex_mistral",
"sdg_ft",
"undata_ft",
"undata_ilo_ft",
"undata_dev_ft",
]
vertexAIModels:
dc-all-minilm-l6-v2-model:
project_id: datcom-website-dev
Expand Down
9 changes: 8 additions & 1 deletion deploy/helm_charts/envs/unsdg_staging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@ nl:
embeddingsSpec:
defaultIndex: "sdg_ft"
enabledIndexes:
["bio_ft", "medium_ft", "sdg_ft", "undata_ft", "undata_ilo_ft"]
[
"bio_ft",
"medium_ft",
"sdg_ft",
"undata_ft",
"undata_ilo_ft",
"undata_dev_ft",
]

serviceGroups:
recon: null
Expand Down
7 changes: 6 additions & 1 deletion deploy/nl/embeddings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#
# This is the main config file for NL models and embeddings,
# with two sections.
#
#
# models:
# - type: what type of model serving infra? (LOCAL, VERTEXAI)
# - usage: what is the model used for? (EMBEDDINGS, RERANKING)
Expand Down Expand Up @@ -71,6 +71,11 @@ indexes:
embeddings: gs://datcom-nl-models/embeddings_undata_ilo_2024_05_15_11_18_05.ft_final_v20230717230459.all-MiniLM-L6-v2.csv
model: ft-final-v20230717230459-all-MiniLM-L6-v2
healthcheck_query: "Employment"
undata_dev_ft:
store: MEMORY
embeddings: gs://datcom-nl-models/embeddings_undata_dev_2024_05_21_15_37_51.ft_final_v20230717230459.all-MiniLM-L6-v2.csv
model: ft-final-v20230717230459-all-MiniLM-L6-v2
healthcheck_query: "Employment"
bio_ft:
store: MEMORY
embeddings: gs://datcom-nl-models/embeddings_bio_2024_03_19_16_39_03.ft_final_v20230717230459.all-MiniLM-L6-v2.csv
Expand Down
6 changes: 6 additions & 0 deletions server/integration_tests/explore_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,12 @@ def test_detection_basic_undata_ilo(self):
test='unittest',
idx='undata_ilo_ft')

def test_detection_basic_undata_dev(self):
self.run_detection('detection_api_undata_dev_idx',
['Employment in the world'],
test='unittest',
idx='undata_dev_ft')

def test_detection_basic_bio(self):
self.run_detection('detection_api_bio_idx', ['Commute in California'],
test='unittest',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"childEntityType": "",
"classifications": [
{
"type": 100
}
],
"client": "test_detect",
"comparisonEntities": [],
"comparisonVariables": [],
"context": {},
"debug": {},
"entities": ["Earth"],
"nonPlaceEntities": [],
"properties": [],
"sessionId": "007_999999999",
"test": "unittest",
"variables": [
"dc/topic/sdg_8.5.2",
"dc/topic/UN_SUB_THEME_42",
"dc/topic/ILOEMPPIFLNB",
"dc/topic/ILOEMPNORMNB",
"dc/topic/ILOEMPNIFLNB",
"dc/topic/ILOEMPSTATNB",
"dc/topic/ILOEMPPIFLRT",
"dc/topic/ILOEMP2FTENB",
"dc/topic/UN_SUB_THEME_68",
"dc/topic/ILOPSETPSENB",
"dc/topic/ILOLUUXLUXNB",
"dc/topic/sdg_9.2.2",
"dc/topic/ILOEMPNIFLRT",
"dc/topic/sdg_8",
"dc/topic/ILOEMP3EMPNB",
"dc/topic/ILOPOP3TEDNB",
"dc/topic/ILOEMP2NIFNB",
"dc/topic/sdg_8.3.1",
"dc/topic/sdg_8.8.2",
"dc/topic/ILOEMP2WAPRT"
]
}
12 changes: 11 additions & 1 deletion server/lib/nl/explore/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ class DCNames(str, Enum):
MAIN_DC = 'main'
SDG_DC = 'sdg'
SDG_MINI_DC = 'sdgmini'
# Production UN Data index. Contains SDG and WHO data
UNDATA_DC = 'undata'
# Dev UN Data index. Contains SDG, WHO, and ILO data
# TODO(dwnoble): Remove after ILO launch
UNDATA_DEV_DC = 'undata_dev'
# Dev UN Data index. Contains ILO data
# TODO(dwnoble): Remove after ILO launch
UNDATA_ILO_DC = 'undata_ilo'
BIO_DC = 'bio'
CUSTOM_DC = 'custom'
Expand All @@ -67,7 +73,9 @@ class Clients(str, Enum):


SDG_DC_LIST = [DCNames.SDG_DC, DCNames.SDG_MINI_DC]
UNDATA_DC_LIST = [DCNames.UNDATA_DC, DCNames.UNDATA_ILO_DC]
UNDATA_DC_LIST = [
DCNames.UNDATA_DC, DCNames.UNDATA_DEV_DC, DCNames.UNDATA_ILO_DC
]
SPECIAL_DC_LIST = SDG_DC_LIST + UNDATA_DC_LIST


Expand Down Expand Up @@ -108,6 +116,8 @@ def dc_to_embedding_type(dc: str, embeddings_type: str) -> str:
return 'undata_ft'
elif dc == DCNames.UNDATA_ILO_DC.value:
return 'undata_ilo_ft'
elif dc == DCNames.UNDATA_DEV_DC.value:
return 'undata_dev_ft'
elif dc == DCNames.BIO_DC.value:
return 'bio_ft'
return embeddings_type
6 changes: 6 additions & 0 deletions server/lib/topic_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def json(self) -> Dict:
'server/config/nl_page/undata_topic_cache.json',
'server/config/nl_page/undata_enum_topic_cache.json',
],
DCNames.UNDATA_DEV_DC.value: [
'server/config/nl_page/sdg_topic_cache.json',
'server/config/nl_page/undata_topic_cache.json',
'server/config/nl_page/undata_enum_topic_cache.json',
'server/config/nl_page/undata_ilo_topic_cache.json'
],
DCNames.UNDATA_ILO_DC.value: [
'server/config/nl_page/undata_ilo_topic_cache.json'
],
Expand Down
6 changes: 6 additions & 0 deletions tools/nl/embeddings/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ variables.
./run.sh -c undata_ilo data/curated_input/undata_ilo
```

To generate the `undata_dev_ft` embeddings:

```bash
./run.sh -c undata_dev data/curated_input/undata_ilo,data/curated_input/undata data/alternatives/undata/*.csv
```

To generate the `bio_ft` embeddings:

```bash
Expand Down
100 changes: 100 additions & 0 deletions tools/nl/embeddings/data/preindex/undata_dev/duplicate_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
PreferredSV,DroppedSV,DuplicateName
dc/topic/ILOPOP3FORNB,dc/topic/ILOPOP3STGNB,Youth Working-age Population thousands
dc/topic/sdg_8.10,dc/topic/sdg_8.10.1,sdg 8 10
dc/topic/sdg_8.10,dc/topic/sdg_8.10.1,sdg 8.10
dc/topic/sdg_8.10,dc/topic/sdg_8.10.1,sdg 810
dc/topic/sdg_8.10,dc/topic/sdg_8.10.1,"sdg Strengthen the capacity of domestic financial institutions to encourage and expand access to banking, insurance and financial services for all"
dc/topic/sdg_8.10,dc/topic/sdg_8.10.1,sdg810
dc/topic/sdg_8.10,dc/topic/sdg_8.10.1,"sustainable development goal Strengthen the capacity of domestic financial institutions to encourage and expand access to banking, insurance and financial services for all"
dc/topic/sdg_11,dc/topic/sdg_1.1,sdg 11
dc/topic/sdg_11,dc/topic/sdg_1.1,sdg11
dc/topic/sdg_11.1,dc/topic/sdg_1.1.1,sdg 111
dc/topic/sdg_11.1,dc/topic/sdg_1.1.1,sdg111
dc/topic/sdg_1.5.1,dc/topic/sdg_11.5.1,"Number of deaths, missing persons and directly affected persons attributed to disasters per 100K population"
dc/topic/sdg_1.5.1,dc/topic/sdg_11.5.1,"sdg Number of deaths, missing persons and directly affected persons attributed to disasters per 100K population"
dc/topic/sdg_1.5.1,dc/topic/sdg_11.5.1,"sustainable development goal Number of deaths, missing persons and directly affected persons attributed to disasters per 100K population"
dc/topic/sdg_1.5.2,dc/topic/sdg_11.5.2,Direct economic loss attributed to disasters in relation to global gross domestic product (GDP)
dc/topic/sdg_1.5.2,dc/topic/sdg_11.5.2,sdg Direct economic loss attributed to disasters in relation to global gross domestic product (GDP)
dc/topic/sdg_1.5.2,dc/topic/sdg_11.5.2,sustainable development goal Direct economic loss attributed to disasters in relation to global gross domestic product (GDP)
dc/topic/sdg_1.5.3,dc/topic/sdg_11.b.1,Number of countries that adopt and implement national disaster risk reduction strategies in line with the Sendai Framework for Disaster Risk Reduction 2015-2030
dc/topic/sdg_1.5.3,dc/topic/sdg_11.b.1,sdg Number of countries that adopt and implement national disaster risk reduction strategies in line with the Sendai Framework for Disaster Risk Reduction 2015-2030
dc/topic/sdg_1.5.3,dc/topic/sdg_11.b.1,sustainable development goal Number of countries that adopt and implement national disaster risk reduction strategies in line with the Sendai Framework for Disaster Risk Reduction 2015-2030
dc/topic/sdg_1.5.4,dc/topic/sdg_11.b.2,Proportion of local governments that adopt and implement local disaster risk reduction strategies in line with national disaster risk reduction strategies
dc/topic/sdg_1.5.4,dc/topic/sdg_11.b.2,sdg Proportion of local governments that adopt and implement local disaster risk reduction strategies in line with national disaster risk reduction strategies
dc/topic/sdg_1.5.4,dc/topic/sdg_11.b.2,sustainable development goal Proportion of local governments that adopt and implement local disaster risk reduction strategies in line with national disaster risk reduction strategies
dc/topic/sdg_12,dc/topic/sdg_1.2,sdg 12
dc/topic/sdg_12,dc/topic/sdg_1.2,sdg12
dc/topic/sdg_12.1,dc/topic/sdg_1.2.1,sdg 121
dc/topic/sdg_12.1,dc/topic/sdg_1.2.1,sdg121
dc/topic/sdg_12.2,dc/topic/sdg_1.2.2,sdg 122
dc/topic/sdg_12.2,dc/topic/sdg_1.2.2,sdg122
dc/topic/sdg_8.4.1,dc/topic/sdg_12.2.1,"Material footprint, material footprint per capita, and material footprint per GDP"
dc/topic/sdg_8.4.1,dc/topic/sdg_12.2.1,"sdg Material footprint, material footprint per capita, and material footprint per GDP"
dc/topic/sdg_8.4.1,dc/topic/sdg_12.2.1,"sustainable development goal Material footprint, material footprint per capita, and material footprint per GDP"
dc/topic/sdg_8.4.2,dc/topic/sdg_12.2.2,"Domestic material consumption, domestic material consumption per capita, and domestic material consumption per GDP"
dc/topic/sdg_8.4.2,dc/topic/sdg_12.2.2,"sdg Domestic material consumption, domestic material consumption per capita, and domestic material consumption per GDP"
dc/topic/sdg_8.4.2,dc/topic/sdg_12.2.2,"sustainable development goal Domestic material consumption, domestic material consumption per capita, and domestic material consumption per GDP"
dc/topic/sdg_4.7.1,dc/topic/sdg_12.8.1,(b) curricula
dc/topic/sdg_4.7.1,dc/topic/sdg_12.8.1,(c) teacher education
dc/topic/sdg_4.7.1,dc/topic/sdg_12.8.1,Extent to which (i) global citizenship education and (ii) education for sustainable development are mainstreamed in (a) national education policies
dc/topic/sdg_4.7.1,dc/topic/sdg_12.8.1,and (d) student assessment
dc/topic/sdg_4.7.1,dc/topic/sdg_12.8.1,sdg Extent to which (i) global citizenship education and (ii) education for sustainable development are mainstreamed in (a) national education policies
dc/topic/sdg_4.7.1,dc/topic/sdg_12.8.1,sustainable development goal Extent to which (i) global citizenship education and (ii) education for sustainable development are mainstreamed in (a) national education policies
dc/topic/sdg_7.b.1,dc/topic/sdg_12.a.1,Installed renewable energy-generating capacity in developing countries (in watts per capita)
dc/topic/sdg_7.b.1,dc/topic/sdg_12.a.1,sdg Installed renewable energy-generating capacity in developing countries (in watts per capita)
dc/topic/sdg_7.b.1,dc/topic/sdg_12.a.1,sustainable development goal Installed renewable energy-generating capacity in developing countries (in watts per capita)
dc/topic/sdg_13,dc/topic/sdg_1.3,sdg 13
dc/topic/sdg_13,dc/topic/sdg_1.3,sdg13
dc/topic/sdg_13.1,dc/topic/sdg_1.3.1,sdg 131
dc/topic/sdg_13.1,dc/topic/sdg_1.3.1,sdg131
dc/topic/sdg_1.5.1,dc/topic/sdg_13.1.1,"Number of deaths, missing persons and directly affected persons attributed to disasters per 100K population"
dc/topic/sdg_1.5.1,dc/topic/sdg_13.1.1,"sdg Number of deaths, missing persons and directly affected persons attributed to disasters per 100K population"
dc/topic/sdg_1.5.1,dc/topic/sdg_13.1.1,"sustainable development goal Number of deaths, missing persons and directly affected persons attributed to disasters per 100K population"
dc/topic/sdg_1.5.3,dc/topic/sdg_13.1.2,Number of countries that adopt and implement national disaster risk reduction strategies in line with the Sendai Framework for Disaster Risk Reduction 2015-2030
dc/topic/sdg_1.5.3,dc/topic/sdg_13.1.2,sdg Number of countries that adopt and implement national disaster risk reduction strategies in line with the Sendai Framework for Disaster Risk Reduction 2015-2030
dc/topic/sdg_1.5.3,dc/topic/sdg_13.1.2,sustainable development goal Number of countries that adopt and implement national disaster risk reduction strategies in line with the Sendai Framework for Disaster Risk Reduction 2015-2030
dc/topic/sdg_1.5.4,dc/topic/sdg_13.1.3,Proportion of local governments that adopt and implement local disaster risk reduction strategies in line with national disaster risk reduction strategies
dc/topic/sdg_1.5.4,dc/topic/sdg_13.1.3,sdg Proportion of local governments that adopt and implement local disaster risk reduction strategies in line with national disaster risk reduction strategies
dc/topic/sdg_1.5.4,dc/topic/sdg_13.1.3,sustainable development goal Proportion of local governments that adopt and implement local disaster risk reduction strategies in line with national disaster risk reduction strategies
dc/topic/sdg_4.7.1,dc/topic/sdg_13.3.1,(b) curricula
dc/topic/sdg_4.7.1,dc/topic/sdg_13.3.1,(c) teacher education
dc/topic/sdg_4.7.1,dc/topic/sdg_13.3.1,Extent to which (i) global citizenship education and (ii) education for sustainable development are mainstreamed in (a) national education policies
dc/topic/sdg_4.7.1,dc/topic/sdg_13.3.1,and (d) student assessment
dc/topic/sdg_4.7.1,dc/topic/sdg_13.3.1,sdg Extent to which (i) global citizenship education and (ii) education for sustainable development are mainstreamed in (a) national education policies
dc/topic/sdg_4.7.1,dc/topic/sdg_13.3.1,sustainable development goal Extent to which (i) global citizenship education and (ii) education for sustainable development are mainstreamed in (a) national education policies
dc/topic/sdg_14,dc/topic/sdg_1.4,sdg 14
dc/topic/sdg_14,dc/topic/sdg_1.4,sdg14
dc/topic/sdg_14.1,dc/topic/sdg_1.4.1,sdg 141
dc/topic/sdg_14.1,dc/topic/sdg_1.4.1,sdg141
dc/topic/sdg_14.2,dc/topic/sdg_1.4.2,sdg 142
dc/topic/sdg_14.2,dc/topic/sdg_1.4.2,sdg142
dc/topic/sdg_15,dc/topic/sdg_1.5,sdg 15
dc/topic/sdg_15,dc/topic/sdg_1.5,sdg15
dc/topic/sdg_15.1,dc/topic/sdg_1.5.1,sdg 151
dc/topic/sdg_15.1,dc/topic/sdg_1.5.1,sdg151
dc/topic/sdg_15.2,dc/topic/sdg_1.5.2,sdg 152
dc/topic/sdg_15.2,dc/topic/sdg_1.5.2,sdg152
dc/topic/sdg_15.3,dc/topic/sdg_1.5.3,sdg 153
dc/topic/sdg_15.3,dc/topic/sdg_1.5.3,sdg153
dc/topic/sdg_15.4,dc/topic/sdg_1.5.4,sdg 154
dc/topic/sdg_15.4,dc/topic/sdg_1.5.4,sdg154
dc/topic/sdg_15.8,dc/topic/sdg_15.6.1,"Number of countries that have adopted legislative, administrative and policy frameworks to ensure fair and equitable sharing of benefits"
dc/topic/sdg_15.a.1,dc/topic/sdg_15.b.1,(a) Official development assistance on conservation and sustainable use of biodiversity
dc/topic/sdg_15.a.1,dc/topic/sdg_15.b.1,and (b) revenue generated and finance mobilized from biodiversity-relevant economic instruments
dc/topic/sdg_15.a.1,dc/topic/sdg_15.b.1,sdg (a) Official development assistance on conservation and sustainable use of biodiversity
dc/topic/sdg_15.a.1,dc/topic/sdg_15.b.1,sustainable development goal (a) Official development assistance on conservation and sustainable use of biodiversity
dc/topic/sdg_10.6.1,dc/topic/sdg_16.8.1,Proportion of members and voting rights of developing countries in international organizations
dc/topic/sdg_10.6.1,dc/topic/sdg_16.8.1,sdg Proportion of members and voting rights of developing countries in international organizations
dc/topic/sdg_10.6.1,dc/topic/sdg_16.8.1,sustainable development goal Proportion of members and voting rights of developing countries in international organizations
dc/topic/sdg_10.3.1,dc/topic/sdg_16.b.1,Proportion of population reporting having personally felt discriminated against or harassed in the previous 12 months on the basis of a ground of discrimination prohibited under international human rights law
dc/topic/sdg_10.3.1,dc/topic/sdg_16.b.1,sdg Proportion of population reporting having personally felt discriminated against or harassed in the previous 12 months on the basis of a ground of discrimination prohibited under international human rights law
dc/topic/sdg_10.3.1,dc/topic/sdg_16.b.1,sustainable development goal Proportion of population reporting having personally felt discriminated against or harassed in the previous 12 months on the basis of a ground of discrimination prohibited under international human rights law
dc/topic/sdg_17.11,dc/topic/sdg_17.1.1,sdg 1711
dc/topic/sdg_17.11,dc/topic/sdg_17.1.1,sdg1711
dc/topic/sdg_17.12,dc/topic/sdg_17.1.2,sdg 1712
dc/topic/sdg_17.12,dc/topic/sdg_17.1.2,sdg1712
dc/topic/sdg_5,dc/topic/UN_THEME_5,gender equality
dc/topic/UN_THEME_7,dc/topic/UN_THEME_10,population trends
dc/topic/WHOAIR7,dc/topic/WHOAIR43,Ambient Air Pollution Attributable Number of Disability-adjusted Life Years
dc/topic/WHOAIR7,dc/topic/WHOAIR43,ambient air pollution attributable number of dalys
dc/topic/WHOcci,dc/topic/WHOcci2030,"Reproductive, Maternal, Newborn And Child Health Interventions RMNCH, Combined: Composite Coverage Index %"
Loading

0 comments on commit 89eb7cd

Please sign in to comment.