Skip to content

Commit

Permalink
refine to make it easier adding new country
Browse files Browse the repository at this point in the history
  • Loading branch information
p-phung committed Dec 5, 2024
1 parent 110f738 commit bd62acd
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 212 deletions.
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,47 @@ To run the pipeline locally
--dateend specify date until which the data should be extracted
--help Show this message and exit.
```
Payload sent to EspoCRM:
{
"status": "onhold",
"type": "heavyrainfall",
"source": "GPM",
"<espo-area-field>": "<id-field-name>",
"<espo-destination-field>": "<rainfall-field-name>"
}
## Adding new country
1. Prepare shapefile
- Add a shapefile in `.geojson` format of the area (e.g. districts) in `data\admin_boundary`
- Rename header of the area code (e.g. district code) to `code`
2. Prepare EspoCRM entity for area
- Create an entity to store the area (e.g. District)
- In the entity, create a field to store area code
- Import the area (by exporting the shapefile to `.csv`) to this entity
- Make sure the area code in this entity exactly the same to those in the shapefile
3. Prepare EspoCRM entity for alert
- Create an entity to store alert (e.g. Climate Hazard)
- In the entity, create a ` float` field to store calculated rainfall value
- In the entity, create additional fields to match with the payload requirements:
| field name | type | value(s) |
| -----------| ---- | ---------|
| `status` | `enum` | `onhold` |
| `type` | `enum` | `heavyrainfall` |
| `source` | `enum` or `text` | `GPM` |
- Make sure the entity for area is linked with this one
4. Add the new country to the `config\config.yaml` below the existing one:
```
- name: <country-iso3>
days-to-observe: 14 # number of most recent days to observe rainfall
alert-on-threshold: 50 # threshold to send to EspoCRM
shapefile-area: <name>.geojson # shapefile of areas (. geojson) where the zonal stats bases on
espo-area: # entity storing areas code (and id)
entity: <entity-name>
field: <id-field-name>
espo-destination: # entity to send alerts to
entity: <entity-name>
field: <rainfall-field-name>
```
5. Test and adjust settings if needed.
9 changes: 7 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
countries:
- name: CMR
shapefile: cmr_district_sante_2022.geojson # shapefile name in geojson format where the zonal stats bases on
days-to-observe: 14 # number of most recent days to observe rainfall
alert-on-threshold: 50 # threshold to send to EspoCRM
destination-entity: CClimaticHazard
shapefile-area: cmr_district_sante_2022.geojson # shapefile of areas (.geojson) where the zonal stats bases on
espo-area: # entity storing areas code and id
entity: CHealthDistrict
field: CHealthDistrictId
espo-destination: # entity to send alerts to
entity: CClimaticHazard
field: averageRainfall
400 changes: 200 additions & 200 deletions data/admin_boundary/cmr_district_sante_2022.geojson

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion nrt_rainfall_pipeline/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __prepare_rainfall_data(self, file_name):
'''
For each date (file), slice it to the extent of the country
'''
shp_name = self.settings.get_country_setting(self.country, "shapefile")
shp_name = self.settings.get_country_setting(self.country, "shapefile-area")
shp_dir = f"data/admin_boundary/{shp_name}"
shapefile = gpd.read_file(f"{shp_dir}")
shapes = [feature["geometry"] for feature in shapefile.iterfeatures()]
Expand Down
3 changes: 2 additions & 1 deletion nrt_rainfall_pipeline/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def set_secrets(self, secrets):

def send_to_espo_api(self, country, data: list):
self.country = country
entity = self.settings.get_country_setting(self.country, "destination-entity")
destination = self.settings.get_country_setting(self.country, "espo-destination")
entity = destination["entity"]
espo_client = EspoAPI(self.secrets.get_secret("ESPOCRM_URL"),
self.secrets.get_secret("ESPOCRM_API_KEY"))
for data in data:
Expand Down
22 changes: 14 additions & 8 deletions nrt_rainfall_pipeline/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __calculate_average_raster(self):

# transform
def __calculate_zonalstats(self):
shp_name = self.settings.get_country_setting(self.country, "shapefile")
shp_name = self.settings.get_country_setting(self.country, "shapefile-area")
shp_dir = f"data/admin_boundary/{shp_name}"
shapefile = gpd.read_file(f"{shp_dir}")
tif_name = f"{self.country}_{self.datestart.strftime('%Y-%m-%d')}_{self.dateend.strftime('%Y-%m-%d')}"
Expand All @@ -93,23 +93,29 @@ def __prepare_data_for_espo(self, stats):
"""
Prepare zonal stats data into payload matching EspoCRM requirements
"""
area = self.settings.get_country_setting(self.country, "espo-area")
area_entity = area["entity"]
area_field = area["field"]
destination = self.settings.get_country_setting(self.country, "espo-destination")
destination_field = destination["field"]
additional_data = {"status": "onhold",
"type": "heavyrainfall",
"source": "GPM"}
admin_id = self.load.get_admin_id("CHealthDistrict", "code")
admin_id = self.load.get_admin_id(area_entity, "code")
admin_id = self.__extract_id_from_key(admin_id)
stats_list = []
for d in stats:
new_d = {k: d["properties"][k] for k in ["CODE_DS","median"]}
new_d["cHealthDistrictId"] = admin_id.get(new_d["CODE_DS"], new_d["CODE_DS"])
del new_d['CODE_DS']
new_d["average14dayRainfall"] = new_d.pop('median')
new_d = {k: d["properties"][k] for k in ["code","median"]}
new_d[area_field] = admin_id.get(new_d["code"], new_d["code"])
del new_d['code']
new_d[destination_field] = new_d.pop('median')
new_d.update(additional_data)
# new_d["cHealthDistrict"] = admin_id.get(new_d["cHealthDistrict"], new_d["cHealthDistrict"])
stats_list.append(new_d)
print('stats_list: ', stats_list)

threshold = self.settings.get_country_setting(self.country, "alert-on-threshold")
filtered = self.__filter_dict(stats_list, 'average14dayRainfall', threshold)
filtered = self.__filter_dict(stats_list, destination_field, threshold)
print("Data to send: " , len(filtered))
return filtered

def __filter_dict(self, stats_list, key_to_filter: str, threshold: float):
Expand Down

0 comments on commit bd62acd

Please sign in to comment.