Skip to content

Commit

Permalink
prepare chart data for strapi
Browse files Browse the repository at this point in the history
  • Loading branch information
ikerey committed Nov 14, 2024
1 parent 1b07fa9 commit d155c22
Showing 1 changed file with 186 additions and 23 deletions.
209 changes: 186 additions & 23 deletions data-processing/notebooks/04_prepare_tabular_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,14 @@
"source": [
"# imports\n",
"import json\n",
"import re\n",
"import sys\n",
"from ast import literal_eval\n",
"from datetime import datetime\n",
"from pprint import pprint\n",
"\n",
"import pandas as pd\n",
"\n",
"# Include local library paths if you have ../src/utils.py\n",
"sys.path.append(\"../src/\")\n",
"sys.path.append(\"../src/animations\")\n",
Expand All @@ -104,33 +108,67 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataset information"
"### Utils"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def convert_string_to_float_list(s):\n",
" \"\"\"\n",
" Convert a string of space-separated numbers to a list of floats.\n",
"\n",
" Parameters\n",
" ----------\n",
" s : str\n",
" A string of space-separated numbers.\n",
" \"\"\"\n",
" # Remove square brackets and newline characters\n",
" s = s.strip(\"[]\").replace(\"\\n\", \"\")\n",
"\n",
" # Split the string by spaces and filter out empty strings\n",
" number_strings = re.split(r\"\\s+\", s.strip())\n",
"\n",
" # Convert each number string to a float\n",
" float_list = [float(num) for num in number_strings]\n",
"\n",
" return float_list"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataset information"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Agricultural drought exposure': <datasets.datasets.Dataset object at 0x7f2042d9fef0>,\n",
" 'Agricultural drought hazard': <datasets.datasets.Dataset object at 0x7f2042d9fe90>,\n",
" 'Boundaries': <datasets.datasets.Dataset object at 0x7f2042d9ff20>,\n",
" 'Contextual layers': <datasets.datasets.Dataset object at 0x7f2042d9ff50>,\n",
" 'EO-based flood exposure': <datasets.datasets.Dataset object at 0x7f2042d9ff80>,\n",
" 'EO-based flood hazard': <datasets.datasets.Dataset object at 0x7f2042d9ffb0>,\n",
" 'Hydrographic data': <datasets.datasets.Dataset object at 0x7f2042d9ffe0>,\n",
" 'Hydrometeorological Data': <datasets.datasets.Dataset object at 0x7f2042d9fb60>,\n",
" 'Meteorological drought exposure': <datasets.datasets.Dataset object at 0x7f2042d9fb30>,\n",
" 'Meteorological drought hazard': <datasets.datasets.Dataset object at 0x7f2042d9fb00>,\n",
" 'Model-based flood exposure': <datasets.datasets.Dataset object at 0x7f2118117aa0>,\n",
" 'Model-based flood hazard': <datasets.datasets.Dataset object at 0x7f2118117980>,\n",
" 'Populated infrastructures': <datasets.datasets.Dataset object at 0x7f2118117620>,\n",
" 'Transportation Network Infrastructures': <datasets.datasets.Dataset object at 0x7f2042dd78c0>,\n",
" 'Water-related infrastructures': <datasets.datasets.Dataset object at 0x7f2042dd4350>}\n"
"{'Agricultural drought exposure': <datasets.datasets.Dataset object at 0x7f1553383da0>,\n",
" 'Agricultural drought hazard': <datasets.datasets.Dataset object at 0x7f1553381df0>,\n",
" 'Boundaries': <datasets.datasets.Dataset object at 0x7f1553382150>,\n",
" 'Contextual layers': <datasets.datasets.Dataset object at 0x7f1553382180>,\n",
" 'EO-based flood exposure': <datasets.datasets.Dataset object at 0x7f1553382210>,\n",
" 'EO-based flood hazard': <datasets.datasets.Dataset object at 0x7f1553382240>,\n",
" 'Hydrographic data': <datasets.datasets.Dataset object at 0x7f1553382120>,\n",
" 'Hydrometeorological Data': <datasets.datasets.Dataset object at 0x7f15533820f0>,\n",
" 'Meteorological drought exposure': <datasets.datasets.Dataset object at 0x7f1553382270>,\n",
" 'Meteorological drought hazard': <datasets.datasets.Dataset object at 0x7f1553381e20>,\n",
" 'Model-based flood exposure': <datasets.datasets.Dataset object at 0x7f1553381d90>,\n",
" 'Model-based flood hazard': <datasets.datasets.Dataset object at 0x7f1553380350>,\n",
" 'Populated infrastructures': <datasets.datasets.Dataset object at 0x7f15533820c0>,\n",
" 'Transportation Network Infrastructures': <datasets.datasets.Dataset object at 0x7f1553382300>,\n",
" 'Water-related infrastructures': <datasets.datasets.Dataset object at 0x7f1553381dc0>}\n"
]
}
],
Expand All @@ -148,20 +186,20 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'Administrative Boundaries - adm0': <datasets.datasets.Layer at 0x7f2073b67fb0>,\n",
" 'Administrative Boundaries - adm1': <datasets.datasets.Layer at 0x7f2073b677a0>,\n",
" 'Administrative Boundaries - adm2': <datasets.datasets.Layer at 0x7f2073b67dd0>,\n",
" 'Administrative Boundaries - adm3': <datasets.datasets.Layer at 0x7f2073b67c20>,\n",
" 'Hydrological Basins': <datasets.datasets.Layer at 0x7f2042dd44a0>}"
"{'Administrative Boundaries - adm0': <datasets.datasets.Layer at 0x7f1553382660>,\n",
" 'Administrative Boundaries - adm1': <datasets.datasets.Layer at 0x7f15533373b0>,\n",
" 'Administrative Boundaries - adm2': <datasets.datasets.Layer at 0x7f1553382780>,\n",
" 'Administrative Boundaries - adm3': <datasets.datasets.Layer at 0x7f1553383a40>,\n",
" 'Hydrological Basins': <datasets.datasets.Layer at 0x7f1553382bd0>}"
]
},
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -176,6 +214,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"# Location data\n",
"## Process Data"
]
},
Expand Down Expand Up @@ -278,6 +318,129 @@
"with open(\"../data/processed/locations.json\", \"w\") as f:\n",
" f.write(json_string)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"# Zonal statistics data\n",
"## Process Data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"layer_ids = {\"Evapotranspiration\": 52, \"Precipitation\": 50, \"Soil moisture\": 53, \"Temperature\": 51}\n",
"\n",
"code_columns = {\n",
" \"Administrative Boundaries - adm0\": \"adm0_pcode\",\n",
" \"Administrative Boundaries - adm1\": \"adm1_pcode\",\n",
" \"Administrative Boundaries - adm2\": \"adm2_pcode\",\n",
" \"Administrative Boundaries - adm3\": \"adm3_pcode\",\n",
" \"Hydrological Basins\": \"objectid\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[52]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keys_to_remove = [\"Evapotranspiration\"]\n",
"[layer_ids.pop(key, None) for key in keys_to_remove]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing Precipitation\n",
"Processing Administrative Boundaries - adm0\n",
"Processing Administrative Boundaries - adm1\n",
"Processing Administrative Boundaries - adm2\n",
"Processing Administrative Boundaries - adm3\n",
"Processing Hydrological Basins\n",
"Processing Soil moisture\n",
"Processing Administrative Boundaries - adm0\n",
"Processing Administrative Boundaries - adm1\n",
"Processing Administrative Boundaries - adm2\n",
"Processing Administrative Boundaries - adm3\n",
"Processing Hydrological Basins\n",
"Processing Temperature\n",
"Processing Administrative Boundaries - adm0\n",
"Processing Administrative Boundaries - adm1\n",
"Processing Administrative Boundaries - adm2\n",
"Processing Administrative Boundaries - adm3\n",
"Processing Hydrological Basins\n"
]
}
],
"source": [
"# Initialize the JSON structure\n",
"json_data = {\"version\": 2, \"data\": {\"api::chart-data.chart-data\": {}}}\n",
"\n",
"chart_id = 1\n",
"for layer_name, layer_id in layer_ids.items():\n",
" print(f\"Processing {layer_name}\")\n",
" raster_name = layer_name.replace(\" \", \"_\")\n",
" for vector_name, code_column in code_columns.items():\n",
" print(f\"Processing {vector_name}\")\n",
" vector_name = (\n",
" vector_name.lower()\n",
" .replace(\" - \", \"_\")\n",
" .replace(\" \", \"_\")\n",
" .replace(\"(\", \"\")\n",
" .replace(\")\", \"\")\n",
" )\n",
"\n",
" df = pd.read_csv(f\"../data/processed/ZonalStatistics/{raster_name}_{vector_name}.csv\")\n",
"\n",
" for _index, row in df.iterrows():\n",
" code = str(row[code_column])\n",
"\n",
" chart_data = {\n",
" \"id\": chart_id,\n",
" \"location_code\": str(row[code_column]),\n",
" \"year\": int(row[\"year\"]),\n",
" \"x_values\": literal_eval(row[\"x_axis_values\"]),\n",
" \"y_values\": convert_string_to_float_list(row[\"y_axis_values\"]),\n",
" \"createdAt\": datetime.now().isoformat(),\n",
" \"updatedAt\": datetime.now().isoformat(),\n",
" \"layer\": layer_id,\n",
" \"createdBy\": None,\n",
" \"updatedBy\": None,\n",
" }\n",
" json_data[\"data\"][\"api::chart-data.chart-data\"][str(chart_id)] = chart_data\n",
" chart_id += 1\n",
"\n",
"# Convert to JSON string\n",
"json_string = json.dumps(json_data, indent=2)\n",
"\n",
"# Write to file\n",
"with open(\"../data/processed/chart-data.json\", \"w\") as f:\n",
" f.write(json_string)"
]
}
],
"metadata": {
Expand Down

0 comments on commit d155c22

Please sign in to comment.