Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/fis validation #49

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions notebooks/fis-network/validation/cemt-network-validation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ba4fce13-543f-467d-8c64-79b7fe1d5a46",
"metadata": {},
"source": [
"# Check if CEMT codes are correct\n",
"This notebook describes how well the Vaarweg codes are filled in. They are consistently filled in, but only in NL and part of BE. Germany is missing. We also show how to compute a consistent ordinal variable.\n",
"\n",
"This information is based on Vaarweginformatie: https://www.vaarweginformatie.nl/frp/main/#/home\n",
"\n",
"You can get extra info from the EURIS API:\n",
"See for example https://developer.eurisportal.eu/docs/reference-data/locks\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "507a5c44-a43b-4bf3-a0e2-730f6b4c5f8a",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"import networkx as nx\n",
"import requests\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d9a6a36d-a55b-41ee-9e22-95c3293afedf",
"metadata": {},
"outputs": [],
"source": [
"fis_url = 'https://zenodo.org/records/6673604/files/network_digital_twin_v0.3.pickle?download=1'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "38a4e34c-760f-47b1-8217-fa6757d91be2",
"metadata": {},
"outputs": [],
"source": [
"resp = requests.get(fis_url)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d28b2422-d698-4af5-96da-c6ec8dfd28ba",
"metadata": {},
"outputs": [],
"source": [
"graph = pickle.loads(resp.content)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "15ce72a0-25ab-486e-8560-24a27318b6ac",
"metadata": {},
"outputs": [],
"source": [
"edges_df = nx.to_pandas_edgelist(graph)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "426d5170-2746-4b6b-a42e-0d31ac0fcb06",
"metadata": {},
"outputs": [],
"source": [
"# store nan filled codes under CEMT\n",
"\n",
"\n",
"def code2cemt(code):\n",
" \"\"\"convert the almost CEMT like code stored in vaarweginformatie\"\"\"\n",
" # Somehow the CEMT class _0 (which is not in the Richtlijn Vaarwegen, but used to be in older versions. We'll replace it by '0'\n",
"\n",
" \n",
" cemt = ''\n",
" if pd.isna(code):\n",
" cemt = ''\n",
" elif code == '_0':\n",
" # special class defined in richtlijn vaarwegen 2011\n",
" cemt = '0' \n",
" else:\n",
" cemt = code\n",
" return cemt\n",
" \n",
"# We need to add the empty category, otherwise\n",
"# add all categories from the richtlijnvaarwegen and add VIIa (not used in NL\n",
"categories = ['0', 'I', 'II', 'III', 'IV', 'IVa', 'IVb', 'V_A', 'V_B', 'VI_A', 'VI_B', 'VI_C', 'VII', 'VIIa']\n",
"cemt_dtype = pd.CategoricalDtype(categories=categories, ordered=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8599c3f0-5234-408e-90a9-6945a71e71d3",
"metadata": {},
"outputs": [],
"source": [
"# you can use the above function to create a elegant categorical function, including ordening\n",
"# Note that when using the data in the graph the ordening function does not work.\n",
"# on a future update we should make the codes uniform and rename them from Code to CEMT\n",
"edges_df['CEMT'] = pd.Categorical(edges_df['Code'].apply(code2cemt), dtype=cemt_dtype)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "3af3af32-f284-4d38-b069-745ca81fa619",
"metadata": {},
"outputs": [],
"source": [
"# convert CEMT to M2, with multiple we will select the middle M or upper M class, when no M class exists, pick RWS class from barges. \n",
"\n",
"cemt2rws_dict = {\n",
" \"0\": \"M0\",\n",
" \"I\": \"M1\",\n",
" \"II\": \"M2\", \n",
" \"III\": \"M4\", \n",
" \"IV\": \"M7\",\n",
" \"V_A\": \"M\", \n",
" \"V_B\": \"BII-2l\",\n",
" \"VI_A\": \"M11\",\n",
" \"VI_B\": \"BII-4\",\n",
" \"VI_C\": \"BII-6l\"\n",
"}\n",
"def cemt2rws(cemt):\n",
" return cemt2rws_dict.get(cemt, '')\n",
"\n",
"\n",
"categories = ['M0', 'M1', 'BO1', 'M2', 'BO2', 'M3', 'BO3', 'M4', 'BO4', 'M5', 'M6', 'BI', 'M7', 'M8', 'BII-1', 'M9', 'BIIa-1', 'BIIL-1', 'BII-2l', 'M10', 'BII-2b', 'M11', 'M12', 'BII-4', 'BII-6l', 'BII-6b'] \n",
"rws_dtype = pd.CategoricalDtype(categories=categories, ordered=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "00a0129b-02b2-483c-8381-0fac9e89906a",
"metadata": {},
"outputs": [],
"source": [
"edges_df['rws_richtlijn_vaarwegen'] = pd.Categorical(edges_df['CEMT'].apply(cemt2rws), dtype=rws_dtype)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "5da3ed64-351a-4deb-9c9d-eeb5b6d1d259",
"metadata": {},
"outputs": [],
"source": [
"# update the graph with our new classification info \n",
"for e, info in graph.edges.items():\n",
" cemt = code2cemt(info['Code'])\n",
" rws = cemt2rws(cemt)\n",
" info['CEMT'] = cemt\n",
" info['rws_richtlijn_vaarwegen'] = rws"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88d8074e-541d-4a6b-9bb3-53bffc0f23bd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading