Skip to content

Commit

Permalink
Merge pull request #71 from comsysto/feature/extend-osm-ocm-to-europe…
Browse files Browse the repository at this point in the history
…an-countries

extend available country code list to most of Europe
  • Loading branch information
stefanpernpaintner authored Feb 19, 2024
2 parents 4466e4f + 4d78906 commit 1d565d9
Show file tree
Hide file tree
Showing 21 changed files with 229 additions and 148 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ All steps are decoupled, so it's easy to integrate your own data source.
The primary data sources for electronic vehicle charging stations that are generally available across Europe are
Open Street Map (OSM) and Open Charge Map (OCM).

For each country, we then integrate the official government data source (where available).
For some countries, we are able to then integrate the official government data source.
Just to name a few examples, the government data source we use for Germany is Bundesnetzagentur (BNA),
or the National Chargepoint Registry (NCR) for the UK.

Expand Down Expand Up @@ -148,6 +148,8 @@ Then run migration

eCharm can be run similar to a command line tool.
Run `python main.py -h` to see the full list of command line options.
Run `python list-countries.py` to see a list of supported countries
together with information about the availability of a governmental data source, and availability in OSM and OCM.

Here are a few example commands for running tasks:

Expand All @@ -169,7 +171,8 @@ tasks, since eCharm is not (yet) clever with updating data from consecutive impo
python main.py import merge --countries de it --delete_data
```

Currently, we support `at`, `de`,`gb`,`fr`, `it`, `nor` and `swe` as country codes.
Currently, we support the [ISO 3166-1 alpha 2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)
country codes of most European countries.

#### Export all original (un-merged) station data for Germany in csv format:

Expand Down
72 changes: 72 additions & 0 deletions charging_stations_pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from pathlib import Path
from typing import Final


class CountryInfo:
def __init__(self, name: str, gov: bool, osm: bool, ocm: bool):
self.name = name
self.gov = gov
self.osm = osm
self.ocm = ocm


COUNTRIES: Final[dict[str, CountryInfo]] = dict(
[
("AD", CountryInfo(name="Andorra", gov=False, osm=True, ocm=True)),
("AL", CountryInfo(name="Albania", gov=False, osm=True, ocm=True)),
("AT", CountryInfo(name="Austria", gov=True, osm=True, ocm=True)),
("BA", CountryInfo(name="Bosnia and Herzegovina", gov=False, osm=True, ocm=True)),
("BE", CountryInfo(name="Belgium", gov=False, osm=True, ocm=True)),
("BG", CountryInfo(name="Bulgaria", gov=False, osm=True, ocm=True)),
("BY", CountryInfo(name="Belarus", gov=False, osm=True, ocm=True)),
("CH", CountryInfo(name="Switzerland", gov=False, osm=True, ocm=True)),
("CY", CountryInfo(name="Cyprus", gov=False, osm=True, ocm=True)),
("CZ", CountryInfo(name="Czech Republic", gov=False, osm=True, ocm=True)),
("DE", CountryInfo(name="Germany", gov=True, osm=True, ocm=True)),
("DK", CountryInfo(name="Denmark", gov=False, osm=True, ocm=True)),
("EE", CountryInfo(name="Estonia", gov=False, osm=True, ocm=True)),
("ES", CountryInfo(name="Spain", gov=False, osm=True, ocm=True)),
("FI", CountryInfo(name="Finland", gov=False, osm=True, ocm=True)),
("FR", CountryInfo(name="France", gov=True, osm=True, ocm=True)),
("GB", CountryInfo(name="United Kingdom", gov=True, osm=True, ocm=True)),
("GR", CountryInfo(name="Greece", gov=False, osm=True, ocm=True)),
("HR", CountryInfo(name="Croatia", gov=False, osm=True, ocm=True)),
("HU", CountryInfo(name="Hungary", gov=False, osm=True, ocm=True)),
("IE", CountryInfo(name="Ireland", gov=False, osm=True, ocm=True)),
("IS", CountryInfo(name="Iceland", gov=False, osm=True, ocm=True)),
("IT", CountryInfo(name="Italy", gov=False, osm=True, ocm=True)),
("LI", CountryInfo(name="Liechtenstein", gov=False, osm=True, ocm=True)),
("LT", CountryInfo(name="Lithuania", gov=False, osm=True, ocm=True)),
("LU", CountryInfo(name="Luxembourg", gov=False, osm=True, ocm=True)),
("LV", CountryInfo(name="Latvia", gov=False, osm=True, ocm=True)),
("MC", CountryInfo(name="Monaco", gov=False, osm=True, ocm=True)),
("MD", CountryInfo(name="Moldova", gov=False, osm=True, ocm=True)),
("ME", CountryInfo(name="Montenegro", gov=False, osm=True, ocm=True)),
("MK", CountryInfo(name="North Macedonia", gov=False, osm=True, ocm=True)),
("MT", CountryInfo(name="Malta", gov=False, osm=True, ocm=True)),
("NL", CountryInfo(name="Netherlands", gov=False, osm=True, ocm=True)),
("NO", CountryInfo(name="Norway", gov=True, osm=True, ocm=True)),
("PL", CountryInfo(name="Poland", gov=False, osm=True, ocm=True)),
("PT", CountryInfo(name="Portugal", gov=False, osm=True, ocm=True)),
("RO", CountryInfo(name="Romania", gov=False, osm=True, ocm=True)),
("RS", CountryInfo(name="Serbia", gov=False, osm=True, ocm=True)),
("SE", CountryInfo(name="Sweden", gov=True, osm=True, ocm=True)),
("SI", CountryInfo(name="Slovenia", gov=False, osm=True, ocm=True)),
("SK", CountryInfo(name="Slovakia", gov=False, osm=True, ocm=True)),
("SM", CountryInfo(name="San Marino", gov=False, osm=True, ocm=True)),
("UA", CountryInfo(name="Ukraine", gov=False, osm=True, ocm=True)),
("VA", CountryInfo(name="Vatican City", gov=False, osm=True, ocm=False)),
("XK", CountryInfo(name="Kosovo", gov=False, osm=True, ocm=True)),
]
)

COUNTRY_CODES: list[str] = list(COUNTRIES.keys())
OSM_COUNTRY_CODES: list[str] = list({k: v for k, v in COUNTRIES.items() if v.osm}.keys())
OCM_COUNTRY_CODES: list[str] = list({k: v for k, v in COUNTRIES.items() if v.ocm}.keys())
GOV_COUNTRY_CODES: list[str] = list({k: v for k, v in COUNTRIES.items() if v.gov}.keys())

PROJ_ROOT: Final[Path] = Path(__file__).parents[1]
"""The root directory of the project."""

PROJ_DATA_DIR: Final[Path] = PROJ_ROOT / "data"
"""The path to the data folder."""
11 changes: 6 additions & 5 deletions charging_stations_pipelines/deduplication/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ def __init__(self, country_code: str, config: configparser, db_engine, is_test:
"AT": "AT_ECONTROL",
"FR": "FRGOV",
"GB": "GBGOV",
"IT": "", # No gov source for Italy so far (5.4.2023)
"NOR": "NOBIL",
"SWE": "NOBIL",
"NO": "NOBIL",
"SE": "NOBIL",
}
if country_code not in country_code_to_gov_source:
raise Exception(f"country code '{country_code}' unknown in merger")
self.gov_source = country_code_to_gov_source[country_code]
logger.info(f"No governmental data source available for country code '{country_code}'")
self.gov_source = ""
else:
self.gov_source = country_code_to_gov_source[country_code]

@staticmethod
def merge_attributes(station: pd.Series, duplicates_to_merge: pd.DataFrame):
Expand Down
3 changes: 0 additions & 3 deletions charging_stations_pipelines/pipelines/at/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,3 @@

DATA_SOURCE_KEY: Final[str] = "AT_ECONTROL"
"""The data source key for the e-control data source."""

SCOPE_COUNTRIES: Final[list[str]] = ["AT"]
"""The list of country codes covered by the e-control data source."""
34 changes: 15 additions & 19 deletions charging_stations_pipelines/pipelines/at/econtrol.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
import collections
import configparser
import logging
import os
import pathlib

import pandas as pd
from sqlalchemy.orm import Session
from tqdm import tqdm

from charging_stations_pipelines.pipelines import Pipeline
from charging_stations_pipelines.pipelines.at import DATA_SOURCE_KEY, SCOPE_COUNTRIES
from charging_stations_pipelines.pipelines.at import DATA_SOURCE_KEY
from charging_stations_pipelines.pipelines.at.econtrol_crawler import get_data
from charging_stations_pipelines.pipelines.at.econtrol_mapper import (
map_address,
Expand All @@ -25,6 +23,7 @@
from charging_stations_pipelines.pipelines.station_table_updater import (
StationTableUpdater,
)
from charging_stations_pipelines.shared import country_import_data_path

logger = logging.getLogger(__name__)

Expand All @@ -40,7 +39,6 @@ class EcontrolAtPipeline(Pipeline):
:ivar config: A `configparser` object containing configurations for the pipeline.
:ivar session: A `Session` object representing the session used for database operations.
:ivar online: A boolean indicating whether the pipeline should retrieve data online.
:ivar data_dir: A string representing the directory where data files will be stored.
"""

def __init__(self, config: configparser, session: Session, online: bool = False):
Expand All @@ -49,12 +47,8 @@ def __init__(self, config: configparser, session: Session, online: bool = False)
# Is always 'AT' for this pipeline
self.country_code = "AT"

relative_dir = os.path.join("../../..", "data")
self.data_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), relative_dir)

def _retrieve_data(self):
pathlib.Path(self.data_dir).mkdir(parents=True, exist_ok=True)
tmp_data_path = os.path.join(self.data_dir, self.config[DATA_SOURCE_KEY]["filename"])
tmp_data_path = country_import_data_path(self.country_code) / self.config[DATA_SOURCE_KEY]["filename"]
if self.online:
logger.info("Retrieving Online Data")
get_data(tmp_data_path)
Expand All @@ -78,19 +72,21 @@ def run(self):
try:
station = map_station(datapoint, self.country_code)

# Count stations with country codes that are not in the scope of the pipeline
if station.country_code not in SCOPE_COUNTRIES:
stats["count_country_mismatch_stations"] += 1

# Address mapping
station.address = map_address(datapoint, self.country_code, None)

# Count stations which have an invalid address
if station.address and station.address.country and station.address.country not in SCOPE_COUNTRIES:
stats["count_country_mismatch_stations"] += 1

# Count stations which have a mismatching country code between Station and Address
if station.country_code != station.address.country:
# Count stations that have some kind of country code mismatch
if (
# Count stations which have an invalid country code in address
(station.address and station.address.country and station.address.country != "AT")
# Count stations which have a mismatching country code between Station and Address
or (
station.country_code is not None
and station.address is not None
and station.address.country is not None
and station.country_code != station.address.country
)
):
stats["count_country_mismatch_stations"] += 1

# Charging point
Expand Down
9 changes: 2 additions & 7 deletions charging_stations_pipelines/pipelines/de/bna.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import configparser
import logging
import pathlib
from typing import Final

import pandas as pd
from sqlalchemy.orm import Session
Expand All @@ -18,7 +16,7 @@
)
from ...pipelines import Pipeline
from ...pipelines.station_table_updater import StationTableUpdater
from ...shared import load_excel_file
from ...shared import load_excel_file, country_import_data_path

logger = logging.getLogger(__name__)

Expand All @@ -30,11 +28,8 @@ def __init__(self, config: configparser, session: Session, online: bool = False)
# All BNA data is from Germany
self.country_code = "DE"

self.data_dir: Final[pathlib.Path] = (pathlib.Path(__file__).parents[3] / "data").resolve()

def retrieve_data(self):
self.data_dir.mkdir(parents=True, exist_ok=True)
tmp_data_path = self.data_dir / self.config[DATA_SOURCE_KEY]["filename"]
tmp_data_path = country_import_data_path(self.country_code) / self.config[DATA_SOURCE_KEY]["filename"]

if self.online:
logger.info("Retrieving Online Data")
Expand Down
8 changes: 2 additions & 6 deletions charging_stations_pipelines/pipelines/fr/france.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Pipeline for retrieving data from the French government website."""

import logging
import os
import pathlib

import pandas as pd
import requests as requests
Expand All @@ -18,16 +16,14 @@
from charging_stations_pipelines.pipelines.station_table_updater import (
StationTableUpdater,
)
from charging_stations_pipelines.shared import download_file, reject_if
from charging_stations_pipelines.shared import download_file, reject_if, country_import_data_path

logger = logging.getLogger(__name__)


class FraPipeline(Pipeline):
def _retrieve_data(self):
data_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "../../..", "data")
pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
tmp_data_path = os.path.join(data_dir, self.config["FRGOV"]["filename"])
tmp_data_path = country_import_data_path("FR") / self.config["FRGOV"]["filename"]
if self.online:
logger.info("Retrieving Online Data")
self.download_france_gov_file(tmp_data_path)
Expand Down
15 changes: 6 additions & 9 deletions charging_stations_pipelines/pipelines/gb/gb_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,24 @@
from charging_stations_pipelines.models.address import Address
from charging_stations_pipelines.models.charging import Charging
from charging_stations_pipelines.models.station import Station
from charging_stations_pipelines.shared import check_coordinates
from charging_stations_pipelines.shared import check_coordinates, parse_date

logger = logging.getLogger(__name__)


# functions for GB gov data:
def map_station_gb(entry, country_code: str):
def map_station_gb(entry):
datasource = "GBGOV"
lat: float = check_coordinates(entry.get("ChargeDeviceLocation").get("Latitude"))
long: float = check_coordinates(entry.get("ChargeDeviceLocation").get("Longitude"))
operator: Optional[str] = entry.get("DeviceController").get("OrganisationName")
new_station = Station()
new_station.country_code = country_code
new_station.country_code = "GB"
new_station.source_id = entry.get("ChargeDeviceId")
new_station.operator = operator
new_station.operator = entry.get("DeviceController").get("OrganisationName")
new_station.data_source = datasource
new_station.point = from_shape(Point(float(long), float(lat)))
new_station.date_created = entry.get("DateCreated")
new_station.date_updated = entry.get("DateUpdated")
# TODO: find way to parse date into desired format
# parse_date having issues with "date out of range" at value 0"
new_station.date_created = parse_date(entry.get("DateCreated"))
new_station.date_updated = parse_date(entry.get("DateUpdated"))
return new_station


Expand Down
10 changes: 3 additions & 7 deletions charging_stations_pipelines/pipelines/gb/gbgov.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import configparser
import json
import logging
import os
import pathlib
from typing import Optional

from sqlalchemy.orm import Session
Expand All @@ -19,7 +17,7 @@
from charging_stations_pipelines.pipelines.station_table_updater import (
StationTableUpdater,
)
from charging_stations_pipelines.shared import JSON
from charging_stations_pipelines.shared import JSON, country_import_data_path

logger = logging.getLogger(__name__)

Expand All @@ -31,9 +29,7 @@ def __init__(self, config: configparser, session: Session, online: bool = False)
self.data: Optional[JSON] = None

def _retrieve_data(self):
data_dir: str = os.path.join(pathlib.Path(__file__).parent.resolve(), "../../..", "data")
pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
tmp_file_path = os.path.join(data_dir, self.config["GBGOV"]["filename"])
tmp_file_path = country_import_data_path("GB") / self.config["GBGOV"]["filename"]
if self.online:
logger.info("Retrieving Online Data")
get_gb_data(tmp_file_path)
Expand All @@ -51,7 +47,7 @@ def run(self):
for entry in self.data.get("ChargeDevice", []):
mapped_address = map_address_gb(entry, None)
mapped_charging = map_charging_gb(entry)
mapped_station = map_station_gb(entry, " GB")
mapped_station = map_station_gb(entry)
mapped_station.address = mapped_address
mapped_station.charging = mapped_charging
station_updater.update_station(station=mapped_station, data_source_key="GBGOV")
Expand Down
14 changes: 7 additions & 7 deletions charging_stations_pipelines/pipelines/nobil/nobil_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import os
from _decimal import Decimal
from pathlib import Path

from geoalchemy2.shape import from_shape
from shapely.geometry import Point
Expand All @@ -15,7 +14,7 @@
from charging_stations_pipelines.models.charging import Charging
from charging_stations_pipelines.models.station import Station
from charging_stations_pipelines.pipelines import Pipeline
from charging_stations_pipelines.shared import download_file, load_json_file, reject_if
from charging_stations_pipelines.shared import download_file, load_json_file, reject_if, country_import_data_path

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -137,10 +136,11 @@ def _map_charging_to_domain(nobil_station: NobilStation) -> Charging:


def _load_datadump_and_write_to_target(path_to_target, country_code: str):
nobil_api_country_code = "NOR" if country_code == "NO" else "SWE"
nobil_api_key = os.getenv("NOBIL_APIKEY")
link_to_datadump = (
f"https://nobil.no/api/server/datadump.php?apikey="
f"{nobil_api_key}&countrycode={country_code}&format=json&file=true"
f"{nobil_api_key}&countrycode={nobil_api_country_code}&format=json&file=true"
)
download_file(link_to_datadump, path_to_target)

Expand All @@ -150,21 +150,21 @@ class NobilPipeline(Pipeline):

def __init__(
self,
config: configparser,
session: Session,
country_code: str,
config: configparser,
online: bool = False,
):
super().__init__(config, session, online)

accepted_country_codes = ["NOR", "SWE"]
accepted_country_codes = ["NO", "SE"]
reject_if(country_code.upper() not in accepted_country_codes, "Invalid country code ")
self.country_code = country_code.upper()

def run(self):
"""Run the pipeline."""
logger.info("Running NOR/SWE GOV Pipeline...")
path_to_target = Path(__file__).parent.parent.parent.parent.joinpath("data/" + self.country_code + "_gov.json")
logger.info(f"Running {self.country_code} GOV Pipeline...")
path_to_target = country_import_data_path(self.country_code) / "nobil.json"
if self.online:
logger.info("Retrieving Online Data")
_load_datadump_and_write_to_target(path_to_target, self.country_code)
Expand Down
Loading

0 comments on commit 1d565d9

Please sign in to comment.