From ee9a831c189196c2c8c89c0cd26dea5366e60b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Desgrange?= Date: Fri, 18 Dec 2020 17:18:10 +0100 Subject: [PATCH] First commit --- .python-version | 1 + Dockerfile | 10 ++++ main.py | 118 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 ++ 4 files changed, 133 insertions(+) create mode 100644 .python-version create mode 100644 Dockerfile create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..cc13910 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +export-g2f diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8b8f4e8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.9-slim + +USER 1234 +WORKDIR /app +COPY --chown 1234:1234 requirements.txt /app +RUN pip install --user -r requirements.txt + +CMD ["python3", "/app/main.py"] + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..dbacb9c --- /dev/null +++ b/main.py @@ -0,0 +1,118 @@ +import csv +import sys +import logging +import requests +import json +from pydantic import BaseSettings +from abc import ABC, abstractmethod +from requests import Response +from pathlib import Path +from urllib.parse import urlencode +from io import StringIO + +class Settings(BaseSettings): + OUTPUT_DIR: Path = Path("/mnt/apache_nas_data/public/export_json_csv") + UNWANTED_CSV_COLUMNS: list = ("FID", "the_geom") + UNWANTED_JSON_COLUMNS: list = ("bbox",) + MAX_FEATURES: int = 5000 + GEOSERVER_WFS_URL: str = "https://www.geo2france.fr/geoserver/cr_hdf/wfs" + GEOSERVER_LAYERS: list = ("epci",) + LOG_LEVEL: str = "INFO" + + +class Process(ABC): + """ + Common interface to fetching data from geoserver WFS in csv and json. + """ + + def __init__(self, settings: "Settings", layer: str) -> None: + self.layer = layer + self.settings = settings + + @abstractmethod + def run(self) -> None: + # self.download() + # self.clean() + # self.store() + pass + + def download(self, output_format: str) -> Response: + qs = urlencode( + { + "request": "GetFeature", + "typeName": self.layer, + "maxFeature": self.settings.MAX_FEATURES, + "outputFormat": output_format, + "version": "1.0.0" + } + ) + url = f"{self.settings.GEOSERVER_WFS_URL}?{qs}" + r = requests.get(url) + r.raise_for_status() + return r + + @abstractmethod + def clean(self, data: str) -> str: + pass + + @abstractmethod + def store(self, data: str, path: Path) -> None: + pass + + +class ProcessCsv(Process): + """ + Concrete implentation of fetching CSV from geoserver WFS + """ + + def run(self) -> None: + csv = self.download("csv") + cleaned = self.clean(csv.text) + self.store(cleaned, "result.csv") + + def clean(self, data: str) -> str: + csv.field_size_limit(sys.maxsize) + output = StringIO() + reader = csv.DictReader(StringIO(data)) + writer = csv.DictWriter(output, fieldnames=reader.fieldnames) + writer.writeheader() + for line in reader: + for unwanted in self.settings.UNWANTED_CSV_COLUMNS: + if unwanted in line: + del line[unwanted] + writer.writerow(line) + return str(output) + + def store(self, data: str, output_file: str) -> None: + with open(self.settings.OUTPUT_DIR / output_file) as f: + f.write(data) + + + +class ProcessJson(Process): + """ + Concrete implentation of fetching JSON from geoserver WFS + """ + + def run(self): + res = self.download("json") + cleaned = self.clean(res.json()) + self.store(cleaned, Path(self.settings.OUTPUT_DIR)) + + def clean(self, geojson: dict) -> dict: + for features in geojson["features"]: + for unwanted in self.settings.UNWANTED_JSON_COLUMNS: + features.pop(col, None) + return geojson["features"] + + def store(self, data: dict, output_file: str) -> None: + with open(self.settings.OUTPUT_DIR / output_file, "w") as f: + json.dump(f, data) + + +if __name__ == "__main__": + settings = Settings() + logging.basicConfig(level=settings.LOG_LEVEL) + for layer in settings.GEOSERVER_LAYERS: + ProcessCsv(settings, layer).run() + ProcessJson(settings, layer).run() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fd1e3f1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests +pydantic +black +mypy