-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Rémi Desgrange
committed
Dec 18, 2020
0 parents
commit ee9a831
Showing
4 changed files
with
133 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export-g2f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
FROM python:3.9-slim | ||
|
||
USER 1234 | ||
WORKDIR /app | ||
COPY --chown 1234:1234 requirements.txt /app | ||
RUN pip install --user -r requirements.txt | ||
|
||
CMD ["python3", "/app/main.py"] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import csv | ||
import sys | ||
import logging | ||
import requests | ||
import json | ||
from pydantic import BaseSettings | ||
from abc import ABC, abstractmethod | ||
from requests import Response | ||
from pathlib import Path | ||
from urllib.parse import urlencode | ||
from io import StringIO | ||
|
||
class Settings(BaseSettings): | ||
OUTPUT_DIR: Path = Path("/mnt/apache_nas_data/public/export_json_csv") | ||
UNWANTED_CSV_COLUMNS: list = ("FID", "the_geom") | ||
UNWANTED_JSON_COLUMNS: list = ("bbox",) | ||
MAX_FEATURES: int = 5000 | ||
GEOSERVER_WFS_URL: str = "https://www.geo2france.fr/geoserver/cr_hdf/wfs" | ||
GEOSERVER_LAYERS: list = ("epci",) | ||
LOG_LEVEL: str = "INFO" | ||
|
||
|
||
class Process(ABC): | ||
""" | ||
Common interface to fetching data from geoserver WFS in csv and json. | ||
""" | ||
|
||
def __init__(self, settings: "Settings", layer: str) -> None: | ||
self.layer = layer | ||
self.settings = settings | ||
|
||
@abstractmethod | ||
def run(self) -> None: | ||
# self.download() | ||
# self.clean() | ||
# self.store() | ||
pass | ||
|
||
def download(self, output_format: str) -> Response: | ||
qs = urlencode( | ||
{ | ||
"request": "GetFeature", | ||
"typeName": self.layer, | ||
"maxFeature": self.settings.MAX_FEATURES, | ||
"outputFormat": output_format, | ||
"version": "1.0.0" | ||
} | ||
) | ||
url = f"{self.settings.GEOSERVER_WFS_URL}?{qs}" | ||
r = requests.get(url) | ||
r.raise_for_status() | ||
return r | ||
|
||
@abstractmethod | ||
def clean(self, data: str) -> str: | ||
pass | ||
|
||
@abstractmethod | ||
def store(self, data: str, path: Path) -> None: | ||
pass | ||
|
||
|
||
class ProcessCsv(Process): | ||
""" | ||
Concrete implentation of fetching CSV from geoserver WFS | ||
""" | ||
|
||
def run(self) -> None: | ||
csv = self.download("csv") | ||
cleaned = self.clean(csv.text) | ||
self.store(cleaned, "result.csv") | ||
|
||
def clean(self, data: str) -> str: | ||
csv.field_size_limit(sys.maxsize) | ||
output = StringIO() | ||
reader = csv.DictReader(StringIO(data)) | ||
writer = csv.DictWriter(output, fieldnames=reader.fieldnames) | ||
writer.writeheader() | ||
for line in reader: | ||
for unwanted in self.settings.UNWANTED_CSV_COLUMNS: | ||
if unwanted in line: | ||
del line[unwanted] | ||
writer.writerow(line) | ||
return str(output) | ||
|
||
def store(self, data: str, output_file: str) -> None: | ||
with open(self.settings.OUTPUT_DIR / output_file) as f: | ||
f.write(data) | ||
|
||
|
||
|
||
class ProcessJson(Process): | ||
""" | ||
Concrete implentation of fetching JSON from geoserver WFS | ||
""" | ||
|
||
def run(self): | ||
res = self.download("json") | ||
cleaned = self.clean(res.json()) | ||
self.store(cleaned, Path(self.settings.OUTPUT_DIR)) | ||
|
||
def clean(self, geojson: dict) -> dict: | ||
for features in geojson["features"]: | ||
for unwanted in self.settings.UNWANTED_JSON_COLUMNS: | ||
features.pop(col, None) | ||
return geojson["features"] | ||
|
||
def store(self, data: dict, output_file: str) -> None: | ||
with open(self.settings.OUTPUT_DIR / output_file, "w") as f: | ||
json.dump(f, data) | ||
|
||
|
||
if __name__ == "__main__": | ||
settings = Settings() | ||
logging.basicConfig(level=settings.LOG_LEVEL) | ||
for layer in settings.GEOSERVER_LAYERS: | ||
ProcessCsv(settings, layer).run() | ||
ProcessJson(settings, layer).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
requests | ||
pydantic | ||
black | ||
mypy |