Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Rémi Desgrange committed Dec 18, 2020
0 parents commit ee9a831
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 0 deletions.
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export-g2f
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM python:3.9-slim

USER 1234
WORKDIR /app
COPY --chown 1234:1234 requirements.txt /app
RUN pip install --user -r requirements.txt

CMD ["python3", "/app/main.py"]


118 changes: 118 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import csv
import sys
import logging
import requests
import json
from pydantic import BaseSettings
from abc import ABC, abstractmethod
from requests import Response
from pathlib import Path
from urllib.parse import urlencode
from io import StringIO

class Settings(BaseSettings):
OUTPUT_DIR: Path = Path("/mnt/apache_nas_data/public/export_json_csv")
UNWANTED_CSV_COLUMNS: list = ("FID", "the_geom")
UNWANTED_JSON_COLUMNS: list = ("bbox",)
MAX_FEATURES: int = 5000
GEOSERVER_WFS_URL: str = "https://www.geo2france.fr/geoserver/cr_hdf/wfs"
GEOSERVER_LAYERS: list = ("epci",)
LOG_LEVEL: str = "INFO"


class Process(ABC):
"""
Common interface to fetching data from geoserver WFS in csv and json.
"""

def __init__(self, settings: "Settings", layer: str) -> None:
self.layer = layer
self.settings = settings

@abstractmethod
def run(self) -> None:
# self.download()
# self.clean()
# self.store()
pass

def download(self, output_format: str) -> Response:
qs = urlencode(
{
"request": "GetFeature",
"typeName": self.layer,
"maxFeature": self.settings.MAX_FEATURES,
"outputFormat": output_format,
"version": "1.0.0"
}
)
url = f"{self.settings.GEOSERVER_WFS_URL}?{qs}"
r = requests.get(url)
r.raise_for_status()
return r

@abstractmethod
def clean(self, data: str) -> str:
pass

@abstractmethod
def store(self, data: str, path: Path) -> None:
pass


class ProcessCsv(Process):
"""
Concrete implentation of fetching CSV from geoserver WFS
"""

def run(self) -> None:
csv = self.download("csv")
cleaned = self.clean(csv.text)
self.store(cleaned, "result.csv")

def clean(self, data: str) -> str:
csv.field_size_limit(sys.maxsize)
output = StringIO()
reader = csv.DictReader(StringIO(data))
writer = csv.DictWriter(output, fieldnames=reader.fieldnames)
writer.writeheader()
for line in reader:
for unwanted in self.settings.UNWANTED_CSV_COLUMNS:
if unwanted in line:
del line[unwanted]
writer.writerow(line)
return str(output)

def store(self, data: str, output_file: str) -> None:
with open(self.settings.OUTPUT_DIR / output_file) as f:
f.write(data)



class ProcessJson(Process):
"""
Concrete implentation of fetching JSON from geoserver WFS
"""

def run(self):
res = self.download("json")
cleaned = self.clean(res.json())
self.store(cleaned, Path(self.settings.OUTPUT_DIR))

def clean(self, geojson: dict) -> dict:
for features in geojson["features"]:
for unwanted in self.settings.UNWANTED_JSON_COLUMNS:
features.pop(col, None)
return geojson["features"]

def store(self, data: dict, output_file: str) -> None:
with open(self.settings.OUTPUT_DIR / output_file, "w") as f:
json.dump(f, data)


if __name__ == "__main__":
settings = Settings()
logging.basicConfig(level=settings.LOG_LEVEL)
for layer in settings.GEOSERVER_LAYERS:
ProcessCsv(settings, layer).run()
ProcessJson(settings, layer).run()
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
requests
pydantic
black
mypy

0 comments on commit ee9a831

Please sign in to comment.