Skip to content

Commit

Permalink
Add script to import geometries
Browse files Browse the repository at this point in the history
  • Loading branch information
vej-ananas committed Oct 1, 2024
1 parent c3d2583 commit 796b5de
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 0 deletions.
165 changes: 165 additions & 0 deletions scripts/2024-10-01_import-new-geometries/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
*.sql
*.gpkg

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
14 changes: 14 additions & 0 deletions scripts/2024-10-01_import-new-geometries/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Script to import new Geometries into the assets database

The script takes a geopackage file as input and creates two files:

- a file `sgsids.txt` which contains all sgsids whose geometries are to be updated
- a SQL Script `import_geometries.sql` which, for each asset with a relevant sgsid, deletes the existing geometries and inserts the new ones

Getting started:

- Install Python 3.12
- Install requirements.txt
- Adjust the parameters in at the top of `main.py`
- Run `main.py`
- Run the resulting SQL Script directly in the database. In case of a large number of geometries, consider running the script in smaller transaction blocks. Tools like Datagrip can do this directly.
68 changes: 68 additions & 0 deletions scripts/2024-10-01_import-new-geometries/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import TextIO

import geopandas as gpd
import numpy as np
from geopandas import GeoDataFrame
import os

# The following parameters need to be adjusted to the correct values:
filedir = ''
filename = 'geometries.gpkg'
output_filename = 'import_geometries.sql'
output_sgsids_filename = 'sgsids.txt'

# The following sgsids are not in the database, they were manually translated to the probably correct sgsid:
sgsid_translation_dict = {}

# Only use entries with Bemerkung equal to one of the following:
allowed_bemerkung = ['erraten', 'erraten_B', 'neu', 'neu (?)', 'unsich kein B']

def read_gdf(file: str, layer: str, allowed_bemerkung: list, translation_dict: dict):
'''Read a geopandas dataframe from a file and layer, filter out entries with Bemerkung not in allowed_bemerkung and translate the sgsid according to translationDict.'''
gdf = gpd.read_file(file, layer=layer)
print(gdf['Bemerkung'].value_counts())
gdf = gdf[(gdf['Bemerkung'].isin(allowed_bemerkung))]
gdf.replace({'IDSGS_neu': translation_dict}, inplace=True)
return gdf

def write_insert_statements(f: TextIO, gdf: GeoDataFrame, geom_type: str):
for i, row in gdf.iterrows():
f.write(
f"INSERT INTO study_{geom_type} (asset_id, geom_quality_item_code, geom) VALUES ((SELECT asset_id FROM asset WHERE sgs_id = {row['IDSGS_neu']}), 'revised', ST_GeomFromText('{row['geometry'].wkt}', 2056));\n")

def main():
path = os.path.join(filedir, filename)
gdf_points = read_gdf(file=path, layer='backup_20240829__points', allowed_bemerkung=allowed_bemerkung, translation_dict=sgsid_translation_dict)
gdf_lines = read_gdf(file=path, layer='backup_20240829__lines', allowed_bemerkung=allowed_bemerkung, translation_dict=sgsid_translation_dict)
gdf_polygons = read_gdf(file=path, layer='backup_20240829__polygons', allowed_bemerkung=allowed_bemerkung, translation_dict=sgsid_translation_dict)
gdf_lines = gdf_lines.explode() # explode multi-linestrings to linestrings
gdf_polygons = gdf_polygons.explode() # explode multi-polygons to polygons
gdf_points = gdf_points[~gdf_points['geometry'].is_empty] # remove empty geometries

sgs_ids = np.unique(np.concatenate(
[gdf_lines['IDSGS_neu'].unique(),
gdf_points['IDSGS_neu'].unique(),
gdf_polygons['IDSGS_neu'].unique()]))

np.savetxt(os.path.join(filedir, output_sgsids_filename), sgs_ids.astype(int), fmt='%d', delimiter=',')

# for each sgsid write a sql delete statement to delete all entries with this sgsid. Commit the commands every 1000 rows.
with open(os.path.join(filedir, output_filename), 'w') as f:
for i, sgsid in enumerate(sgs_ids):
f.write(
f"DELETE FROM study_location WHERE asset_id = (SELECT asset_id FROM asset WHERE sgs_id = {sgsid});\n")
f.write(f"DELETE FROM study_trace WHERE asset_id = (SELECT asset_id FROM asset WHERE sgs_id = {sgsid});\n")
f.write(f"DELETE FROM study_area WHERE asset_id = (SELECT asset_id FROM asset WHERE sgs_id = {sgsid});\n")

f.write('SELECT \'Finished deleting previous studies.\';\n\n')

# write the insert statements for the points
f.write('SELECT \'Creating locations.\';\n')
write_insert_statements(f, gdf_points, 'location')
f.write('\nSELECT \'Creating trace.\';\n')
write_insert_statements(f, gdf_lines, 'trace')
f.write('\nSELECT \'Creating area.\';\n')
write_insert_statements(f, gdf_polygons, 'area')

if __name__ == '__main__':
main()
2 changes: 2 additions & 0 deletions scripts/2024-10-01_import-new-geometries/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
geopandas
numpy

0 comments on commit 796b5de

Please sign in to comment.