From 62e252039575b3fa4112c79d0234727d15c865ab Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:03:30 +0300 Subject: [PATCH] major update --- README.md | 18 +- SuomiGeoData/__init__.py | 6 +- SuomiGeoData/core.py | 193 +++++++++- SuomiGeoData/data/example_area.cpg | 1 + SuomiGeoData/data/example_area.dbf | Bin 0 -> 919 bytes SuomiGeoData/data/example_area.prj | 1 + SuomiGeoData/data/example_area.shp | Bin 0 -> 10684 bytes SuomiGeoData/data/example_area.shx | Bin 0 -> 108 bytes SuomiGeoData/paituli.py | 551 +++++++++++++++++++++++------ SuomiGeoData/syke.py | 344 ++++++++++++++++++ pyproject.toml | 3 +- requirements-gh-action.txt | 1 + tests/test_paituli.py | 92 ----- tests/test_suomigeodata.py | 398 +++++++++++++++++++++ 14 files changed, 1392 insertions(+), 216 deletions(-) create mode 100644 SuomiGeoData/data/example_area.cpg create mode 100644 SuomiGeoData/data/example_area.dbf create mode 100644 SuomiGeoData/data/example_area.prj create mode 100644 SuomiGeoData/data/example_area.shp create mode 100644 SuomiGeoData/data/example_area.shx create mode 100644 SuomiGeoData/syke.py delete mode 100644 tests/test_paituli.py create mode 100644 tests/test_suomigeodata.py diff --git a/README.md b/README.md index 0d122c1..9b43c3a 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,28 @@ SuomiGeoData is a Python package whose concept originated on September 11, 2024. It is designed to simplify the process of downloading and extracting geospatial data from Suomi, that is Finland. The package offers the following features: -* [Paituli](https://paituli.csc.fi/download.html) website +* [Paituli integration](https://paituli.csc.fi/download.html) - Provides access to vector format index maps for downloading DEM and the topographic database. - Downloads DEM as raster files and the topographic database as shapefiles based on label names from the index maps. + - Downloads all DEM labels intersected with a given vector format area. + - Downloads clipped DEM data that matches a given vector format area. + + * [Syke integration](https://www.syke.fi/en-US/Open_information/Spatial_datasets/Downloadable_spatial_dataset) + + - Downloads CORINE land cover 2018 raster. + - Downloads vector files of latest subcatchment divisions, ranging from level 1 to 5. + - Extracts individual or merged subcatchments by identifier and uses these areas to download DEM. + + * Geoprocessing + + - Simplified merging and clipping of raster files. ## Roadmap -* Enable downloading DEM for a specified area using a shapefile. * Enable downloading the topographic database for a specified area using a shapefile. +* Implement searching and merging of features from the downloaded topographic database. ## Easy Installation @@ -55,6 +67,6 @@ For detailed information, see the [documentation](http://suomigeodata.readthedoc | **PyPI**| ![PyPI - Version](https://img.shields.io/pypi/v/SuomiGeoData) ![PyPI - Status](https://img.shields.io/pypi/status/SuomiGeoData) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/SuomiGeoData) ![PyPI - Wheel](https://img.shields.io/pypi/wheel/SuomiGeoData) ![PyPI - Downloads](https://img.shields.io/pypi/dm/SuomiGeoData) | | **GitHub** | ![GitHub last commit](https://img.shields.io/github/last-commit/debpal/SuomiGeoData) [![flake8](https://github.com/debpal/SuomiGeoData/actions/workflows/linting.yml/badge.svg)](https://github.com/debpal/SuomiGeoData/actions/workflows/linting.yml) [![mypy](https://github.com/debpal/SuomiGeoData/actions/workflows/typing.yml/badge.svg)](https://github.com/debpal/SuomiGeoData/actions/workflows/typing.yml) [![pytest](https://github.com/debpal/SuomiGeoData/actions/workflows/testing.yml/badge.svg)](https://github.com/debpal/SuomiGeoData/actions/workflows/testing.yml) ![GitHub repo size](https://img.shields.io/github/repo-size/debpal/SuomiGeoData) | | **Codecov** | [![codecov](https://codecov.io/gh/debpal/SuomiGeoData/graph/badge.svg?token=ORFQKXO96C)](https://codecov.io/gh/debpal/SuomiGeoData) | -| **Read**_the_**Docs** | [![Documentation Status](https://readthedocs.org/projects/suomigeodata/badge/?version=latest)](https://suomigeodata.readthedocs.io/en/latest/?badge=latest) | +| **Read** _the_ **Docs** | [![Documentation Status](https://readthedocs.org/projects/suomigeodata/badge/?version=latest)](https://suomigeodata.readthedocs.io/en/latest/?badge=latest) | | **License** | ![PyPI - License](https://img.shields.io/pypi/l/SuomiGeoData) | diff --git a/SuomiGeoData/__init__.py b/SuomiGeoData/__init__.py index 9c31413..4a3a993 100644 --- a/SuomiGeoData/__init__.py +++ b/SuomiGeoData/__init__.py @@ -1,9 +1,11 @@ from .paituli import Paituli +from .syke import Syke __all__ = [ - 'Paituli' + 'Paituli', + 'Syke' ] -__version__ = '0.1.0' +__version__ = '1.0.0' diff --git a/SuomiGeoData/core.py b/SuomiGeoData/core.py index a4bdb45..df96f8f 100644 --- a/SuomiGeoData/core.py +++ b/SuomiGeoData/core.py @@ -1,4 +1,11 @@ +import os +import typing import pyogrio +import rasterio +import rasterio.merge +import rasterio.drivers +import rasterio.mask +import geopandas class Core: @@ -7,7 +14,7 @@ class Core: Core functionality of :mod:`SuomiGeoData` module. ''' - def is_valid_write_shape_driver( + def is_valid_ogr_driver( self, file_path: str ) -> bool: @@ -34,6 +41,33 @@ def is_valid_write_shape_driver( return output + def is_valid_raster_driver( + self, + file_path: str + ) -> bool: + + ''' + Returns whether the given file path is a valid raster file. + + Parameters + ---------- + file_path : str + File path to save the raster. + + Returns + ------- + bool + True if the file path is valid, False otherwise. + ''' + + try: + rasterio.drivers.driver_from_extension(file_path) + output = True + except Exception: + output = False + + return output + @property def _url_prefix_paituli_dem_tdb( self, @@ -60,9 +94,164 @@ def default_http_headers( output = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36', - 'Host': 'www.nic.funet.fi', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Connection': 'keep-alive' } return output + + def raster_merging( + self, + folder_path: str, + output_file: str, + raster_ext: str = '.tif', + **kwargs: typing.Any + ) -> str: + + ''' + Merges raster files and returns a confirmation message. + + Parameters + ---------- + folder_path : str + Folder path containing input raster files. + + output_file : str + File path to save the output raster. + + raster_ext : str, optional + Extension of input raster files. Defaults to '.tif' if not provided. + + **kwargs : optional + Additional keyword arguments for updating the dictionary of + :attr:`rasterio.profile` attribute. + + Returns + ------- + str + A confirmation message indicating that the raster merging is complete. + ''' + + # file paths + if os.path.isdir(folder_path): + file_paths = filter( + lambda x: os.path.isfile(os.path.join(folder_path, x)), + os.listdir(folder_path) + ) + else: + raise Exception( + 'The folder path does not exist.' + ) + + # extract raster files + raster_files = filter( + lambda x: x.endswith(raster_ext), + file_paths + ) + + # raster merging + check_file = self.is_valid_raster_driver(output_file) + # output file check fail + if check_file is False: + raise Exception( + 'Could not retrieve driver from the file path.' + ) + else: + # open the split rasters + split_rasters = [ + rasterio.open(os.path.join(folder_path, file)) for file in raster_files + ] + # merge the split rasters + profile = split_rasters[0].profile + output_array, output_transform = rasterio.merge.merge( + datasets=split_rasters + ) + # update merged raster profile + profile.update( + { + 'height': output_array.shape[1], + 'width': output_array.shape[2], + 'transform': output_transform + } + ) + for key, value in kwargs.items(): + profile[key] = value + # save the merged raster + with rasterio.open(output_file, 'w', **profile) as output_raster: + output_raster.write(output_array) + # close the split rasters + for raster in split_rasters: + raster.close() + + return 'Merging of rasters completed.' + + def raster_clipping_by_mask( + self, + input_file: str, + mask_area: str | geopandas.GeoDataFrame, + output_file: str, + **kwargs: typing.Any + ) -> str: + + ''' + Clips a raster file using a mask and returns a confirmation message. + + Parameters + ---------- + input_file : str + File path to the input raster. + + mask_area : str or GeoDataFrame + Mask area either as a file path or a GeoDataFrame. + + output_file : str + File path to save the output raster. + + **kwargs : optional + Additional keyword arguments for updating the dictionary of + :attr:`rasterio.profile` attribute. + + Returns + ------- + str + A confirmation message indicating that the raster clipping is complete. + ''' + + # mask area + if isinstance(mask_area, str): + mask_geometry = geopandas.read_file(mask_area).geometry.to_list() + elif isinstance(mask_area, geopandas.GeoDataFrame): + mask_geometry = mask_area.geometry.to_list() + else: + raise Exception('Input area must be either file or GeoDataFrame format.') + + # raster clipping + check_file = self.is_valid_raster_driver(output_file) + # output file check fail + if check_file is False: + raise Exception( + 'Could not retrieve driver from the file path.' + ) + else: + # raster clipping + with rasterio.open(input_file) as input_raster: + profile = input_raster.profile + output_array, output_transform = rasterio.mask.mask( + dataset=input_raster, + shapes=mask_geometry, + all_touched=True, + crop=True + ) + # update clipped raster profile + profile.update( + {'height': output_array.shape[1], + 'width': output_array.shape[2], + 'transform': output_transform} + ) + for key, value in kwargs.items(): + profile[key] = value + # save the clipped raster + with rasterio.open(output_file, 'w', **profile) as output_raster: + output_raster.write(output_array) + + return 'Raster clipping completed.' diff --git a/SuomiGeoData/data/example_area.cpg b/SuomiGeoData/data/example_area.cpg new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/SuomiGeoData/data/example_area.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/SuomiGeoData/data/example_area.dbf b/SuomiGeoData/data/example_area.dbf new file mode 100644 index 0000000000000000000000000000000000000000..2175b689f55acef204a5ebd5e6c72c720444b036 GIT binary patch literal 919 zcmZRs;S^_NU|>jOG-U!(7@#DvINvlrzc?|I!4D)Ugr)*6U&7!Fk_x~opP2#_#;PA^ za#>GcblJp_E4005-|U&_vJF)Bpht4MF0DhK9y^X66WC k19MatQR)n!ez*^*U>3*-huv<*X#I^(Z@^PSFVfB#1Rn149lIJ#ky{nyVf5gkkaPhNi|jK_A}na%7ITFZfR zHRUdD^frD4eJ#3iyo^3#WRt&f_M(q(bmw>heKLOId=7mEe&bkzJ_o;XK8Zdbzi~W) zz7oH2K8C&tzi});--h4$Vf4YHn)Wfy`_a1^-8t?>pMc*u??#`3-#F%?&&2P1JNjI7 z=Uc3No$|-wH!gptl@C|>gRFcwE+zl|~2b9Za` zX&mRGtG>oL5naz~9Os~S(>}&|Ci-;R$2caS>-`w#Y3Q2&jN?@FUdlJlap;;KjpHPA z^%vtj5nc0}ag0XS{N~(+&ioe6!_Dt}cx(CLe4zQA2cv6z8s{PCnm>(0={0{kt31sg z#yN=mHGde#ey!!V@c?v{Z=AcJ4`6i@?1 zDmv>!8?R{fr*SMt*Z4EeW#}5e&d;NdXZFj9zdskoO2$!+SfSlL)ZH6d>1Q&Urfe2K>e`9=(tH8Ry7WJ&%Psx3=*ROK%)UpsPKM z^H6m4599LBqCd$GmtXx`e)a!@$)E8O#^oPHf0Q4t^0%{Is(d*0ygH8=$56`G{9s&u zt(Wq{<=1>9Kb*tJU-Ox99FMN~(0L5H_H)KL7G3LsaVWj^yT;X@v|m>L()y+Puph7b zYJaZ&_aOUy<4UjnzVg#~OyzN&sQT!9q55{R-#4!I)p<x zzb)guW?cPs5$84aU%2}3B=f`NU(?mBC-TF25byV}MsMQ-(6=A=k3Ws01ATL&J6C?| z@vA(z(yuZ@@f*kP=u_|;=dS1z@H_uOT(1k=IDSVTO#a5X z1APE~-~z=pOwFHjlYHDuki=h z_+3i=8ozM)KQcdD{w?Mg@6Y&E`x)2#GM4t&{07(jr1zouPVYPky~W z)n{4L{#9SG_FIe4jYI9F{$*V4r~az;gRA|9@cz|)aMg$LoF`_aw%V_fIS z&5iCHI=`;RZ=Bas{wn;&q4V(q{Kk1b{<-*#>pVUazs}=uo#&_E*LfbUc)$ewiYJH_ z-{?X&u6WA;@>jeBuK3EfVNLrg{sQMcJm029cdmHSO8kl^!IgfF`Qh?UHov&yUFgR7 zH1!)n{>ByG>%iYbdbr|+@;}p>-_H0V{%6hqJRFp795148>}=ZCxZlQBl;ko&p3WY@8tQ6^H+2|uX8_oH~Bm7!F`AP#<4fL$~Vq^%_yqJBj8Eg8_l1n_x-TTX?l^tW0?<~>ppiK{)yCA+zo5~Hm>{V z!OZ`szotGyI|QhSP3AL3h;zv5kY^L~hr?fm|Tc$L0qQhMXi_fN#foa=il>f>DB zYw3CPy_WbX>ZkaaaV|#RMEuBE- None: + ) -> geopandas.GeoDataFrame: ''' - Initializes the class by loading the GeoDataFrame of index maps. + Returns a GeoDataFrame containing the DEM index map. ''' - # DEM index map - self.indexmap_dem = geopandas.read_file( + output = geopandas.read_file( os.path.join( os.path.dirname(__file__), 'data', 'nls_dem_index.shp' ) ) - # topograhical database index map - self.indexmap_tdb = geopandas.read_file( + return output + + @property + def indexmap_tdb( + self + ) -> geopandas.GeoDataFrame: + + ''' + Returns a GeoDataFrame containing the topographic database index map. + ''' + + output = geopandas.read_file( os.path.join( os.path.dirname(__file__), 'data', 'nls_td_index.shp' ) ) + return output + def save_indexmap_dem( self, file_path: str, **kwargs: typing.Any - ) -> bool: + ) -> str: ''' - Saves the GeoDataFrame of the DEM index map to the specified file path. + Saves the GeoDataFrame of the DEM index map to the specified file path + and return a success message. Parameters ---------- @@ -60,72 +67,36 @@ def save_indexmap_dem( **kwargs : optional Additional keyword arguments for the - :meth:`geopandas.GeoDataFrame.to_file` function. + :meth:`geopandas.GeoDataFrame.to_file` method. Returns ------- - bool - True if the file exists at the specified path, False otherwise. + str + A confirmation message indicating the output file has been saved. ''' - validity = Core().is_valid_write_shape_driver(file_path) - if validity is True: + check_file = Core().is_valid_ogr_driver(file_path) + if check_file is True: self.indexmap_dem.to_file( file_path, **kwargs ) else: raise Exception( - 'Could not OGR format driver from the file path.' + 'Could not retrieve driver from the file path.' ) - output = os.path.exists(file_path) - - return output - - @property - def dem_labels( - self - ) -> list[str]: - - ''' - Returns the list of labels from the DEM index map. - ''' - - output = list(self.indexmap_dem['label']) - - return output - - def is_valid_label_dem( - self, - label: str - ) -> bool: - - ''' - Returns whether the label exists in the DEM index map. - - Parameters - ---------- - label : str - Name of the label. - - Returns - ------- - bool - True if the label exists, False otherwise. - ''' - - return label in self.dem_labels + return 'GeoDataFrame saved to the output file.' def save_indexmap_tdb( self, file_path: str, **kwargs: typing.Any - ) -> bool: + ) -> str: ''' Saves the GeoDataFrame of the topographic database - index map to the specified file path. + index map to the specified file path and returns a success message. Parameters ---------- @@ -134,26 +105,37 @@ def save_indexmap_tdb( **kwargs : optional Additional keyword arguments for the - :meth:`geopandas.GeoDataFrame.to_file` function. + :meth:`geopandas.GeoDataFrame.to_file` method. Returns ------- - bool - True if the file exists at the specified path, False otherwise. + str + A confirmation message indicating the output file has been saved. ''' - validity = Core().is_valid_write_shape_driver(file_path) - if validity is True: + check_file = Core().is_valid_ogr_driver(file_path) + if check_file is True: self.indexmap_tdb.to_file( file_path, **kwargs ) else: raise Exception( - 'Could not OGR format driver from the file path.' + 'Could not retrieve driver from the file path.' ) - output = os.path.exists(file_path) + return 'GeoDataFrame saved to the output file.' + + @property + def dem_labels( + self + ) -> list[str]: + + ''' + Returns the list of labels from the DEM index map. + ''' + + output = list(self.indexmap_dem['label']) return output @@ -170,6 +152,27 @@ def tdb_labels( return output + def is_valid_label_dem( + self, + label: str + ) -> bool: + + ''' + Returns whether the label exists in the DEM index map. + + Parameters + ---------- + label : str + Name of the label. + + Returns + ------- + bool + True if the label exists, False otherwise. + ''' + + return label in self.dem_labels + def is_valid_label_tdb( self, label: str @@ -196,10 +199,11 @@ def dem_download_by_labels( labels: list[str], folder_path: str, http_headers: typing.Optional[dict[str, str]] = None - ) -> bool: + ) -> str: ''' - Downloads the DEM raster files for the given labels. + Downloads the DEM raster files for the given labels and + returns a confirmation message. Parameters ---------- @@ -207,41 +211,43 @@ def dem_download_by_labels( List of label names from the DEM index map. folder_path : str - Complete folder path to save the downloaded raster files. + Path of empty folder to save the downloaded raster files. http_headers : dict, optional - HTTP headers to be used for the web request. If not provided, the default headers - :attr:`SuomiGeoData.core.Core.default_http_headers` will be used. + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. Returns ------- - bool - True if all the DEM raster files were successfully downloaded and - exist at the specified folder path, False otherwise. + str + A confirmation message indicating that all downloads are complete. ''' + # check the existence of the output folder path + if os.path.isdir(folder_path): + exist_files = len(os.listdir(folder_path)) + if exist_files > 0: + raise Exception( + 'Output folder must be empty.' + ) + else: + pass + else: + raise Exception( + 'The folder path does not exist.' + ) + # check whether the input labels exist for label in labels: if self.is_valid_label_dem(label): pass else: raise Exception( - f'The label "{label}" does not exist in the index map.' + f'The label {label} does not exist in the index map.' ) - # check the existence of the given folder path - if os.path.isdir(folder_path): - pass - else: - raise Exception( - f'The folder path "{folder_path}" is not a valid directory.' - ) - # web request headers - if http_headers is None: - headers = Core().default_http_headers - else: - headers = http_headers + headers = Core().default_http_headers if http_headers is None else http_headers # download topographic database suffix_urls = self.indexmap_dem[self.indexmap_dem['label'].isin(labels)]['path'] @@ -252,29 +258,28 @@ def dem_download_by_labels( url=label_url, headers=headers ) - label_file = os.path.join( + downloaded_file = os.path.join( folder_path, f'{label}.tif' ) - with open(label_file, 'wb') as label_raster: - label_raster.write(response.content) + with open(downloaded_file, 'wb') as downloaded_raster: + downloaded_raster.write(response.content) print( f'Download of label {label} completed (count {count}/{len(labels)}).' ) count = count + 1 - output = all(os.path.isfile(os.path.join(folder_path, f'{label}.tif')) for label in labels) - - return output + return 'All downloads are complete.' def tdb_download_by_labels( self, labels: list[str], folder_path: str, http_headers: typing.Optional[dict[str, str]] = None - ) -> bool: + ) -> str: ''' - Downloads the topographic database folders of shapefiles for the given labels. + Downloads the topographic database folders of shapefiles for the given labels and + returns a confirmation message. Parameters ---------- @@ -282,17 +287,16 @@ def tdb_download_by_labels( List of label names from the topographic database index map. folder_path : str - Complete folder path to save the downloaded folder of shapefiles. + Path of empty folder to save the downloaded folder of shapefiles. http_headers : dict, optional - HTTP headers to be used for the web request. If not provided, the default headers - :attr:`SuomiGeoData.core.Core.default_http_headers` will be used. + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. Returns ------- - bool - True if all the topographic database folders were successfully downloaded and - exist at the specified folder path, False otherwise. + str + A confirmation message indicating that all downloads are complete. ''' # check whether the input labels exist @@ -301,22 +305,25 @@ def tdb_download_by_labels( pass else: raise Exception( - f'The label "{label}" does not exist in the index map.' + f'The label {label} does not exist in the index map.' ) # check the existence of the given folder path if os.path.isdir(folder_path): - pass + exist_files = len(os.listdir(folder_path)) + if exist_files > 0: + raise Exception( + 'Output folder must be empty.' + ) + else: + pass else: raise Exception( - f'The folder path "{folder_path}" is not a valid directory.' + 'The folder path does not exist.' ) # web request headers - if http_headers is None: - headers = Core().default_http_headers - else: - headers = http_headers + headers = Core().default_http_headers if http_headers is None else http_headers # download topographic database suffix_urls = self.indexmap_tdb[self.indexmap_tdb['label'].isin(labels)]['path'] @@ -327,9 +334,9 @@ def tdb_download_by_labels( url=label_url, headers=headers ) - label_data = io.BytesIO(response.content) - with zipfile.ZipFile(label_data, 'r') as label_zip: - label_zip.extractall( + downloaded_data = io.BytesIO(response.content) + with zipfile.ZipFile(downloaded_data, 'r') as downloaded_zip: + downloaded_zip.extractall( os.path.join(folder_path, label) ) print( @@ -337,6 +344,318 @@ def tdb_download_by_labels( ) count = count + 1 - output = all(os.path.isdir(os.path.join(folder_path, label)) for label in labels) + return 'All downloads are complete.' + + @property + def get_example_area( + self + ) -> geopandas.GeoDataFrame: + + ''' + Returns a GeoDataFrame of example area to test + raster and vector downloads. + ''' + + output = geopandas.read_file( + os.path.join( + os.path.dirname(__file__), 'data', 'example_area.shp' + ) + ) return output + + def dem_labels_download_by_area( + self, + input_area: str | geopandas.GeoDataFrame, + folder_path: str, + http_headers: typing.Optional[dict[str, str]] = None + ) -> str: + + ''' + Downloads the DEM raster files for the given labels and + returns a confirmation message. + + Parameters + ---------- + input_area : str or GeoDataFrame + Input area by either file path or GeoDataFrame. + + folder_path : str + Path of empty folder to save the downloaded raster files. + + http_headers : dict, optional + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. + + Returns + ------- + str + A confirmation message indicating that all downloads are complete. + ''' + + # input area + if isinstance(input_area, str): + area_gdf = geopandas.read_file(input_area) + elif isinstance(input_area, geopandas.GeoDataFrame): + area_gdf = input_area + else: + raise Exception('Input area must be either file or GeoDataFrame format.') + + # check crs of input area + target_crs = 'EPSG:3067' + if area_gdf.crs is None: + area_gdf = area_gdf.set_crs(target_crs) + elif str(area_gdf.crs) != target_crs: + area_gdf = area_gdf.to_crs(target_crs) + else: + pass + + # DEM index map + index_gdf = self.indexmap_dem + + # labels + label_gdf = geopandas.sjoin(index_gdf, area_gdf, how='inner').reset_index(drop=True) + label_gdf = label_gdf.drop_duplicates(subset=['label']).reset_index(drop=True) + + # download labels + if label_gdf.shape[0] == 0: + raise Exception('The index map does not intersect with the given area.') + else: + message = self.dem_download_by_labels( + labels=list(label_gdf['label']), + folder_path=folder_path, + http_headers=http_headers + ) + + return message + + def dem_labels_download_by_syke_subcatchment( + self, + input_file: str, + level: int, + single_area: int, + folder_path: str, + merge_polygons: bool = True, + percentage_cutoff: float = 0, + http_headers: typing.Optional[dict[str, str]] = None + ) -> str: + + ''' + Downloads the DEM raster files for the given subcatchment division of Syke and + returns a confirmation message. + + Parameters + ---------- + input_file : str + Path to the shapefile of catchment area divisions, obtained from the + :meth: `SuomiGeoData.Syke.download_catchment_divisions_2023` method. + + level : int + Level of catchment division and must be one of 1, 2, 3, 4 or 5. + + single_area : int + Selected value from 'taso_osai' columns. + + folder_path : str + Path of empty folder path to save the downloaded raster files. + + merge_polygons : bool, optional + Merges the polygons using the :meth:`geopandas.GeoDataFrame.dissolve` method + and explodes them with the :meth:`geopandas.GeoDataFrame.explode` method. If False, + no operation is performed. + + percentage_cutoff : float, optional + Excludes polygon below the specified area percentage, ranging from 0 to 100, + relative to the total area of all polygons. Default is 0, excluding negligible polygons. + Provide -1 for no exclusion. + + http_headers : dict, optional + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. + + Returns + ------- + str + A confirmation message indicating that all downloads are complete. + ''' + + # input area + area_gdf = Syke().select_single_subcatchment( + input_file=input_file, + level=level, + single_area=single_area, + merge_polygons=merge_polygons, + percentage_cutoff=percentage_cutoff + ) + + # DEM index map + index_gdf = self.indexmap_dem + + # labels + label_gdf = geopandas.sjoin(index_gdf, area_gdf, how='inner').reset_index(drop=True) + label_gdf = label_gdf.drop_duplicates(subset=['label']).reset_index(drop=True) + + # download labels + message = self.dem_download_by_labels( + labels=list(label_gdf['label']), + folder_path=folder_path, + http_headers=http_headers + ) + + return message + + def dem_clipped_download_by_area( + self, + input_area: str | geopandas.GeoDataFrame, + output_file: str, + http_headers: typing.Optional[dict[str, str]] = None, + **kwargs: typing.Any + ) -> str: + + ''' + Downloads the clipped DEM raster file for the given area and + returns a confirmation message. + + Parameters + ---------- + input_area : str or GeoDataFrame + Input area by either file path or GeoDataFrame. + + folder_path : str + Path of empty folder to save the downloaded raster files. + + http_headers : dict, optional + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. + + **kwargs : optional + Additional keyword arguments for updating the dictionary of + :attr:`rasterio.profile` attribute. + + Returns + ------- + str + A confirmation message indicating that all geoprocesssings are complete. + ''' + + with tempfile.TemporaryDirectory() as tmp_dir: + message = self.dem_labels_download_by_area( + input_area=input_area, + folder_path=tmp_dir, + http_headers=http_headers + ) + print(message) + # merging rasters + message = Core().raster_merging( + folder_path=tmp_dir, + output_file=os.path.join(tmp_dir, 'merged.tif'), + compress='lzw' + ) + print(message) + # clipping rasters + message = Core().raster_clipping_by_mask( + input_file=os.path.join(tmp_dir, 'merged.tif'), + mask_area=input_area, + output_file=output_file, + **kwargs + ) + print(message) + + return 'All geoprocessing has been completed.' + + def dem_clipped_download_by_syke_subcatchment( + self, + input_file: str, + level: int, + single_area: int, + output_file: str, + merge_polygons: bool = True, + percentage_cutoff: float = 0, + http_headers: typing.Optional[dict[str, str]] = None, + **kwargs: typing.Any + ) -> str: + + ''' + Downloads the clipped DEM raster file for the given subcatchment division of Syke and + returns a confirmation message. + + Parameters + ---------- + input_file : str + Path to the shapefile of catchment area divisions, obtained from the + :meth: `SuomiGeoData.Syke.download_catchment_divisions_2023` method. + + level : int + Level of catchment division and must be one of 1, 2, 3, 4 or 5. + + single_area : int + Selected value from 'taso_osai' columns. + + output_file : str + File path to save the output raster. + + merge_polygons : bool, optional + Merges the polygons using the :meth:`geopandas.GeoDataFrame.dissolve` method + and explodes them with the :meth:`geopandas.GeoDataFrame.explode` method. If False, + no operation is performed. + + percentage_cutoff : float, optional + Excludes polygon below the specified area percentage, ranging from 0 to 100, + relative to the total area of all polygons. Default is 0, excluding negligible polygons. + Provide -1 for no exclusion. + + http_headers : dict, optional + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. + + **kwargs : optional + Additional keyword arguments for updating the dictionary of + :attr:`rasterio.profile` attribute. + + Returns + ------- + str + A confirmation message indicating that all geoprocessing are complete. + ''' + + # input subcatchment + area_gdf = Syke().select_single_subcatchment( + input_file=input_file, + level=level, + single_area=single_area, + merge_polygons=merge_polygons, + percentage_cutoff=percentage_cutoff + ) + + # DEM index map + index_gdf = self.indexmap_dem + + # labels + label_gdf = geopandas.sjoin(index_gdf, area_gdf, how='inner').reset_index(drop=True) + label_gdf = label_gdf.drop_duplicates(subset=['label']).reset_index(drop=True) + + with tempfile.TemporaryDirectory() as tmp_dir: + # download labels + message = self.dem_download_by_labels( + labels=list(label_gdf['label']), + folder_path=tmp_dir, + http_headers=http_headers + ) + print(message) + # merging rasters + message = Core().raster_merging( + folder_path=tmp_dir, + output_file=os.path.join(tmp_dir, 'merged.tif'), + compress='lzw' + ) + print(message) + # clipping rasters + message = Core().raster_clipping_by_mask( + input_file=os.path.join(tmp_dir, 'merged.tif'), + mask_area=area_gdf, + output_file=output_file, + **kwargs + ) + print(message) + + return 'All geoprocessing has been completed.' diff --git a/SuomiGeoData/syke.py b/SuomiGeoData/syke.py new file mode 100644 index 0000000..183703b --- /dev/null +++ b/SuomiGeoData/syke.py @@ -0,0 +1,344 @@ +import os +import io +import zipfile +import typing +import geopandas +import shapely +import requests +from .core import Core + + +class Syke: + + ''' + Executes downloading and extracting data from Syke + (https://www.syke.fi/en-US/Open_information/Spatial_datasets/Downloadable_spatial_dataset). + ''' + + def download_corine_land_cover_2018( + self, + folder_path: str, + http_headers: typing.Optional[dict[str, str]] = None + ) -> str: + + ''' + Downloads raster files of Finland's CORINE land cover for the year 2018 and + returns a confirmation message. + + Parameters + ---------- + folder_path : str + Folder path to save the downloaded files. + + http_headers : dict, optional + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. + + Returns + ------- + str + A confirmation message indicating that download is complete. + ''' + + # check the existence of the given folder path + if os.path.isdir(folder_path): + pass + else: + raise Exception( + 'The folder path does not exist.' + ) + + # web request headers + headers = Core().default_http_headers if http_headers is None else http_headers + + # download land cover + url = 'https://wwwd3.ymparisto.fi/d3/Static_rs/spesific/clc2018_fi20m.zip' + response = requests.get( + url=url, + headers=headers + ) + downloaded_data = io.BytesIO(response.content) + with zipfile.ZipFile(downloaded_data, 'r') as downloaded_zip: + downloaded_zip.extractall( + folder_path + ) + + return 'All downloads are complete.' + + def download_catchment_divisions_2023( + self, + folder_path: str, + http_headers: typing.Optional[dict[str, str]] = None + ) -> str: + + ''' + Downloads shapefiles of Finland's catchment area divisions for the year 2023 and + returns a confirmation message. + + Parameters + ---------- + folder_path : str + Path of empty folder to save the downloaded shapefiles. + + http_headers : dict, optional + HTTP headers to be used for the web request. Defaults to + :attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided. + + Returns + ------- + str + A confirmation message indicating that download is complete. + ''' + + # check the existence of the given folder path + if os.path.isdir(folder_path): + pass + else: + raise Exception( + 'The folder path does not exist.' + ) + + # web request headers + headers = Core().default_http_headers if http_headers is None else http_headers + + # download land cover + url = 'https://wwwd3.ymparisto.fi/d3/gis_data/spesific/valumaalueet.zip' + response = requests.get( + url=url, + headers=headers + ) + downloaded_data = io.BytesIO(response.content) + with zipfile.ZipFile(downloaded_data, 'r') as downloaded_zip: + downloaded_zip.extractall( + folder_path + ) + for file in os.listdir(folder_path): + if file.startswith('Valumaaluejako_taso'): + renamed_file = file.replace( + 'Valumaaluejako_taso', 'catchment_division_level_' + ) + else: + renamed_file = file.replace( + 'Valumaaluejako_purkupiste', 'catchment_discharge_point' + ) + os.rename( + os.path.join(folder_path, file), + os.path.join(folder_path, renamed_file) + ) + + return 'All downloads are complete.' + + def select_single_subcatchment( + self, + input_file: str, + level: int, + single_area: int, + output_file: typing.Optional[str] = None, + merge_polygons: bool = True, + percentage_cutoff: float = 0, + **kwargs: typing.Any + ) -> geopandas.GeoDataFrame: + + ''' + Selects a single subcatchment from the shapefile of + Syke's catachment divisions and returns a GeoDataFrame. + + Parameters + ---------- + input_file : str + Path to the shapefile of catchment area divisions, obtained from the + :meth: `SuomiGeoData.Syke.download_catchment_divisions_2023` method. + + level : int + Catchment division level, must be one of 1, 2, 3, 4, or 5. + + single_area : int + Selected value from the 'taso_osai' column in the shapefile. + + output_file : str, optional + File path to save the ouput GeoDataFrame. + + merge_polygons : bool, optional + Merges the polygons using the :meth:`geopandas.GeoDataFrame.dissolve` method + and explodes them with the :meth:`geopandas.GeoDataFrame.explode` method. If False, + no operation is performed. + + percentage_cutoff : float, optional + Excludes polygon below the specified area percentage, ranging from 0 to 100, + relative to the total area of all polygons. Default is 0, excluding negligible polygons. + Provide -1 for no exclusion. + + **kwargs : optional + Additional keyword arguments for the + :meth:`geopandas.GeoDataFrame.to_file` method. + + Returns + ------- + GeoDataFrame + GeoDataFrame containing the selected subcatchment. + ''' + + # check level + if level in [1, 2, 3, 4, 5]: + pass + else: + raise Exception('Input level must be one of 1, 2, 3, 4, or 5.') + + # input GeoDataFrame + gdf = geopandas.read_file(input_file) + + # processing of the selected suncatchment + area_col = f'taso{level}_osai' + area_gdf = gdf[gdf[area_col].isin([single_area])].reset_index(drop=True) + if area_gdf.shape[0] == 0: + raise Exception('The index map does not intersect with the given area.') + else: + area_gdf = area_gdf.drop( + columns=['muutospvm', 'Shape_STAr', 'Shape_STLe'] + ) + area_gdf[area_col] = area_gdf[area_col].astype('int') + id_col = area_col.replace('_osai', '_id') + area_gdf[id_col] = area_gdf[id_col].astype('int') + + # merging polygons + if area_gdf.geometry.iloc[0].geom_type == 'Polygon': + pass + else: + if merge_polygons is True: + area_gdf = area_gdf.dissolve() + area_gdf = area_gdf[['geometry']] + area_gdf = area_gdf.explode(ignore_index=True) + area_gdf['PID'] = list(range(1, area_gdf.shape[0] + 1)) + else: + pass + # removing negligible polygons + if percentage_cutoff < 0: + pass + else: + total_area = area_gdf.geometry.area.sum() + area_gdf['area_%'] = round(100 * area_gdf.geometry.area / total_area).astype('int') + area_gdf = area_gdf[area_gdf['area_%'] > percentage_cutoff].reset_index(drop=True) + area_gdf = area_gdf.drop(columns=['area_%']) + area_gdf['PID'] = list(range(1, area_gdf.shape[0] + 1)) + + # saving the geodataframe + if output_file is None: + pass + else: + check_file = Core().is_valid_ogr_driver(output_file) + # invalid file + if check_file is False: + raise Exception( + 'Could not retrieve driver from the file path.' + ) + else: + # saving the output GeoDataFrame + area_gdf.to_file( + output_file, + **kwargs + ) + + return area_gdf + + def merging_multiple_subcatchments( + self, + input_file: str, + level: int, + multiple_area: list[int], + output_file: typing.Optional[str] = None, + percentage_cutoff: float = -1, + **kwargs: typing.Any + ) -> geopandas.GeoDataFrame: + + ''' + Selects multiple subcatchments from the shapefile of + Syke's catachment divisions and returns a GeoDataFrame. + + Parameters + ---------- + input_file : str + Path to the shapefile of catchment area divisions, obtained from the + :meth: `SuomiGeoData.Syke.download_catchment_divisions_2023` method. + + level : int + Catchment division level, must be one of 1, 2, 3, 4, or 5. + + multiple_area : list of int + List of selected integer values from the 'taso_osai' column in the shapefile. + + output_file : str, optional + File path to save the output GeoDataFrame. + + percentage_cutoff : float, optional + Excludes polygon below the specified area percentage, ranging between 0 to 100, + relative to the total area of all polygons. Default is -1 for no exclusion. + + **kwargs : optional + Additional keyword arguments for the + :meth:`geopandas.GeoDataFrame.to_file` method. + + Returns + ------- + GeoDataFrame + GeoDataFrame containing the selected subcatchments. + ''' + + # check level + if level in [1, 2, 3, 4, 5]: + pass + else: + raise Exception('Input level must be one of 1, 2, 3, 4, or 5.') + + # check multiple subcatchments + if len(multiple_area) > 1: + pass + else: + raise Exception('Input multiple area list contains single element.') + + # input GeoDataFrame + gdf = geopandas.read_file(input_file) + + # processing of selected subcatchments + area_col = f'taso{level}_osai' + area_gdf = gdf[gdf[area_col].isin(multiple_area)].reset_index(drop=True) + if area_gdf.shape[0] == 0: + raise Exception('The index map does not intersect with the given area.') + else: + area_gdf = area_gdf.drop( + columns=['muutospvm', 'Shape_STAr', 'Shape_STLe'] + ) + area_gdf['geometry'] = area_gdf['geometry'].apply(lambda x: shapely.union_all(x)) + area_gdf[area_col] = area_gdf[area_col].astype('int') + id_col = area_col.replace('_osai', '_id') + area_gdf[id_col] = area_gdf[id_col].astype('int') + area_gdf = area_gdf.dissolve() + area_gdf = area_gdf[['geometry']] + area_gdf = area_gdf.explode(ignore_index=True) + area_gdf['PID'] = list(range(1, area_gdf.shape[0] + 1)) + # removing negligible polygons + if percentage_cutoff < 0: + pass + else: + total_area = area_gdf.geometry.area.sum() + area_gdf['area_%'] = round(100 * area_gdf.geometry.area / total_area).astype('int') + area_gdf = area_gdf[area_gdf['area_%'] > percentage_cutoff].reset_index(drop=True) + area_gdf = area_gdf.drop(columns=['area_%']) + area_gdf['PID'] = list(range(1, area_gdf.shape[0] + 1)) + + # saving the geodataframe + if output_file is None: + pass + else: + check_file = Core().is_valid_ogr_driver(output_file) + # invalid file + if check_file is False: + raise Exception( + 'Could not retrieve driver from the file path.' + ) + else: + area_gdf.to_file( + output_file, + **kwargs + ) + + return area_gdf diff --git a/pyproject.toml b/pyproject.toml index d670b06..5196076 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,12 +11,13 @@ authors = [ ] dependencies = [ "geopandas>=1.0.1", + "rasterio>=1.3.11", "requests>=2.32.3" ] readme = "README.md" requires-python = ">=3.10" classifiers = [ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 3 - Alpha", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", diff --git a/requirements-gh-action.txt b/requirements-gh-action.txt index 308af20..9580dd9 100644 --- a/requirements-gh-action.txt +++ b/requirements-gh-action.txt @@ -1,4 +1,5 @@ pytest pytest-cov geopandas>=1.0.1 +rasterio>=1.3.11 requests>=2.32.3 diff --git a/tests/test_paituli.py b/tests/test_paituli.py deleted file mode 100644 index d6cfe26..0000000 --- a/tests/test_paituli.py +++ /dev/null @@ -1,92 +0,0 @@ -import pytest -import os -import geopandas -import tempfile -from SuomiGeoData import Paituli - - -@pytest.fixture(scope='class') -def class_instance(): - - yield Paituli() - - -def test_save_indexmap( - class_instance -): - # pass test for saving index map - with tempfile.TemporaryDirectory() as tmp_dir: - # DEM - dem_file = os.path.join(tmp_dir, "indexmap_dem.shp") - save_dem = class_instance.save_indexmap_dem(dem_file) - assert save_dem is True - dem_gdf = geopandas.read_file(dem_file) - assert isinstance(dem_gdf, geopandas.GeoDataFrame) is True - assert dem_gdf.shape[0] == 10320 - # topographical database - tdb_file = os.path.join(tmp_dir, "indexmap_tdb.shp") - save_tdb = class_instance.save_indexmap_tdb(tdb_file) - assert save_tdb is True - tdb_gdf = geopandas.read_file(tdb_file) - assert isinstance(tdb_gdf, geopandas.GeoDataFrame) is True - assert tdb_gdf.shape[0] == 3132 - - assert os.path.exists(tmp_dir) is False - - # error test of saving DEM index map - with pytest.raises(Exception) as exc_info: - class_instance.save_indexmap_dem('invalid_file_extension.sh') - assert exc_info.value.args[0] == 'Could not OGR format driver from the file path.' - - # error test of saving topographical database index map - with pytest.raises(Exception) as exc_info: - class_instance.save_indexmap_tdb('invalid_file_extension.sh') - assert exc_info.value.args[0] == 'Could not OGR format driver from the file path.' - - -def test_is_valid_label( - class_instance -): - - # pass test for DEM - assert class_instance.is_valid_label_dem('K3244G') is True - assert class_instance.is_valid_label_dem('invalid_label') is False - - # pass test for topographical database - assert class_instance.is_valid_label_tdb('K2344R') is True - assert class_instance.is_valid_label_tdb('invalid_label') is False - - -def test_download_by_labels( - class_instance -): - - # test for downloading DEM - with tempfile.TemporaryDirectory() as dem_dir: - # download test - class_instance.dem_download_by_labels(['X4344A'], dem_dir) is True - # download test with customized HTTP headers - class_instance.dem_download_by_labels(['X4344A'], dem_dir, http_headers={'Host': 'www.nic.funet.fi'}) is True - # error test for invalid label - with pytest.raises(Exception) as exc_info: - class_instance.dem_download_by_labels(['ABCDE'], dem_dir) - assert exc_info.value.args[0] == 'The label "ABCDE" does not exist in the index map.' - # errot test for invalid directory - with pytest.raises(Exception) as exc_info: - class_instance.dem_download_by_labels(['X4344A'], dem_dir) - assert exc_info.value.args[0] == f'The folder path "{dem_dir}" is not a valid directory.' - - # test for downloading topographical database - with tempfile.TemporaryDirectory() as tdb_dir: - # download test - class_instance.tdb_download_by_labels(['J3224R'], tdb_dir) is True - # download test with customized HTTP headers - class_instance.tdb_download_by_labels(['J3224R'], tdb_dir, http_headers={'Host': 'www.nic.funet.fi'}) is True - # error test for invalid label - with pytest.raises(Exception) as exc_info: - class_instance.tdb_download_by_labels(['ABCDE'], tdb_dir) - assert exc_info.value.args[0] == 'The label "ABCDE" does not exist in the index map.' - # errot test for invalid directory - with pytest.raises(Exception) as exc_info: - class_instance.tdb_download_by_labels(['J3224R'], tdb_dir) - assert exc_info.value.args[0] == f'The folder path "{tdb_dir}" is not a valid directory.' diff --git a/tests/test_suomigeodata.py b/tests/test_suomigeodata.py new file mode 100644 index 0000000..0af7253 --- /dev/null +++ b/tests/test_suomigeodata.py @@ -0,0 +1,398 @@ +import pytest +import os +import tempfile +import geopandas +import shapely +import rasterio +import SuomiGeoData + + +@pytest.fixture(scope='class') +def paituli(): + + yield SuomiGeoData.Paituli() + + +@pytest.fixture(scope='class') +def syke(): + + yield SuomiGeoData.Syke() + + +@pytest.fixture(scope='class') +def core(): + + yield SuomiGeoData.core.Core() + + +@pytest.fixture +def message(): + + output = { + 'download': 'All downloads are complete.', + 'folder_empty': 'Output folder must be empty.', + 'gdf_write': 'GeoDataFrame saved to the output file.', + 'geoprocess': 'All geoprocessing has been completed.', + 'error_area': 'The index map does not intersect with the given area.', + 'error_folder': 'The folder path does not exist.', + 'error_driver': 'Could not retrieve driver from the file path.', + 'error_gdf': 'Input area must be either file or GeoDataFrame format.', + 'error_label': 'The label ABCDE does not exist in the index map.', + 'error_level': 'Input level must be one of 1, 2, 3, 4, or 5.' + } + + return output + + +def test_save_indexmap( + paituli, + message +): + + with tempfile.TemporaryDirectory() as tmp_dir: + # test for saving DEM index map + dem_file = os.path.join(tmp_dir, "indexmap_dem.shp") + save_dem = paituli.save_indexmap_dem(dem_file) + assert save_dem == message['gdf_write'] + dem_gdf = geopandas.read_file(dem_file) + assert isinstance(dem_gdf, geopandas.GeoDataFrame) is True + assert dem_gdf.shape[0] == 10320 + # test for saving topographical database index map + tdb_file = os.path.join(tmp_dir, "indexmap_tdb.shp") + save_tdb = paituli.save_indexmap_tdb(tdb_file) + assert save_tdb == message['gdf_write'] + tdb_gdf = geopandas.read_file(tdb_file) + assert isinstance(tdb_gdf, geopandas.GeoDataFrame) is True + assert tdb_gdf.shape[0] == 3132 + + # test for error of undetected OGR driver while saving DEM index map + with pytest.raises(Exception) as exc_info: + paituli.save_indexmap_dem('invalid_file_extension.sh') + assert exc_info.value.args[0] == message['error_driver'] + + # test for error of undetected OGR driver while saving topographical database index map + with pytest.raises(Exception) as exc_info: + paituli.save_indexmap_tdb('invalid_file_extension.sh') + assert exc_info.value.args[0] == message['error_driver'] + + +def test_is_valid_label( + paituli +): + + # test for valid label of DEM index map + assert paituli.is_valid_label_dem('K3244G') is True + assert paituli.is_valid_label_dem('invalid_label') is False + + # test for valid label of topographical database index map + assert paituli.is_valid_label_tdb('K2344R') is True + assert paituli.is_valid_label_tdb('invalid_label') is False + + +def test_dem_download_by_labels( + paituli, + message +): + + # test for downloading DEM labels + with tempfile.TemporaryDirectory() as tmp_dir: + assert paituli.dem_download_by_labels( + ['X4344A'], tmp_dir + ) == message['download'] + # test for error when the input is a non empty folder + with pytest.raises(Exception) as exc_info: + paituli.dem_download_by_labels(['K3244G'], tmp_dir) + assert exc_info.value.args[0] == message['folder_empty'] + + # test for error when the input is a invalid label + with tempfile.TemporaryDirectory() as tmp_dir: + with pytest.raises(Exception) as exc_info: + paituli.dem_download_by_labels(['ABCDE'], tmp_dir) + assert exc_info.value.args[0] == message['error_label'] + + # test for error when the input is a invalid folder path + with pytest.raises(Exception) as exc_info: + paituli.dem_download_by_labels(['X4344A'], tmp_dir) + assert exc_info.value.args[0] == message['error_folder'] + + +def test_tdb_download_by_labels( + paituli, + message +): + + # test for downloading topographical database labels + with tempfile.TemporaryDirectory() as tmp_dir: + assert paituli.tdb_download_by_labels( + ['J3224R'], tmp_dir + ) == message['download'] + # test for error when the input is a non empty folder + with pytest.raises(Exception) as exc_info: + paituli.tdb_download_by_labels(['K2344R'], tmp_dir) + assert exc_info.value.args[0] == message['folder_empty'] + + # test for error when the input is a invalid label + with tempfile.TemporaryDirectory() as tmp_dir: + with pytest.raises(Exception) as exc_info: + paituli.tdb_download_by_labels(['ABCDE'], tmp_dir) + assert exc_info.value.args[0] == message['error_label'] + + # test for error when the input is a invalid folder path + with pytest.raises(Exception) as exc_info: + paituli.tdb_download_by_labels(['J3224R'], tmp_dir) + assert exc_info.value.args[0] == message['error_folder'] + + +def test_dem_labels_download_by_area( + paituli, + message +): + + with tempfile.TemporaryDirectory() as tmp_dir: + # test for error for invalid input + with pytest.raises(Exception) as exc_info: + paituli.dem_labels_download_by_area(5, tmp_dir) + assert exc_info.value.args[0] == message['error_gdf'] + # test for downloading when the input is a GeoDataFrame format + example_gdf = paituli.get_example_area + assert paituli.dem_labels_download_by_area( + example_gdf, tmp_dir + ) == message['download'] + # test for downloading when the input is a file format + example_file = os.path.join(tmp_dir, 'example_file.shp') + example_gdf.to_file(example_file) + sub_dir = os.path.join(tmp_dir, 'sub_dir') + os.makedirs(sub_dir) + assert paituli.dem_labels_download_by_area( + example_file, sub_dir + ) == message['download'] + + example_area = shapely.Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) + with tempfile.TemporaryDirectory() as tmp_dir: + # test for error while the input GeoDataFrame has no CRS + nocrs_gdf = geopandas.GeoDataFrame({'geometry': [example_area]}) + with pytest.raises(Exception) as exc_info: + paituli.dem_labels_download_by_area(nocrs_gdf, tmp_dir) + assert exc_info.value.args[0] == message['error_area'] + # test for error while the input GeoDataFrame has CRS other than 'EPSG:3067' + crs_gdf = geopandas.GeoDataFrame({'geometry': [example_area]}, crs='EPSG:4326') + with pytest.raises(Exception) as exc_info: + paituli.dem_labels_download_by_area(crs_gdf, tmp_dir) + assert exc_info.value.args[0] == message['error_area'] + + +def test_download_corine_land_cover_2018( + syke, + message +): + + # test for downloading Syke's land cover map + with tempfile.TemporaryDirectory() as tmp_dir: + assert len(os.listdir(tmp_dir)) == 0 + assert syke.download_corine_land_cover_2018(tmp_dir) == message['download'] + assert len(os.listdir(tmp_dir)) > 0 + + # test for error when the input is a invalid folder path + with pytest.raises(Exception) as exc_info: + syke.download_corine_land_cover_2018(tmp_dir) + assert exc_info.value.args[0] == message['error_folder'] + + +def test_dem_by_area( + paituli, + syke, + core, + message +): + + # temporary directory + with tempfile.TemporaryDirectory() as tmp_dir: + # test for downloading Syke's catchment divisions + assert syke.download_catchment_divisions_2023(tmp_dir) == message['download'] + catchd5_path = os.path.join(tmp_dir, 'catchment_division_level_5.shp') + sub_dir = os.path.join(tmp_dir, 'sub_dir') + os.makedirs(sub_dir) + # test for error of Syke's single subcatchment when the input level is not an integer + with pytest.raises(Exception) as exc_info: + paituli.dem_labels_download_by_syke_subcatchment( + input_file=catchd5_path, + level='invalid_level', + single_area=15730216003, + folder_path=sub_dir + ) + assert exc_info.value.args[0] == message['error_level'] + # test for error of Syke's single subcatchment when the input area does not intersect with the index map + with pytest.raises(Exception) as exc_info: + paituli.dem_labels_download_by_syke_subcatchment( + input_file=catchd5_path, + level=5, + single_area=157302, + folder_path=sub_dir + ) + assert exc_info.value.args[0] == message['error_area'] + # test for downloading DEM labels for single subcatchment from Syke's catchment divisions + assert paituli.dem_labels_download_by_syke_subcatchment( + input_file=catchd5_path, + level=5, + single_area=15730216003, + folder_path=sub_dir + ) == message['download'] + # test for selecting single polygons for Syke's single subcatchment + spg_gdf = syke.select_single_subcatchment( + input_file=catchd5_path, + level=5, + single_area=15730216003, + output_file=os.path.join(tmp_dir, 'single_subcatchment_spg.shp'), + merge_polygons=True, + percentage_cutoff=0 + ) + assert isinstance(spg_gdf, geopandas.GeoDataFrame) is True + assert spg_gdf.geometry.iloc[0].bounds == (594410.0, 7377690.0, 596350.0, 7379700.0) + # test for error of undetected OGR driver while saving Syke's single subcatchment + with pytest.raises(Exception) as exc_info: + syke.select_single_subcatchment( + input_file=catchd5_path, + level=5, + single_area=15730216003, + output_file=os.path.join(tmp_dir, 'invalid_file_extension.sh') + ) + assert exc_info.value.args[0] == message['error_driver'] + # test for Syke's single subcatchment with merging polygons and percentage cutoff + mpg_gdf = syke.select_single_subcatchment( + input_file=catchd5_path, + level=5, + single_area=42010117301, + output_file=os.path.join(tmp_dir, 'single_subcatchment_mpg.shp'), + merge_polygons=True, + percentage_cutoff=0 + ) + assert mpg_gdf.geometry.iloc[0].bounds == (689130.0, 6898840.0, 693370.0, 6902730.0) + # test for Syke's single subcatchment without merging polygons or percentage cutoff + mpg_gdf = syke.select_single_subcatchment( + input_file=catchd5_path, + level=5, + single_area=31670606904, + merge_polygons=False, + percentage_cutoff=-1 + ) + assert round(mpg_gdf.geometry.iloc[0].area) == 238699 + # test for raster merging + assert core.raster_merging( + folder_path=sub_dir, + output_file=os.path.join(tmp_dir, 'check_merged.tif') + ) == 'Merging of rasters completed.' + # test for error in raster merging when the input is a invalid folder path + with pytest.raises(Exception) as exc_info: + core.raster_merging( + folder_path=os.path.join(tmp_dir, 'nonexist_dir'), + output_file=os.path.join(tmp_dir, 'check_merged.tif') + ) + assert exc_info.value.args[0] == message['error_folder'] + # test for error of undetected driver while raster merging + with pytest.raises(Exception) as exc_info: + core.raster_merging( + folder_path=sub_dir, + output_file=os.path.join(tmp_dir, 'merged.t') + ) + assert exc_info.value.args[0] == message['error_driver'] + # test for raster clipping + assert core.raster_clipping_by_mask( + input_file=os.path.join(tmp_dir, 'check_merged.tif'), + mask_area=geopandas.read_file(os.path.join(tmp_dir, 'single_subcatchment_spg.shp')), + output_file=os.path.join(tmp_dir, 'check_clipped.tif') + ) == 'Raster clipping completed.' + with rasterio.open(os.path.join(tmp_dir, 'check_clipped.tif')) as clip_raster: + assert clip_raster.bounds.bottom == 7377690.0 + assert clip_raster.bounds.top == 7379700.0 + # test for error of invalid mask area while clipping raster file + with pytest.raises(Exception) as exc_info: + core.raster_clipping_by_mask( + input_file=os.path.join(tmp_dir, 'check_merged.tif'), + mask_area=5, + output_file=os.path.join(tmp_dir, 'check_clipped.tif') + ) + assert exc_info.value.args[0] == message['error_gdf'] + # test for error of undetected driver while clipping raster file + with pytest.raises(Exception) as exc_info: + core.raster_clipping_by_mask( + input_file=os.path.join(tmp_dir, 'check_merged.tif'), + mask_area=os.path.join(tmp_dir, 'single_subcatchment_spg.shp'), + output_file=os.path.join(tmp_dir, 'invalid.t') + ) + assert exc_info.value.args[0] == message['error_driver'] + # test for downoading clipped dem from Syke's catchment divisions + raster_path = os.path.join(tmp_dir, 'clipped_catchment.tif') + assert paituli.dem_clipped_download_by_syke_subcatchment( + input_file=catchd5_path, + level=5, + single_area=15730216003, + output_file=raster_path, + compress='lzw' + ) == message['geoprocess'] + with rasterio.open(raster_path) as tmp_raster: + assert tmp_raster.bounds.left == 594410.0 + assert tmp_raster.bounds.right == 596350.0 + catchd2_path = os.path.join(tmp_dir, 'catchment_division_level_2.shp') + # test for error of Syke's multiple subcatchment without merging or percentage cutoff + with pytest.raises(Exception) as exc_info: + syke.merging_multiple_subcatchments( + input_file=catchd2_path, + level='invalid_level', + multiple_area=[1159, 1160, 1161], + ) + assert exc_info.value.args[0] == message['error_level'] + # test for error of Syke's multiple subcatchment when the input area contains single element + with pytest.raises(Exception) as exc_info: + syke.merging_multiple_subcatchments( + input_file=catchd2_path, + level=2, + multiple_area=[11], + ) + assert exc_info.value.args[0] == 'Input multiple area list contains single element.' + # test for error of Syke's multiple subcatchment when the input area does not intersect with the index map + with pytest.raises(Exception) as exc_info: + syke.merging_multiple_subcatchments( + input_file=catchd2_path, + level=2, + multiple_area=[11, 12], + ) + assert exc_info.value.args[0] == message['error_area'] + # test for error of undetected OGR driver while saving Syke's multiple subcatchment + with pytest.raises(Exception) as exc_info: + syke.merging_multiple_subcatchments( + input_file=catchd2_path, + level=2, + multiple_area=[1159, 1160, 1161], + output_file=os.path.join(tmp_dir, 'invalid_file_extension.sh'), + ) + assert exc_info.value.args[0] == message['error_driver'] + # test for Syke's multiple subcatchment without percentage cutoff + msc_gdf = syke.merging_multiple_subcatchments( + input_file=catchd2_path, + level=2, + multiple_area=[1159, 1160, 1161], + output_file=os.path.join(tmp_dir, 'merging_msc.shp'), + ) + assert msc_gdf.shape[0] == 20 + # test for Syke's multiple subcatchment with percentage cutoff + msc_gdf = syke.merging_multiple_subcatchments( + input_file=catchd5_path, + level=5, + multiple_area=[15730214505, 15730214514], + percentage_cutoff=0 + ) + assert msc_gdf.geometry.iloc[0].area == 22858200.0 + # test for downoading clipped dem from area + raster_path = os.path.join(tmp_dir, 'clipped_area.tif') + assert paituli.dem_clipped_download_by_area( + input_area=msc_gdf, + output_file=raster_path, + ) == message['geoprocess'] + with rasterio.open(raster_path) as tmp_raster: + assert tmp_raster.bounds.left == 582480.0 + assert tmp_raster.bounds.right == 589690.0 + + # test for error of donloading Syke's catchment division when the input is a invalid folder path + with pytest.raises(Exception) as exc_info: + syke.download_catchment_divisions_2023(tmp_dir) + assert exc_info.value.args[0] == message['error_folder']