From 87a130be61272a31fb71a2b29d17c25179b68cf7 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 7 Aug 2023 14:20:27 +0000 Subject: [PATCH 01/20] Add geodata processing to dataset tool --- poetry.lock | 196 +++++++++++++++++- pyproject.toml | 1 + .../dataset/resource_management.py | 52 ++++- .../{{cookiecutter.slug}}/datapackage.yaml | 3 + 4 files changed, 239 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index 25e48a6..2670100 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "altair" @@ -616,6 +616,42 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] + +[[package]] +name = "cligj" +version = "0.7.2" +description = "Click params for commmand line interfaces to GeoJSON" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" +files = [ + {file = "cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df"}, + {file = "cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +test = ["pytest-cov"] + [[package]] name = "colorama" version = "0.4.6" @@ -1021,6 +1057,50 @@ files = [ [package.extras] devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] +[[package]] +name = "fiona" +version = "1.9.4.post1" +description = "Fiona reads and writes spatial data files" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Fiona-1.9.4.post1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:d6483a20037db2209c8e9a0c6f1e552f807d03c8f42ed0c865ab500945a37c4d"}, + {file = "Fiona-1.9.4.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dbe158947099a83ad16f9acd3a21f50ff01114c64e2de67805e382e6b6e0083a"}, + {file = "Fiona-1.9.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2c7b09eecee3bb074ef8aa518cd6ab30eb663c6fdd0eff3c88d454a9746eaa"}, + {file = "Fiona-1.9.4.post1-cp310-cp310-win_amd64.whl", hash = "sha256:1da8b954f6f222c3c782bc285586ea8dd9d7e55e1bc7861da9cd772bca671660"}, + {file = "Fiona-1.9.4.post1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:c671d8832287cda397621d79c5a635d52e4631f33a8f0e6fdc732a79a93cb96c"}, + {file = "Fiona-1.9.4.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b633a2e550e083805c638d2ab8059c283ca112aaea8241e170c012d2ee0aa905"}, + {file = "Fiona-1.9.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1faa625d5202b8403471bbc9f9c96b1bf9099cfcb0ee02a80a3641d3d02383e"}, + {file = "Fiona-1.9.4.post1-cp311-cp311-win_amd64.whl", hash = "sha256:39baf11ff0e4318397e2b2197de427b4eebdc49d4a9a7c1366f8a7ed682978a4"}, + {file = "Fiona-1.9.4.post1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d93c993265f6378b23f47708c83bddb3377ca6814a1f0b5a0ae0bee9c8d72cf8"}, + {file = "Fiona-1.9.4.post1-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:b0387cae39e27f338fd948b3b50b6e6ce198cc4cec257fc91660849697c69dc3"}, + {file = "Fiona-1.9.4.post1-cp37-cp37m-win_amd64.whl", hash = "sha256:450561d308d3ce7c7e30294822b1de3f4f942033b703ddd4a91a7f7f5f506ca0"}, + {file = "Fiona-1.9.4.post1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:71b023ef5248ebfa5524e7a875033f7db3bbfaf634b1b5c1ae36958d1eb82083"}, + {file = "Fiona-1.9.4.post1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:74511d3755695d75cea0f4ff6f5e0c6c5d5be8e0d46dafff124c6a219e99b1eb"}, + {file = "Fiona-1.9.4.post1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:285f3dd4f96aa0a3955ed469f0543375b20989731b2dddc85124453f11ac62bc"}, + {file = "Fiona-1.9.4.post1-cp38-cp38-win_amd64.whl", hash = "sha256:a670ea4262cb9140445bcfc97cbfd2f508a058be342f4a97e966b8ce7696601f"}, + {file = "Fiona-1.9.4.post1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:ea7c44c15b3a653452b9b3173181490b7afc5f153b0473c145c43c0fbf90448b"}, + {file = "Fiona-1.9.4.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7bfb1f49e0e53f6cd7ad64ae809d72646266b37a7b9881205977408b443a8d79"}, + {file = "Fiona-1.9.4.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a585002a6385cc8ab0f66ddf3caf18711f531901906abd011a67a0cc89ab7b0"}, + {file = "Fiona-1.9.4.post1-cp39-cp39-win_amd64.whl", hash = "sha256:f5da66b723a876142937e683431bbaa5c3d81bb2ed3ec98941271bc99b7f8cd0"}, + {file = "Fiona-1.9.4.post1.tar.gz", hash = "sha256:5679d3f7e0d513035eb72e59527bb90486859af4405755dfc739138633106120"}, +] + +[package.dependencies] +attrs = ">=19.2.0" +certifi = "*" +click = ">=8.0,<9.0" +click-plugins = ">=1.0" +cligj = ">=0.5" +six = "*" + +[package.extras] +all = ["Fiona[calc,s3,test]"] +calc = ["shapely"] +s3 = ["boto3 (>=1.3.1)"] +test = ["Fiona[s3]", "pytest (>=7)", "pytest-cov", "pytz"] + [[package]] name = "flake8" version = "3.9.2" @@ -1110,6 +1190,25 @@ server = ["flask (>=1.1)", "gunicorn (>=20.0)"] spss = ["savReaderWriter (>=3.0)"] sql = ["sqlalchemy (>=1.3)"] +[[package]] +name = "geopandas" +version = "0.13.2" +description = "Geographic pandas extensions" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "geopandas-0.13.2-py3-none-any.whl", hash = "sha256:101cfd0de54bcf9e287a55b5ea17ebe0db53a5e25a28bacf100143d0507cabd9"}, + {file = "geopandas-0.13.2.tar.gz", hash = "sha256:e5b56d9c20800c77bcc0c914db3f27447a37b23b2cd892be543f5001a694a968"}, +] + +[package.dependencies] +fiona = ">=1.8.19" +packaging = "*" +pandas = ">=1.1.0" +pyproj = ">=3.0.1" +shapely = ">=1.7.1" + [[package]] name = "google-api-core" version = "2.11.0" @@ -3105,6 +3204,44 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pyproj" +version = "3.6.0" +description = "Python interface to PROJ (cartographic projections and coordinate transformations library)" +category = "main" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pyproj-3.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e600f6a2771d3b41aeb2cc1efd96771ae9a01451013da1dd48ff272e7c6e34ef"}, + {file = "pyproj-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7f6cd045df29aae960391dfe06a575c110af598f1dea5add8be6ca42332b0f5"}, + {file = "pyproj-3.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:557e6592855111c84eda176ddf6b130f55d5e2b9cb1c017b8c91b69f37f474f5"}, + {file = "pyproj-3.6.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de6288b6ceabdeeac01abf627c74414822d322d8f55dc8efe4d29dedd27c5719"}, + {file = "pyproj-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e427ccdbb1763872416549bdfa9fa1f5f169054653c4daf674e71480cc39cf11"}, + {file = "pyproj-3.6.0-cp310-cp310-win32.whl", hash = "sha256:1283d3c1960edbb74828f5f3405b27578a9a27f7766ab6a3956f4bd851f08239"}, + {file = "pyproj-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:9de1aab71234bfd3fd648a1152519b5ee152c43113d7d8ea52590a0140129501"}, + {file = "pyproj-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:00fab048596c17572fa8980014ef117dbb2a445e6f7ba3b9ddfcc683efc598e7"}, + {file = "pyproj-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ba5e7c8ddd6ed5a3f9fcf95ea80ba44c931913723de2ece841c94bb38b200c4a"}, + {file = "pyproj-3.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08dfc5c9533c78a97afae9d53b99b810a4a8f97c3be9eb2b8f323b726c736403"}, + {file = "pyproj-3.6.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18a8bdb87aeb41b60a2e91d32f623227de3569fb83b4c64b174c3a7c5b0ed3ae"}, + {file = "pyproj-3.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfe392dfc0eba2248dc08c976a72f52ff9da2bddfddfd9ff5dcf18e8e88200c7"}, + {file = "pyproj-3.6.0-cp311-cp311-win32.whl", hash = "sha256:78276c6b0c831255c97c56dff7313a3571f327a284d8ac63d6a56437a72ed0e0"}, + {file = "pyproj-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:8fbac2eb9a0e425d7d6b7c6f4ebacd675cf3bdef0c59887057b8b4b0374e7c12"}, + {file = "pyproj-3.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:95120d65cbc5983dfd877076f28dbc18b9b329cbee38ca6e217bb7a5a043c099"}, + {file = "pyproj-3.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:830e6de7cfe43853967afee5ef908dfd5aa72d1ec12af9b9e3fecc179886e346"}, + {file = "pyproj-3.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e342b3010b2b20134671564ff9a8c476e5e512bf589477480aded1a5813af7c8"}, + {file = "pyproj-3.6.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23787460fab85ba2f857ee60ffb2e8e21fd9bd5db9833c51c1c05b2a6d9f0be5"}, + {file = "pyproj-3.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595376e4d3bb72b7dceeccbce0f4c43053d47561f17a1ad0224407e9980ee849"}, + {file = "pyproj-3.6.0-cp39-cp39-win32.whl", hash = "sha256:4d8a9773503085eada59b6892c96ddf686ab8cf64cfdc18ad744d13ee76dfa6f"}, + {file = "pyproj-3.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:137a07404f937f264b11b7130cd4cfa00002dbe4333b222e8056db84849c2ea4"}, + {file = "pyproj-3.6.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2799499a4045e4fb73e44c31bdacab0593a253a7a4b6baae6fdd27d604cf9bc2"}, + {file = "pyproj-3.6.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f04f6297c615c3b17f835df2556ac8fb9b4f51f281e960437eaf0cd80e7ae26a"}, + {file = "pyproj-3.6.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a4d2d438b007cb1f8d5f6f308d53d7ff9a2508cff8f9da6e2a93b76ffd98aaf"}, + {file = "pyproj-3.6.0.tar.gz", hash = "sha256:a5b111865b3f0f8b77b3983f2fbe4dd6248fc09d3730295949977c8dcd988062"}, +] + +[package.dependencies] +certifi = "*" + [[package]] name = "pyright" version = "1.1.291" @@ -3828,6 +3965,61 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-g testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +[[package]] +name = "shapely" +version = "2.0.1" +description = "Manipulation and analysis of geometric objects" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "shapely-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b06d031bc64149e340448fea25eee01360a58936c89985cf584134171e05863f"}, + {file = "shapely-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9a6ac34c16f4d5d3c174c76c9d7614ec8fe735f8f82b6cc97a46b54f386a86bf"}, + {file = "shapely-2.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:865bc3d7cc0ea63189d11a0b1120d1307ed7a64720a8bfa5be2fde5fc6d0d33f"}, + {file = "shapely-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45b4833235b90bc87ee26c6537438fa77559d994d2d3be5190dd2e54d31b2820"}, + {file = "shapely-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce88ec79df55430e37178a191ad8df45cae90b0f6972d46d867bf6ebbb58cc4d"}, + {file = "shapely-2.0.1-cp310-cp310-win32.whl", hash = "sha256:01224899ff692a62929ef1a3f5fe389043e262698a708ab7569f43a99a48ae82"}, + {file = "shapely-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:da71de5bf552d83dcc21b78cc0020e86f8d0feea43e202110973987ffa781c21"}, + {file = "shapely-2.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:502e0a607f1dcc6dee0125aeee886379be5242c854500ea5fd2e7ac076b9ce6d"}, + {file = "shapely-2.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7d3bbeefd8a6a1a1017265d2d36f8ff2d79d0162d8c141aa0d37a87063525656"}, + {file = "shapely-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f470a130d6ddb05b810fc1776d918659407f8d025b7f56d2742a596b6dffa6c7"}, + {file = "shapely-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4641325e065fd3e07d55677849c9ddfd0cf3ee98f96475126942e746d55b17c8"}, + {file = "shapely-2.0.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90cfa4144ff189a3c3de62e2f3669283c98fb760cfa2e82ff70df40f11cadb39"}, + {file = "shapely-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70a18fc7d6418e5aea76ac55dce33f98e75bd413c6eb39cfed6a1ba36469d7d4"}, + {file = "shapely-2.0.1-cp311-cp311-win32.whl", hash = "sha256:09d6c7763b1bee0d0a2b84bb32a4c25c6359ad1ac582a62d8b211e89de986154"}, + {file = "shapely-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:d8f55f355be7821dade839df785a49dc9f16d1af363134d07eb11e9207e0b189"}, + {file = "shapely-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:83a8ec0ee0192b6e3feee9f6a499d1377e9c295af74d7f81ecba5a42a6b195b7"}, + {file = "shapely-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a529218e72a3dbdc83676198e610485fdfa31178f4be5b519a8ae12ea688db14"}, + {file = "shapely-2.0.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91575d97fd67391b85686573d758896ed2fc7476321c9d2e2b0c398b628b961c"}, + {file = "shapely-2.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8b0d834b11be97d5ab2b4dceada20ae8e07bcccbc0f55d71df6729965f406ad"}, + {file = "shapely-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:b4f0711cc83734c6fad94fc8d4ec30f3d52c1787b17d9dca261dc841d4731c64"}, + {file = "shapely-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:05c51a29336e604c084fb43ae5dbbfa2c0ef9bd6fedeae0a0d02c7b57a56ba46"}, + {file = "shapely-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b519cf3726ddb6c67f6a951d1bb1d29691111eaa67ea19ddca4d454fbe35949c"}, + {file = "shapely-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:193a398d81c97a62fc3634a1a33798a58fd1dcf4aead254d080b273efbb7e3ff"}, + {file = "shapely-2.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e55698e0ed95a70fe9ff9a23c763acfe0bf335b02df12142f74e4543095e9a9b"}, + {file = "shapely-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f32a748703e7bf6e92dfa3d2936b2fbfe76f8ce5f756e24f49ef72d17d26ad02"}, + {file = "shapely-2.0.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a34a23d6266ca162499e4a22b79159dc0052f4973d16f16f990baa4d29e58b6"}, + {file = "shapely-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d173d24e85e51510e658fb108513d5bc11e3fd2820db6b1bd0522266ddd11f51"}, + {file = "shapely-2.0.1-cp38-cp38-win32.whl", hash = "sha256:3cb256ae0c01b17f7bc68ee2ffdd45aebf42af8992484ea55c29a6151abe4386"}, + {file = "shapely-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:c7eed1fb3008a8a4a56425334b7eb82651a51f9e9a9c2f72844a2fb394f38a6c"}, + {file = "shapely-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ac1dfc397475d1de485e76de0c3c91cc9d79bd39012a84bb0f5e8a199fc17bef"}, + {file = "shapely-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:33403b8896e1d98aaa3a52110d828b18985d740cc9f34f198922018b1e0f8afe"}, + {file = "shapely-2.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2569a4b91caeef54dd5ae9091ae6f63526d8ca0b376b5bb9fd1a3195d047d7d4"}, + {file = "shapely-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a70a614791ff65f5e283feed747e1cc3d9e6c6ba91556e640636bbb0a1e32a71"}, + {file = "shapely-2.0.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43755d2c46b75a7b74ac6226d2cc9fa2a76c3263c5ae70c195c6fb4e7b08e79"}, + {file = "shapely-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad81f292fffbd568ae71828e6c387da7eb5384a79db9b4fde14dd9fdeffca9a"}, + {file = "shapely-2.0.1-cp39-cp39-win32.whl", hash = "sha256:b50c401b64883e61556a90b89948297f1714dbac29243d17ed9284a47e6dd731"}, + {file = "shapely-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:bca57b683e3d94d0919e2f31e4d70fdfbb7059650ef1b431d9f4e045690edcd5"}, + {file = "shapely-2.0.1.tar.gz", hash = "sha256:66a6b1a3e72ece97fc85536a281476f9b7794de2e646ca8a4517e2e3c1446893"}, +] + +[package.dependencies] +numpy = ">=1.14" + +[package.extras] +docs = ["matplotlib", "numpydoc (>=1.1.0,<1.2.0)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] +test = ["pytest", "pytest-cov"] + [[package]] name = "shellingham" version = "1.5.0.post1" @@ -4523,4 +4715,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "34ea7766877c41c5778a7c2339615fa7da868a2fe2bbeaa683505f9ac19f979d" +content-hash = "fefa554270da664690bbd98cc6e742e75960435c4ac4d178ae32a075717ff5e3" diff --git a/pyproject.toml b/pyproject.toml index b6ecae2..7db4164 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ lxml = "^4.9.1" pyarrow = "^11.0.0" duckdb = "^0.6.1" sqlfluff = "^1.4.5" +geopandas = "^0.13.2" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index 898831d..35251a6 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -13,6 +13,7 @@ from shutil import copyfile from typing import Any, Callable, Dict, Literal, TypedDict, TypeVar, cast from urllib.parse import urlencode +import geopandas as gpd import pandas as pd import pytest @@ -169,7 +170,6 @@ def get_df(self) -> pd.DataFrame: raise ValueError(f"Unhandled file type {self.path.suffix}") def get_resource(self, inline_data: bool = False) -> dict[str, Any]: - if self.has_resource_yaml: yaml = YAML(typ="safe") with open(self.resource_path, "r") as f: @@ -205,7 +205,7 @@ def get_schema_from_file( ) -> SchemaValidator: return update_table_schema(self.path, existing_schema) - def rebuild_yaml(self): + def rebuild_yaml(self, is_geodata: bool = False): """ Recreate yaml file from source file, preserving any custom values from previously existing yaml file """ @@ -218,6 +218,15 @@ def rebuild_yaml(self): desc["schema"] = self.get_schema_from_file(existing_desc.get("schema", None)) desc["path"] = self.path.name + # if geodata - drop geometry example from schema + if is_geodata: + new_fields = [] + for f in desc["schema"]["fields"]: + if f["name"] == "geometry": + f["example"] = "" + new_fields.append(f) + desc["schema"]["fields"] = new_fields + # ensure a blank title and description new_dict = {"title": None, "description": None, "custom": {}} @@ -337,7 +346,6 @@ def build_from_function(self): ) return None if ":" in build_module and " " not in build_module: - module, function = build_module.split(":") module = importlib.import_module(module) function = getattr(module, function) @@ -680,7 +688,6 @@ def derive_bump_rule_from_change(self) -> tuple[version_rules, str] | None: ) if current_data != previous_data: - dict_diff = diff_dicts(previous_data, current_data) rich.print(dict_diff) @@ -809,8 +816,13 @@ def rebuild_resource(self, slug: str): resource.rebuild_yaml() def rebuild_all_resources(self): + is_geodata = self.is_geodata() for resource in self.resources().values(): - resource.rebuild_yaml() + resource.rebuild_yaml(is_geodata=is_geodata) + + def is_geodata(self) -> bool: + desc = self.get_datapackage() + return desc["custom"].get("is_geodata", False) def get_datapackage(self) -> dict[str, Any]: yaml = YAML(typ="safe") @@ -897,15 +909,21 @@ def copy_resources(self): """ desc = self.get_datapackage() - csv_value = desc.get("custom", {}).get("formats", {}).get("csv", True) - parquet_value = desc.get("custom", {}).get("formats", {}).get("parquet", True) + formats = desc.get("custom", {}).get("formats", {}) + csv_value = formats.get("csv", True) + parquet_value = formats.get("parquet", True) + geojson_value = formats.get("geojson", True) + geopackage_value = formats.get("gpkg", True) csv_copy_query = """ copy (select * from {{ source }}) to {{ dest }} (format PARQUET); """ + exclude = "" + if desc["custom"].get("is_geodata", False): + exclude = "EXCLUDE geometry" parquet_copy_query = """ - copy (select * from {{ source }}) to {{ dest }} (HEADER, DELIMITER ','); + copy (select * {{ exclude }} from {{ source }}) to {{ dest }} (HEADER, DELIMITER ','); """ for r in self.resources().values(): @@ -916,12 +934,24 @@ def copy_resources(self): if parquet_value: parquet_file = self.build_path() / (r.path.stem + ".parquet") duck_query(csv_copy_query, source=r.path, dest=parquet_file) + if geojson_value or geopackage_value: + raise ValueError( + "Writing to geojson/geopackage from csv source not supported. Use parquet internally." + ) elif r.path.suffix == ".parquet": if parquet_value: copyfile(r.path, self.build_path() / r.path.name) if csv_value: csv_file = self.build_path() / (r.path.stem + ".csv") duck_query(parquet_copy_query, source=r.path, dest=csv_file) + if geojson_value: + geojson_path = self.build_path() / (r.path.stem + ".geojson") + gdf = gpd.read_parquet(r.path) + gdf.to_file(geojson_path, driver="GeoJSON") + if geopackage_value: + geopackage_path = self.build_path() / (r.path.stem + ".gpkg") + gdf = gpd.read_parquet(r.path) + gdf.to_file(geopackage_path, driver="GPKG") def get_datapackage_order(self) -> int: """ @@ -1134,6 +1164,9 @@ def build_excel(self): for sheet_name, df in sheets.items(): short_sheet_name = sheet_name[-31:] # only allow 31 characters + # if geometry is column - remove it + if "geometry" in df.columns: + df = df.drop(columns=["geometry"]) df.to_excel(writer, sheet_name=short_sheet_name, index=False) for column in df: @@ -1142,7 +1175,6 @@ def build_excel(self): col_idx = df.columns.get_loc(column) if column_length <= 50: - writer.sheets[short_sheet_name].set_column( col_idx, col_idx, column_length ) @@ -1236,7 +1268,6 @@ def convert_to_array_from_comma(value: t) -> list[t]: # for instance splitting comma seperated fields to arrays for resource_slug, modify_maps in composite_options["modify"].items(): for column, modify_type in modify_maps.items(): - # split specified columns to arrays and update the schema if modify_type == "comma-to-array": for resource in datapackage["resources"]: @@ -1282,7 +1313,6 @@ def build_markdown(self): ... def print_status(self): - resources = list(self.resources().values()) df = pd.DataFrame( diff --git a/src/data_common/resources/dataset_template/{{cookiecutter.slug}}/datapackage.yaml b/src/data_common/resources/dataset_template/{{cookiecutter.slug}}/datapackage.yaml index 94cce6d..89c9176 100644 --- a/src/data_common/resources/dataset_template/{{cookiecutter.slug}}/datapackage.yaml +++ b/src/data_common/resources/dataset_template/{{cookiecutter.slug}}/datapackage.yaml @@ -31,6 +31,9 @@ custom: formats: csv: true parquet: true + gpkg: false + geojson: false + is_geodata: false composite: xlsx: include: all From a88eba7f032a5580025cd4dc5dd76bec90350978 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 7 Aug 2023 14:41:02 +0000 Subject: [PATCH 02/20] Update pyyaml --- poetry.lock | 77 ++++++++++++++++++++++++++++---------------------- pyproject.toml | 1 - 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2670100..dff4c12 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3421,42 +3421,53 @@ files = [ ] [[package]] -name = "PyYAML" -version = "5.4.1" +name = "pyyaml" +version = "6.0.1" description = "YAML parser and emitter for Python" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.6" files = [ - {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, - {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"}, - {file = "PyYAML-5.4.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8"}, - {file = "PyYAML-5.4.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185"}, - {file = "PyYAML-5.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541"}, - {file = "PyYAML-5.4.1-cp36-cp36m-win32.whl", hash = "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5"}, - {file = "PyYAML-5.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df"}, - {file = "PyYAML-5.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"}, - {file = "PyYAML-5.4.1-cp37-cp37m-win32.whl", hash = "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b"}, - {file = "PyYAML-5.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf"}, - {file = "PyYAML-5.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc"}, - {file = "PyYAML-5.4.1-cp38-cp38-win32.whl", hash = "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc"}, - {file = "PyYAML-5.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696"}, - {file = "PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6"}, - {file = "PyYAML-5.4.1-cp39-cp39-win32.whl", hash = "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10"}, - {file = "PyYAML-5.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db"}, - {file = "PyYAML-5.4.1.tar.gz", hash = "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] [[package]] @@ -4715,4 +4726,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "fefa554270da664690bbd98cc6e742e75960435c4ac4d178ae32a075717ff5e3" +content-hash = "5e6c63d8cdd5908c182672a502e7200f4abed104c4e8292e9a14e5104feac4ae" diff --git a/pyproject.toml b/pyproject.toml index 7db4164..68c6b25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,6 @@ python = ">=3.10,<3.11" numpy = "1.21.0" openpyxl = "3.0.7" pandas = "1.4.2" -PyYAML = "5.4.1" scikit-learn = "^1.0.2" unicodecsv = "0.14.1" xlrd = "2.0.1" From 8ea4cf7663d8eecd8fc6351c45f4f0653e22a61e Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 7 Aug 2023 15:25:41 +0000 Subject: [PATCH 03/20] Add typing exemptions --- src/data_common/charting/download.py | 4 ++-- src/data_common/charting/sw_theme.py | 2 +- src/data_common/charting/theme.py | 2 +- src/data_common/pandas/df_extensions/space.py | 8 +------- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/data_common/charting/download.py b/src/data_common/charting/download.py index 76e6852..df56f50 100644 --- a/src/data_common/charting/download.py +++ b/src/data_common/charting/download.py @@ -39,12 +39,12 @@ def json_to_chart(json_spec: str) -> alt.Chart: del di_copy["datasets"] del di_copy["width"] c = Chart.from_dict(di_copy) - chart += c + chart += c # type: ignore else: del di["width"] del di["config"]["view"] chart = Chart.from_dict(di) - return chart + return chart # type: ignore def get_chart_from_url(url: str, n: int = 0) -> alt.Chart: diff --git a/src/data_common/charting/sw_theme.py b/src/data_common/charting/sw_theme.py index bc6d292..6ca81ae 100644 --- a/src/data_common/charting/sw_theme.py +++ b/src/data_common/charting/sw_theme.py @@ -118,7 +118,7 @@ def color_scale( use_palette = palette[: len(domain)] if reverse: use_palette = use_palette[::-1] - return alt.Scale(domain=domain, range=use_palette) + return alt.Scale(domain=domain, range=use_palette) # type: ignore font = "Lato" diff --git a/src/data_common/charting/theme.py b/src/data_common/charting/theme.py index 3b2935a..2e1316a 100644 --- a/src/data_common/charting/theme.py +++ b/src/data_common/charting/theme.py @@ -131,7 +131,7 @@ def color_scale( use_palette = palette[: len(domain)] if reverse: use_palette = use_palette[::-1] - return alt.Scale(domain=domain, range=use_palette) + return alt.Scale(domain=domain, range=use_palette) # type: ignore font = "Source Sans Pro" diff --git a/src/data_common/pandas/df_extensions/space.py b/src/data_common/pandas/df_extensions/space.py index 6df9673..d7042dd 100644 --- a/src/data_common/pandas/df_extensions/space.py +++ b/src/data_common/pandas/df_extensions/space.py @@ -125,14 +125,12 @@ def t(x): self.label_df = label_df def set_k(self, k: int) -> "Cluster": - new = copy.deepcopy(self) new.k = k return new def get_label_name(self, n, include_short=True) -> str: - short_label = n name = self.label_names.get(self.k, {}).get(n, short_label) if include_short: @@ -205,7 +203,6 @@ def map_from_anchor(self, anchor: pd.DataFrame | Path) -> dict[int, int]: return mapping.to_dict() def get_label_options(self) -> list: - return [self.get_label_name(x) for x in range(1, self.k + 1)] def get_cluster_label_ids(self) -> pd.Series: @@ -215,7 +212,6 @@ def get_cluster_label_ids(self) -> pd.Series: return labels def get_cluster_labels(self, include_short=True) -> ArrayLike: - labels = self.get_cluster_label_ids() def f(x): @@ -227,7 +223,6 @@ def f(x): label_array = get_cluster_labels def get_cluster_descs(self) -> ArrayLike: - labels = self.get_cluster_label_ids() labels = labels.apply(lambda x: self.get_label_desc(n=x)) return np.array(labels) @@ -331,7 +326,7 @@ def _get_clusters(self, k: int): """ fetch k means results for this cluster """ - km = KMeans(n_clusters=k, random_state=self.default_seed, n_init=10) + km = KMeans(n_clusters=k, random_state=self.default_seed, n_init=10) # type: ignore return km.fit(self.df) def get_clusters(self, k: int): @@ -622,7 +617,6 @@ def join_distance(df_label_dict: Dict[str, pd.DataFrame]) -> pd.DataFrame: """ def prepare(df, label): - return ( df.set_index(list(df.columns[:2])) .rename(columns={"distance": label}) From f1f8eb10d2a63bcd9301e3ff690e40e498905fd4 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 7 Aug 2023 16:16:56 +0000 Subject: [PATCH 04/20] Make updating run notebook optional --- src/data_common/management/run_notebook.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/data_common/management/run_notebook.py b/src/data_common/management/run_notebook.py index 27058aa..0cb12eb 100644 --- a/src/data_common/management/run_notebook.py +++ b/src/data_common/management/run_notebook.py @@ -3,7 +3,7 @@ from pathlib import Path -def run_notebook(notebook_filename: Path): +def run_notebook(notebook_filename: Path, save: bool = True): """ Run a notebook as part of another process """ @@ -12,6 +12,8 @@ def run_notebook(notebook_filename: Path): nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600) ep.preprocess(nb, {"metadata": {"path": "notebooks/"}}) - with open(notebook_filename, "w", encoding="utf-8") as f: - nbformat.write(nb, f) + if save: + print(f"Saving notebook: {notebook_filename}") + with open(notebook_filename, "w", encoding="utf-8") as f: + nbformat.write(nb, f) print("Done") From 472a1d74ceb9020e0dc2da055453adf50879e08e Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Tue, 8 Aug 2023 09:54:04 +0000 Subject: [PATCH 05/20] Change where notebook settings are stored --- src/data_common/management/settings.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/data_common/management/settings.py b/src/data_common/management/settings.py index 7ab85b0..41aa195 100644 --- a/src/data_common/management/settings.py +++ b/src/data_common/management/settings.py @@ -25,7 +25,11 @@ def get_settings( settings_file = Path(*top_level, toml_file) - data = toml.load(settings_file)["notebook"]["settings"] + try: + data = toml.load(settings_file)["tool"]["notebook"]["settings"] + except KeyError: + # backward compatibiiity for invalid toml + data = toml.load(settings_file)["notebook"]["settings"] env_data = {} if env_file and Path(*top_level, env_file).exists(): From d368fd5d1de3661f6b6de75cd696d2d044039036 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Wed, 9 Aug 2023 14:27:13 +0000 Subject: [PATCH 06/20] formatting fixes --- src/data_common/dataset/jekyll_management.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/data_common/dataset/jekyll_management.py b/src/data_common/dataset/jekyll_management.py index ec0012c..39985ab 100644 --- a/src/data_common/dataset/jekyll_management.py +++ b/src/data_common/dataset/jekyll_management.py @@ -12,7 +12,6 @@ def markdown_with_frontmatter( data: dict[str, Any], dest: Path, content: str = "", from_file: Path | None = None ): - if content and from_file: raise ValueError("Trying to use contents and from_file arguments") @@ -31,7 +30,6 @@ def markdown_with_frontmatter( def render_download_format_to_dir(items: list[dict[str, Any]], output_dir: Path): - if output_dir.exists() is False: output_dir.mkdir() # remove existing files @@ -52,7 +50,6 @@ def render_download_format_to_dir(items: list[dict[str, Any]], output_dir: Path) def render_sources_to_dir(items: list[dict[str, Any]], output_dir: Path): - if output_dir.exists() is False: output_dir.mkdir() # remove existing files @@ -103,7 +100,7 @@ def make_version_info_page(items: list[dict[str, Any]], output_dir: Path): df = pd.DataFrame(items)[["name", "title", "version", "full_version"]] for name, d in df.groupby("name"): - safe_name = name.replace("-", "_") + safe_name = str(name).replace("-", "_") data_dict = { "name": name, "title": d["title"].iloc[0], From edbef720229296ba4741fa4ca36e7b17b4774c32 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Wed, 9 Aug 2023 14:27:59 +0000 Subject: [PATCH 07/20] Helper functions for validation tests in typing --- src/data_common/helpers/typing.py | 119 ++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/data_common/helpers/typing.py diff --git a/src/data_common/helpers/typing.py b/src/data_common/helpers/typing.py new file mode 100644 index 0000000..85ae348 --- /dev/null +++ b/src/data_common/helpers/typing.py @@ -0,0 +1,119 @@ +from typing import ( + get_args, + Any, + Type, + TypeVar, + Callable, + Generic, + ParamSpec, + get_type_hints, +) + +from inspect import signature + +T = TypeVar("T") +P = ParamSpec("P") + + +class ValidationTest(Generic[T]): + root_type: Type[T] + test: Callable[[T], Any] + error: Callable[[T], Exception] + + def __init__( + self, + root_type: Type[T], + test: Callable[[T], Any], + error: Callable[[T], Exception], + ): + self.root_type = root_type + self.test = test + self.error = error + + def __call__(self, *args, **kwargs): + return self.test(*args, **kwargs) + + +def inspect_function(func): + sig = signature(func) + parameters = sig.parameters + args = [] + kwargs = {} + + for param_name, param in parameters.items(): + if param.default == param.empty: + args.append(param_name) + else: + kwargs[param_name] = param.default + + return args, kwargs + + +def merge_args_kwargs(func, *args, **kwargs): + expected_args, expected_kwargs = inspect_function(func) + + if len(args) > len(expected_args): + raise ValueError( + f"Function expects {len(expected_args)} positional arguments, but {len(args)} were provided." + ) + + merged_kwargs = expected_kwargs.copy() + + for i, arg in enumerate(args): + merged_kwargs[expected_args[i]] = arg + + merged_kwargs.update(kwargs) + + return merged_kwargs + + +def enforce_types(func: Callable[P, T]) -> Callable[P, T]: + """ + This lets us move some basic validation items into the type hint structure + """ + type_hints = get_type_hints(func, include_extras=True) + expected_args, expected_kwargs = inspect_function(func) + + def wrapper(*args: P.args, **kwargs: P.kwargs): + if len(args) > len(expected_args): + raise ValueError( + f"Function expects {len(expected_args)} positional arguments, but {len(args)} were provided." + ) + + merged_kwargs = expected_kwargs.copy() + + for i, arg in enumerate(args): + merged_kwargs[expected_args[i]] = arg + + merged_kwargs.update(kwargs) + + for arg, type_ in type_hints.items(): + if arg == "return": + continue + parameter_value = merged_kwargs[arg] + enforce_type(parameter_value, type_) + value = func(*args, **kwargs) + if "return" in type_hints: + enforce_type(value, type_hints["return"]) + return value + + return wrapper + + +def enforce_type(object: T, annotated_type: Type[T]) -> None: + meta_data = get_args(annotated_type) + + if not meta_data: + if not isinstance(object, annotated_type): + raise TypeError(f"Expected {annotated_type} but got {type(object)}") + + if meta_data: + type_ = meta_data[0] + + if not isinstance(object, type_): + raise TypeError(f"Expected {type_} but got {type(object)}") + tests = meta_data[1:] + + for test in tests: + if not test(object): + raise test.error(object) From a0260459ed46f3ab82b4aff0fd25df68a44e4d1b Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Wed, 9 Aug 2023 15:54:42 +0000 Subject: [PATCH 08/20] Add new decorator based approach for duck queries --- src/data_common/db/duck.py | 118 ++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 9 deletions(-) diff --git a/src/data_common/db/duck.py b/src/data_common/db/duck.py index d4dfbfb..6121705 100644 --- a/src/data_common/db/duck.py +++ b/src/data_common/db/duck.py @@ -1,14 +1,23 @@ -import inspect from functools import lru_cache from pathlib import Path -from typing import Any, Literal, Callable - +from typing import Any, Literal, Callable, Protocol, runtime_checkable, Union import duckdb import jinja2 import pandas as pd import toml +@runtime_checkable +class DuckView(Protocol): + query: str + + +@runtime_checkable +class DuckMacro(Protocol): + args: list[str] + macro: str + + @lru_cache def get_settings(toml_file: str = "pyproject.toml") -> dict: """ @@ -122,17 +131,57 @@ def __truediv__(self, other: str) -> "DuckUrl": return DuckUrl(f"{url}/{other}") +SourceType = Path | DuckUrl | pd.DataFrame + + +@runtime_checkable +class SourceView(Protocol): + @property + def source(self) -> SourceType: + ... + + class DuckQuery: def __init__(self): self.ddb = duckdb.connect(":memory:") self.https: bool = False + self.variables = {} + self._last_query: DuckResponse | None = None + + def set_jinja_variable(self, name: str, value: Any) -> "DuckQuery": + """ + Set jinja variables that can then be used in queries + """ + self.variables[name] = value + return self + + @property + def last_query(self): + """ + Get query for last view registered + """ + if not self._last_query: + raise ValueError("No previous query to execute") + return self._last_query def activate_https(self) -> None: if self.https is False: self.ddb.execute("install httpfs; load httpfs") - def register(self, name: str, item: pd.DataFrame | DuckUrl | Path) -> None: + def as_source(self, item: SourceView) -> "DuckResponse": + """ + Decorator to convert something implementing SourceView to a DuckResponse + """ + name = item.__name__ # type: ignore + source = getattr(item, "source", None) + + if source is None: + raise ValueError("Class must have a source attribute") + self.register(name, source) + return self.view(name) + + def register(self, name: str, item: SourceType) -> None: if isinstance(item, DuckUrl): self.activate_https() self.ddb.execute( @@ -156,14 +205,38 @@ def add_view(self, name: str, query: str) -> "DuckQuery": self.ddb.execute(f"CREATE OR REPLACE VIEW {name} AS {query}") return self + def as_view(self, cls: DuckView) -> "DuckResponse": + """ + Decorator to convert something implementing DuckView to a DuckResponse + """ + + query = getattr(cls, "query", None) + + if query is None: + raise ValueError("Class must have a query method") + + store_as_view = getattr(cls, "store_as_view", None) # type: ignore + + if store_as_view is None: + store_as_view: str = cls.__name__ # type: ignore + + return self.query(query, store_as=store_as_view) + + def view(self, view_name: str): + """ """ + return self.query(f"SELECT * FROM {view_name}") + def query( - self, query: str | Path, store_as: str | None = None, **kwargs: Any + self, query: str | Path | DuckView, store_as: str | None = None, **kwargs: Any ) -> DuckResponse: """ Execute a query """ + if isinstance(query, DuckView): + return self.as_view(query) + + query_vars = self.variables | kwargs - # if the query is a path, read it in if isinstance(query, Path) or query.endswith(".sql"): path = Path(query) if not path.exists(): @@ -189,11 +262,11 @@ def process_kwarg(key: str, value: Any) -> Any: return value - if kwargs: + if query_vars: env = jinja2.Environment() template = env.from_string(query) - args = {k: process_kwarg(k, v) for k, v in kwargs.items()} + args = {k: process_kwarg(k, v) for k, v in query_vars.items()} rendered_query = template.render(**args) else: @@ -202,9 +275,36 @@ def process_kwarg(key: str, value: Any) -> Any: if store_as: self.ddb.execute(f"CREATE OR REPLACE VIEW {store_as} AS {rendered_query}") rendered_query = f"SELECT * FROM {store_as}" - return DuckResponse(self, rendered_query) + + response = DuckResponse(self, rendered_query) + + self._last_query = response + return response + + def as_macro(self, item: DuckMacro): + name = item.__name__ # type: ignore + + args = getattr(item, "args", None) + + if args is None: + raise ValueError("Macro must have an args attribute") + + macro = getattr(item, "macro", None) + + if macro is None: + raise ValueError("Macro must have a macro method") + + macro_query = f""" + CREATE OR REPLACE MACRO {name}({", ".join(args)}) AS + {macro} + """ + self.query(macro_query).run() + + return item def macro(self, func: Callable[..., str]) -> None: + # depricated: converts a function + # prefer 'as_macro' for clarity # get function name name = func.__name__ # get arguments From 69f4b5780f6c8a721be653e016cdf600c81c67cc Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Wed, 9 Aug 2023 15:55:20 +0000 Subject: [PATCH 09/20] Add helpers for working with parquet files --- src/data_common/helpers/parquet.py | 104 +++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/data_common/helpers/parquet.py diff --git a/src/data_common/helpers/parquet.py b/src/data_common/helpers/parquet.py new file mode 100644 index 0000000..92c12e9 --- /dev/null +++ b/src/data_common/helpers/parquet.py @@ -0,0 +1,104 @@ +from pathlib import Path +import pyarrow as pa +import pyarrow.parquet as pq +import pandas as pd +import geopandas as gpd +import math +from tqdm import tqdm +from geopandas.io.arrow import _arrow_to_geopandas + + +def write_split_parquet( + from_file: Path, + output_path: Path, + chunk_size: int = 1000, + compression: str = "GZIP", + silent: bool = False, +): + """ + Split a Parquet file into multiple Parquet files. + """ + # initialize output directory + if not output_path.exists(): + output_path.mkdir(parents=True) + + if output_path.exists() and not output_path.is_dir(): + raise ValueError("Output path is not a directory.") + + else: + for file in output_path.iterdir(): + file.unlink() + table = pa.parquet.read_table(from_file) + + # Calculate the total number of records + total_records = table.num_rows + + # Calculate the number of chunks needed + num_chunks = math.ceil(total_records / chunk_size) + + # Split the table into chunks and write to separate Parquet files + for chunk_idx in tqdm(list(range(num_chunks)), disable=silent): + start_idx = chunk_idx * chunk_size + end_idx = min((chunk_idx + 1) * chunk_size, total_records) + + # Slice the table to create a new chunk + chunk_table = table.slice(start_idx, end_idx - start_idx) + + # Write the chunk to a Parquet file + output_file = output_path / f"{chunk_idx}.parquet" + pq.write_table(chunk_table, output_file, compression=compression) + + +def read_parquet_directory_to_table(directory_path: Path) -> pa.Table: + """ + Read all Parquet files in a directory and combine them into a single PyArrow Table. + """ + parquet_files = [ + file for file in directory_path.iterdir() if file.suffix == ".parquet" + ] + if not parquet_files: + raise ValueError("No Parquet files found in the directory.") + + # Read Parquet files and combine them into a single DataFrame + tables = [] + for file in parquet_files: + table = pq.read_table(file) + tables.append(table) + + return pa.concat_tables(tables) + + +def read_parquet_directory(directory_path: Path) -> pd.DataFrame: + """ + Read all Parquet files in a directory and combine them into a single Pandas DataFrame. + """ + table = read_parquet_directory_to_table(directory_path) + return table.to_pandas() + + +def read_split_geoparquet(fromdir: Path) -> gpd.GeoDataFrame: + """ + Read all Parquet files in a directory and combine them into a single GeoPandas DataFrame. + """ + table = read_parquet_directory_to_table(fromdir) + # convert pyarrow table to geopandas dataframe + return _arrow_to_geopandas(table) + + +def open_geo_file(file_path: Path) -> gpd.GeoDataFrame: + """ + Open a GeoFile (GeoJSON, Shapefile, GeoPackage, etc.) and return a GeoDataFrame. + """ + + # if the file_path is a directory return a GeoDataFrame + if file_path.is_dir(): + return read_split_geoparquet(file_path) + # if the file_name is "*.parquet", get the parent directory and return a GeoDataFrame + elif file_path.name == "*.parquet": + return read_split_geoparquet(file_path.parent) + # if it's another parquet file, return that using the normal method + elif file_path.suffix == ".parquet": + return gpd.read_parquet(file_path) + # if it's a GeoJSON, Shapefile, GeoPackage, etc. return that using the normal method + else: + return gpd.read_file(file_path) From 1a81beb1f3142335df518cc92de31306b7aea532 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Thu, 10 Aug 2023 10:39:19 +0000 Subject: [PATCH 10/20] Do not generate geoformats by default --- src/data_common/dataset/resource_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index 35251a6..ec7c5bc 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -912,8 +912,8 @@ def copy_resources(self): formats = desc.get("custom", {}).get("formats", {}) csv_value = formats.get("csv", True) parquet_value = formats.get("parquet", True) - geojson_value = formats.get("geojson", True) - geopackage_value = formats.get("gpkg", True) + geojson_value = formats.get("geojson", False) + geopackage_value = formats.get("gpkg", False) csv_copy_query = """ copy (select * from {{ source }}) to {{ dest }} (format PARQUET); From 0030f25cae42e112f9e782ec40e005f3b9996323 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Thu, 10 Aug 2023 10:53:49 +0000 Subject: [PATCH 11/20] Fix CSV render from parquet bug - Actually need to run the transform queries. --- src/data_common/dataset/resource_management.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index 35251a6..6d74a8b 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -933,7 +933,7 @@ def copy_resources(self): copyfile(r.path, self.build_path() / r.path.name) if parquet_value: parquet_file = self.build_path() / (r.path.stem + ".parquet") - duck_query(csv_copy_query, source=r.path, dest=parquet_file) + duck_query(csv_copy_query, source=r.path, dest=parquet_file).run() if geojson_value or geopackage_value: raise ValueError( "Writing to geojson/geopackage from csv source not supported. Use parquet internally." @@ -943,7 +943,12 @@ def copy_resources(self): copyfile(r.path, self.build_path() / r.path.name) if csv_value: csv_file = self.build_path() / (r.path.stem + ".csv") - duck_query(parquet_copy_query, source=r.path, dest=csv_file) + duck_query( + parquet_copy_query, + exclude=exclude, + source=r.path, + dest=csv_file, + ).run() if geojson_value: geojson_path = self.build_path() / (r.path.stem + ".geojson") gdf = gpd.read_parquet(r.path) From 2dcf2af2a0d0ff1fe4f415cf69d63a35ea2c66f4 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Fri, 11 Aug 2023 08:15:21 +0000 Subject: [PATCH 12/20] Remove __index_level_0__ from csv exports --- src/data_common/dataset/resource_management.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index 1ec53ce..f504df6 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -918,9 +918,12 @@ def copy_resources(self): csv_copy_query = """ copy (select * from {{ source }}) to {{ dest }} (format PARQUET); """ - exclude = "" + + # __index_level_0__ is an internal parquet column that duckdb has access to + # but we don't want to export + exclude = "EXCLUDE __index_level_0__" if desc["custom"].get("is_geodata", False): - exclude = "EXCLUDE geometry" + exclude = "EXCLUDE __index_level_0__, geometry" parquet_copy_query = """ copy (select * {{ exclude }} from {{ source }}) to {{ dest }} (HEADER, DELIMITER ','); From 00a006415e01d86fce14982eb018341f0adaf60d Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Fri, 11 Aug 2023 08:30:46 +0000 Subject: [PATCH 13/20] Make all composite types remove geodata --- .../dataset/resource_management.py | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index f504df6..34ee6e1 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -11,7 +11,7 @@ from dataclasses import dataclass from pathlib import Path from shutil import copyfile -from typing import Any, Callable, Dict, Literal, TypedDict, TypeVar, cast +from typing import Any, Callable, Literal, TypedDict, TypeVar, cast from urllib.parse import urlencode import geopandas as gpd @@ -21,9 +21,7 @@ import xlsxwriter import re -from frictionless import Schema, describe, validate -from pyparsing import any_open_tag -from rich.markdown import Markdown +from frictionless import describe, validate from rich.table import Table from ruamel.yaml import YAML @@ -169,15 +167,18 @@ def get_df(self) -> pd.DataFrame: else: raise ValueError(f"Unhandled file type {self.path.suffix}") - def get_resource(self, inline_data: bool = False) -> dict[str, Any]: + def get_resource( + self, inline_data: bool = False, is_geodata: bool = False + ) -> dict[str, Any]: if self.has_resource_yaml: yaml = YAML(typ="safe") - with open(self.resource_path, "r") as f: + with self.resource_path.open("r") as f: resource = yaml.load(f) if inline_data: - resource["data"] = ( - self.get_df().fillna(value="").to_dict(orient="records") - ) + df = self.get_df() + if is_geodata and "geometry" in df.columns: + df = df.drop(columns=["geometry"]) + resource["data"] = df.fillna(value="").to_dict(orient="records") resource["format"] = "json" del resource["scheme"] del resource["path"] @@ -209,8 +210,6 @@ def rebuild_yaml(self, is_geodata: bool = False): """ Recreate yaml file from source file, preserving any custom values from previously existing yaml file """ - from frictionless.resource.resource import Resource - existing_desc = self.get_resource() desc = describe(self.path) desc.update(existing_desc) @@ -271,7 +270,7 @@ def rebuild_yaml(self, is_geodata: bool = False): yaml_str = yaml_str.replace("- no\n", "- 'no'\n") yaml_str = yaml_str.replace("- yes\n", "- 'yes'\n") - with open(self.resource_path, "w") as f: + with self.resource_path.open("w") as f: f.write(yaml_str) print(f"Updated config for {self.slug} to {self.resource_path}") @@ -1141,7 +1140,7 @@ def get_composite_options( return composite_options - def build_excel(self): + def build_excel(self, is_geodata: bool = False): """ Build a single excel file for all resources """ @@ -1173,7 +1172,7 @@ def build_excel(self): for sheet_name, df in sheets.items(): short_sheet_name = sheet_name[-31:] # only allow 31 characters # if geometry is column - remove it - if "geometry" in df.columns: + if is_geodata and "geometry" in df.columns: df = df.drop(columns=["geometry"]) df.to_excel(writer, sheet_name=short_sheet_name, index=False) @@ -1193,7 +1192,7 @@ def build_excel(self): writer.save() - def build_sqlite(self): + def build_sqlite(self, is_geodata: bool = False): """ Create a composite sqlite file for all resources with metadata as a seperate table. @@ -1216,7 +1215,10 @@ def build_sqlite(self): for slug, resource in self.resources().items(): if slug not in allowed_resource_slugs: continue - sheets[slug] = resource.get_df() + df = resource.get_df() + if is_geodata and "geometry" in df.columns: + df = df.drop(columns=["geometry"]) + sheets[slug] = df meta_df = resource.get_metadata_df() meta_df["resource"] = slug metadata.append(meta_df) @@ -1232,7 +1234,7 @@ def build_sqlite(self): df.to_sql(name, con, index=False) con.close() - def build_composite_json(self): + def build_composite_json(self, is_geodata: bool = False): """ This builds a composite json file that inlines the data as json. It can have less resources than the total, and some modifiers on the data. @@ -1251,7 +1253,7 @@ def build_composite_json(self): ] datapackage["resources"] = [ - x.get_resource(inline_data=True) + x.get_resource(inline_data=True, is_geodata=is_geodata) for x in self.resources().values() if x.slug in allowed_resource_slugs ] @@ -1310,9 +1312,10 @@ def build_composites(self): """ Create composite files for the datapackage """ - self.build_excel() - self.build_sqlite() - self.build_composite_json() + is_geodata = self.is_geodata() + self.build_excel(is_geodata) + self.build_sqlite(is_geodata) + self.build_composite_json(is_geodata) def build_markdown(self): """ From bdca4e075c10ee1392ea1b148ed4fe60bcac7e28 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Fri, 11 Aug 2023 11:01:42 +0000 Subject: [PATCH 14/20] Fix geodata-less csv query --- src/data_common/dataset/resource_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index 34ee6e1..4f8bd75 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -920,9 +920,9 @@ def copy_resources(self): # __index_level_0__ is an internal parquet column that duckdb has access to # but we don't want to export - exclude = "EXCLUDE __index_level_0__" + exclude = "EXCLUDE (__index_level_0__)" if desc["custom"].get("is_geodata", False): - exclude = "EXCLUDE __index_level_0__, geometry" + exclude = "EXCLUDE (__index_level_0__, geometry)" parquet_copy_query = """ copy (select * {{ exclude }} from {{ source }}) to {{ dest }} (HEADER, DELIMITER ','); From 3d6534777956c8ac88a8ac7dee78a302577d6aa6 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Thu, 31 Aug 2023 19:31:56 +0000 Subject: [PATCH 15/20] Removed PyYaml requirement --- poetry.lock | 246 +++++++++++++++++++++++++++++++++++++++++-------- pyproject.toml | 2 +- 2 files changed, 210 insertions(+), 38 deletions(-) diff --git a/poetry.lock b/poetry.lock index 25e48a6..94e8f4f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "altair" @@ -74,6 +74,18 @@ files = [ altair = "*" altair-data-server = ">=0.4.0" +[[package]] +name = "annotated-types" +version = "0.5.0" +description = "Reusable constraint types to use with typing.Annotated" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "annotated_types-0.5.0-py3-none-any.whl", hash = "sha256:58da39888f92c276ad970249761ebea80ba544b77acddaa1a4d6cf78287d45fd"}, + {file = "annotated_types-0.5.0.tar.gz", hash = "sha256:47cdc3490d9ac1506ce92c7aaa76c579dc3509ff11e098fc867e5130ab7be802"}, +] + [[package]] name = "ansiwrap" version = "0.8.4" @@ -3008,6 +3020,145 @@ files = [ {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] +[[package]] +name = "pydantic" +version = "2.3.0" +description = "Data validation using Python type hints" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-2.3.0-py3-none-any.whl", hash = "sha256:45b5e446c6dfaad9444819a293b921a40e1db1aa61ea08aede0522529ce90e81"}, + {file = "pydantic-2.3.0.tar.gz", hash = "sha256:1607cc106602284cd4a00882986570472f193fde9cb1259bceeaedb26aa79a6d"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.6.3" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.6.3" +description = "" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic_core-2.6.3-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:1a0ddaa723c48af27d19f27f1c73bdc615c73686d763388c8683fe34ae777bad"}, + {file = "pydantic_core-2.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5cfde4fab34dd1e3a3f7f3db38182ab6c95e4ea91cf322242ee0be5c2f7e3d2f"}, + {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5493a7027bfc6b108e17c3383959485087d5942e87eb62bbac69829eae9bc1f7"}, + {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84e87c16f582f5c753b7f39a71bd6647255512191be2d2dbf49458c4ef024588"}, + {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:522a9c4a4d1924facce7270c84b5134c5cabcb01513213662a2e89cf28c1d309"}, + {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaafc776e5edc72b3cad1ccedb5fd869cc5c9a591f1213aa9eba31a781be9ac1"}, + {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a750a83b2728299ca12e003d73d1264ad0440f60f4fc9cee54acc489249b728"}, + {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e8b374ef41ad5c461efb7a140ce4730661aadf85958b5c6a3e9cf4e040ff4bb"}, + {file = "pydantic_core-2.6.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b594b64e8568cf09ee5c9501ede37066b9fc41d83d58f55b9952e32141256acd"}, + {file = "pydantic_core-2.6.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2a20c533cb80466c1d42a43a4521669ccad7cf2967830ac62c2c2f9cece63e7e"}, + {file = "pydantic_core-2.6.3-cp310-none-win32.whl", hash = "sha256:04fe5c0a43dec39aedba0ec9579001061d4653a9b53a1366b113aca4a3c05ca7"}, + {file = "pydantic_core-2.6.3-cp310-none-win_amd64.whl", hash = "sha256:6bf7d610ac8f0065a286002a23bcce241ea8248c71988bda538edcc90e0c39ad"}, + {file = "pydantic_core-2.6.3-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:6bcc1ad776fffe25ea5c187a028991c031a00ff92d012ca1cc4714087e575973"}, + {file = "pydantic_core-2.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:df14f6332834444b4a37685810216cc8fe1fe91f447332cd56294c984ecbff1c"}, + {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b7486d85293f7f0bbc39b34e1d8aa26210b450bbd3d245ec3d732864009819"}, + {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a892b5b1871b301ce20d40b037ffbe33d1407a39639c2b05356acfef5536d26a"}, + {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:883daa467865e5766931e07eb20f3e8152324f0adf52658f4d302242c12e2c32"}, + {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4eb77df2964b64ba190eee00b2312a1fd7a862af8918ec70fc2d6308f76ac64"}, + {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce8c84051fa292a5dc54018a40e2a1926fd17980a9422c973e3ebea017aa8da"}, + {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22134a4453bd59b7d1e895c455fe277af9d9d9fbbcb9dc3f4a97b8693e7e2c9b"}, + {file = "pydantic_core-2.6.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:02e1c385095efbd997311d85c6021d32369675c09bcbfff3b69d84e59dc103f6"}, + {file = "pydantic_core-2.6.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d79f1f2f7ebdb9b741296b69049ff44aedd95976bfee38eb4848820628a99b50"}, + {file = "pydantic_core-2.6.3-cp311-none-win32.whl", hash = "sha256:430ddd965ffd068dd70ef4e4d74f2c489c3a313adc28e829dd7262cc0d2dd1e8"}, + {file = "pydantic_core-2.6.3-cp311-none-win_amd64.whl", hash = "sha256:84f8bb34fe76c68c9d96b77c60cef093f5e660ef8e43a6cbfcd991017d375950"}, + {file = "pydantic_core-2.6.3-cp311-none-win_arm64.whl", hash = "sha256:5a2a3c9ef904dcdadb550eedf3291ec3f229431b0084666e2c2aa8ff99a103a2"}, + {file = "pydantic_core-2.6.3-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:8421cf496e746cf8d6b677502ed9a0d1e4e956586cd8b221e1312e0841c002d5"}, + {file = "pydantic_core-2.6.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bb128c30cf1df0ab78166ded1ecf876620fb9aac84d2413e8ea1594b588c735d"}, + {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37a822f630712817b6ecc09ccc378192ef5ff12e2c9bae97eb5968a6cdf3b862"}, + {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:240a015102a0c0cc8114f1cba6444499a8a4d0333e178bc504a5c2196defd456"}, + {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f90e5e3afb11268628c89f378f7a1ea3f2fe502a28af4192e30a6cdea1e7d5e"}, + {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:340e96c08de1069f3d022a85c2a8c63529fd88709468373b418f4cf2c949fb0e"}, + {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1480fa4682e8202b560dcdc9eeec1005f62a15742b813c88cdc01d44e85308e5"}, + {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f14546403c2a1d11a130b537dda28f07eb6c1805a43dae4617448074fd49c282"}, + {file = "pydantic_core-2.6.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a87c54e72aa2ef30189dc74427421e074ab4561cf2bf314589f6af5b37f45e6d"}, + {file = "pydantic_core-2.6.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f93255b3e4d64785554e544c1c76cd32f4a354fa79e2eeca5d16ac2e7fdd57aa"}, + {file = "pydantic_core-2.6.3-cp312-none-win32.whl", hash = "sha256:f70dc00a91311a1aea124e5f64569ea44c011b58433981313202c46bccbec0e1"}, + {file = "pydantic_core-2.6.3-cp312-none-win_amd64.whl", hash = "sha256:23470a23614c701b37252618e7851e595060a96a23016f9a084f3f92f5ed5881"}, + {file = "pydantic_core-2.6.3-cp312-none-win_arm64.whl", hash = "sha256:1ac1750df1b4339b543531ce793b8fd5c16660a95d13aecaab26b44ce11775e9"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:a53e3195f134bde03620d87a7e2b2f2046e0e5a8195e66d0f244d6d5b2f6d31b"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:f2969e8f72c6236c51f91fbb79c33821d12a811e2a94b7aa59c65f8dbdfad34a"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:672174480a85386dd2e681cadd7d951471ad0bb028ed744c895f11f9d51b9ebe"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:002d0ea50e17ed982c2d65b480bd975fc41086a5a2f9c924ef8fc54419d1dea3"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ccc13afee44b9006a73d2046068d4df96dc5b333bf3509d9a06d1b42db6d8bf"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:439a0de139556745ae53f9cc9668c6c2053444af940d3ef3ecad95b079bc9987"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d63b7545d489422d417a0cae6f9898618669608750fc5e62156957e609e728a5"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b44c42edc07a50a081672e25dfe6022554b47f91e793066a7b601ca290f71e42"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1c721bfc575d57305dd922e6a40a8fe3f762905851d694245807a351ad255c58"}, + {file = "pydantic_core-2.6.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5e4a2cf8c4543f37f5dc881de6c190de08096c53986381daebb56a355be5dfe6"}, + {file = "pydantic_core-2.6.3-cp37-none-win32.whl", hash = "sha256:d9b4916b21931b08096efed090327f8fe78e09ae8f5ad44e07f5c72a7eedb51b"}, + {file = "pydantic_core-2.6.3-cp37-none-win_amd64.whl", hash = "sha256:a8acc9dedd304da161eb071cc7ff1326aa5b66aadec9622b2574ad3ffe225525"}, + {file = "pydantic_core-2.6.3-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:5e9c068f36b9f396399d43bfb6defd4cc99c36215f6ff33ac8b9c14ba15bdf6b"}, + {file = "pydantic_core-2.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e61eae9b31799c32c5f9b7be906be3380e699e74b2db26c227c50a5fc7988698"}, + {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85463560c67fc65cd86153a4975d0b720b6d7725cf7ee0b2d291288433fc21b"}, + {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9616567800bdc83ce136e5847d41008a1d602213d024207b0ff6cab6753fe645"}, + {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e9b65a55bbabda7fccd3500192a79f6e474d8d36e78d1685496aad5f9dbd92c"}, + {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f468d520f47807d1eb5d27648393519655eadc578d5dd862d06873cce04c4d1b"}, + {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9680dd23055dd874173a3a63a44e7f5a13885a4cfd7e84814be71be24fba83db"}, + {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a718d56c4d55efcfc63f680f207c9f19c8376e5a8a67773535e6f7e80e93170"}, + {file = "pydantic_core-2.6.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8ecbac050856eb6c3046dea655b39216597e373aa8e50e134c0e202f9c47efec"}, + {file = "pydantic_core-2.6.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:788be9844a6e5c4612b74512a76b2153f1877cd845410d756841f6c3420230eb"}, + {file = "pydantic_core-2.6.3-cp38-none-win32.whl", hash = "sha256:07a1aec07333bf5adebd8264047d3dc518563d92aca6f2f5b36f505132399efc"}, + {file = "pydantic_core-2.6.3-cp38-none-win_amd64.whl", hash = "sha256:621afe25cc2b3c4ba05fff53525156d5100eb35c6e5a7cf31d66cc9e1963e378"}, + {file = "pydantic_core-2.6.3-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:813aab5bfb19c98ae370952b6f7190f1e28e565909bfc219a0909db168783465"}, + {file = "pydantic_core-2.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:50555ba3cb58f9861b7a48c493636b996a617db1a72c18da4d7f16d7b1b9952b"}, + {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19e20f8baedd7d987bd3f8005c146e6bcbda7cdeefc36fad50c66adb2dd2da48"}, + {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b0a5d7edb76c1c57b95df719af703e796fc8e796447a1da939f97bfa8a918d60"}, + {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f06e21ad0b504658a3a9edd3d8530e8cea5723f6ea5d280e8db8efc625b47e49"}, + {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea053cefa008fda40f92aab937fb9f183cf8752e41dbc7bc68917884454c6362"}, + {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:171a4718860790f66d6c2eda1d95dd1edf64f864d2e9f9115840840cf5b5713f"}, + {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ed7ceca6aba5331ece96c0e328cd52f0dcf942b8895a1ed2642de50800b79d3"}, + {file = "pydantic_core-2.6.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:acafc4368b289a9f291e204d2c4c75908557d4f36bd3ae937914d4529bf62a76"}, + {file = "pydantic_core-2.6.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1aa712ba150d5105814e53cb141412217146fedc22621e9acff9236d77d2a5ef"}, + {file = "pydantic_core-2.6.3-cp39-none-win32.whl", hash = "sha256:44b4f937b992394a2e81a5c5ce716f3dcc1237281e81b80c748b2da6dd5cf29a"}, + {file = "pydantic_core-2.6.3-cp39-none-win_amd64.whl", hash = "sha256:9b33bf9658cb29ac1a517c11e865112316d09687d767d7a0e4a63d5c640d1b17"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d7050899026e708fb185e174c63ebc2c4ee7a0c17b0a96ebc50e1f76a231c057"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:99faba727727b2e59129c59542284efebbddade4f0ae6a29c8b8d3e1f437beb7"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fa159b902d22b283b680ef52b532b29554ea2a7fc39bf354064751369e9dbd7"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:046af9cfb5384f3684eeb3f58a48698ddab8dd870b4b3f67f825353a14441418"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:930bfe73e665ebce3f0da2c6d64455098aaa67e1a00323c74dc752627879fc67"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:85cc4d105747d2aa3c5cf3e37dac50141bff779545ba59a095f4a96b0a460e70"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b25afe9d5c4f60dcbbe2b277a79be114e2e65a16598db8abee2a2dcde24f162b"}, + {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e49ce7dc9f925e1fb010fc3d555250139df61fa6e5a0a95ce356329602c11ea9"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2dd50d6a1aef0426a1d0199190c6c43ec89812b1f409e7fe44cb0fbf6dfa733c"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6595b0d8c8711e8e1dc389d52648b923b809f68ac1c6f0baa525c6440aa0daa"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ef724a059396751aef71e847178d66ad7fc3fc969a1a40c29f5aac1aa5f8784"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3c8945a105f1589ce8a693753b908815e0748f6279959a4530f6742e1994dcb6"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c8c6660089a25d45333cb9db56bb9e347241a6d7509838dbbd1931d0e19dbc7f"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:692b4ff5c4e828a38716cfa92667661a39886e71136c97b7dac26edef18767f7"}, + {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:f1a5d8f18877474c80b7711d870db0eeef9442691fcdb00adabfc97e183ee0b0"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3796a6152c545339d3b1652183e786df648ecdf7c4f9347e1d30e6750907f5bb"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:b962700962f6e7a6bd77e5f37320cabac24b4c0f76afeac05e9f93cf0c620014"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56ea80269077003eaa59723bac1d8bacd2cd15ae30456f2890811efc1e3d4413"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c0ebbebae71ed1e385f7dfd9b74c1cff09fed24a6df43d326dd7f12339ec34"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:252851b38bad3bfda47b104ffd077d4f9604a10cb06fe09d020016a25107bf98"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:6656a0ae383d8cd7cc94e91de4e526407b3726049ce8d7939049cbfa426518c8"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d9140ded382a5b04a1c030b593ed9bf3088243a0a8b7fa9f071a5736498c5483"}, + {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d38bbcef58220f9c81e42c255ef0bf99735d8f11edef69ab0b499da77105158a"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c9d469204abcca28926cbc28ce98f28e50e488767b084fb3fbdf21af11d3de26"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48c1ed8b02ffea4d5c9c220eda27af02b8149fe58526359b3c07eb391cb353a2"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b2b1bfed698fa410ab81982f681f5b1996d3d994ae8073286515ac4d165c2e7"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf9d42a71a4d7a7c1f14f629e5c30eac451a6fc81827d2beefd57d014c006c4a"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4292ca56751aebbe63a84bbfc3b5717abb09b14d4b4442cc43fd7c49a1529efd"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:7dc2ce039c7290b4ef64334ec7e6ca6494de6eecc81e21cb4f73b9b39991408c"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:615a31b1629e12445c0e9fc8339b41aaa6cc60bd53bf802d5fe3d2c0cda2ae8d"}, + {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1fa1f6312fb84e8c281f32b39affe81984ccd484da6e9d65b3d18c202c666149"}, + {file = "pydantic_core-2.6.3.tar.gz", hash = "sha256:1508f37ba9e3ddc0189e6ff4e2228bd2d3c3a4641cbe8c07177162f76ed696c7"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + [[package]] name = "pyflakes" version = "2.3.1" @@ -3284,42 +3435,63 @@ files = [ ] [[package]] -name = "PyYAML" -version = "5.4.1" +name = "pyyaml" +version = "6.0.1" description = "YAML parser and emitter for Python" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.6" files = [ - {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, - {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"}, - {file = "PyYAML-5.4.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8"}, - {file = "PyYAML-5.4.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185"}, - {file = "PyYAML-5.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541"}, - {file = "PyYAML-5.4.1-cp36-cp36m-win32.whl", hash = "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5"}, - {file = "PyYAML-5.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df"}, - {file = "PyYAML-5.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"}, - {file = "PyYAML-5.4.1-cp37-cp37m-win32.whl", hash = "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b"}, - {file = "PyYAML-5.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf"}, - {file = "PyYAML-5.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc"}, - {file = "PyYAML-5.4.1-cp38-cp38-win32.whl", hash = "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc"}, - {file = "PyYAML-5.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696"}, - {file = "PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6"}, - {file = "PyYAML-5.4.1-cp39-cp39-win32.whl", hash = "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10"}, - {file = "PyYAML-5.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db"}, - {file = "PyYAML-5.4.1.tar.gz", hash = "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] [[package]] @@ -4265,14 +4437,14 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6. [[package]] name = "typing-extensions" -version = "4.4.0" +version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, - {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] [[package]] @@ -4523,4 +4695,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "34ea7766877c41c5778a7c2339615fa7da868a2fe2bbeaa683505f9ac19f979d" +content-hash = "3839561ca2068feddc97e917911e39760f27645af1c84a2430c3a9f1bcec4f4d" diff --git a/pyproject.toml b/pyproject.toml index b6ecae2..aa52717 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,6 @@ python = ">=3.10,<3.11" numpy = "1.21.0" openpyxl = "3.0.7" pandas = "1.4.2" -PyYAML = "5.4.1" scikit-learn = "^1.0.2" unicodecsv = "0.14.1" xlrd = "2.0.1" @@ -46,6 +45,7 @@ lxml = "^4.9.1" pyarrow = "^11.0.0" duckdb = "^0.6.1" sqlfluff = "^1.4.5" +pydantic = "^2.3.0" [build-system] requires = ["poetry-core>=1.0.0"] From e54b0fdc670c821226fdbd70e653aac79d54e4aa Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Fri, 1 Sep 2023 16:20:35 +0000 Subject: [PATCH 16/20] ignore weird typing error --- src/data_common/helpers/pipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_common/helpers/pipe.py b/src/data_common/helpers/pipe.py index b5f2d5b..6be3641 100644 --- a/src/data_common/helpers/pipe.py +++ b/src/data_common/helpers/pipe.py @@ -1,6 +1,6 @@ # pyright: strict -from typing import Iterator, Any from itertools import product +from typing import Any, Iterator def iter_format( @@ -25,6 +25,6 @@ def pos_from_keyword(p: dict[str, Any]) -> Iterator[str]: x += 1 for p in parameters: - kw_parameters = label_parameters(p) + kw_parameters = label_parameters(p) # type: ignore pos_parameters = list(pos_from_keyword(kw_parameters)) yield str_source.format(*pos_parameters, **kw_parameters) From 2c28ca0b00a971a1f48cf33671d9c433d60fbd0f Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Fri, 1 Sep 2023 18:52:32 +0000 Subject: [PATCH 17/20] Index not always present, doesn't need to be excluded --- src/data_common/dataset/resource_management.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index 4f8bd75..c750881 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -3,6 +3,7 @@ import io import json import os +import re import shutil import sqlite3 import subprocess @@ -13,14 +14,12 @@ from shutil import copyfile from typing import Any, Callable, Literal, TypedDict, TypeVar, cast from urllib.parse import urlencode -import geopandas as gpd +import geopandas as gpd import pandas as pd import pytest import rich import xlsxwriter -import re - from frictionless import describe, validate from rich.table import Table from ruamel.yaml import YAML @@ -920,7 +919,7 @@ def copy_resources(self): # __index_level_0__ is an internal parquet column that duckdb has access to # but we don't want to export - exclude = "EXCLUDE (__index_level_0__)" + exclude = "" if desc["custom"].get("is_geodata", False): exclude = "EXCLUDE (__index_level_0__, geometry)" From 84ba6c31aedbcd1c05825ad379e0134800a7a371 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 4 Sep 2023 08:56:29 +0000 Subject: [PATCH 18/20] Add url function to core data_common --- src/data_common/helpers/url.py | 196 +++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 src/data_common/helpers/url.py diff --git a/src/data_common/helpers/url.py b/src/data_common/helpers/url.py new file mode 100644 index 0000000..167dc5f --- /dev/null +++ b/src/data_common/helpers/url.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from typing import NamedTuple, Union +from urllib.parse import ParseResult, urlparse + +from typing_extensions import Self + + +class NetLoc(NamedTuple): + username: str + password: str + hostname: str + port: str + + @classmethod + def from_parse_result(cls, parse_result: ParseResult): + username, password = parse_result._userinfo # type: ignore + hostname, port = parse_result._hostinfo # type: ignore + return cls(username, password, hostname, port) + + def construct_netloc(self) -> str: + base = self.hostname + if self.port: + base += ":" + self.port + if self.username: + if self.password: + base = self.username + ":" + self.password + "@" + base + else: + base = self.username + "@" + base + return base + + def __str__(self): + return ":".join(self) + + +class UrlHandler: + def __str__(self): + return self._urlparse.geturl() + + def __init__( + self, + url: str, + ): + self._urlparse = urlparse(url) + if self.scheme == "": + self._urlparse = urlparse("https://" + url) + self._netloc = NetLoc.from_parse_result(self._urlparse) + + def update( + self, + scheme: str = "", + path: str = "", + params: str = "", + query: str = "", + fragment: str = "", + hostname: str = "", + port: str = "", + username: str = "", + password: str = "", + ) -> Self: + new = self.__class__(self._urlparse.geturl()) + + if scheme: + new.scheme = scheme + if path: + new.path = path + if params: + new.params = params + if query: + new.query = query + if fragment: + new.fragment = fragment + if scheme: + new.scheme = scheme + if hostname: + new.hostname = hostname + if port: + new.port = port + if username: + new.username = username + if password: + new.password = password + + return new + + @property + def scheme(self): + return self._urlparse.scheme + + @property + def path(self): + return self._urlparse.path + + @property + def params(self): + return self._urlparse.params + + @property + def query(self): + return self._urlparse.query + + @property + def fragment(self): + return self._urlparse.fragment + + @property + def username(self): + return self._netloc.username + + @property + def password(self): + return self._netloc.password + + @property + def hostname(self): + return self._netloc.hostname + + @property + def port(self): + return self._netloc.port + + @scheme.setter + def scheme(self, value: str): + self._urlparse = self._urlparse._replace(scheme=value) + + @path.setter + def path(self, value: str): + self._urlparse = self._urlparse._replace(path=value) + + @params.setter + def params(self, value: str): + self._urlparse = self._urlparse._replace(params=value) + + @query.setter + def query(self, value: str): + self._urlparse = self._urlparse._replace(query=value) + + @fragment.setter + def fragment(self, value: str): + self._urlparse = self._urlparse._replace(fragment=value) + + @username.setter + def username(self, value: str): + self._netloc = self._netloc._replace(username=value) + self._urlparse = self._urlparse._replace(netloc=self._netloc.construct_netloc()) + + @password.setter + def password(self, value: str): + self._netloc = self._netloc._replace(password=value) + self._urlparse = self._urlparse._replace(netloc=self._netloc.construct_netloc()) + + @hostname.setter + def hostname(self, value: str): + self._netloc = self._netloc._replace(hostname=value) + self._urlparse = self._urlparse._replace(netloc=self._netloc.construct_netloc()) + + @port.setter + def port(self, value: str | int): + if isinstance(value, int): + value = str(value) + self._netloc = self._netloc._replace(port=value) + self._urlparse = self._urlparse._replace(netloc=self._netloc.construct_netloc()) + + def __truediv__(self, other: str) -> Self: + new_url = self._urlparse._replace(path=self._urlparse.path + "/" + other) + return self.__class__(new_url.geturl()) + + +class Url(UrlHandler, str): + """ + URL class that's pretending to be a string. + + Can access and set all the attributes of a URL, e.g.: + url = Url("https://www.example.com") + url.port = 8080 + url.path = "/path/to/resource" + + And this will modify this object in place. + + To return a new object - use the update method. + + Preferred way of adding paths to URLs is to use the / operator, e.g.: + Url("https://www.example.com") / "path" / "to" / "resource" + + Using the strings + operator will revert back + to being a normal string for compatibility. + """ + + def __new__(cls, url_string: str): + return str.__new__(cls, url_string) + + def __init__(self, url_string: str): + UrlHandler.__init__(self, url_string) + + +UrlLike = Union[str, Url] From 23605c675137cf0126ca31ef1c9f5a399658df69 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 4 Sep 2023 10:29:31 +0000 Subject: [PATCH 19/20] Make URL usable as a pydantic source --- src/data_common/helpers/url.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/data_common/helpers/url.py b/src/data_common/helpers/url.py index 167dc5f..a59dbfb 100644 --- a/src/data_common/helpers/url.py +++ b/src/data_common/helpers/url.py @@ -1,8 +1,10 @@ from __future__ import annotations -from typing import NamedTuple, Union +from typing import Any, NamedTuple, Union from urllib.parse import ParseResult, urlparse +from pydantic import GetCoreSchemaHandler +from pydantic_core import CoreSchema, core_schema from typing_extensions import Self @@ -192,5 +194,11 @@ def __new__(cls, url_string: str): def __init__(self, url_string: str): UrlHandler.__init__(self, url_string) + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: Any, handler: GetCoreSchemaHandler + ) -> CoreSchema: + return core_schema.no_info_after_validator_function(cls, handler(str)) + UrlLike = Union[str, Url] From a3b8e9325b46dfdebc55c4d1d156d23374f74596 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 4 Sep 2023 14:01:19 +0000 Subject: [PATCH 20/20] Enum value get consistently sorted --- src/data_common/dataset/table_management.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/data_common/dataset/table_management.py b/src/data_common/dataset/table_management.py index c1a4a93..dbaf033 100644 --- a/src/data_common/dataset/table_management.py +++ b/src/data_common/dataset/table_management.py @@ -3,6 +3,7 @@ import pandas as pd from pandas.io.json import build_table_schema + from data_common.db import duck_query @@ -80,6 +81,9 @@ def enhance_field( field["constraints"]["enum"] = enum_value if isinstance(enum_value, EnumPlaceholder): field["constraints"]["enum"] = enum_value.process(col) + if isinstance(field["constraints"]["enum"], list): + # sort the enum values + field["constraints"]["enum"] = sorted(field["constraints"]["enum"]) return field @classmethod @@ -105,7 +109,6 @@ def get_table_schema( def update_table_schema( path: Path, existing_schema: SchemaValidator | None ) -> SchemaValidator: - if path.suffix == ".csv": df = pd.read_csv(path) elif path.suffix == ".parquet":