diff --git a/chesscog/__version__.py b/chesscog/__version__.py index 5becc17c..5c4105cd 100644 --- a/chesscog/__version__.py +++ b/chesscog/__version__.py @@ -1 +1 @@ -__version__ = "1.0.0" +__version__ = "1.0.1" diff --git a/chesscog/data_synthesis/download_dataset.py b/chesscog/data_synthesis/download_dataset.py index ca9950da..88ecb134 100644 --- a/chesscog/data_synthesis/download_dataset.py +++ b/chesscog/data_synthesis/download_dataset.py @@ -13,14 +13,42 @@ import functools import argparse +import shutil +import os +import osfclient.cli +import typing +import zipfile +import tempfile +from pathlib import Path +from types import SimpleNamespace +from recap import URI +from logging import getLogger -from chesscog.core.io import download_zip_folder_from_google_drive - -ensure_dataset = functools.partial(download_zip_folder_from_google_drive, - "1XClmGJwEWNcIkwaH0VLuvvAY3qk_CRJh", - "data://render") +logger = getLogger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Download the rendered dataset.").parse_args() - ensure_dataset(show_size=True) + + folder = URI("data://render") + with tempfile.TemporaryDirectory() as tmp: + logger.info("Downloading rendered dataset from OSF") + tmp = Path(tmp) + args = SimpleNamespace(project="xf3ka", output=str(tmp), username=None) + osfclient.cli.clone(args) + shutil.rmtree(folder, ignore_errors=True) + os.makedirs(folder.parent, exist_ok=True) + shutil.move(tmp / "osfstorage", folder) + logger.info("Merging train dataset") + try: + os.system( + f"zip -s 0 {folder / 'train.zip'} --out {folder / 'train_full.zip'}") + except Exception: + raise Exception(f"Please manually unpack the ZIP archives at {folder}") + for file in ("train.z01", "train.zip"): + (folder / file).unlink() + shutil.move(folder / "train_full.zip", folder / "train.zip") + for archive in ("train.zip", "val.zip", "test.zip"): + logger.info(f"Extracting {archive}") + with zipfile.ZipFile(folder / archive) as z: + z.extractall(folder) diff --git a/pyproject.toml b/pyproject.toml index 57b60203..123c283b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "chesscog" -version = "1.0.0" +version = "1.0.1" description = "Recognise chess positions using computer vision." authors = [ "Georg Wölflein ",] include = [ "config/*.yaml", "config/**/_*.yaml",]