From 84395009c25039be3867063f62f379ee1db887c4 Mon Sep 17 00:00:00 2001 From: nilomr Date: Mon, 30 May 2022 17:56:11 +0100 Subject: [PATCH] adds option to timestamp dataset saved as csv --- docs/contents/kantodata-dataset.ipynb | 39 +++++++++++---------------- pykanto/dataset.py | 12 ++++----- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/docs/contents/kantodata-dataset.ipynb b/docs/contents/kantodata-dataset.ipynb index 23ab441..5f0cf70 100644 --- a/docs/contents/kantodata-dataset.ipynb +++ b/docs/contents/kantodata-dataset.ipynb @@ -64,15 +64,6 @@ " B216 117\n", " \n", "\n", - "Loading an existing dataset\n", - "\n", - ".. code-block:: python\n", - " :linenos:\n", - " \n", - " DATASET_ID = \"WYTHAM_GRETIS_2021_TEST\"\n", - " out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n", - " dataset = pickle.load(open(out_dir, \"rb\"))\n", - "\n", "\n", "Creating a dataset for which there is already derived data (e.g. spectrograms).\n", "This is something that might happen if, say, creating a dataset fails but\n", @@ -108,30 +99,32 @@ "metadata": {}, "outputs": [], "source": [ + "# Opening an existing dataset\n", + "from pykanto.utils.read import load_dataset\n", + "DATASET_ID = 'BIG_BIRD'\n", + "out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n", + "dataset = load_dataset(out_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", "# If you want to save the dataset as a .csv file,\n", "# which I recommend you do as backup,\n", - "\n", "csv_dir = dataset.DIRS.DATASET.parent\n", "dataset.to_csv(csv_dir)\n", "\n", "\n", - "# If you want to save the new metadata you have generated \n", - "# (vocalisation type labels and onset/offsets, for example) \n", + "# If you want to save the new metadata you have generated\n", + "# (vocalisation type labels and onset/offsets, for example)\n", "# to the original .json files (as a backup or to use with other software):\n", - "\n", "from pykanto.utils.write import save_to_jsons\n", "save_to_jsons(dataset)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_loc = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n", - "dataset = open_dataset(dataset_loc)" - ] } ], "metadata": { diff --git a/pykanto/dataset.py b/pykanto/dataset.py index 878ac94..e295e75 100644 --- a/pykanto/dataset.py +++ b/pykanto/dataset.py @@ -10,20 +10,17 @@ import copy import inspect -import math import pickle import subprocess import warnings +from datetime import datetime from pathlib import Path from random import sample from typing import List, Literal, Tuple -import matplotlib as mpl -import matplotlib.pyplot as plt import numpy as np import pandas as pd import ray -import seaborn as sns from bokeh.palettes import Set3_12 import pykanto.plot as kplot @@ -713,15 +710,18 @@ def save_to_disk(self, verbose: bool = True) -> None: if verbose: print(f"Saved dataset to {out_dir}") - def to_csv(self, path: Path) -> None: + def to_csv(self, path: Path, timestamp: bool = False) -> None: """ Output vocalisation (and, if present, unit) metadata in the dataset as a .csv file. Args: path (Path): Directory where to save the file(s). + timestamp (bool, optional): Whether to add timestamp to file name. + Defaults to False. """ - self.vocs.to_csv(path / f"{self.DIRS.DATASET.stem}_VOCS.csv") + t = f'_{datetime.now().strftime("%H%M%S")}' if timestamp else "" + self.vocs.to_csv(path / f"{self.DIRS.DATASET.stem}_VOCS{t}.csv") if hasattr(self, "units"): self.units.to_csv(path / f"{self.DIRS.DATASET.stem}_UNITS.csv")