adds option to timestamp dataset saved as csv

nilomr · May 30, 2022 · 8439500 · 8439500
1 parent 034dd35
commit 8439500
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 29 deletions.
diff --git a/docs/contents/kantodata-dataset.ipynb b/docs/contents/kantodata-dataset.ipynb
@@ -64,15 +64,6 @@
     "    B216    117\n",
     "    \n",
     "\n",
-    "Loading an existing dataset\n",
-    "\n",
-    ".. code-block:: python\n",
-    "    :linenos:\n",
-    "    \n",
-    "    DATASET_ID = \"WYTHAM_GRETIS_2021_TEST\"\n",
-    "    out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n",
-    "    dataset = pickle.load(open(out_dir, \"rb\"))\n",
-    "\n",
     "\n",
     "Creating a dataset for which there is already derived data (e.g. spectrograms).\n",
     "This is something that might happen if, say, creating a dataset fails but\n",
@@ -108,30 +99,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Opening an existing dataset\n",
+    "from pykanto.utils.read import load_dataset\n",
+    "DATASET_ID = 'BIG_BIRD'\n",
+    "out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n",
+    "dataset = load_dataset(out_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
     "# If you want to save the dataset as a .csv file,\n",
     "# which I recommend you do as backup,\n",
-    "\n",
     "csv_dir = dataset.DIRS.DATASET.parent\n",
     "dataset.to_csv(csv_dir)\n",
     "\n",
     "\n",
-    "# If you want to save the new metadata you have generated \n",
-    "# (vocalisation type labels and onset/offsets, for example) \n",
+    "# If you want to save the new metadata you have generated\n",
+    "# (vocalisation type labels and onset/offsets, for example)\n",
     "# to the original .json files (as a backup or to use with other software):\n",
-    "\n",
     "from pykanto.utils.write import save_to_jsons\n",
     "save_to_jsons(dataset)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_loc = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n",
-    "dataset = open_dataset(dataset_loc)"
-   ]
   }
  ],
  "metadata": {

diff --git a/pykanto/dataset.py b/pykanto/dataset.py
@@ -10,20 +10,17 @@
 
 import copy
 import inspect
-import math
 import pickle
 import subprocess
 import warnings
+from datetime import datetime
 from pathlib import Path
 from random import sample
 from typing import List, Literal, Tuple
 
-import matplotlib as mpl
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import ray
-import seaborn as sns
 from bokeh.palettes import Set3_12
 
 import pykanto.plot as kplot
@@ -713,15 +710,18 @@ def save_to_disk(self, verbose: bool = True) -> None:
         if verbose:
             print(f"Saved dataset to {out_dir}")
 
-    def to_csv(self, path: Path) -> None:
+    def to_csv(self, path: Path, timestamp: bool = False) -> None:
         """
         Output vocalisation (and, if present, unit) metadata in the dataset as
         a .csv file.
 
         Args:
             path (Path): Directory where to save the file(s).
+            timestamp (bool, optional): Whether to add timestamp to file name.
+                Defaults to False.
         """
-        self.vocs.to_csv(path / f"{self.DIRS.DATASET.stem}_VOCS.csv")
+        t = f'_{datetime.now().strftime("%H%M%S")}' if timestamp else ""
+        self.vocs.to_csv(path / f"{self.DIRS.DATASET.stem}_VOCS{t}.csv")
         if hasattr(self, "units"):
             self.units.to_csv(path / f"{self.DIRS.DATASET.stem}_UNITS.csv")