Skip to content

Commit

Permalink
adds option to timestamp dataset saved as csv
Browse files Browse the repository at this point in the history
  • Loading branch information
nilomr committed May 30, 2022
1 parent 034dd35 commit 8439500
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 29 deletions.
39 changes: 16 additions & 23 deletions docs/contents/kantodata-dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,6 @@
" B216 117\n",
" \n",
"\n",
"Loading an existing dataset\n",
"\n",
".. code-block:: python\n",
" :linenos:\n",
" \n",
" DATASET_ID = \"WYTHAM_GRETIS_2021_TEST\"\n",
" out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n",
" dataset = pickle.load(open(out_dir, \"rb\"))\n",
"\n",
"\n",
"Creating a dataset for which there is already derived data (e.g. spectrograms).\n",
"This is something that might happen if, say, creating a dataset fails but\n",
Expand Down Expand Up @@ -108,30 +99,32 @@
"metadata": {},
"outputs": [],
"source": [
"# Opening an existing dataset\n",
"from pykanto.utils.read import load_dataset\n",
"DATASET_ID = 'BIG_BIRD'\n",
"out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n",
"dataset = load_dataset(out_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# If you want to save the dataset as a .csv file,\n",
"# which I recommend you do as backup,\n",
"\n",
"csv_dir = dataset.DIRS.DATASET.parent\n",
"dataset.to_csv(csv_dir)\n",
"\n",
"\n",
"# If you want to save the new metadata you have generated \n",
"# (vocalisation type labels and onset/offsets, for example) \n",
"# If you want to save the new metadata you have generated\n",
"# (vocalisation type labels and onset/offsets, for example)\n",
"# to the original .json files (as a backup or to use with other software):\n",
"\n",
"from pykanto.utils.write import save_to_jsons\n",
"save_to_jsons(dataset)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset_loc = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n",
"dataset = open_dataset(dataset_loc)"
]
}
],
"metadata": {
Expand Down
12 changes: 6 additions & 6 deletions pykanto/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,17 @@

import copy
import inspect
import math
import pickle
import subprocess
import warnings
from datetime import datetime
from pathlib import Path
from random import sample
from typing import List, Literal, Tuple

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ray
import seaborn as sns
from bokeh.palettes import Set3_12

import pykanto.plot as kplot
Expand Down Expand Up @@ -713,15 +710,18 @@ def save_to_disk(self, verbose: bool = True) -> None:
if verbose:
print(f"Saved dataset to {out_dir}")

def to_csv(self, path: Path) -> None:
def to_csv(self, path: Path, timestamp: bool = False) -> None:
"""
Output vocalisation (and, if present, unit) metadata in the dataset as
a .csv file.
Args:
path (Path): Directory where to save the file(s).
timestamp (bool, optional): Whether to add timestamp to file name.
Defaults to False.
"""
self.vocs.to_csv(path / f"{self.DIRS.DATASET.stem}_VOCS.csv")
t = f'_{datetime.now().strftime("%H%M%S")}' if timestamp else ""
self.vocs.to_csv(path / f"{self.DIRS.DATASET.stem}_VOCS{t}.csv")
if hasattr(self, "units"):
self.units.to_csv(path / f"{self.DIRS.DATASET.stem}_UNITS.csv")

Expand Down

0 comments on commit 8439500

Please sign in to comment.