Skip to content

Commit

Permalink
Add Weldy's calltype dataset and Anuraset.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 553610183
  • Loading branch information
sdenton4 authored and copybara-github committed Aug 3, 2023
1 parent 6386357 commit 317e40f
Show file tree
Hide file tree
Showing 6 changed files with 547 additions and 5 deletions.
50 changes: 50 additions & 0 deletions chirp/data/soundscapes/dataset_fns.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,53 @@ def load_powdermill_annotations(annotations_path: epath.Path) -> pd.DataFrame:
)
segments = annotations.annotations_to_dataframe(annos)
return segments


def load_weldy_annotations(annotations_path: epath.Path) -> pd.DataFrame:
"""Loads a dataframe of all annotations from the Weldy Calltype dataset."""
filename_fn = lambda _, row: 'annotated_recordings/' + row['file'].strip()
start_time_fn = lambda row: float(row['start'])
end_time_fn = lambda row: float(row['end'])
filter_fn = lambda row: False
class_fn = lambda row: ( # pylint: disable=g-long-lambda
row['label']
.replace('unk', 'unknown')
.replace('impossible', 'unknown')
.replace('unknown_chip', 'unknown')
.split(' ')
)
annos = annotations.read_dataset_annotations_csvs(
[epath.Path(annotations_path)],
filename_fn=filename_fn,
namespace='weldy_calltype',
class_fn=class_fn,
start_time_fn=start_time_fn,
end_time_fn=end_time_fn,
filter_fn=filter_fn,
)
segments = annotations.annotations_to_dataframe(annos)
return segments


def load_anuraset_annotations(annotations_path: epath.Path) -> pd.DataFrame:
"""Loads a dataframe of all annotations."""
filename_fn = lambda _, row: os.path.join( # pylint: disable=g-long-lambda
row['filename'].split('_')[0], row['filename'].strip()
)
start_time_fn = lambda row: float(row['start_time_s'])
end_time_fn = lambda row: float(row['end_time_s'])
# There are a few SPECIES_LALSE labels which according to the authors should
# be ignored.
filter_fn = lambda row: '_LALSE' in row['label']
class_fn = lambda row: row['label'].split(' ')
annos = annotations.read_dataset_annotations_csvs(
[epath.Path(annotations_path)],
filename_fn=filename_fn,
namespace='anuraset',
class_fn=class_fn,
start_time_fn=start_time_fn,
end_time_fn=end_time_fn,
filter_fn=filter_fn,
)
segments = annotations.annotations_to_dataframe(annos)
return segments
24 changes: 24 additions & 0 deletions chirp/data/soundscapes/soundscapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,30 @@ class Soundscapes(bird_taxonomy.BirdTaxonomy):
),
class_list_name='peru',
),
SoundscapesConfig(
name='weldy_calltype_full_length',
audio_glob='weldy_calltype/annotated_recordings/*.wav',
annotation_load_fn=dataset_fns.load_weldy_annotations,
annotation_filename='annotations.csv',
keep_unknown_annotation=True,
description=(
'Full-length annotated bird call types from the PNW. '
'https://zenodo.org/record/8047850'
),
class_list_name='weldy_calltype',
),
SoundscapesConfig(
name='anuraset_full_length',
audio_glob='anuraset/raw_data/*/*.wav',
annotation_load_fn=dataset_fns.load_anuraset_annotations,
annotation_filename='annotations.csv',
keep_unknown_annotation=True,
description=(
'Full-length annotated frog vocalizations. '
'https://zenodo.org/record/8056090'
),
class_list_name='anuraset',
),
]

def _info(self) -> tfds.core.DatasetInfo:
Expand Down
19 changes: 14 additions & 5 deletions chirp/taxonomy/namespace_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,33 @@ def validate_taxonomy_database(taxonomy_database: TaxonomyDatabase) -> None:
"""
namespaces = taxonomy_database.namespaces

for _, mapping in taxonomy_database.mappings.items():
for mapping_name, mapping in taxonomy_database.mappings.items():
if (
set(mapping.mapped_pairs.keys())
- namespaces[mapping.source_namespace].classes
):
raise ValueError("unknown class in source")
raise ValueError(
f"Mapping {mapping_name} contains a source class not in "
f"the namespace ({mapping.source_namespace})."
)
if (
set(mapping.mapped_pairs.values())
- namespaces[mapping.target_namespace].classes
):
raise ValueError("unknown class in target")
raise ValueError(
f"Mapping {mapping_name} contains a target class not in "
f"the namespace ({mapping.source_namespace})."
)

for _, class_list in taxonomy_database.class_lists.items():
for class_name, class_list in taxonomy_database.class_lists.items():
classes = class_list.classes
if set(classes) - namespaces[class_list.namespace].classes > {
namespace.UNKNOWN_LABEL
}:
raise ValueError("unknown class in class list")
raise ValueError(
f"ClassList {class_name} contains a class not in "
f"the namespace ({class_list.namespace})."
)


def load_taxonomy_database(
Expand Down
Loading

0 comments on commit 317e40f

Please sign in to comment.