Skip to content

Commit

Permalink
added formatting utils (#213)
Browse files Browse the repository at this point in the history
* added formatting function to export APLOSE formatted result to Raven and json format

* added formatting function to export APLOSE formatted result to Raven and json format

* added a test to aplose2raven function and added some more comments in docstring

* changed some variables for more explicit names in `test_aplose2raven`

* syntax fix

* docstring update

* docstring update 2

* docstring update 3
  • Loading branch information
mathieudpnt authored Oct 21, 2024
1 parent c527c9f commit 6ac7558
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 0 deletions.
42 changes: 42 additions & 0 deletions src/OSmOSE/utils/formatting_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pandas as pd


def aplose2raven(df: pd.DataFrame) -> pd.DataFrame:
"""Export an APLOSE formatted result file to Raven formatted DataFrame
Parameters
----------
df: APLOSE formatted result DataFrame
Returns
-------
df2raven: Raven formatted DataFrame
Example of use
--------------
aplose_file = Path("path/to/aplose/result/file")
df = (
pd.read_csv(aplose_file, parse_dates=["start_datetime", "end_datetime"])
.sort_values("start_datetime")
.reset_index(drop=True)
)
df_raven = aplose2raven(df)
# export to Raven format
df2raven.to_csv('path/to/result/file.txt', sep='\t', index=False) # Raven export tab-separated files with a txt extension
"""
start_time = [
(st - df["start_datetime"][0]).total_seconds() for st in df["start_datetime"]
]
end_time = [st + dur for st, dur in zip(start_time, df["end_time"])]

df2raven = pd.DataFrame()
df2raven["Selection"] = list(range(1, len(df) + 1))
df2raven["View"], df2raven["Channel"] = [1] * len(df), [1] * len(df)
df2raven["Begin Time (s)"] = start_time
df2raven["End Time (s)"] = end_time
df2raven["Low Freq (Hz)"] = df["start_frequency"]
df2raven["High Freq (Hz)"] = df["end_frequency"]

return df2raven
51 changes: 51 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from io import StringIO
import os

import pandas as pd
import pytest
import shutil
from OSmOSE.utils.core_utils import read_header, safe_read
from OSmOSE.config import OSMOSE_PATH
import numpy as np
import soundfile as sf
from OSmOSE.utils.formatting_utils import aplose2raven


@pytest.mark.unit
Expand Down Expand Up @@ -44,3 +47,51 @@ def test_read_header(input_dir):
assert (sr, frames, sampwidth, channels, size) == read_header(
input_dir.joinpath("test.wav")
)


@pytest.fixture
def aplose_dataframe():
data = pd.DataFrame(
{
"dataset": ["dataset_test", "dataset_test", "dataset_test"],
"filename": ["file1.wav", "file2.wav", "file3.wav"],
"start_time": [0, 0, 5.9],
"end_time": [60, 60, 8.1],
"start_frequency": [0, 0, 18500.0],
"end_frequency": [96000, 96000, 53000.0],
"annotation": ["boat", "boat", "boat"],
"annotator": ["bbjuni", "bbjuni", "bbjuni"],
"start_datetime": [
pd.Timestamp("2020-05-29T11:30:00.000+00:00"),
pd.Timestamp("2020-05-29T11:31:00.000+00:00"),
pd.Timestamp("2020-05-29T11:31:00.000+00:00"),
],
"end_datetime": [
pd.Timestamp("2020-05-29T11:31:00.000+00:00"),
pd.Timestamp("2020-05-29T11:32:00.000+00:00"),
pd.Timestamp("2020-05-29T11:32:00.000+00:00"),
],
"is_box": [0, 0, 1],
}
)

return data.reset_index(drop=True)


@pytest.mark.unit
def test_aplose2raven(aplose_dataframe):
raven_dataframe = aplose2raven(df=aplose_dataframe)

expected_raven_dataframe = pd.DataFrame(
{
"Selection": [1, 2, 3],
"View": [1, 1, 1],
"Channel": [1, 1, 1],
"Begin Time (s)": [0.0, 60.0, 60.0],
"End Time (s)": [60.0, 120.0, 68.1],
"Low Freq (Hz)": [0.0, 0.0, 18500.0],
"High Freq (Hz)": [96000.0, 96000.0, 53000.0],
}
)

assert expected_raven_dataframe.equals(raven_dataframe)

0 comments on commit 6ac7558

Please sign in to comment.