From bfc975bd38a49f222e124d3804ea64e56a683716 Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Thu, 16 Jun 2022 09:27:19 +0200 Subject: [PATCH 1/7] Add a button to record new samples --- .../opened_project_frame.py | 45 ++++++++++++++++++- ui/opened_project_frame.ui | 35 +++++++++++++-- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/voice_annotation_tool/opened_project_frame.py b/src/voice_annotation_tool/opened_project_frame.py index 7815563..72c51b7 100644 --- a/src/voice_annotation_tool/opened_project_frame.py +++ b/src/voice_annotation_tool/opened_project_frame.py @@ -1,7 +1,19 @@ -from PySide6.QtCore import QModelIndex, Slot +import os +import random +import string +from PySide6.QtCore import QModelIndex, QUrl, Slot +from PySide6.QtMultimedia import ( + QAudioInput, + QMediaCaptureSession, + QMediaFormat, + QMediaRecorder, +) from PySide6.QtWidgets import QFrame, QFileDialog, QPushButton, QWidget -from voice_annotation_tool.annotation_list_model import AnnotationListModel, ANNOTATION_ROLE +from voice_annotation_tool.annotation_list_model import ( + AnnotationListModel, + ANNOTATION_ROLE, +) from voice_annotation_tool.opened_project_frame_ui import Ui_OpenedProjectFrame from voice_annotation_tool.project import Annotation, Project @@ -51,10 +63,23 @@ def __init__(self): self.audioPlaybackWidget.previous_pressed.connect(self.previous_pressed) self.project: Project self.annotationList.installEventFilter(self) + for age in AGE_STRINGS: self.ageInput.addItem(age) self.ageInput.addItem(self.tr("[Multiple]")) + self.recorder = QMediaRecorder() + self.input = QAudioInput() + self.session = QMediaCaptureSession() + self.session.setAudioInput(self.input) + self.recorder = QMediaRecorder() + self.session.setRecorder(self.recorder) + self.recorder.setMediaFormat(QMediaFormat.Wave) + self.recorder.setEncodingMode(QMediaRecorder.ConstantBitRateEncoding) + self.recorder.setAudioSampleRate(16000) + self.recorder.setAudioBitRate(32) + self.recorder.setQuality(QMediaRecorder.HighQuality) + def get_playback_buttons(self) -> list[QPushButton]: """Returns a list of buttons used to control the audio playback.""" return self.audioPlaybackWidget.playback_buttons @@ -273,3 +298,19 @@ def import_profile_pressed(self): def mark_unchanged_pressed(self): for annotation in self.get_selected_annotations(): self.project.mark_unchanged(annotation) + + @Slot() + def record_pressed(self): + if not self.project.audio_folder or not self.project.audio_folder.is_dir(): + return + if self.recorder.recorderState() == QMediaRecorder.StoppedState: + name = "".join( + random.choices(string.ascii_lowercase + string.digits * 2, k=129) + ) + path = QUrl.fromLocalFile(os.fspath(self.project.audio_folder / name)) + self.recorder.setOutputLocation(path) + self.recorder.record() + self.recordButton.setText(self.tr("Stop Recording")) + else: + self.recorder.stop() + self.recordButton.setText(self.tr("Record Sample")) diff --git a/ui/opened_project_frame.ui b/ui/opened_project_frame.ui index 5f02e4d..280f4d3 100644 --- a/ui/opened_project_frame.ui +++ b/ui/opened_project_frame.ui @@ -189,6 +189,16 @@ + + + + Start recording a new sample and save it to the audio folder selected in the project settings. Click the button again to stop recording. + + + &Record Sample + + + @@ -270,8 +280,8 @@ gender_selected(int) - 542 - 34 + 540 + 43 1062 @@ -286,8 +296,8 @@ accent_changed(QString) - 542 - 86 + 540 + 90 1047 @@ -327,6 +337,22 @@ + + recordButton + pressed() + OpenedProjectFrame + record_pressed() + + + 819 + 709 + + + 1234 + 392 + + + previous_pressed() @@ -342,5 +368,6 @@ play_pause_pressed() mark_unchanged_pressed() metadata_changed(QString) + record_pressed() From 985eb33661fd2e913ae392a356fb2f84836b37d9 Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Thu, 16 Jun 2022 09:37:32 +0200 Subject: [PATCH 2/7] Add recorded samples to the list --- src/voice_annotation_tool/opened_project_frame.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/voice_annotation_tool/opened_project_frame.py b/src/voice_annotation_tool/opened_project_frame.py index 72c51b7..ec62397 100644 --- a/src/voice_annotation_tool/opened_project_frame.py +++ b/src/voice_annotation_tool/opened_project_frame.py @@ -189,7 +189,7 @@ def delete_selected(self): """Delete the selected annotations and audio files.""" for selected in self.get_selected_annotations(): self.project.delete_annotation(selected) - self.annotationList.model().layoutChanged.emit() + self.update_sample_list() self.update_metadata_widgets() def get_selected_annotations(self) -> list[Annotation]: @@ -199,6 +199,11 @@ def get_selected_annotations(self) -> list[Annotation]: annotations.append(selected_index.data(ANNOTATION_ROLE)) return annotations + def update_sample_list(self): + """Update the graphical representation of the + AnnotationListModel after it changed.""" + self.annotationList.model().layoutChanged.emit() + @Slot() def previous_pressed(self): current = self.annotationList.currentIndex().row() @@ -314,3 +319,5 @@ def record_pressed(self): else: self.recorder.stop() self.recordButton.setText(self.tr("Record Sample")) + self.project.load_audio_files(self.project.audio_folder) + self.update_sample_list() From 3d526e37b76fe0939ea643e81ca98c8c4fa40043 Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Thu, 16 Jun 2022 09:46:09 +0200 Subject: [PATCH 3/7] Add a dropdown to choose the audio device --- .../opened_project_frame.py | 8 ++++++ ui/opened_project_frame.ui | 28 +++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/voice_annotation_tool/opened_project_frame.py b/src/voice_annotation_tool/opened_project_frame.py index ec62397..cff2362 100644 --- a/src/voice_annotation_tool/opened_project_frame.py +++ b/src/voice_annotation_tool/opened_project_frame.py @@ -5,6 +5,7 @@ from PySide6.QtMultimedia import ( QAudioInput, QMediaCaptureSession, + QMediaDevices, QMediaFormat, QMediaRecorder, ) @@ -80,6 +81,9 @@ def __init__(self): self.recorder.setAudioBitRate(32) self.recorder.setQuality(QMediaRecorder.HighQuality) + for device in QMediaDevices.audioInputs(): + self.deviceComboBox.addItem(device.description()) + def get_playback_buttons(self) -> list[QPushButton]: """Returns a list of buttons used to control the audio playback.""" return self.audioPlaybackWidget.playback_buttons @@ -321,3 +325,7 @@ def record_pressed(self): self.recordButton.setText(self.tr("Record Sample")) self.project.load_audio_files(self.project.audio_folder) self.update_sample_list() + + @Slot() + def device_selected(self, device: int): + self.input.setDevice(QMediaDevices.audioInputs()[device]) diff --git a/ui/opened_project_frame.ui b/ui/opened_project_frame.ui index 280f4d3..18ab50c 100644 --- a/ui/opened_project_frame.ui +++ b/ui/opened_project_frame.ui @@ -199,6 +199,13 @@ + + + + Select the audio capture device used for recording. + + + @@ -344,8 +351,8 @@ record_pressed() - 819 - 709 + 978 + 689 1234 @@ -353,6 +360,22 @@ + + deviceComboBox + currentIndexChanged(int) + OpenedProjectFrame + device_selected(int) + + + 823 + 707 + + + 1233 + 637 + + + previous_pressed() @@ -369,5 +392,6 @@ mark_unchanged_pressed() metadata_changed(QString) record_pressed() + device_selected(int) From 9a2d3deefb3cc481ce4d08f4b03ea0d35e5760aa Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Thu, 16 Jun 2022 13:10:28 +0200 Subject: [PATCH 4/7] Add basic documentation of recording feature --- docs/index.rst | 1 + docs/recording.rst | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 docs/recording.rst diff --git a/docs/index.rst b/docs/index.rst index 53a2e49..a38074e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,3 +14,4 @@ Contents tsv_format import_export api + recording diff --git a/docs/recording.rst b/docs/recording.rst new file mode 100644 index 0000000..3014aa9 --- /dev/null +++ b/docs/recording.rst @@ -0,0 +1,6 @@ +Recording New Samples +===================== + +New samples can be recorded directly from the application. + +Select the audio device you want to use and then press the `Record` button. From 40f44c4643d1f84d4ad70cfa785ca6c88b9f278b Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Fri, 17 Jun 2022 09:05:21 +0200 Subject: [PATCH 5/7] Use random hexadecimal number as sample name --- src/voice_annotation_tool/opened_project_frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/voice_annotation_tool/opened_project_frame.py b/src/voice_annotation_tool/opened_project_frame.py index cff2362..4b90114 100644 --- a/src/voice_annotation_tool/opened_project_frame.py +++ b/src/voice_annotation_tool/opened_project_frame.py @@ -313,9 +313,7 @@ def record_pressed(self): if not self.project.audio_folder or not self.project.audio_folder.is_dir(): return if self.recorder.recorderState() == QMediaRecorder.StoppedState: - name = "".join( - random.choices(string.ascii_lowercase + string.digits * 2, k=129) - ) + name = "".join(random.choices(string.hexdigits, k=129)) path = QUrl.fromLocalFile(os.fspath(self.project.audio_folder / name)) self.recorder.setOutputLocation(path) self.recorder.record() From 46b0bca10f41ce7cb65cfd97c1588cfb53733674 Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Fri, 17 Jun 2022 09:06:34 +0200 Subject: [PATCH 6/7] Add zeros to the name as in the common voice dataset --- src/voice_annotation_tool/opened_project_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/voice_annotation_tool/opened_project_frame.py b/src/voice_annotation_tool/opened_project_frame.py index 4b90114..fada636 100644 --- a/src/voice_annotation_tool/opened_project_frame.py +++ b/src/voice_annotation_tool/opened_project_frame.py @@ -313,7 +313,7 @@ def record_pressed(self): if not self.project.audio_folder or not self.project.audio_folder.is_dir(): return if self.recorder.recorderState() == QMediaRecorder.StoppedState: - name = "".join(random.choices(string.hexdigits, k=129)) + name = "000" + "".join(random.choices(string.hexdigits, k=125)) path = QUrl.fromLocalFile(os.fspath(self.project.audio_folder / name)) self.recorder.setOutputLocation(path) self.recorder.record() From 93bfe1bdc6f4b2857522c05cb708011ad9fffca9 Mon Sep 17 00:00:00 2001 From: Feliks Weber Date: Fri, 17 Jun 2022 09:08:06 +0200 Subject: [PATCH 7/7] Use unbiased hexadecimal digits --- src/voice_annotation_tool/opened_project_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/voice_annotation_tool/opened_project_frame.py b/src/voice_annotation_tool/opened_project_frame.py index fada636..a7b8b91 100644 --- a/src/voice_annotation_tool/opened_project_frame.py +++ b/src/voice_annotation_tool/opened_project_frame.py @@ -313,7 +313,7 @@ def record_pressed(self): if not self.project.audio_folder or not self.project.audio_folder.is_dir(): return if self.recorder.recorderState() == QMediaRecorder.StoppedState: - name = "000" + "".join(random.choices(string.hexdigits, k=125)) + name = "000" + "".join(random.choices("0123456789abcdef", k=125)) path = QUrl.fromLocalFile(os.fspath(self.project.audio_folder / name)) self.recorder.setOutputLocation(path) self.recorder.record()