Skip to content

Commit

Permalink
Updated JavaDoc for all files
Browse files Browse the repository at this point in the history
  • Loading branch information
gkonovalov committed Jan 10, 2024
1 parent 180277e commit c1c148e
Show file tree
Hide file tree
Showing 23 changed files with 568 additions and 485 deletions.
2 changes: 1 addition & 1 deletion example/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ android {
compileSdk 34
minSdkVersion 23
targetSdkVersion 34
versionCode 12
versionCode 13
versionName "2.0.6"

setProperty("archivesBaseName", "Android-VAD-v" + versionName)
Expand Down
2 changes: 1 addition & 1 deletion silero/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ android {
compileSdk 34
minSdkVersion 21
targetSdkVersion 34
versionCode 12
versionCode 13
versionName "2.0.6"

setProperty("archivesBaseName", "android-vad-silero-v" + versionName)
Expand Down
108 changes: 61 additions & 47 deletions silero/src/main/java/com/konovalov/vad/silero/Vad.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,41 @@ import com.konovalov.vad.silero.config.Mode
import com.konovalov.vad.silero.config.SampleRate

/**
* Created by Georgiy Konovalov on 26/06/2023.
* <p>
* Created by Georgiy Konovalov on 6/26/2023.
*
* The Silero VAD algorithm, based on DNN, analyzes the audio signal to determine whether it
* contains speech or non-speech segments. It offers higher accuracy in differentiating speech from
* background noise compared to the WebRTC VAD algorithm.
*
* The Silero VAD supports the following parameters:
*
* Sample Rates: 8000Hz, 16000Hz
* Sample Rates:
*
* 8000Hz,
* 16000Hz
*
* Frame Sizes (per sample rate):
* For 8000Hz: 256, 512, 768
* For 16000Hz: 512, 1024, 1536
* Mode: OFF, NORMAL, AGGRESSIVE, VERY_AGGRESSIVE
*
* For 8000Hz: 80, 160, 240
* For 16000Hz: 160, 320, 480
*
* Mode:
*
* NORMAL,
* LOW_BITRATE,
* AGGRESSIVE,
* VERY_AGGRESSIVE
*
* Please note that the VAD class supports these specific combinations of sample
* rates and frame sizes, and the classifiers determine the aggressiveness of the voice
* activity detection algorithm.
* </p>
* @param context (required) The context helps with reading the model file from the file system.
* @param sampleRate (required) The sample rate of the audio input.
* @param frameSize (required) The frame size of the audio input.
* @param mode (required) The recognition mode of the VAD model.
* @param speechDurationMs (optional) The minimum duration in milliseconds for speech segments.
* @param silenceDurationMs (optional) The minimum duration in milliseconds for silence segments.
* </p>
*
* @param context is required for reading the model file from file system.
* @param sampleRate is required for processing audio input.
* @param frameSize is required for processing audio input.
* @param mode is required for the VAD model.
* @param speechDurationMs is minimum duration in milliseconds for speech segments (optional).
* @param silenceDurationMs is minimum duration in milliseconds for silence segments (optional).
*/
class Vad private constructor() {
private lateinit var context: Context
Expand All @@ -42,89 +51,94 @@ class Vad private constructor() {
private var silenceDurationMs = 0

/**
* <p>
* Set Context for Vad Model.
* </p>
* @param context (required) - The context is required and helps with reading the
* model file from the file system.
*
* @param context is required for accessing the model file.
*/
fun setContext(context: Context): Vad = apply {
this.context = context.applicationContext ?: context
}

/**
* <p>
* Set sample rate of the audio input for Vad Model.
* Set, retrieve and validate sample rate for Vad Model.
*
* Valid Sample Rates:
*
* 8000Hz,
* 16000Hz
*
* Sample Rates: 8000Hz, 16000Hz
* </p>
* @param sampleRate (required) - The sample rate of the audio input.
* @param sampleRate is required for processing audio input.
*/
fun setSampleRate(sampleRate: SampleRate): Vad = apply {
this.sampleRate = sampleRate
}

/**
* <p>
* Set frame size of the audio input for Vad Model.
* Set, retrieve and validate frame size for Vad Model.
*
* Valid Frame Sizes (per sample rate):
*
* For 8000Hz: 256, 512, 768
* For 16000Hz: 512, 1024, 1536
* </p>
* @param frameSize (required) - The frame size of the audio input.
*
* @param frameSize is required for processing audio input.
*/
fun setFrameSize(frameSize: FrameSize): Vad = apply {
this.frameSize = frameSize
}

/**
* <p>
* Set recognition mode for Vad Model.
* Set and retrieve detection mode for Vad model.
*
* Mode:
*
* Valid Mode: OFF, NORMAL, AGGRESSIVE, VERY_AGGRESSIVE
* </p>
* @param mode (required) - The recognition mode of the VAD model.
* NORMAL,
* LOW_BITRATE,
* AGGRESSIVE,
* VERY_AGGRESSIVE
*
* @param mode is required for the VAD model.
*/
fun setMode(mode: Mode): Vad = apply {
this.mode = mode
}

/**
* <p>
* Set the minimum duration in milliseconds for speech segments.
* The value of this parameter will define the necessary and sufficient duration of positive
* results to recognize result as speech. Negative numbers are not allowed.
* results to recognize result as speech. This parameter is optional.
*
* Permitted range (0ms >= speechDurationMs <= 300000ms).
*
* Parameters used in {@link VadSilero.continuousSpeechListener}.
* </p>
* @param speechDurationMs (optional) The minimum duration in milliseconds for speech segments.
* Parameters used for {@link VadSilero.isSpeech}.
*
* @param speechDurationMs minimum duration in milliseconds for speech segments.
*/
fun setSpeechDurationMs(speechDurationMs: Int): Vad = apply {
this.speechDurationMs = speechDurationMs
}

/**
* <p>
* Set the minimum duration in milliseconds for silence segments.
* The value of this parameter will define the necessary and sufficient duration of
* negative results to recognize it as silence. Negative numbers are not allowed.
* negative results to recognize it as silence. This parameter is optional.
*
* Permitted range (0ms >= silenceDurationMs <= 300000ms).
*
* Parameters used in {@link VadSilero.continuousSpeechListener}.
* </p>
* @param silenceDurationMs (optional) The minimum duration in milliseconds for silence segments.
* Parameters used in {@link VadSilero.isSpeech}.
*
* @param silenceDurationMs minimum duration in milliseconds for silence segments.
*/
fun setSilenceDurationMs(silenceDurationMs: Int): Vad = apply {
this.silenceDurationMs = silenceDurationMs
}

/**
* <p>
* Builds and returns a VadModel instance based on the specified parameters.
* </p>
* @return An {@link VadSilero} with constructed VadModel.
* @throws IllegalArgumentException If invalid parameters have been set for the model.
* @throws OrtException If the model failed to parse, wasn't compatible or caused an error.
*
* @return constructed VadSilero model.
* @throws IllegalArgumentException if invalid parameters have been set for the model.
* @throws OrtException if the model failed to parse, wasn't compatible or caused an error.
*/
fun build(): VadSilero {
return VadSilero(
Expand Down
5 changes: 2 additions & 3 deletions silero/src/main/java/com/konovalov/vad/silero/VadListener.kt
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package com.konovalov.vad.silero

/**
* Created by Georgiy Konovalov on 1/06/2023.
* <p>
* Created by Georgiy Konovalov on 6/1/2023.
*
* Interface representing a listener for Voice Activity Detection (VAD) events.
* </p>
*/
interface VadListener {
fun onSpeechDetected()
Expand Down
Loading

0 comments on commit c1c148e

Please sign in to comment.