From 836b1b1bffd561fa740ac22ae0ba324ffc094645 Mon Sep 17 00:00:00 2001
From: gkonovalov
Date: Wed, 9 Jun 2021 17:21:04 -0400
Subject: [PATCH] Upgraded all libraries Added commends for every method in
Vad.java Removed andoridx.appcompat from VAD project Renamed
Vad.isContinuousSpeech to Vad.addContinuousSpeechListener Fixed issue with
wrong Frame Sizes in MainActivity.java
---
README.md | 46 +++++++----
build.gradle | 2 +-
example/build.gradle | 16 ++--
.../konovalov/vad/example/MainActivity.java | 15 ++--
.../vad/example/recorder/VoiceRecorder.java | 6 +-
gradle/wrapper/gradle-wrapper.properties | 4 +-
vad/build.gradle | 15 ++--
vad/src/main/java/com/konovalov/vad/Vad.java | 82 +++++++++++++++++--
8 files changed, 135 insertions(+), 51 deletions(-)
diff --git a/README.md b/README.md
index 221c72e..9ccddcd 100644
--- a/README.md
+++ b/README.md
@@ -20,39 +20,49 @@ comparison.
## Parameters
-VAD library only accepts 16-bit mono PCM audio.
+VAD library only accepts 16-bit mono PCM audio and support various of Sample Rates and Frame Sizes.
-Valid sample rates are 8000, 16000, 32000 and 48000 Hz.
+| Valid Sample Rate | Valid Frame Size |
+|:-------------------|:------------------|
+| 8000Hz | 80, 160, 240 |
+| 16000Hz | 160, 320, 480 |
+| 32000Hz | 320, 640, 960 |
+| 48000Hz | 480, 960, 1440 |
-Frame size can be 80, 160, 240, 320, 480, 640, 960 or 1440 bytes depending on the sample rate.
-Classifier supports NORMAL, LOW_BITRATE, AGGRESSIVE and VERY_AGGRESSIVE modes.
+Classifier supports next modes:
-Silence duration (ms) - this parameter used in Continuous Speech detector,
+| Valid Classifiers |
+|:------------------|
+| NORMAL |
+| LOW_BITRATE |
+| AGGRESSIVE |
+| VERY_AGGRESSIVE |
+
+**Silence duration (ms)** - this parameter used in Continuous Speech detector,
the value of this parameter will define the necessary and sufficient
duration of negative results to recognize it as silence.
-Voice duration (ms) - this parameter used in Continuous Speech detector,
+**Voice duration (ms)** - this parameter used in Continuous Speech detector,
the value of this parameter will define the necessary and sufficient
duration of positive results to recognize result as speech.
-
Recommended parameters:
-* sample rate - 16KHz,
-* frame size - 480,
-* mode - VERY_AGGRESSIVE,
-* silence duration - 500ms,
-* voice duration - 500ms;
+* Sample Rate - **16KHz**,
+* Frame Size - **160**,
+* Mode - **VERY_AGGRESSIVE**,
+* Silence Duration - **500ms**,
+* Voice Duration - **500ms**;
## Usage
VAD supports 2 different ways of detecting speech:
-1. Continuous Speech detector was designed to detect long utterances
+1. Continuous Speech listener was designed to detect long utterances
without returning false positive results when user makes pauses between
sentences.
```java
Vad vad = new Vad(VadConfig.newBuilder()
.setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
- .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480)
+ .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_160)
.setMode(VadConfig.Mode.VERY_AGGRESSIVE)
.setSilenceDurationMillis(500)
.setVoiceDurationMillis(500)
@@ -60,7 +70,7 @@ sentences.
vad.start();
- vad.isContinuousSpeech(short[] audioFrame, new VadListener() {
+ vad.addContinuousSpeechListener(short[] audioFrame, new VadListener() {
@Override
public void onSpeechDetected() {
//speech detected!
@@ -81,7 +91,7 @@ long utterances.
```java
Vad vad = new Vad(VadConfig.newBuilder()
.setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
- .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480)
+ .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_160)
.setMode(VadConfig.Mode.VERY_AGGRESSIVE)
.build());
@@ -120,11 +130,11 @@ allprojects {
2. Add the dependency
```groovy
dependencies {
- implementation 'com.github.gkonovalov:android-vad:1.0.0'
+ implementation 'com.github.gkonovalov:android-vad:1.0.1'
}
```
You also can download precompiled AAR library and APK files from GitHub's [releases page](https://github.com/gkonovalov/android-vad/releases).
------------
-Georgiy Konovalov 2019 (c) [MIT License](https://opensource.org/licenses/MIT)
\ No newline at end of file
+Georgiy Konovalov 2021 (c) [MIT License](https://opensource.org/licenses/MIT)
\ No newline at end of file
diff --git a/build.gradle b/build.gradle
index 8482741..ee7e609 100644
--- a/build.gradle
+++ b/build.gradle
@@ -8,7 +8,7 @@ buildscript {
}
dependencies {
- classpath 'com.android.tools.build:gradle:3.5.2'
+ classpath 'com.android.tools.build:gradle:4.0.2'
classpath 'com.github.dcendents:android-maven-gradle-plugin:2.1'
// NOTE: Do not place your application dependencies here; they belong
// in the individual module build.gradle files
diff --git a/example/build.gradle b/example/build.gradle
index da383f0..c653ee5 100644
--- a/example/build.gradle
+++ b/example/build.gradle
@@ -24,16 +24,16 @@ android {
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar'])
- implementation 'androidx.appcompat:appcompat:1.1.0'
- implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
+ implementation 'androidx.appcompat:appcompat:1.3.0'
+ implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
- implementation 'com.airbnb.android:lottie:3.2.2'
- implementation 'org.permissionsdispatcher:permissionsdispatcher:4.6.0'
- annotationProcessor 'org.permissionsdispatcher:permissionsdispatcher-processor:4.6.0'
+ implementation 'com.airbnb.android:lottie:3.4.0'
+ implementation 'org.permissionsdispatcher:permissionsdispatcher:4.8.0'
+ annotationProcessor 'org.permissionsdispatcher:permissionsdispatcher-processor:4.8.0'
implementation project(path: ':vad')
testImplementation 'junit:junit:4.12'
- androidTestImplementation 'androidx.test:runner:1.2.0'
- androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0'
- implementation 'com.google.android.material:material:1.0.0'
+ androidTestImplementation 'androidx.test:runner:1.3.0'
+ androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
+ implementation 'com.google.android.material:material:1.3.0'
}
diff --git a/example/src/main/java/com/konovalov/vad/example/MainActivity.java b/example/src/main/java/com/konovalov/vad/example/MainActivity.java
index 9d8ac3b..0511b07 100644
--- a/example/src/main/java/com/konovalov/vad/example/MainActivity.java
+++ b/example/src/main/java/com/konovalov/vad/example/MainActivity.java
@@ -26,6 +26,7 @@ public class MainActivity extends AppCompatActivity implements VoiceRecorder.Lis
private VadConfig.SampleRate DEFAULT_SAMPLE_RATE = VadConfig.SampleRate.SAMPLE_RATE_16K;
private VadConfig.FrameSize DEFAULT_FRAME_SIZE = VadConfig.FrameSize.FRAME_SIZE_160;
private VadConfig.Mode DEFAULT_MODE = VadConfig.Mode.VERY_AGGRESSIVE;
+
private int DEFAULT_SILENCE_DURATION = 500;
private int DEFAULT_VOICE_DURATION = 500;
@@ -67,23 +68,23 @@ protected void onCreate(Bundle savedInstanceState) {
sampleRateSpinner = findViewById(R.id.sampleRateSpinner);
sampleRateAdapter = new ArrayAdapter<>(this, android.R.layout.simple_spinner_dropdown_item, getSampleRates());
sampleRateSpinner.setAdapter(sampleRateAdapter);
- sampleRateSpinner.setOnItemSelectedListener(this);
sampleRateSpinner.setTag(SPINNER_SAMPLE_RATE_TAG);
- sampleRateSpinner.setSelection(getSampleRates().indexOf(DEFAULT_SAMPLE_RATE.name()));
+ sampleRateSpinner.setSelection(getSampleRates().indexOf(DEFAULT_SAMPLE_RATE.name()), false);
+ sampleRateSpinner.setOnItemSelectedListener(this);
frameSpinner = findViewById(R.id.frameSampleRateSpinner);
frameAdapter = new ArrayAdapter<>(this, android.R.layout.simple_spinner_dropdown_item, getFrameSizes());
frameSpinner.setAdapter(frameAdapter);
- frameSpinner.setOnItemSelectedListener(this);
frameSpinner.setTag(SPINNER_FRAME_SIZE_TAG);
- frameSpinner.setSelection(getFrameSizes().indexOf(DEFAULT_FRAME_SIZE.name()));
+ frameSpinner.setSelection(getFrameSizes().indexOf(DEFAULT_FRAME_SIZE.name()), false);
+ frameSpinner.setOnItemSelectedListener(this);
modeSpinner = findViewById(R.id.modeSpinner);
modeAdapter = new ArrayAdapter<>(this, android.R.layout.simple_spinner_dropdown_item, getModes());
modeSpinner.setAdapter(modeAdapter);
- modeSpinner.setOnItemSelectedListener(this);
modeSpinner.setTag(SPINNER_MODE_TAG);
- modeSpinner.setSelection(getModes().indexOf(DEFAULT_MODE.name()));
+ modeSpinner.setSelection(getModes().indexOf(DEFAULT_MODE.name()), false);
+ modeSpinner.setOnItemSelectedListener(this);
recordingActionButton = findViewById(R.id.recordingActionButton);
recordingActionButton.setOnClickListener(this);
@@ -145,6 +146,8 @@ public void onItemSelected(AdapterView> adapterView, View view, int position,
frameAdapter.addAll(getFrameSizes());
frameAdapter.notifyDataSetChanged();
frameSpinner.setSelection(0);
+
+ config.setFrameSize(VadConfig.FrameSize.valueOf(String.valueOf(frameAdapter.getItem(0))));
break;
case SPINNER_FRAME_SIZE_TAG:
config.setFrameSize(VadConfig.FrameSize.valueOf(String.valueOf(frameAdapter.getItem(position))));
diff --git a/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java b/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java
index 4d6ffbb..58fd83f 100644
--- a/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java
+++ b/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java
@@ -116,12 +116,12 @@ public void run() {
short[] buffer = new short[vad.getConfig().getFrameSize().getValue() * getNumberOfChannels() * 2];
audioRecord.read(buffer, 0, buffer.length);
- isSpeechDetected(buffer);
+ detectSpeech(buffer);
}
}
- private void isSpeechDetected(short[] buffer) {
- vad.isContinuousSpeech(buffer, new VadListener() {
+ private void detectSpeech(short[] buffer) {
+ vad.addContinuousSpeechListener(buffer, new VadListener() {
@Override
public void onSpeechDetected() {
callback.onSpeechDetected();
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index c720a48..0d01954 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
-#Wed Nov 27 23:19:25 EST 2019
+#Wed Jun 09 15:12:01 EDT 2021
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-6.1.1-all.zip
diff --git a/vad/build.gradle b/vad/build.gradle
index 284d7fc..34bfc04 100644
--- a/vad/build.gradle
+++ b/vad/build.gradle
@@ -4,15 +4,15 @@ apply plugin: 'com.github.dcendents.android-maven'
group='com.github.gkonovalov'
android {
- compileSdkVersion 29
- buildToolsVersion "29.0.2"
+ compileSdkVersion 30
+ buildToolsVersion "29.0.3"
defaultConfig {
minSdkVersion 16
- targetSdkVersion 29
- versionCode 1
- versionName "1.0.0"
+ targetSdkVersion 30
+ versionCode 2
+ versionName "1.0.1"
setProperty("archivesBaseName", "android-vad-v" + versionName)
@@ -41,8 +41,7 @@ android {
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar'])
- implementation 'androidx.appcompat:appcompat:1.1.0'
testImplementation 'junit:junit:4.12'
- androidTestImplementation 'androidx.test:runner:1.2.0'
- androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0'
+ androidTestImplementation 'androidx.test:runner:1.3.0'
+ androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
}
diff --git a/vad/src/main/java/com/konovalov/vad/Vad.java b/vad/src/main/java/com/konovalov/vad/Vad.java
index a869010..d140e0e 100644
--- a/vad/src/main/java/com/konovalov/vad/Vad.java
+++ b/vad/src/main/java/com/konovalov/vad/Vad.java
@@ -1,5 +1,7 @@
package com.konovalov.vad;
+import android.text.Html;
+
import java.util.LinkedHashMap;
import java.util.LinkedList;
@@ -16,6 +18,9 @@ public class Vad {
private long detectedSilenceSamplesMillis = 0;
private long previousTimeMillis = System.currentTimeMillis();
+ /**
+ * Valid Sample Rates and corresponding Frame Sizes
+ */
public static final LinkedHashMap> SAMPLE_RATE_VALID_FRAMES = new LinkedHashMap>() {{
put(VadConfig.SampleRate.SAMPLE_RATE_8K, new LinkedList() {{
add(VadConfig.FrameSize.FRAME_SIZE_80);
@@ -42,10 +47,18 @@ public class Vad {
public Vad() {
}
+ /**
+ * VAD constructor
+ *
+ * @param config contains such parameters as Sample Rate {@link VadConfig.SampleRate}, Frame Size {@link VadConfig.FrameSize}, Mode {@link VadConfig.Mode}, etc.
+ */
public Vad(VadConfig config) {
this.config = config;
}
+ /**
+ * Start VAD should be called before {@link #isSpeech(short[] audio)} or {@link #addContinuousSpeechListener(short[] audio, VadListener listener)}
+ */
public void start() {
if (config == null) {
throw new NullPointerException("VadConfig is NULL!");
@@ -56,12 +69,19 @@ public void start() {
}
try {
- nativeStart(config.getSampleRate().getValue(), config.getFrameSize().getValue(), config.getMode().getValue());
+ int result = nativeStart(config.getSampleRate().getValue(), config.getFrameSize().getValue(), config.getMode().getValue());
+
+ if (result < 0) {
+ throw new RuntimeException("Error can't set parameters for VAD!");
+ }
} catch (Exception e) {
throw new RuntimeException("Error can't start VAD!", e);
}
}
+ /**
+ * Stop VAD - should be called after {@link #start()}
+ */
public void stop() {
try {
nativeStop();
@@ -70,6 +90,14 @@ public void stop() {
}
}
+ /**
+ * Speech detector was designed to detect speech/noise in small audio
+ * frames and return result for every frame. This method will not work for
+ * long utterances.
+ *
+ * @param audio input audio frame
+ * @return boolean containing result of speech detection
+ */
public boolean isSpeech(short[] audio) {
if (audio == null) {
throw new NullPointerException("Audio data is NULL!");
@@ -82,7 +110,34 @@ public boolean isSpeech(short[] audio) {
}
}
+ /**
+ * Continuous Speech listener was designed to detect long utterances
+ * without returning false positive results when user makes pauses between
+ * sentences.
+ *
+ * @param audio input audio frame
+ * @param listener VAD result listener {@link VadListener}
+ *
+ * @deprecated use {@link #addContinuousSpeechListener(short[] audio, VadListener listener)} instead.
+ */
+ @Deprecated
public void isContinuousSpeech(short[] audio, VadListener listener) {
+ addContinuousSpeechListener(audio, listener);
+ }
+
+ /**
+ * Continuous Speech listener was designed to detect long utterances
+ * without returning false positive results when user makes pauses between
+ * sentences.
+ *
+ * @param audio input audio frame
+ * @param listener VAD result listener {@link VadListener}
+ */
+ public void addContinuousSpeechListener(short[] audio, VadListener listener) {
+ if (config == null) {
+ throw new NullPointerException("VadConfig is NULL!");
+ }
+
if (audio == null) {
throw new NullPointerException("Audio data is NULL!");
}
@@ -91,10 +146,6 @@ public void isContinuousSpeech(short[] audio, VadListener listener) {
throw new NullPointerException("VadListener is NULL!");
}
- if (config == null) {
- throw new NullPointerException("VadConfig is NULL!");
- }
-
long currentTimeMillis = System.currentTimeMillis();
if (isSpeech(audio)) {
@@ -119,14 +170,29 @@ public void isContinuousSpeech(short[] audio, VadListener listener) {
previousTimeMillis = currentTimeMillis;
}
+ /**
+ * Get current VAD config
+ *
+ * @return config {@link VadConfig} of VAD
+ */
public VadConfig getConfig() {
return config;
}
+ /**
+ * Set {@link VadConfig} for VAD
+ *
+ * @param config VAD config
+ */
public void setConfig(VadConfig config) {
this.config = config;
}
+ /**
+ * Check Sample Rate and corresponding Frame Size inside of config
+ *
+ * @return boolean - contains true if Sample Rate and Frame Size inside of config is valid
+ */
private boolean isSampleRateAndFrameSizeValid() {
if (config == null) {
throw new NullPointerException("VadConfig is NULL!");
@@ -141,6 +207,12 @@ private boolean isSampleRateAndFrameSizeValid() {
}
}
+ /**
+ * Method return valid Frame sizes for specific Sample Rate
+ *
+ * @param sampleRate contains sample rate
+ * @return LinkedList with valid Frame sizes
+ */
public static LinkedList getValidFrameSize(VadConfig.SampleRate sampleRate) {
if (sampleRate == null) {
throw new NullPointerException("SampleRate is NULL!");