diff --git a/README.md b/README.md index 221c72e..9ccddcd 100644 --- a/README.md +++ b/README.md @@ -20,39 +20,49 @@ comparison.

## Parameters -VAD library only accepts 16-bit mono PCM audio. +VAD library only accepts 16-bit mono PCM audio and support various of Sample Rates and Frame Sizes. -Valid sample rates are 8000, 16000, 32000 and 48000 Hz. +| Valid Sample Rate | Valid Frame Size | +|:-------------------|:------------------| +| 8000Hz | 80, 160, 240 | +| 16000Hz | 160, 320, 480 | +| 32000Hz | 320, 640, 960 | +| 48000Hz | 480, 960, 1440 | -Frame size can be 80, 160, 240, 320, 480, 640, 960 or 1440 bytes depending on the sample rate. -Classifier supports NORMAL, LOW_BITRATE, AGGRESSIVE and VERY_AGGRESSIVE modes. +Classifier supports next modes: -Silence duration (ms) - this parameter used in Continuous Speech detector, +| Valid Classifiers | +|:------------------| +| NORMAL | +| LOW_BITRATE | +| AGGRESSIVE | +| VERY_AGGRESSIVE | + +**Silence duration (ms)** - this parameter used in Continuous Speech detector, the value of this parameter will define the necessary and sufficient duration of negative results to recognize it as silence. -Voice duration (ms) - this parameter used in Continuous Speech detector, +**Voice duration (ms)** - this parameter used in Continuous Speech detector, the value of this parameter will define the necessary and sufficient duration of positive results to recognize result as speech. - Recommended parameters: -* sample rate - 16KHz, -* frame size - 480, -* mode - VERY_AGGRESSIVE, -* silence duration - 500ms, -* voice duration - 500ms; +* Sample Rate - **16KHz**, +* Frame Size - **160**, +* Mode - **VERY_AGGRESSIVE**, +* Silence Duration - **500ms**, +* Voice Duration - **500ms**; ## Usage VAD supports 2 different ways of detecting speech: -1. Continuous Speech detector was designed to detect long utterances +1. Continuous Speech listener was designed to detect long utterances without returning false positive results when user makes pauses between sentences. ```java Vad vad = new Vad(VadConfig.newBuilder() .setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K) - .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480) + .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_160) .setMode(VadConfig.Mode.VERY_AGGRESSIVE) .setSilenceDurationMillis(500) .setVoiceDurationMillis(500) @@ -60,7 +70,7 @@ sentences. vad.start(); - vad.isContinuousSpeech(short[] audioFrame, new VadListener() { + vad.addContinuousSpeechListener(short[] audioFrame, new VadListener() { @Override public void onSpeechDetected() { //speech detected! @@ -81,7 +91,7 @@ long utterances. ```java Vad vad = new Vad(VadConfig.newBuilder() .setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K) - .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480) + .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_160) .setMode(VadConfig.Mode.VERY_AGGRESSIVE) .build()); @@ -120,11 +130,11 @@ allprojects { 2. Add the dependency ```groovy dependencies { - implementation 'com.github.gkonovalov:android-vad:1.0.0' + implementation 'com.github.gkonovalov:android-vad:1.0.1' } ``` You also can download precompiled AAR library and APK files from GitHub's [releases page](https://github.com/gkonovalov/android-vad/releases). ------------ -Georgiy Konovalov 2019 (c) [MIT License](https://opensource.org/licenses/MIT) \ No newline at end of file +Georgiy Konovalov 2021 (c) [MIT License](https://opensource.org/licenses/MIT) \ No newline at end of file diff --git a/build.gradle b/build.gradle index 8482741..ee7e609 100644 --- a/build.gradle +++ b/build.gradle @@ -8,7 +8,7 @@ buildscript { } dependencies { - classpath 'com.android.tools.build:gradle:3.5.2' + classpath 'com.android.tools.build:gradle:4.0.2' classpath 'com.github.dcendents:android-maven-gradle-plugin:2.1' // NOTE: Do not place your application dependencies here; they belong // in the individual module build.gradle files diff --git a/example/build.gradle b/example/build.gradle index da383f0..c653ee5 100644 --- a/example/build.gradle +++ b/example/build.gradle @@ -24,16 +24,16 @@ android { dependencies { implementation fileTree(dir: 'libs', include: ['*.jar']) - implementation 'androidx.appcompat:appcompat:1.1.0' - implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + implementation 'androidx.appcompat:appcompat:1.3.0' + implementation 'androidx.constraintlayout:constraintlayout:2.0.4' - implementation 'com.airbnb.android:lottie:3.2.2' - implementation 'org.permissionsdispatcher:permissionsdispatcher:4.6.0' - annotationProcessor 'org.permissionsdispatcher:permissionsdispatcher-processor:4.6.0' + implementation 'com.airbnb.android:lottie:3.4.0' + implementation 'org.permissionsdispatcher:permissionsdispatcher:4.8.0' + annotationProcessor 'org.permissionsdispatcher:permissionsdispatcher-processor:4.8.0' implementation project(path: ':vad') testImplementation 'junit:junit:4.12' - androidTestImplementation 'androidx.test:runner:1.2.0' - androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0' - implementation 'com.google.android.material:material:1.0.0' + androidTestImplementation 'androidx.test:runner:1.3.0' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' + implementation 'com.google.android.material:material:1.3.0' } diff --git a/example/src/main/java/com/konovalov/vad/example/MainActivity.java b/example/src/main/java/com/konovalov/vad/example/MainActivity.java index 9d8ac3b..0511b07 100644 --- a/example/src/main/java/com/konovalov/vad/example/MainActivity.java +++ b/example/src/main/java/com/konovalov/vad/example/MainActivity.java @@ -26,6 +26,7 @@ public class MainActivity extends AppCompatActivity implements VoiceRecorder.Lis private VadConfig.SampleRate DEFAULT_SAMPLE_RATE = VadConfig.SampleRate.SAMPLE_RATE_16K; private VadConfig.FrameSize DEFAULT_FRAME_SIZE = VadConfig.FrameSize.FRAME_SIZE_160; private VadConfig.Mode DEFAULT_MODE = VadConfig.Mode.VERY_AGGRESSIVE; + private int DEFAULT_SILENCE_DURATION = 500; private int DEFAULT_VOICE_DURATION = 500; @@ -67,23 +68,23 @@ protected void onCreate(Bundle savedInstanceState) { sampleRateSpinner = findViewById(R.id.sampleRateSpinner); sampleRateAdapter = new ArrayAdapter<>(this, android.R.layout.simple_spinner_dropdown_item, getSampleRates()); sampleRateSpinner.setAdapter(sampleRateAdapter); - sampleRateSpinner.setOnItemSelectedListener(this); sampleRateSpinner.setTag(SPINNER_SAMPLE_RATE_TAG); - sampleRateSpinner.setSelection(getSampleRates().indexOf(DEFAULT_SAMPLE_RATE.name())); + sampleRateSpinner.setSelection(getSampleRates().indexOf(DEFAULT_SAMPLE_RATE.name()), false); + sampleRateSpinner.setOnItemSelectedListener(this); frameSpinner = findViewById(R.id.frameSampleRateSpinner); frameAdapter = new ArrayAdapter<>(this, android.R.layout.simple_spinner_dropdown_item, getFrameSizes()); frameSpinner.setAdapter(frameAdapter); - frameSpinner.setOnItemSelectedListener(this); frameSpinner.setTag(SPINNER_FRAME_SIZE_TAG); - frameSpinner.setSelection(getFrameSizes().indexOf(DEFAULT_FRAME_SIZE.name())); + frameSpinner.setSelection(getFrameSizes().indexOf(DEFAULT_FRAME_SIZE.name()), false); + frameSpinner.setOnItemSelectedListener(this); modeSpinner = findViewById(R.id.modeSpinner); modeAdapter = new ArrayAdapter<>(this, android.R.layout.simple_spinner_dropdown_item, getModes()); modeSpinner.setAdapter(modeAdapter); - modeSpinner.setOnItemSelectedListener(this); modeSpinner.setTag(SPINNER_MODE_TAG); - modeSpinner.setSelection(getModes().indexOf(DEFAULT_MODE.name())); + modeSpinner.setSelection(getModes().indexOf(DEFAULT_MODE.name()), false); + modeSpinner.setOnItemSelectedListener(this); recordingActionButton = findViewById(R.id.recordingActionButton); recordingActionButton.setOnClickListener(this); @@ -145,6 +146,8 @@ public void onItemSelected(AdapterView adapterView, View view, int position, frameAdapter.addAll(getFrameSizes()); frameAdapter.notifyDataSetChanged(); frameSpinner.setSelection(0); + + config.setFrameSize(VadConfig.FrameSize.valueOf(String.valueOf(frameAdapter.getItem(0)))); break; case SPINNER_FRAME_SIZE_TAG: config.setFrameSize(VadConfig.FrameSize.valueOf(String.valueOf(frameAdapter.getItem(position)))); diff --git a/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java b/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java index 4d6ffbb..58fd83f 100644 --- a/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java +++ b/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java @@ -116,12 +116,12 @@ public void run() { short[] buffer = new short[vad.getConfig().getFrameSize().getValue() * getNumberOfChannels() * 2]; audioRecord.read(buffer, 0, buffer.length); - isSpeechDetected(buffer); + detectSpeech(buffer); } } - private void isSpeechDetected(short[] buffer) { - vad.isContinuousSpeech(buffer, new VadListener() { + private void detectSpeech(short[] buffer) { + vad.addContinuousSpeechListener(buffer, new VadListener() { @Override public void onSpeechDetected() { callback.onSpeechDetected(); diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index c720a48..0d01954 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ -#Wed Nov 27 23:19:25 EST 2019 +#Wed Jun 09 15:12:01 EDT 2021 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-6.1.1-all.zip diff --git a/vad/build.gradle b/vad/build.gradle index 284d7fc..34bfc04 100644 --- a/vad/build.gradle +++ b/vad/build.gradle @@ -4,15 +4,15 @@ apply plugin: 'com.github.dcendents.android-maven' group='com.github.gkonovalov' android { - compileSdkVersion 29 - buildToolsVersion "29.0.2" + compileSdkVersion 30 + buildToolsVersion "29.0.3" defaultConfig { minSdkVersion 16 - targetSdkVersion 29 - versionCode 1 - versionName "1.0.0" + targetSdkVersion 30 + versionCode 2 + versionName "1.0.1" setProperty("archivesBaseName", "android-vad-v" + versionName) @@ -41,8 +41,7 @@ android { dependencies { implementation fileTree(dir: 'libs', include: ['*.jar']) - implementation 'androidx.appcompat:appcompat:1.1.0' testImplementation 'junit:junit:4.12' - androidTestImplementation 'androidx.test:runner:1.2.0' - androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0' + androidTestImplementation 'androidx.test:runner:1.3.0' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' } diff --git a/vad/src/main/java/com/konovalov/vad/Vad.java b/vad/src/main/java/com/konovalov/vad/Vad.java index a869010..d140e0e 100644 --- a/vad/src/main/java/com/konovalov/vad/Vad.java +++ b/vad/src/main/java/com/konovalov/vad/Vad.java @@ -1,5 +1,7 @@ package com.konovalov.vad; +import android.text.Html; + import java.util.LinkedHashMap; import java.util.LinkedList; @@ -16,6 +18,9 @@ public class Vad { private long detectedSilenceSamplesMillis = 0; private long previousTimeMillis = System.currentTimeMillis(); + /** + * Valid Sample Rates and corresponding Frame Sizes + */ public static final LinkedHashMap> SAMPLE_RATE_VALID_FRAMES = new LinkedHashMap>() {{ put(VadConfig.SampleRate.SAMPLE_RATE_8K, new LinkedList() {{ add(VadConfig.FrameSize.FRAME_SIZE_80); @@ -42,10 +47,18 @@ public class Vad { public Vad() { } + /** + * VAD constructor + * + * @param config contains such parameters as Sample Rate {@link VadConfig.SampleRate}, Frame Size {@link VadConfig.FrameSize}, Mode {@link VadConfig.Mode}, etc. + */ public Vad(VadConfig config) { this.config = config; } + /** + * Start VAD should be called before {@link #isSpeech(short[] audio)} or {@link #addContinuousSpeechListener(short[] audio, VadListener listener)} + */ public void start() { if (config == null) { throw new NullPointerException("VadConfig is NULL!"); @@ -56,12 +69,19 @@ public void start() { } try { - nativeStart(config.getSampleRate().getValue(), config.getFrameSize().getValue(), config.getMode().getValue()); + int result = nativeStart(config.getSampleRate().getValue(), config.getFrameSize().getValue(), config.getMode().getValue()); + + if (result < 0) { + throw new RuntimeException("Error can't set parameters for VAD!"); + } } catch (Exception e) { throw new RuntimeException("Error can't start VAD!", e); } } + /** + * Stop VAD - should be called after {@link #start()} + */ public void stop() { try { nativeStop(); @@ -70,6 +90,14 @@ public void stop() { } } + /** + * Speech detector was designed to detect speech/noise in small audio + * frames and return result for every frame. This method will not work for + * long utterances. + * + * @param audio input audio frame + * @return boolean containing result of speech detection + */ public boolean isSpeech(short[] audio) { if (audio == null) { throw new NullPointerException("Audio data is NULL!"); @@ -82,7 +110,34 @@ public boolean isSpeech(short[] audio) { } } + /** + * Continuous Speech listener was designed to detect long utterances + * without returning false positive results when user makes pauses between + * sentences. + * + * @param audio input audio frame + * @param listener VAD result listener {@link VadListener} + * + * @deprecated use {@link #addContinuousSpeechListener(short[] audio, VadListener listener)} instead. + */ + @Deprecated public void isContinuousSpeech(short[] audio, VadListener listener) { + addContinuousSpeechListener(audio, listener); + } + + /** + * Continuous Speech listener was designed to detect long utterances + * without returning false positive results when user makes pauses between + * sentences. + * + * @param audio input audio frame + * @param listener VAD result listener {@link VadListener} + */ + public void addContinuousSpeechListener(short[] audio, VadListener listener) { + if (config == null) { + throw new NullPointerException("VadConfig is NULL!"); + } + if (audio == null) { throw new NullPointerException("Audio data is NULL!"); } @@ -91,10 +146,6 @@ public void isContinuousSpeech(short[] audio, VadListener listener) { throw new NullPointerException("VadListener is NULL!"); } - if (config == null) { - throw new NullPointerException("VadConfig is NULL!"); - } - long currentTimeMillis = System.currentTimeMillis(); if (isSpeech(audio)) { @@ -119,14 +170,29 @@ public void isContinuousSpeech(short[] audio, VadListener listener) { previousTimeMillis = currentTimeMillis; } + /** + * Get current VAD config + * + * @return config {@link VadConfig} of VAD + */ public VadConfig getConfig() { return config; } + /** + * Set {@link VadConfig} for VAD + * + * @param config VAD config + */ public void setConfig(VadConfig config) { this.config = config; } + /** + * Check Sample Rate and corresponding Frame Size inside of config + * + * @return boolean - contains true if Sample Rate and Frame Size inside of config is valid + */ private boolean isSampleRateAndFrameSizeValid() { if (config == null) { throw new NullPointerException("VadConfig is NULL!"); @@ -141,6 +207,12 @@ private boolean isSampleRateAndFrameSizeValid() { } } + /** + * Method return valid Frame sizes for specific Sample Rate + * + * @param sampleRate contains sample rate + * @return LinkedList with valid Frame sizes + */ public static LinkedList getValidFrameSize(VadConfig.SampleRate sampleRate) { if (sampleRate == null) { throw new NullPointerException("SampleRate is NULL!");