-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
miaobin
committed
Jan 21, 2020
0 parents
commit 6be0251
Showing
20 changed files
with
4,356 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
example/wasm | ||
*.bc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Web-MFCC | ||
|
||
Calculate Mel-frequency cepstral coefficients (MFCCs) in the browser from prepared audio or receive live audio input from the microphone using Javascript [Web Audio API](https://github.com/WebAudio/web-audio-api). | ||
|
||
Implement and accelerate Tensorflow 'AudioSpectrogram' and 'Mfcc' operators by compiling the TensorFlow [lite/kernels](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/kernels) use [emscripten](emscripten.org). | ||
|
||
|
||
# Compile the code | ||
|
||
1. Download and install emscripten follow the [instructions](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions). | ||
|
||
2. Compile each .c/.cc file to bitcode: | ||
``` | ||
emcc -O3 test.cc -o test.bc | ||
``` | ||
3. Compile all the .bc file to tf_mfcc.bc: | ||
``` | ||
emcc *.bc -o tf_mfcc.bc | ||
``` | ||
4. Compile tf_mfcc.bc to WASM: | ||
``` | ||
mkdir wasm | ||
emcc -O3 -s WASM=1 -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" tf_mfcc.bc -o ./wasm/mfcc.js | ||
``` | ||
After compile you will get the following files int the wasm folder: | ||
``` | ||
wasm | ||
├── mfcc.js | ||
└── mfcc.wasm | ||
``` | ||
# Run the example | ||
1. Put the wasm files to the sample folder: | ||
``` | ||
example | ||
├── favicon.ico | ||
├── index.html | ||
├── main.js | ||
├── wasm | ||
│ ├── mfcc.js | ||
│ └── mfcc.wasm | ||
└── yes.wav | ||
``` | ||
2. Start an http server in the example folder. You can install [http-server](https://github.com/indexzero/http-server) via: | ||
``` | ||
npm install http-server -g | ||
http-server | ||
``` | ||
3. Open up the browser and access this URL: | ||
http://localhost:8080/ | ||
4. Click on Play button to see results from console. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<html> | ||
|
||
<body> | ||
<audio id="audio" src="./yes.wav" controls="controls"> | ||
Your browser does not support the audio element. | ||
</audio> | ||
|
||
<button type="button" id="play" onclick="startAnalyse()">Play</button> | ||
</body> | ||
|
||
<script src="./wasm/mfcc.js"></script> | ||
<script src="./main.js"></script> | ||
|
||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
const audioContext = new AudioContext(); | ||
const audioElement = document.getElementById("audio"); | ||
|
||
let runtime = false; | ||
|
||
Module.onRuntimeInitialized = function() { | ||
runtime = true; | ||
console.log('WASM Runtime Ready.'); | ||
}; | ||
|
||
async function startAnalyse() { | ||
if(audioContext.state != "running") { | ||
audioContext.resume().then(function() { | ||
console.log('audioContext resumed.') | ||
}); | ||
} | ||
audioElement.play(); | ||
|
||
if(runtime) { | ||
let pcm = await getAudioPCMData(audioElement); | ||
// windowSize = sampleRate * windowSize_ms / 1000 | ||
// windowStride = sampleRate * windowStride_ms / 1000 | ||
let mfccs = getAudioMfccs(pcm, audioContext.sampleRate, 1323, 882); | ||
console.log("mfccs value:", mfccs); | ||
} else { | ||
console.log('WASM Runtime ERROR!'); | ||
} | ||
} | ||
|
||
async function getAudioPCMData(audio) { | ||
let request = new Request(audio.src); | ||
let response = await fetch(request); | ||
let audioFileData = await response.arrayBuffer(); | ||
let audioDecodeData = await audioContext.decodeAudioData(audioFileData); | ||
let audioPCMData = audioDecodeData.getChannelData(0); | ||
|
||
return audioPCMData; | ||
} | ||
|
||
function getAudioMfccs(pcm, sampleRate, | ||
windowSize, windowStride, | ||
upperFrequencyLimit = 4000, | ||
lowerFrequencyLimit = 20, | ||
filterbankChannelCount = 40, | ||
dctCoefficientCount = 13) { | ||
let pcmPtr = Module._malloc(8 * pcm.length); | ||
let lenPtr = Module._malloc(4); | ||
|
||
for(let i=0; i<pcm.length; i++) { | ||
Module.HEAPF64[pcmPtr/8 + i] = pcm[i]; | ||
} | ||
Module.HEAP32[lenPtr/4] = pcm.length; | ||
|
||
let tfMfccs = Module.cwrap('tf_mfccs', 'number', | ||
['number', 'number', 'number', 'number', | ||
'number', 'number', 'number', 'number', 'number']); | ||
let mfccsPtr = tfMfccs(pcmPtr, lenPtr, | ||
sampleRate, windowSize, windowStride, | ||
upperFrequencyLimit, lowerFrequencyLimit, | ||
filterbankChannelCount, dctCoefficientCount); | ||
let mfccsLen = Module.HEAP32[lenPtr >> 2]; | ||
let audioMfccs = [mfccsLen]; | ||
|
||
for(let i=0; i<mfccsLen; i++) { | ||
audioMfccs[i] = Module.HEAPF64[(mfccsPtr >> 3) + i]; | ||
} | ||
|
||
Module._free(pcmPtr, lenPtr, mfccsPtr); | ||
|
||
return audioMfccs; | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#ifndef TENSORFLOW_CORE_LIB_CORE_BITS_H_ | ||
#define TENSORFLOW_CORE_LIB_CORE_BITS_H_ | ||
|
||
#include "types.h" | ||
|
||
namespace tensorflow { | ||
|
||
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. | ||
int Log2Floor(uint32 n); | ||
int Log2Floor64(uint64 n); | ||
|
||
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0. | ||
int Log2Ceiling(uint32 n); | ||
int Log2Ceiling64(uint64 n); | ||
|
||
// ------------------------------------------------------------------------ | ||
// Implementation details follow | ||
// ------------------------------------------------------------------------ | ||
|
||
#if defined(__GNUC__) | ||
|
||
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. | ||
inline int Log2Floor(uint32 n) { return n == 0 ? -1 : 31 ^ __builtin_clz(n); } | ||
|
||
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. | ||
inline int Log2Floor64(uint64 n) { | ||
return n == 0 ? -1 : 63 ^ __builtin_clzll(n); | ||
} | ||
|
||
#else | ||
|
||
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. | ||
inline int Log2Floor(uint32 n) { | ||
if (n == 0) return -1; | ||
int log = 0; | ||
uint32 value = n; | ||
for (int i = 4; i >= 0; --i) { | ||
int shift = (1 << i); | ||
uint32 x = value >> shift; | ||
if (x != 0) { | ||
value = x; | ||
log += shift; | ||
} | ||
} | ||
assert(value == 1); | ||
return log; | ||
} | ||
|
||
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. | ||
// Log2Floor64() is defined in terms of Log2Floor32() | ||
inline int Log2Floor64(uint64 n) { | ||
const uint32 topbits = static_cast<uint32>(n >> 32); | ||
if (topbits == 0) { | ||
// Top bits are zero, so scan in bottom bits | ||
return Log2Floor(static_cast<uint32>(n)); | ||
} else { | ||
return 32 + Log2Floor(topbits); | ||
} | ||
} | ||
|
||
#endif | ||
|
||
inline int Log2Ceiling(uint32 n) { | ||
int floor = Log2Floor(n); | ||
if (n == (n & ~(n - 1))) // zero or a power of two | ||
return floor; | ||
else | ||
return floor + 1; | ||
} | ||
|
||
inline int Log2Ceiling64(uint64 n) { | ||
int floor = Log2Floor64(n); | ||
if (n == (n & ~(n - 1))) // zero or a power of two | ||
return floor; | ||
else | ||
return floor + 1; | ||
} | ||
|
||
inline uint32 NextPowerOfTwo(uint32 value) { | ||
int exponent = Log2Ceiling(value); | ||
// DCHECK_LT(exponent, std::numeric_limits<uint32>::digits); | ||
return 1 << exponent; | ||
} | ||
|
||
inline uint64 NextPowerOfTwo64(uint64 value) { | ||
int exponent = Log2Ceiling(value); | ||
// DCHECK_LT(exponent, std::numeric_limits<uint64>::digits); | ||
return 1LL << exponent; | ||
} | ||
|
||
} // namespace tensorflow | ||
|
||
#endif // TENSORFLOW_CORE_LIB_CORE_BITS_H_ |
Oops, something went wrong.