Skip to content

Commit

Permalink
Pre-release the project
Browse files Browse the repository at this point in the history
  • Loading branch information
miaobin committed Jan 21, 2020
0 parents commit 6be0251
Show file tree
Hide file tree
Showing 20 changed files with 4,356 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
example/wasm
*.bc
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Web-MFCC

Calculate Mel-frequency cepstral coefficients (MFCCs) in the browser from prepared audio or receive live audio input from the microphone using Javascript [Web Audio API](https://github.com/WebAudio/web-audio-api).

Implement and accelerate Tensorflow 'AudioSpectrogram' and 'Mfcc' operators by compiling the TensorFlow [lite/kernels](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/kernels) use [emscripten](emscripten.org).


# Compile the code

1. Download and install emscripten follow the [instructions](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions).

2. Compile each .c/.cc file to bitcode:
```
emcc -O3 test.cc -o test.bc
```
3. Compile all the .bc file to tf_mfcc.bc:
```
emcc *.bc -o tf_mfcc.bc
```
4. Compile tf_mfcc.bc to WASM:
```
mkdir wasm
emcc -O3 -s WASM=1 -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" tf_mfcc.bc -o ./wasm/mfcc.js
```
After compile you will get the following files int the wasm folder:
```
wasm
├── mfcc.js
└── mfcc.wasm
```
# Run the example
1. Put the wasm files to the sample folder:
```
example
├── favicon.ico
├── index.html
├── main.js
├── wasm
│   ├── mfcc.js
│   └── mfcc.wasm
└── yes.wav
```
2. Start an http server in the example folder. You can install [http-server](https://github.com/indexzero/http-server) via:
```
npm install http-server -g
http-server
```
3. Open up the browser and access this URL:
http://localhost:8080/
4. Click on Play button to see results from console.
Binary file added example/favicon.ico
Binary file not shown.
14 changes: 14 additions & 0 deletions example/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<html>

<body>
<audio id="audio" src="./yes.wav" controls="controls">
Your browser does not support the audio element.
</audio>

<button type="button" id="play" onclick="startAnalyse()">Play</button>
</body>

<script src="./wasm/mfcc.js"></script>
<script src="./main.js"></script>

</html>
71 changes: 71 additions & 0 deletions example/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
const audioContext = new AudioContext();
const audioElement = document.getElementById("audio");

let runtime = false;

Module.onRuntimeInitialized = function() {
runtime = true;
console.log('WASM Runtime Ready.');
};

async function startAnalyse() {
if(audioContext.state != "running") {
audioContext.resume().then(function() {
console.log('audioContext resumed.')
});
}
audioElement.play();

if(runtime) {
let pcm = await getAudioPCMData(audioElement);
// windowSize = sampleRate * windowSize_ms / 1000
// windowStride = sampleRate * windowStride_ms / 1000
let mfccs = getAudioMfccs(pcm, audioContext.sampleRate, 1323, 882);
console.log("mfccs value:", mfccs);
} else {
console.log('WASM Runtime ERROR!');
}
}

async function getAudioPCMData(audio) {
let request = new Request(audio.src);
let response = await fetch(request);
let audioFileData = await response.arrayBuffer();
let audioDecodeData = await audioContext.decodeAudioData(audioFileData);
let audioPCMData = audioDecodeData.getChannelData(0);

return audioPCMData;
}

function getAudioMfccs(pcm, sampleRate,
windowSize, windowStride,
upperFrequencyLimit = 4000,
lowerFrequencyLimit = 20,
filterbankChannelCount = 40,
dctCoefficientCount = 13) {
let pcmPtr = Module._malloc(8 * pcm.length);
let lenPtr = Module._malloc(4);

for(let i=0; i<pcm.length; i++) {
Module.HEAPF64[pcmPtr/8 + i] = pcm[i];
}
Module.HEAP32[lenPtr/4] = pcm.length;

let tfMfccs = Module.cwrap('tf_mfccs', 'number',
['number', 'number', 'number', 'number',
'number', 'number', 'number', 'number', 'number']);
let mfccsPtr = tfMfccs(pcmPtr, lenPtr,
sampleRate, windowSize, windowStride,
upperFrequencyLimit, lowerFrequencyLimit,
filterbankChannelCount, dctCoefficientCount);
let mfccsLen = Module.HEAP32[lenPtr >> 2];
let audioMfccs = [mfccsLen];

for(let i=0; i<mfccsLen; i++) {
audioMfccs[i] = Module.HEAPF64[(mfccsPtr >> 3) + i];
}

Module._free(pcmPtr, lenPtr, mfccsPtr);

return audioMfccs;
}
Binary file added example/yes.wav
Binary file not shown.
108 changes: 108 additions & 0 deletions src/bits.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_LIB_CORE_BITS_H_
#define TENSORFLOW_CORE_LIB_CORE_BITS_H_

#include "types.h"

namespace tensorflow {

// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
int Log2Floor(uint32 n);
int Log2Floor64(uint64 n);

// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
int Log2Ceiling(uint32 n);
int Log2Ceiling64(uint64 n);

// ------------------------------------------------------------------------
// Implementation details follow
// ------------------------------------------------------------------------

#if defined(__GNUC__)

// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Floor(uint32 n) { return n == 0 ? -1 : 31 ^ __builtin_clz(n); }

// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Floor64(uint64 n) {
return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
}

#else

// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Floor(uint32 n) {
if (n == 0) return -1;
int log = 0;
uint32 value = n;
for (int i = 4; i >= 0; --i) {
int shift = (1 << i);
uint32 x = value >> shift;
if (x != 0) {
value = x;
log += shift;
}
}
assert(value == 1);
return log;
}

// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
// Log2Floor64() is defined in terms of Log2Floor32()
inline int Log2Floor64(uint64 n) {
const uint32 topbits = static_cast<uint32>(n >> 32);
if (topbits == 0) {
// Top bits are zero, so scan in bottom bits
return Log2Floor(static_cast<uint32>(n));
} else {
return 32 + Log2Floor(topbits);
}
}

#endif

inline int Log2Ceiling(uint32 n) {
int floor = Log2Floor(n);
if (n == (n & ~(n - 1))) // zero or a power of two
return floor;
else
return floor + 1;
}

inline int Log2Ceiling64(uint64 n) {
int floor = Log2Floor64(n);
if (n == (n & ~(n - 1))) // zero or a power of two
return floor;
else
return floor + 1;
}

inline uint32 NextPowerOfTwo(uint32 value) {
int exponent = Log2Ceiling(value);
// DCHECK_LT(exponent, std::numeric_limits<uint32>::digits);
return 1 << exponent;
}

inline uint64 NextPowerOfTwo64(uint64 value) {
int exponent = Log2Ceiling(value);
// DCHECK_LT(exponent, std::numeric_limits<uint64>::digits);
return 1LL << exponent;
}

} // namespace tensorflow

#endif // TENSORFLOW_CORE_LIB_CORE_BITS_H_
Loading

0 comments on commit 6be0251

Please sign in to comment.