Merge branch 'wavesurfer-upgrade'

Mattk70 · Nov 7, 2023 · ee1bbb6 · ee1bbb6
2 parents a0673ca + ba6f836
commit ee1bbb6
Show file tree

Hide file tree

Showing 31 changed files with 1,335 additions and 1,096 deletions.
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard1of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard1of7.bin
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard2of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard2of7.bin
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard3of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard3of7.bin
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard4of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard4of7.bin
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard5of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard5of7.bin
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard6of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard6of7.bin
diff --git a/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard7of7.bin b/...s~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/group1-shard7of7.bin
diff --git a/2023116_Augs~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/model.json b/2023116_Augs~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_408/model.json
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard1of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard1of7.bin
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard2of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard2of7.bin
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard3of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard3of7.bin
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard4of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard4of7.bin
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard5of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard5of7.bin
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard6of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard6of7.bin
diff --git a/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard7of7.bin b/...fficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/group1-shard7of7.bin
diff --git a/...~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/model.json b/...~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_droput1.5__408/model.json
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard1of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard1of7.bin
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard2of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard2of7.bin
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard3of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard3of7.bin
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard4of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard4of7.bin
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard5of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard5of7.bin
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard6of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard6of7.bin
diff --git a/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard7of7.bin b/...01_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/group1-shard7of7.bin
diff --git a/...Augs~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/model.json b/...Augs~0010001_EfficientNetV2B0_SigmoidFocalCrossentropy_256x384_Adam_noEMA__408/model.json
diff --git a/index.html b/index.html
diff --git a/js/model.js b/js/model.js
@@ -10,7 +10,7 @@ const MIGRANTS = new Set(["Pluvialis dominica_American Golden Plover", "Acanthis
 const NOT_BIRDS = ['Ambient Noise_Ambient Noise', 'Animal_Animal', 'Cat_Cat', 'Church Bells_Church Bells', 'Cough_Cough', 'Dog_Dog', 'Human_Human', 'Laugh_Laugh', 'Rain_Rain', 'Red Fox_Red Fox', 'Sneeze_Sneeze', 'Snoring_Snoring', 'Thunder_Thunder', 'Vehicle_Vehicle', 'Water Drops_Water Drops', 'Waves_Waves', 'Wind_Wind'];
 const MYSTERIES = ['Unknown Sp._Unknown Sp.'];
 const GRAYLIST = [];
-const GOLDEN_LIST = [] // ["Turdus iliacus_Redwing (call)", "Turdus philomelos_Song Thrush (call)"] // "Erithacus rubecula_Robin (song)", "Erithacus rubecula_Robin (call)"];
+const GOLDEN_LIST = [];
 let BLOCKED_IDS = [];
 let SUPPRESSED_IDS = [];
 let ENHANCED_IDS = [];
@@ -103,8 +103,6 @@ onmessage = async (e) => {
                 const signal = tf.tensor1d(buffer, 'float32');
                 const bufferTensor = myModel.normalise_audio(signal);
                 signal.dispose();
-                // const mymax = tf.max(bufferTensor).dataSync()
-                // const mymin = tf.min(bufferTensor).dataSync()
                 const imageTensor = tf.tidy(() => {
                     return myModel.makeSpectrogram(bufferTensor);
                 });
@@ -170,15 +168,13 @@ class Model {
         }
     }
 
-    warmUp(batchSize) {
+    async warmUp(batchSize) {
         this.batchSize = parseInt(batchSize);
         this.inputShape[0] = this.batchSize;
         if (tf.getBackend() === 'webgl') {
             tf.tidy(() => {
                 const warmupResult = this.model.predict(tf.zeros(this.inputShape), { batchSize: this.batchSize });
-                warmupResult.arraySync();
-                // see if we can get padding compiled at this point
-                this.padBatch(tf.zeros([1, this.inputShape[1], this.inputShape[2], this.inputShape[3]]), { batchSize: this.batchSize })
+                const synced = warmupResult.arraySync();
             })
         }
         if (DEBUG) console.log('WarmUp end', tf.memory().numTensors)
@@ -192,36 +188,26 @@ class Model {
             // find the position of the blocked items in the label list
             NOT_BIRDS.forEach(notBird => BLOCKED_IDS.push(this.labels.indexOf(notBird)))
         } else if (this.list === 'migrants') {
-            let v1_migrants;
-            if (this.version === 'v1') {
-                // strip (call) from migrants set
-                v1_migrants = new Set();
-                MIGRANTS.forEach((element) => {
-                    const newElement = element.replace(' (call)', '');
-                    v1_migrants.add(newElement);
-                })
-
-            }
-            const listToCheck = v1_migrants || MIGRANTS;
+            const listToCheck = MIGRANTS;
             for (let i = 0; i < this.labels.length; i++) {
                 const item = this.labels[i];
                 if (!listToCheck.has(item) && !MYSTERIES.includes(item)) BLOCKED_IDS.push(i);
             }
-
         }
         GRAYLIST.forEach(species => SUPPRESSED_IDS.push(this.labels.indexOf(species)))
         GOLDEN_LIST.forEach(species => ENHANCED_IDS.push(this.labels.indexOf(species)))
     }
 
-    normalize(spec) {
+    normalise(spec) {
         return tf.tidy(() => {
-            // console.log('Pre-norm### Min is: ', spec.min().dataSync(), 'Max is: ', spec.max().dataSync())
-            const spec_max = tf.max(spec, [1, 2]).reshape([-1, 1, 1, 1])
-            // const spec_min = tf.min(spec, [1, 2]).reshape([-1, 1, 1, 1])
-            spec = spec.mul(255);
-            spec = spec.div(spec_max);
-            // spec = tf.sub(spec, spec_min).div(tf.sub(spec_max, spec_min));
-            // console.log('{Post norm#### Min is: ', spec.min().dataSync(), 'Max is: ', spec.max().dataSync())
+             const spec_max = tf.max(spec, [1, 2], true)
+            // if (this.version === 'v4'){
+            //     const spec_min = tf.min(spec, [1, 2], true)
+            //     spec = tf.sub(spec, spec_min).div(tf.sub(spec_max, spec_min));
+            // } else {
+                spec = spec.mul(255);
+                spec = spec.div(spec_max);
+            // }
             return spec
         })
     }
@@ -231,34 +217,10 @@ class Model {
             const { mean, variance } = tf.moments(spectrograms, 2);
             const peak = tf.div(variance, mean)
             let snr = tf.squeeze(tf.max(peak, 1));
-            //snr = tf.sub(255, snr)  // bigger number, less signal
-            // const MEAN = mean.arraySync()
-            // const VARIANCE = variance.arraySync()
-            // const PEAK = peak.arraySync()
             return snr
         })
     }
 
-
-    fixUpSpecBatch(specBatch, h, w) {
-        const img_height = h || this.height;
-        const img_width = w || this.width;
-        return tf.tidy(() => {
-            /*
-            Try out taking log of spec when SNR is below threshold?
-            */
-            //specBatch = tf.log1p(specBatch).mul(20);
-            // Swap axes to fit output shape
-            specBatch = tf.transpose(specBatch, [0, 2, 1]);
-            specBatch = tf.reverse(specBatch, [1]);
-            // Add channel axis
-            specBatch = tf.expandDims(specBatch, -1);
-            //specBatch = tf.slice4d(specBatch, [0, 1, 0, 0], [-1, img_height, img_width, -1]);
-            specBatch = tf.image.resizeBilinear(specBatch, [img_height, img_width], true);
-            return this.version === 'v1' ? specBatch : this.normalize(specBatch)
-        })
-    }
-
     padBatch(tensor) {
         return tf.tidy(() => {
             if (DEBUG) console.log(`Adding ${this.batchSize - tensor.shape[0]} tensors to the batch`)
@@ -298,9 +260,9 @@ class Model {
         })
     }
 
-    async predictBatch(specs, keys, threshold, confidence) {
-        const TensorBatch = this.fixUpSpecBatch(specs); // + 1 tensor
-        specs.dispose(); // - 1 tensor
+    async predictBatch(TensorBatch, keys, threshold, confidence) {
+        // const TensorBatch = this.fixUpSpecBatch(specs); // + 1 tensor
+        // specs.dispose(); // - 1 tensor
         let paddedTensorBatch, maskedTensorBatch;
         if (BACKEND === 'webgl' && TensorBatch.shape[0] < this.batchSize) {
             // WebGL works best when all batches are the same size
@@ -310,7 +272,7 @@ class Model {
             const keysTensor = tf.stack(keys); // + 1 tensor
             const snr = this.getSNR(TensorBatch)
             const condition = tf.greaterEqual(snr, threshold); // + 1 tensor
-            if (DEBUG) console.log('SNR is: ', snr.dataSync())
+            if (DEBUG) console.log('SNR is: ', await snr.data())
             snr.dispose();
             // Avoid mask cannot be scalar error at end of predictions
             let newCondition;
@@ -333,7 +295,7 @@ class Model {
                 if (DEBUG) console.log("No surviving tensors in batch", maskedTensorBatch.shape[0])
                 return []
             } else {
-                keys = maskedKeysTensor.dataSync();
+                keys = await maskedKeysTensor.data();
                 maskedKeysTensor.dispose(); // - 1 tensor
                 if (DEBUG) console.log("surviving tensors in batch", maskedTensorBatch.shape[0])
             }
@@ -356,24 +318,18 @@ class Model {
         if (maskedTensorBatch) maskedTensorBatch.dispose();
 
         const finalPrediction = newPrediction || prediction;
-        //new
         const { indices, values } = tf.topk(finalPrediction, 5, true)
+        //const adjusted_values  = tf.div(1, tf.add(1, tf.exp(tf.mul(tf.neg(10), values.sub(0.6)))));
+
         const topIndices = indices.arraySync();
         const topValues = values.arraySync();
         indices.dispose();
         values.dispose();
-        // end new
-        // const array_of_predictions = finalPrediction.arraySync()
+
         finalPrediction.dispose();
         if (newPrediction) newPrediction.dispose();
         keys = keys.map(key => (key / CONFIG.sampleRate).toFixed(3));
         return [keys, topIndices, topValues];
-        // return keys.reduce((acc, key, index) => {
-        //     // convert key (samples) to milliseconds
-        //     const position = (key / CONFIG.sampleRate).toFixed(3);
-        //     acc[position] = array_of_predictions[index];
-        //     return acc;
-        // }, {});
     }
 
     makeSpectrogram(signal) {
@@ -383,36 +339,32 @@ class Model {
             return spec;
         })
     }
-
-    /*    normalizeTensor(audio) {
-            return tf.tidy(() => {
-                const tensor = tf.tensor1d(audio);
-                const {mean, variance} = tf.moments(tensor);
-                const stdDev = variance.sqrt();
-                const normalizedTensor = tensor.sub(mean).div(stdDev.mul(tf.scalar(2)));
-                return normalizedTensor;
-            })
-        }*/
-
-    /*    normalise_audio = (signal) => {
-            return tf.tidy(() => {
-                //signal = tf.tensor1d(signal);
-                const sigMax = tf.max(signal);
-                const sigMin = tf.min(signal);
-                return signal.sub(sigMin).div(sigMax.sub(sigMin)).mul(255).sub(127.5);
-            })
-        };*/
-
-    normalise_audio = (signal) => {
+    fixUpSpecBatch(specBatch, h, w) {
+        const img_height = h || this.height;
+        const img_width = w || this.width;
         return tf.tidy(() => {
-            //signal = tf.tensor1d(signal, 'float32');
-            const sigMax = tf.max(signal);
-            const sigMin = tf.min(signal);
-            const range = sigMax.sub(sigMin);
-            //return signal.sub(sigMin).div(range).mul(tf.scalar(8192.0, 'float32')).sub(tf.scalar(4095, 'float32'))
-            return signal.sub(sigMin).div(range).mul(tf.scalar(2)).sub(tf.scalar(1))
+            /*
+            Try out taking log of spec when SNR is below threshold?
+            */
+            //specBatch = tf.log1p(specBatch).mul(20);
+            // Swap axes to fit output shape
+            specBatch = tf.transpose(specBatch, [0, 2, 1]);
+            specBatch = tf.reverse(specBatch, [1]);
+            // Add channel axis
+            specBatch = tf.expandDims(specBatch, -1);
+            //specBatch = tf.slice4d(specBatch, [0, 1, 0, 0], [-1, img_height, img_width, -1]);
+            specBatch = tf.image.resizeBilinear(specBatch, [img_height, img_width], true);
+            return  this.normalise(specBatch)
+        })
+    }
+    normalise_audio_batch = (tensor) => {
+        return tf.tidy(() => {        
+        const sigMax = tf.max(tensor, 1, true);
+        const sigMin = tf.min(tensor, 1, true);
+        const normalized = tensor.sub(sigMin).div(sigMax.sub(sigMin)).mul(tf.scalar(2)).sub(tf.scalar(1));
+        return normalized;
         })
-    };
+    }
 
     async predictChunk(audioBuffer, start, fileStart, file, threshold, confidence) {
         if (DEBUG) console.log('predictCunk begin', tf.memory().numTensors);
@@ -429,25 +381,21 @@ class Model {
         }
         const buffer = paddedBuffer || audioBuffer;
         const numSamples = buffer.shape / this.chunkLength;
-        let bufferList = tf.split(buffer, numSamples);
+        let buffers = tf.reshape(buffer, [numSamples, this.chunkLength]);
         buffer.dispose();
-        // Turn the audio into a spec tensor
-        bufferList = tf.tidy(() => {
-            return bufferList.map(x => {
-                let normal = this.normalise_audio(x);
-                x.dispose();
-                return this.makeSpectrogram(normal);
+        const bufferList = this.version !== 'v4' ? this.normalise_audio_batch(buffers) : buffers;
+        const specBatch = tf.tidy(() => {
+            const bufferArray = tf.unstack(bufferList);
+            const toStack = bufferArray.map(x => {
+                return this.makeSpectrogram(x)
             })
+            return this.fixUpSpecBatch(tf.stack(toStack))
         });
-        const specBatch = tf.stack(bufferList);
+        buffers.dispose();
+        bufferList.dispose();
+        //const specBatch = tf.stack(bufferList);
         const batchKeys = [...Array(numSamples).keys()].map(i => start + this.chunkLength * i);
         const result = await this.predictBatch(specBatch, batchKeys, threshold, confidence);
-        this.clearTensorArray(bufferList);
         return [result, file, fileStart];
     }
-
-    async clearTensorArray(tensorObj) {
-        // Dispose of accumulated kept tensors in model tensor array
-        tensorObj.forEach(tensor => tensor.dispose());
-    }
 }
diff --git a/js/spectrogram.js b/js/spectrogram.js
@@ -0,0 +1,113 @@
+const tf = require('@tensorflow/tfjs-node');
+const DEBUG = false;
+class PreprocessSpectrogramLayer extends tf.layers.Layer {
+    constructor(config) {
+        super(config);
+        this.imgHeight = config.imgHeight;
+        this.imgWidth = config.imgWidth;
+        this.version = config.version;
+    }
+
+    call(inputs) {
+        return tf.tidy(() => {
+            const spec_max = tf.max(inputs, [1, 2], true);
+            if (this.version === 'v4') {
+                const spec_min = tf.min(inputs, [1, 2], true);
+                const normalized = tf.div(tf.sub(inputs, spec_min), tf.sub(spec_max, spec_min));
+                return normalized;
+            } else {
+                const scaled = tf.mul(inputs, 255).div(spec_max);
+                return scaled;
+            }
+        });
+    }
+
+
+    build(inputShape) {
+        this.inputSpec = [{ shape: [null, inputShape[1], inputShape[2], inputShape[3]] }];
+        return this;
+    }
+
+    static get className() {
+        return 'PreprocessSpectrogramLayer';
+      }
+}
+let preprocessLayer
+
+onmessage = async (e) => {
+    const message = e.data.message;
+
+    if (message === 'load'){
+        const backend = e.data.backend;
+        tf.setBackend(backend).then(async () => {
+            if (backend === 'webgl') {
+                tf.env().set('WEBGL_FORCE_F16_TEXTURES', true);
+                tf.env().set('WEBGL_PACK', true);
+                tf.env().set('WEBGL_EXP_CONV', true);
+                tf.env().set('TOPK_K_CPU_HANDOFF_THRESHOLD', 128)
+                tf.env().set('TOPK_LAST_DIM_CPU_HANDOFF_SIZE_THRESHOLD', 0);
+            }
+            tf.enableProdMode();
+            if (DEBUG) {
+                console.log(tf.env());
+                console.log(tf.env().getFlags());
+            }
+            const config = e.data.config;
+            preprocessLayer = new PreprocessSpectrogramLayer(config);
+            console.log('Layer loaded')
+        })
+
+    } else {
+        let {audio, start,fileStart, file, snr, worker, threshold, confidence} = e.data.payload;
+        if (DEBUG) console.log('predictCunk begin', tf.memory().numTensors);
+        audio = tf.tensor1d(audio);
+
+        // check if we need to pad
+        const remainder = audio.shape % 72000;
+        let paddedBuffer;
+        if (remainder !== 0) {
+            // Pad to the nearest full sample
+            paddedBuffer = audio.pad([[0, 72000 - remainder]]);
+            audio.dispose();
+            if (DEBUG) console.log('Received final chunks')
+        }
+        const buffer = paddedBuffer || audio;
+        const numSamples = buffer.shape / 72000;
+        let bufferList = tf.split(buffer, numSamples);
+        buffer.dispose();
+        // Turn the audio into a spec tensor
+        // bufferList = tf.tidy(() => {
+        //     return bufferList.map(x => {
+        //         return this.version === 'v4' ? this.makeSpectrogram(x) : this.makeSpectrogram(this.normalise_audio(x));
+        //     })
+        // });
+
+        const specBatch = makeSpectrogramBatch(bufferList);
+        //const specBatch = tf.stack(bufferList);
+        const batchKeys = [...Array(numSamples).keys()].map(i => start + 72000 * i);
+        postMessage({
+            message: 'specs',
+            specBatch: specBatch.arraySync(),
+            batchKeys: batchKeys,
+            threshold: threshold,
+            confidence: confidence,
+            file: file,
+            fileStart: fileStart,
+            worker: worker
+        })
+        specBatch.dispose()    
+    }
+}
+
+function makeSpectrogramBatch(signalBatch) {
+    return tf.tidy(() => {
+        const specBatch = signalBatch.map(signal => {
+            // const sigMax = tf.max(signal);
+            // const sigMin = tf.min(signal);
+            // const range = sigMax.sub(sigMin);
+            // const normalizedSignal = signal.sub(sigMin).div(range).mul(2).sub(1);
+            return tf.abs(tf.signal.stft(signal, 512, 186));
+        });
+        return tf.stack(specBatch);
+    });
+}