Add CI to build HAPs for HarmonyOS (#1578)

k2-fsa · Nov 29, 2024 · 299f239 · 299f239
1 parent be159f9
commit 299f239
Show file tree

Hide file tree

Showing 11 changed files with 376 additions and 24 deletions.
diff --git a/.github/workflows/hap-vad-asr.yaml b/.github/workflows/hap-vad-asr.yaml
@@ -0,0 +1,173 @@
+name: hap-vad-asr
+
+on:
+  push:
+    branches:
+      - hap
+      - hap-ci
+
+  workflow_dispatch:
+
+concurrency:
+  group: hap-vad-asr-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: write
+
+jobs:
+  hap_vad_asr:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    runs-on: ${{ matrix.os }}
+    name: Haps for vad asr ${{ matrix.index }}/${{ matrix.total }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        total: ["10"]
+        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # https://github.com/actions/setup-java
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin' # See 'Supported distributions' for available options
+          java-version: '17' # it requires jdk 17 to sigh the hap
+
+      - name: Show java version
+        shell: bash
+        run: |
+          which java
+          java --version
+
+      - name: cache-toolchain
+        id: cache-toolchain-ohos
+        uses: actions/cache@v4
+        with:
+          path: command-line-tools
+          key: commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Download toolchain
+        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+          unzip commandline-tools-linux-x64-5.0.5.200.zip
+          rm commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Set environment variable
+        shell: bash
+        run: |
+          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
+          which cmake
+
+          cmake --version
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          python3 -m pip install --upgrade pip jinja2
+
+      - name: Generate build script
+        shell: bash
+        run: |
+          cd scripts/hap
+
+          total=${{ matrix.total }}
+          index=${{ matrix.index }}
+
+          ./generate-vad-asr-hap-script.py --total $total --index $index
+          ls -lh
+
+          chmod +x build-hap-vad-asr.sh
+          mv -v ./build-hap-vad-asr.sh ../..
+
+      - name: Generate secrets
+        shell: bash
+        run: |
+          echo "${{ secrets.HAP_SHERPA_ONNX_CER }}" > /tmp/sherpa_onnx.cer
+          shasum -a 256 /tmp/sherpa_onnx.cer
+          ls -lh /tmp/sherpa_onnx.cer
+
+          # macos
+          # base64 -i sherpa_onnx_profileRelease.p7b -o sherpa_onnx_profileRelease.p7b.base64
+          #
+          # linux
+          # base64 -w 0 sherpa_onnx_profileRelease.p7b > sherpa_onnx_profileRelease.p7b.base64
+          #
+          # cat sherpa_onnx_profileRelease.p7b.base64 | base64 --decode > sherpa_onnx_profileRelease.p7b
+          #
+          echo "${{ secrets.HAP_SHERPA_ONNX_PROFILE }}"   | base64 --decode > /tmp/sherpa_onnx_profileRelease.p7b
+          echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" > ./sherpa_onnx_ohos_key.p12.base64
+          echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" | base64 --decode > /tmp/sherpa_onnx_ohos_key.p12
+
+          ls -l /tmp/sherpa_onnx_profileRelease.p7b
+          ls -l /tmp/sherpa_onnx_ohos_key.p12
+
+          ls -lh ./sherpa_onnx_ohos_key.p12.base64
+          shasum -a 256 ./sherpa_onnx_ohos_key.p12.base64
+          wc ./sherpa_onnx_ohos_key.p12.base64
+          rm ./sherpa_onnx_ohos_key.p12.base64
+
+          shasum -a 256 /tmp/sherpa_onnx_profileRelease.p7b
+          shasum -a 256 /tmp/sherpa_onnx_ohos_key.p12
+
+      - name: build HAP
+        env:
+          HAP_KEY_ALIAS: ${{ secrets.HAP_KEY_ALIAS }}
+          HAP_KEY_PWD: ${{ secrets.HAP_KEY_PWD }}
+          HAP_KEY_STORE_PWD: ${{ secrets.HAP_KEY_STORE_PWD }}
+        shell: bash
+        run: |
+          export COMMANDLINE_TOOLS_DIR=$GITHUB_WORKSPACE/command-line-tools
+          ./build-hap-vad-asr.sh
+
+          # remove secrets
+          rm /tmp/sherpa_onnx.cer
+          rm /tmp/sherpa_onnx_profileRelease.p7b
+          rm /tmp/sherpa_onnx_ohos_key.p12
+
+      - name: Display HAPs
+        shell: bash
+        run: |
+          ls -lh ./haps/
+          du -h -d1 .
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "[email protected]"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+            git clone https://csukuangfj:[email protected]/csukuangfj/sherpa-onnx-harmony-os huggingface
+            cd huggingface
+            du -h -d1 .
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            d=hap/vad-asr/$SHERPA_ONNX_VERSION
+            mkdir -p $d
+            cp -v ../haps/*.hap $d/
+            git status
+            git lfs track "*.hap"
+            git add .
+            git commit -m "add more HAPs"
+            git push https://csukuangfj:[email protected]/csukuangfj/sherpa-onnx-harmony-os main
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxVadAsr/entry/oh-package-lock.json5
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/oh-package.json5 b/harmony-os/SherpaOnnxVadAsr/entry/oh-package.json5
@@ -5,6 +5,9 @@
   "main": "",
   "author": "",
   "license": "",
-  "dependencies": {}
+  "dependencies": {
+    // please see https://ohpm.openharmony.cn/#/cn/detail/sherpa_onnx
+    "sherpa_onnx": "1.10.32",
+  }
 }
 
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets
@@ -4,7 +4,7 @@
 import { OfflineModelConfig } from 'sherpa_onnx';
 
 export function getOfflineModelConfig(type: number): OfflineModelConfig {
-  const c = new OfflineModelConfig();
+  const c: OfflineModelConfig = new OfflineModelConfig();
   switch (type) {
     case 0: {
       const modelDir = 'sherpa-onnx-paraformer-zh-2023-09-14'

diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets
@@ -2,8 +2,11 @@ import { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope, worker } from '@kit
 import {
   OfflineRecognizer,
   OfflineRecognizerConfig,
+  OfflineStream,
+  OnlineRecognizerResult,
   readWaveFromBinary,
   SileroVadConfig,
+  SpeechSegment,
   Vad,
   VadConfig,
 } from 'sherpa_onnx';
@@ -18,7 +21,7 @@ let vad: Vad; // vad for decoding files
 
 function initVad(context: Context): Vad {
   let mgr = context.resourceManager;
-  const config = new VadConfig(
+  const config: VadConfig = new VadConfig(
     new SileroVadConfig(
       'silero_vad.onnx',
       0.5,
@@ -37,7 +40,7 @@ function initVad(context: Context): Vad {
 
 function initNonStreamingAsr(context: Context): OfflineRecognizer {
   let mgr = context.resourceManager;
-  const config = new OfflineRecognizerConfig();
+  const config: OfflineRecognizerConfig = new OfflineRecognizerConfig();
 
   // Note that you can switch to a new model by changing type
   //
@@ -61,7 +64,13 @@ function initNonStreamingAsr(context: Context): OfflineRecognizer {
   const type = 2;
   config.modelConfig = getOfflineModelConfig(type);
   config.modelConfig.debug = true;
-  return new OfflineRecognizer(config, mgr)
+  config.ruleFsts = '';
+  return new OfflineRecognizer(config, mgr);
+}
+
+interface Wave {
+  samples: Float32Array;
+  sampleRate: number;
 }
 
 function decode(filename: string): string {
@@ -71,44 +80,44 @@ function decode(filename: string): string {
   const stat = fileIo.statSync(fp.fd);
   const arrayBuffer = new ArrayBuffer(stat.size);
   fileIo.readSync(fp.fd, arrayBuffer);
-  const data = new Uint8Array(arrayBuffer);
+  const data: Uint8Array = new Uint8Array(arrayBuffer);
 
-  const wave = readWaveFromBinary(data);
+  const wave: Wave = readWaveFromBinary(data);
 
   console.log(`sample rate ${wave.sampleRate}`);
   console.log(`samples length ${wave.samples.length}`);
   const resultList: string[] = [];
 
-  const windowSize = vad.config.sileroVad.windowSize;
+  const windowSize: number = vad.config.sileroVad.windowSize;
   for (let i = 0; i < wave.samples.length; i += windowSize) {
-    const thisWindow = wave.samples.subarray(i, i + windowSize)
+    const thisWindow: Float32Array = wave.samples.subarray(i, i + windowSize)
     vad.acceptWaveform(thisWindow);
     if (i + windowSize >= wave.samples.length) {
       vad.flush();
     }
     while (!vad.isEmpty()) {
-      const segment = vad.front();
-      const _startTime = (segment.start / wave.sampleRate);
-      const _endTime = _startTime + segment.samples.length / wave.sampleRate;
+      const segment: SpeechSegment = vad.front();
+      const _startTime: number = (segment.start / wave.sampleRate);
+      const _endTime: number = _startTime + segment.samples.length / wave.sampleRate;
 
       if (_endTime - _startTime < 0.2) {
         vad.pop();
         continue;
       }
 
-      const startTime = _startTime.toFixed(2);
-      const endTime = _endTime.toFixed(2);
+      const startTime: string = _startTime.toFixed(2);
+      const endTime: string = _endTime.toFixed(2);
 
-      const progress = (segment.start + segment.samples.length) / wave.samples.length * 100;
+      const progress: number = (segment.start + segment.samples.length) / wave.samples.length * 100;
 
       workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress });
 
-      const stream = recognizer.createStream();
+      const stream: OfflineStream = recognizer.createStream();
       stream.acceptWaveform({ samples: segment.samples, sampleRate: wave.sampleRate });
       recognizer.decode(stream);
-      const result = recognizer.getResult(stream);
+      const result: OnlineRecognizerResult = recognizer.getResult(stream);
 
-      const text = `${startTime} -- ${endTime} ${result.text}`
+      const text: string = `${startTime} -- ${endTime} ${result.text}`
       resultList.push(text);
       console.log(`partial result ${text}`);
 

diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile/.gitkeep
diff --git a/harmony-os/SherpaOnnxVadAsr/oh-package.json5 b/harmony-os/SherpaOnnxVadAsr/oh-package.json5
@@ -2,11 +2,6 @@
   "modelVersion": "5.0.0",
   "description": "Please describe the basic information.",
   "dependencies": {
-
-    // You can download sherpa_onnx-v1.10.32.har
-    // from
-    // https://huggingface.co/csukuangfj/sherpa-onnx-harmony-os/tree/main/har
-    "sherpa_onnx": "file:./entry/sherpa_onnx-v1.10.32.har"
   },
   "devDependencies": {
     "@ohos/hypium": "1.0.19"