Add a Pip-based CI testing for inference code, with Colab dependencies.

PiperOrigin-RevId: 627135153
google-research · May 11, 2024 · 4fbea40 · 4fbea40
1 parent c829814
commit 4fbea40
Show file tree

Hide file tree

Showing 6 changed files with 1,462 additions and 1,352 deletions.
diff --git a/.github/install_colab_deps.py b/.github/install_colab_deps.py
@@ -0,0 +1,40 @@
+# coding=utf-8
+# Copyright 2024 The Perch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Installs Colab dependencies for CI testing."""
+
+from typing import Sequence
+
+from absl import app
+import requests
+
+
+REQS_FILE = 'https://raw.githubusercontent.com/googlecolab/backend-info/main/pip-freeze.txt'
+COLAB_REQS_FILE = '/tmp/colab_reqs.txt'
+
+
+def main(unused_argv: Sequence[str]) -> None:
+  got = requests.get(REQS_FILE)
+  requirements_str = str(got.content, 'utf8')
+  # Skip the file:// lines, which we do not have access to.
+  lines = [
+      ln + '\n' for ln in requirements_str.split('\n') if 'file://' not in ln
+  ]
+  with open(COLAB_REQS_FILE, 'w') as f:
+    f.writelines(lines)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/.github/workflows/ci_colab_no_jaxtrain.yml b/.github/workflows/ci_colab_no_jaxtrain.yml
@@ -0,0 +1,40 @@
+name: CI_colab_no_jaxtrain
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  # Allows to run this workflow manually from the Actions tab on GitHub.
+  workflow_dispatch:
+
+jobs:
+  test-ubuntu:
+    name: "test on ${{ matrix.python-version }} on ${{ matrix.os }}"
+    runs-on: "${{ matrix.os }}"
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11"]
+        os: [ubuntu-latest]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Poetry
+        run: |
+          pipx install poetry
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'poetry'
+      - name: Install Chirp and its dependencies via pip.
+        run: |
+          sudo apt-get update
+          sudo apt-get install libsndfile1 ffmpeg
+          pip install absl-py
+          pip install requests
+          python3 .github/install_colab_deps.py
+          pip install -r /tmp/colab_reqs.txt
+          pip install git+https://github.com/google-research/perch.git
+      - name: Test with unittest
+        # TODO: Group together jaxtrain tests so they can be easily excluded.
+        run: poetry run python -m unittest discover -s chirp/inference/tests -p "*test.py"
diff --git a/chirp/inference/tests/bootstrap_test.py b/chirp/inference/tests/bootstrap_test.py
@@ -16,6 +16,7 @@
 """Tests for project state handling."""
 
 import os
+import shutil
 import tempfile
 
 from chirp import audio_utils
@@ -33,20 +34,29 @@
 
 class BootstrapTest(absltest.TestCase):
 
+  def setUp(self):
+    super().setUp()
+    # `self.create_tempdir()` raises an UnparsedFlagAccessError, which is why
+    # we use `tempdir` directly.
+    self.tempdir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    super().tearDown()
+    shutil.rmtree(self.tempdir)
+
   def make_wav_files(self, classes, filenames):
     # Create a pile of files.
     rng = np.random.default_rng(seed=42)
-    tmpdir = self.create_tempdir()
     for subdir in classes:
-      subdir_path = os.path.join(tmpdir.full_path, subdir)
+      subdir_path = os.path.join(self.tempdir, subdir)
       os.mkdir(subdir_path)
       for filename in filenames:
         with open(
             os.path.join(subdir_path, f'{filename}_{subdir}.wav'), 'wb'
         ) as f:
           noise = rng.normal(scale=0.2, size=16000)
           wavfile.write(f, 16000, noise)
-    audio_glob = os.path.join(tmpdir.full_path, '*/*.wav')
+    audio_glob = os.path.join(self.tempdir, '*/*.wav')
     return audio_glob
 
   def write_placeholder_embeddings(self, audio_glob, source_infos, embed_dir):
@@ -129,15 +139,16 @@ def test_bootstrap_from_embeddings(self):
     source_infos = embed_lib.create_source_infos([audio_glob], shard_len_s=5.0)
     self.assertLen(source_infos, len(classes) * len(filenames))
 
-    embed_dir = self.create_tempdir()
-    labeled_dir = self.create_tempdir()
-    self.write_placeholder_embeddings(
-        audio_glob, source_infos, embed_dir.full_path
-    )
+    embed_dir = os.path.join(self.tempdir, 'embeddings')
+    labeled_dir = os.path.join(self.tempdir, 'labeled')
+    epath.Path(embed_dir).mkdir(parents=True, exist_ok=True)
+    epath.Path(labeled_dir).mkdir(parents=True, exist_ok=True)
+
+    self.write_placeholder_embeddings(audio_glob, source_infos, embed_dir)
 
     bootstrap_config = bootstrap.BootstrapConfig.load_from_embedding_path(
-        embeddings_path=embed_dir.full_path,
-        annotated_path=labeled_dir.full_path,
+        embeddings_path=embed_dir,
+        annotated_path=labeled_dir,
     )
     print('config hash : ', bootstrap_config.embedding_config_hash())
 

diff --git a/chirp/inference/tests/embed_test.py b/chirp/inference/tests/embed_test.py
@@ -50,6 +50,8 @@ def _make_output_head_model(model_path: str, embedding_dim: int = 1280):
       tf.keras.Input(shape=[embedding_dim]),
       tf.keras.layers.Dense(len(classes)),
   ])
+  tf.saved_model.save(model, model_path)
+  model = tf.saved_model.load(model_path)
   class_list = namespace.ClassList('custom', classes)
   return interface.LogitsOutputHead(
       model_path, 'other_label', model, class_list