Create embedding models from configurations with a from_config clas…

…smethod instead of abusing the dataclass `post_init` function. PiperOrigin-RevId: 557267692
google-research · Aug 16, 2023 · f152d4c · f152d4c
1 parent e4cd2c8
commit f152d4c
Show file tree

Hide file tree

Showing 6 changed files with 179 additions and 124 deletions.
diff --git a/chirp/inference/configs/separate_soundscapes.py b/chirp/inference/configs/separate_soundscapes.py
@@ -52,8 +52,10 @@ def get_config() -> config_dict.ConfigDict:
           },
           'separator_model_tf_config': {
               'model_path': sep_model_checkpoint_path,
+              'window_size_s': 5.0,
               'sample_rate': 32000,
               'frame_size': 32000,
+              'target_class_list': None,
           },
       },
       'speech_filter_threshold': 0.95,

diff --git a/chirp/inference/configs/separated_seabirds.py b/chirp/inference/configs/separated_seabirds.py
@@ -46,6 +46,7 @@ def get_config() -> config_dict.ConfigDict:
       'model_key': 'separate_embed_model',
       'model_config': {
           'sample_rate': 32000,
+          'embed_raw': True,
           'taxonomy_model_tf_config': {
               'model_path': emb_model_checkpoint_path,
               'window_size_s': 5.0,
@@ -54,8 +55,10 @@ def get_config() -> config_dict.ConfigDict:
           },
           'separator_model_tf_config': {
               'model_path': sep_model_checkpoint_path,
+              'window_size_s': 5.0,
               'sample_rate': 32000,
               'frame_size': 32000,
+              'target_class_list': None,
           },
       },
       'speech_filter_threshold': 0.0,

diff --git a/chirp/inference/embed_lib.py b/chirp/inference/embed_lib.py
@@ -139,9 +139,8 @@ def __init__(
 
   def setup(self):
     if self.embedding_model is None:
-      self.embedding_model = models.model_class_map()[self.model_key](
-          **self.model_config
-      )
+      model_class = models.model_class_map()[self.model_key]
+      self.embedding_model = model_class.from_config(self.model_config)
     if hasattr(self, 'model_key'):
       del self.model_key
     if hasattr(self, 'model_config'):

diff --git a/chirp/inference/interface.py b/chirp/inference/interface.py
@@ -20,6 +20,7 @@
 
 from chirp.taxonomy import namespace
 import librosa
+from ml_collections import config_dict
 import numpy as np
 
 LogitType = Dict[str, np.ndarray]
@@ -111,6 +112,13 @@ class EmbeddingModel:
 
   sample_rate: int
 
+  @classmethod
+  def from_config(
+      cls, model_config: config_dict.ConfigDict
+  ) -> 'EmbeddingModel':
+    """Load the model from a configuration dict."""
+    raise NotImplementedError
+
   def embed(self, audio_array: np.ndarray) -> InferenceOutputs:
     """Create InferenceOutputs from an audio array.