deeppavlov · voorhs · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
@@ -12,8 +12,7 @@
 from autointent import Context, Dataset
 from autointent.configs import EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig
 from autointent.custom_types import NodeType
-from autointent.nodes import NodeOptimizer
-from autointent.nodes.inference import InferenceNode
+from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.utils import load_default_search_space, load_search_space
 
 from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput

diff --git a/autointent/context/vector_index_client/_vector_index.py b/autointent/context/vector_index_client/_vector_index.py
@@ -172,7 +172,7 @@ def query(
         all_results = func(queries, k)  # type: ignore[arg-type]
 
         all_labels = [[self.labels[result["id"]] for result in results] for results in all_results]
-        all_distances = [[result["distance"] for result in results] for results in all_results]
+        all_distances = [[float(result["distance"]) for result in results] for results in all_results]
         all_texts = [[self.texts[result["id"]] for result in results] for results in all_results]
 
         return all_labels, all_distances, all_texts

diff --git a/autointent/custom_types.py b/autointent/custom_types.py
@@ -21,7 +21,8 @@ class LogLevel(Enum):
 # Literal type for weight types in specific operations
 WEIGHT_TYPES = Literal["uniform", "distance", "closest"]
 """
-Represents weight calculation methods:
+Represents weight calculation methods
+
 - "uniform": Equal weight for all elements.
 - "distance": Weights based on distance.
 - "closest": Prioritizes closest elements.
@@ -30,7 +31,8 @@ class LogLevel(Enum):
 # Type alias for label representation
 LabelType = int | list[int]
 """
-Type alias for label representation:
+Type alias for label representation
+
 - `int`: For single-label classification.
 - `list[int]`: For multi-label classification.
 """

diff --git a/autointent/modules/prediction/_adaptive.py b/autointent/modules/prediction/_adaptive.py
@@ -47,26 +47,22 @@ class AdaptivePredictor(PredictionModule):
 
     Examples
     --------
-    >>> from autointent.modules import AdaptivePredictor
-    >>> import numpy as np
-    >>> scores = np.array([[0.8, 0.1, 0.4], [0.2, 0.9, 0.5]])
-    >>> labels = [[1, 0, 0], [0, 1, 0]]
-    >>> search_space = [0.1, 0.2, 0.3, 0.5, 0.7]
-    >>> predictor = AdaptivePredictor(search_space=search_space)
-    >>> predictor.fit(scores, labels)
-    >>> predictions = predictor.predict(scores)
-    >>> print(predictions)
-    [[1 0 0]
-     [0 1 0]]
-
-    Save and load the predictor:
-    >>> predictor.dump("outputs/")
-    >>> predictor_loaded = AdaptivePredictor()
-    >>> predictor_loaded.load("outputs/")
-    >>> predictions = predictor_loaded.predict(scores)
-    >>> print(predictions)
-    [[1 0 0]
-     [0 1 0]]
+    .. testcode::
+
+        from autointent.modules.prediction import AdaptivePredictor
+        import numpy as np
+        scores = np.array([[0.8, 0.1, 0.4], [0.2, 0.9, 0.5]])
+        labels = [[1, 0, 0], [0, 1, 0]]
+        predictor = AdaptivePredictor()
+        predictor.fit(scores, labels)
+        predictions = predictor.predict(scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [[1 0 0]
+         [0 1 0]]
+
     """
 
     metadata_dict_name = "metadata.json"

diff --git a/autointent/modules/prediction/_argmax.py b/autointent/modules/prediction/_argmax.py
@@ -32,24 +32,22 @@ class ArgmaxPredictor(PredictionModule):
 
     Examples
     --------
-    >>> from autointent.modules import ArgmaxPredictor
-    >>> import numpy as np
-    >>> predictor = ArgmaxPredictor()
-    >>> train_scores = np.array([[0.2, 0.8, 0.0], [0.7, 0.1, 0.2]])
-    >>> labels = [1, 0]  # Single-label targets
-    >>> predictor.fit(train_scores, labels)
-    >>> test_scores = np.array([[0.1, 0.5, 0.4], [0.6, 0.3, 0.1]])
-    >>> predictions = predictor.predict(test_scores)
-    >>> print(predictions)
-    [1 0]
-
-    Save the predictor's state:
-    >>> predictor.dump("outputs/")
-    >>> loaded_predictor = ArgmaxPredictor()
-    >>> loaded_predictor.load("outputs/")
-    >>> loaded_predictions = loaded_predictor.predict(test_scores)
-    >>> print(loaded_predictions)
-    [1 0]
+    .. testcode::
+
+        from autointent.modules import ArgmaxPredictor
+        import numpy as np
+        predictor = ArgmaxPredictor()
+        train_scores = np.array([[0.2, 0.8, 0.0], [0.7, 0.1, 0.2]])
+        labels = [1, 0]  # Single-label targets
+        predictor.fit(train_scores, labels)
+        test_scores = np.array([[0.1, 0.5, 0.4], [0.6, 0.3, 0.1]])
+        predictions = predictor.predict(test_scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [1 0]
+
     """
 
     name = "argmax"

diff --git a/autointent/modules/prediction/_jinoos.py b/autointent/modules/prediction/_jinoos.py
@@ -37,24 +37,23 @@ class JinoosPredictor(PredictionModule):
 
     Examples
     --------
-    >>> from autointent.modules import JinoosPredictor
-    >>> import numpy as np
-    >>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
-    >>> labels = [1, 0, 1]
-    >>> search_space = [0.3, 0.5, 0.7]
-    >>> predictor = JinoosPredictor(search_space=search_space)
-    >>> predictor.fit(scores, labels)
-    >>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
-    >>> predictions = predictor.predict(test_scores)
-    >>> print(predictions)
-    [1 0]
-
-    Save and load the predictor state:
-    >>> predictor.dump("outputs/")
-    >>> loaded_predictor = JinoosPredictor()
-    >>> loaded_predictor.load("outputs/")
-    >>> print(loaded_predictor.thresh)
-    0.5  # Example threshold from the search space
+    .. testcode::
+
+        from autointent.modules import JinoosPredictor
+        import numpy as np
+        scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
+        labels = [1, 0, 1]
+        search_space = [0.3, 0.5, 0.7]
+        predictor = JinoosPredictor(search_space=search_space)
+        predictor.fit(scores, labels)
+        test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
+        predictions = predictor.predict(test_scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [1 0]
+
     """
 
     thresh: float

diff --git a/autointent/modules/prediction/_threshold.py b/autointent/modules/prediction/_threshold.py
@@ -42,34 +42,41 @@ class ThresholdPredictor(PredictionModule):
 
     Examples
     --------
-    Single-label classification example:
-    >>> from autointent.modules import ThresholdPredictor
-    >>> import numpy as np
-    >>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
-    >>> labels = [1, 0, 1]
-    >>> threshold = 0.5
-    >>> predictor = ThresholdPredictor(thresh=threshold)
-    >>> predictor.fit(scores, labels)
-    >>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
-    >>> predictions = predictor.predict(test_scores)
-    >>> print(predictions)
-    [1 0]
-
-    Multi-label classification example:
-    >>> labels = [[1, 0], [0, 1], [1, 1]]
-    >>> predictor = ThresholdPredictor(thresh=[0.5, 0.5])
-    >>> predictor.fit(scores, labels)
-    >>> test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
-    >>> predictions = predictor.predict(test_scores)
-    >>> print(predictions)
-    [[0 1] [1 0]]
-
-    Save and load the model:
-    >>> predictor.dump("outputs/")
-    >>> loaded_predictor = ThresholdPredictor(thresh=0.5)
-    >>> loaded_predictor.load("outputs/")
-    >>> print(loaded_predictor.thresh)
-    0.5
+    Single-label classification
+    ===========================
+    .. testcode::
+
+        from autointent.modules import ThresholdPredictor
+        import numpy as np
+        scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
+        labels = [1, 0, 1]
+        threshold = 0.5
+        predictor = ThresholdPredictor(thresh=threshold)
+        predictor.fit(scores, labels)
+        test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
+        predictions = predictor.predict(test_scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [1 0]
+
+    Multi-label classification
+    ==========================
+    .. testcode::
+
+        labels = [[1, 0], [0, 1], [1, 1]]
+        predictor = ThresholdPredictor(thresh=[0.5, 0.5])
+        predictor.fit(scores, labels)
+        test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
+        predictions = predictor.predict(test_scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [[0 1]
+         [1 0]]
+
     """
 
     metadata: ThresholdPredictorDumpMetadata

diff --git a/autointent/modules/prediction/_tunable.py b/autointent/modules/prediction/_tunable.py
@@ -43,33 +43,40 @@ class TunablePredictor(PredictionModule):
 
     Examples
     --------
-    Single-label classification:
-    >>> import numpy as np
-    >>> from autointent.modules import TunablePredictor
-    >>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
-    >>> labels = [1, 0, 1]
-    >>> predictor = TunablePredictor(n_trials=100, seed=42)
-    >>> predictor.fit(scores, labels)
-    >>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
-    >>> predictions = predictor.predict(test_scores)
-    >>> print(predictions)
-    [1 0]
-
-    Multi-label classification:
-    >>> labels = [[1, 0], [0, 1], [1, 1]]
-    >>> predictor = TunablePredictor(n_trials=100, seed=42)
-    >>> predictor.fit(scores, labels)
-    >>> test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
-    >>> predictions = predictor.predict(test_scores)
-    >>> print(predictions)
-    [[0 1] [1 0]]
-
-    Saving and loading the model:
-    >>> predictor.dump("outputs/")
-    >>> loaded_predictor = TunablePredictor()
-    >>> loaded_predictor.load("outputs/")
-    >>> print(loaded_predictor.thresh)
-    [0.5, 0.7]
+    Single-label classification
+    ===========================
+    .. testcode::
+
+        import numpy as np
+        from autointent.modules import TunablePredictor
+        scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
+        labels = [1, 0, 1]
+        predictor = TunablePredictor(n_trials=100, seed=42)
+        predictor.fit(scores, labels)
+        test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
+        predictions = predictor.predict(test_scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [1 0]
+
+    Multi-label classification
+    ==========================
+    .. testcode::
+
+        labels = [[1, 0], [0, 1], [1, 1]]
+        predictor = TunablePredictor(n_trials=100, seed=42)
+        predictor.fit(scores, labels)
+        test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
+        predictions = predictor.predict(test_scores)
+        print(predictions)
+
+    .. testoutput::
+
+        [[1 1]
+         [1 1]]
+
     """
 
     name = "tunable"

diff --git a/autointent/modules/retrieval/_vectordb.py b/autointent/modules/retrieval/_vectordb.py
@@ -32,28 +32,33 @@ class VectorDBModule(RetrievalModule):
 
     Examples
     --------
-    Creating and fitting the VectorDBModule:
-    >>> from your_module import VectorDBModule
-    >>> utterances = ["hello world", "how are you?", "good morning"]
-    >>> labels = [1, 2, 3]
-    >>> vector_db = VectorDBModule(k=2, embedder_name="some_embedder", db_dir="./db", device="cpu")
-    >>> vector_db.fit(utterances, labels)
-    >>> def retrieval_metric_fn(true_labels, predicted_labels):
-    >>>     # Custom metric function (e.g., accuracy or F1 score)
-    >>>     return sum([1 if true == pred else 0 for true, pred \\
-    >>>         in zip(true_labels, predicted_labels)]) / len(true_labels)
-    >>> score = vector_db.score(context, retrieval_metric_fn)
-    >>> print(score)
-
-    Performing predictions:
-    >>> predictions = vector_db.predict(["how is the weather today?"])
-    >>> print(predictions)
-
-    Saving and loading the model:
-    >>> vector_db.dump("outputs/")
-    >>> loaded_vector_db = VectorDBModule(k=2, embedder_name="some_embedder", db_dir="./db", device="cpu")
-    >>> loaded_vector_db.load("outputs/")
-    >>> print(loaded_vector_db.vector_index)
+    .. testsetup::
+
+        db_dir = "doctests-db"
+
+    .. testcode::
+
+        from autointent.modules.retrieval import VectorDBModule
+        utterances = ["bye", "how are you?", "good morning"]
+        labels = [0, 1, 1]
+        vector_db = VectorDBModule(
+            k=2,
+            embedder_name="sergeyzh/rubert-tiny-turbo",
+            db_dir=db_dir,
+        )
+        vector_db.fit(utterances, labels)
+        predictions = vector_db.predict(["how is the weather today?"])
+        print(predictions)
+
+    .. testoutput::
+
+        ([[1, 1]], [[0.1525942087173462, 0.18616724014282227]], [['good morning', 'how are you?']])
+
+    .. testcleanup::
+
+        import shutil
+        shutil.rmtree(db_dir)
+
     """
 
     vector_index: VectorIndex

diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
@@ -40,24 +40,6 @@ class DescriptionScorer(ScoringModule):
     :ivar db_dir: Directory path where the vector database is stored.
     :ivar name: Name of the scorer, defaults to "description".
 
-    Examples
-    --------
-    Creating and fitting the DescriptionScorer
-    >>> from autointent.modules import DescriptionScorer
-    >>> utterances = ["what is your name?", "how old are you?"]
-    >>> labels = [0, 1]
-    >>> descriptions = ["greeting", "age-related question"]
-    >>> scorer = DescriptionScorer(embedder_name="your_embedder", temperature=1.0)
-    >>> scorer.fit(utterances, labels, descriptions)
-
-    Predicting scores:
-    >>> scores = scorer.predict(["tell me about your age?"])
-    >>> print(scores)  # Outputs similarity scores for the utterance against all descriptions
-
-    Saving and loading the scorer:
-    >>> scorer.dump("outputs/")
-    >>> loaded_scorer = DescriptionScorer(embedder_name="your_embedder")
-    >>> loaded_scorer.load("outputs/")
     """
 
     weights_file_name: str = "description_vectors.npy"