Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/doctests for modules #71

Merged
merged 6 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions autointent/_pipeline/_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from autointent import Context, Dataset
from autointent.configs import EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig
from autointent.custom_types import NodeType
from autointent.nodes import NodeOptimizer
from autointent.nodes.inference import InferenceNode
from autointent.nodes import InferenceNode, NodeOptimizer
from autointent.utils import load_default_search_space, load_search_space

from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput
Expand Down
2 changes: 1 addition & 1 deletion autointent/context/vector_index_client/_vector_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def query(
all_results = func(queries, k) # type: ignore[arg-type]

all_labels = [[self.labels[result["id"]] for result in results] for results in all_results]
all_distances = [[result["distance"] for result in results] for results in all_results]
all_distances = [[float(result["distance"]) for result in results] for results in all_results]
all_texts = [[self.texts[result["id"]] for result in results] for results in all_results]

return all_labels, all_distances, all_texts
Expand Down
6 changes: 4 additions & 2 deletions autointent/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class LogLevel(Enum):
# Literal type for weight types in specific operations
WEIGHT_TYPES = Literal["uniform", "distance", "closest"]
"""
Represents weight calculation methods:
Represents weight calculation methods

- "uniform": Equal weight for all elements.
- "distance": Weights based on distance.
- "closest": Prioritizes closest elements.
Expand All @@ -30,7 +31,8 @@ class LogLevel(Enum):
# Type alias for label representation
LabelType = int | list[int]
"""
Type alias for label representation:
Type alias for label representation

- `int`: For single-label classification.
- `list[int]`: For multi-label classification.
"""
Expand Down
36 changes: 16 additions & 20 deletions autointent/modules/prediction/_adaptive.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,22 @@ class AdaptivePredictor(PredictionModule):

Examples
--------
>>> from autointent.modules import AdaptivePredictor
>>> import numpy as np
>>> scores = np.array([[0.8, 0.1, 0.4], [0.2, 0.9, 0.5]])
>>> labels = [[1, 0, 0], [0, 1, 0]]
>>> search_space = [0.1, 0.2, 0.3, 0.5, 0.7]
>>> predictor = AdaptivePredictor(search_space=search_space)
>>> predictor.fit(scores, labels)
>>> predictions = predictor.predict(scores)
>>> print(predictions)
[[1 0 0]
[0 1 0]]

Save and load the predictor:
>>> predictor.dump("outputs/")
>>> predictor_loaded = AdaptivePredictor()
>>> predictor_loaded.load("outputs/")
>>> predictions = predictor_loaded.predict(scores)
>>> print(predictions)
[[1 0 0]
[0 1 0]]
.. testcode::

from autointent.modules.prediction import AdaptivePredictor
import numpy as np
scores = np.array([[0.8, 0.1, 0.4], [0.2, 0.9, 0.5]])
labels = [[1, 0, 0], [0, 1, 0]]
predictor = AdaptivePredictor()
predictor.fit(scores, labels)
predictions = predictor.predict(scores)
print(predictions)

.. testoutput::

[[1 0 0]
[0 1 0]]

"""

metadata_dict_name = "metadata.json"
Expand Down
34 changes: 16 additions & 18 deletions autointent/modules/prediction/_argmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,22 @@ class ArgmaxPredictor(PredictionModule):

Examples
--------
>>> from autointent.modules import ArgmaxPredictor
>>> import numpy as np
>>> predictor = ArgmaxPredictor()
>>> train_scores = np.array([[0.2, 0.8, 0.0], [0.7, 0.1, 0.2]])
>>> labels = [1, 0] # Single-label targets
>>> predictor.fit(train_scores, labels)
>>> test_scores = np.array([[0.1, 0.5, 0.4], [0.6, 0.3, 0.1]])
>>> predictions = predictor.predict(test_scores)
>>> print(predictions)
[1 0]

Save the predictor's state:
>>> predictor.dump("outputs/")
>>> loaded_predictor = ArgmaxPredictor()
>>> loaded_predictor.load("outputs/")
>>> loaded_predictions = loaded_predictor.predict(test_scores)
>>> print(loaded_predictions)
[1 0]
.. testcode::

from autointent.modules import ArgmaxPredictor
import numpy as np
predictor = ArgmaxPredictor()
train_scores = np.array([[0.2, 0.8, 0.0], [0.7, 0.1, 0.2]])
labels = [1, 0] # Single-label targets
predictor.fit(train_scores, labels)
test_scores = np.array([[0.1, 0.5, 0.4], [0.6, 0.3, 0.1]])
predictions = predictor.predict(test_scores)
print(predictions)

.. testoutput::

[1 0]

"""

name = "argmax"
Expand Down
35 changes: 17 additions & 18 deletions autointent/modules/prediction/_jinoos.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,23 @@ class JinoosPredictor(PredictionModule):

Examples
--------
>>> from autointent.modules import JinoosPredictor
>>> import numpy as np
>>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
>>> labels = [1, 0, 1]
>>> search_space = [0.3, 0.5, 0.7]
>>> predictor = JinoosPredictor(search_space=search_space)
>>> predictor.fit(scores, labels)
>>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
>>> predictions = predictor.predict(test_scores)
>>> print(predictions)
[1 0]

Save and load the predictor state:
>>> predictor.dump("outputs/")
>>> loaded_predictor = JinoosPredictor()
>>> loaded_predictor.load("outputs/")
>>> print(loaded_predictor.thresh)
0.5 # Example threshold from the search space
.. testcode::

from autointent.modules import JinoosPredictor
import numpy as np
scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
labels = [1, 0, 1]
search_space = [0.3, 0.5, 0.7]
predictor = JinoosPredictor(search_space=search_space)
predictor.fit(scores, labels)
test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
predictions = predictor.predict(test_scores)
print(predictions)

.. testoutput::

[1 0]

"""

thresh: float
Expand Down
63 changes: 35 additions & 28 deletions autointent/modules/prediction/_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,34 +42,41 @@ class ThresholdPredictor(PredictionModule):

Examples
--------
Single-label classification example:
>>> from autointent.modules import ThresholdPredictor
>>> import numpy as np
>>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
>>> labels = [1, 0, 1]
>>> threshold = 0.5
>>> predictor = ThresholdPredictor(thresh=threshold)
>>> predictor.fit(scores, labels)
>>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
>>> predictions = predictor.predict(test_scores)
>>> print(predictions)
[1 0]

Multi-label classification example:
>>> labels = [[1, 0], [0, 1], [1, 1]]
>>> predictor = ThresholdPredictor(thresh=[0.5, 0.5])
>>> predictor.fit(scores, labels)
>>> test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
>>> predictions = predictor.predict(test_scores)
>>> print(predictions)
[[0 1] [1 0]]

Save and load the model:
>>> predictor.dump("outputs/")
>>> loaded_predictor = ThresholdPredictor(thresh=0.5)
>>> loaded_predictor.load("outputs/")
>>> print(loaded_predictor.thresh)
0.5
Single-label classification
===========================
.. testcode::

from autointent.modules import ThresholdPredictor
import numpy as np
scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
labels = [1, 0, 1]
threshold = 0.5
predictor = ThresholdPredictor(thresh=threshold)
predictor.fit(scores, labels)
test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
predictions = predictor.predict(test_scores)
print(predictions)

.. testoutput::

[1 0]

Multi-label classification
==========================
.. testcode::

labels = [[1, 0], [0, 1], [1, 1]]
predictor = ThresholdPredictor(thresh=[0.5, 0.5])
predictor.fit(scores, labels)
test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
predictions = predictor.predict(test_scores)
print(predictions)

.. testoutput::

[[0 1]
[1 0]]

"""

metadata: ThresholdPredictorDumpMetadata
Expand Down
61 changes: 34 additions & 27 deletions autointent/modules/prediction/_tunable.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,40 @@ class TunablePredictor(PredictionModule):

Examples
--------
Single-label classification:
>>> import numpy as np
>>> from autointent.modules import TunablePredictor
>>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
>>> labels = [1, 0, 1]
>>> predictor = TunablePredictor(n_trials=100, seed=42)
>>> predictor.fit(scores, labels)
>>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
>>> predictions = predictor.predict(test_scores)
>>> print(predictions)
[1 0]

Multi-label classification:
>>> labels = [[1, 0], [0, 1], [1, 1]]
>>> predictor = TunablePredictor(n_trials=100, seed=42)
>>> predictor.fit(scores, labels)
>>> test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
>>> predictions = predictor.predict(test_scores)
>>> print(predictions)
[[0 1] [1 0]]

Saving and loading the model:
>>> predictor.dump("outputs/")
>>> loaded_predictor = TunablePredictor()
>>> loaded_predictor.load("outputs/")
>>> print(loaded_predictor.thresh)
[0.5, 0.7]
Single-label classification
===========================
.. testcode::

import numpy as np
from autointent.modules import TunablePredictor
scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
labels = [1, 0, 1]
predictor = TunablePredictor(n_trials=100, seed=42)
predictor.fit(scores, labels)
test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
predictions = predictor.predict(test_scores)
print(predictions)

.. testoutput::

[1 0]

Multi-label classification
==========================
.. testcode::

labels = [[1, 0], [0, 1], [1, 1]]
predictor = TunablePredictor(n_trials=100, seed=42)
predictor.fit(scores, labels)
test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
predictions = predictor.predict(test_scores)
print(predictions)

.. testoutput::

[[1 1]
[1 1]]

"""

name = "tunable"
Expand Down
49 changes: 27 additions & 22 deletions autointent/modules/retrieval/_vectordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,33 @@ class VectorDBModule(RetrievalModule):

Examples
--------
Creating and fitting the VectorDBModule:
>>> from your_module import VectorDBModule
>>> utterances = ["hello world", "how are you?", "good morning"]
>>> labels = [1, 2, 3]
>>> vector_db = VectorDBModule(k=2, embedder_name="some_embedder", db_dir="./db", device="cpu")
>>> vector_db.fit(utterances, labels)
>>> def retrieval_metric_fn(true_labels, predicted_labels):
>>> # Custom metric function (e.g., accuracy or F1 score)
>>> return sum([1 if true == pred else 0 for true, pred \\
>>> in zip(true_labels, predicted_labels)]) / len(true_labels)
>>> score = vector_db.score(context, retrieval_metric_fn)
>>> print(score)

Performing predictions:
>>> predictions = vector_db.predict(["how is the weather today?"])
>>> print(predictions)

Saving and loading the model:
>>> vector_db.dump("outputs/")
>>> loaded_vector_db = VectorDBModule(k=2, embedder_name="some_embedder", db_dir="./db", device="cpu")
>>> loaded_vector_db.load("outputs/")
>>> print(loaded_vector_db.vector_index)
.. testsetup::

db_dir = "doctests-db"

.. testcode::

from autointent.modules.retrieval import VectorDBModule
utterances = ["bye", "how are you?", "good morning"]
labels = [0, 1, 1]
vector_db = VectorDBModule(
k=2,
embedder_name="sergeyzh/rubert-tiny-turbo",
db_dir=db_dir,
)
vector_db.fit(utterances, labels)
predictions = vector_db.predict(["how is the weather today?"])
print(predictions)

.. testoutput::

([[1, 1]], [[0.1525942087173462, 0.18616724014282227]], [['good morning', 'how are you?']])

.. testcleanup::

import shutil
shutil.rmtree(db_dir)

"""

vector_index: VectorIndex
Expand Down
18 changes: 0 additions & 18 deletions autointent/modules/scoring/_description/description.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,6 @@ class DescriptionScorer(ScoringModule):
:ivar db_dir: Directory path where the vector database is stored.
:ivar name: Name of the scorer, defaults to "description".

Examples
--------
Creating and fitting the DescriptionScorer
>>> from autointent.modules import DescriptionScorer
>>> utterances = ["what is your name?", "how old are you?"]
>>> labels = [0, 1]
>>> descriptions = ["greeting", "age-related question"]
>>> scorer = DescriptionScorer(embedder_name="your_embedder", temperature=1.0)
>>> scorer.fit(utterances, labels, descriptions)

Predicting scores:
>>> scores = scorer.predict(["tell me about your age?"])
>>> print(scores) # Outputs similarity scores for the utterance against all descriptions

Saving and loading the scorer:
>>> scorer.dump("outputs/")
>>> loaded_scorer = DescriptionScorer(embedder_name="your_embedder")
>>> loaded_scorer.load("outputs/")
"""

weights_file_name: str = "description_vectors.npy"
Expand Down
Loading
Loading