Run reporting#
This script demonstrates how to report the optimization process using the AutoIntent library.
[1]:
search_space = [
{
"node_type": "embedding",
"target_metric": "retrieval_hit_rate",
"search_space": [
{
"module_name": "retrieval",
"k": [10],
"embedder_config": ["avsolatorio/GIST-small-Embedding-v0", "sergeyzh/rubert-tiny-turbo"],
}
],
},
{
"node_type": "scoring",
"target_metric": "scoring_roc_auc",
"search_space": [
{"module_name": "knn", "k": [1, 3, 5, 10], "weights": ["uniform", "distance", "closest"]},
{"module_name": "linear"},
{
"module_name": "dnnc",
"cross_encoder_config": ["cross-encoder/ms-marco-MiniLM-L-6-v2"],
"k": [1, 3, 5, 10],
},
],
},
{
"node_type": "decision",
"target_metric": "decision_accuracy",
"search_space": [{"module_name": "threshold", "thresh": [0.5]}, {"module_name": "argmax"}],
},
]
Load Data#
Let us use small subset of popular clinc150
dataset:
[2]:
from autointent import Dataset
dataset = Dataset.from_hub("AutoIntent/clinc150_subset")
/home/runner/.cache/pypoetry/virtualenvs/autointent-FDypUDHQ-py3.10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
Start Auto Configuration#
[3]:
from autointent import Pipeline
pipeline_optimizer = Pipeline.from_search_space(search_space)
Reporting#
Currently supported reporting options are:
tensorboard
wandb
[4]:
from autointent.configs import LoggingConfig
from pathlib import Path
log_config = LoggingConfig(
run_name="test_tensorboard", report_to=["tensorboard"], project_dir=Path("my_projects"), dump_modules=False
)
pipeline_optimizer.set_config(log_config)
[5]:
pipeline_optimizer.fit(dataset)
/home/runner/work/AutoIntent/AutoIntent/autointent/nodes/_node_optimizer.py:99: ExperimentalWarning: BruteForceSampler is experimental (supported from v3.1.0). The interface can change in the future.
sampler_instance = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment]
[I 2025-03-08 22:25:08,408] A new study created in memory with name: no-name-e5603f48-b40b-4b1b-9731-b81e0c0099c3
/home/runner/work/AutoIntent/AutoIntent/autointent/nodes/_node_optimizer.py:99: ExperimentalWarning: BruteForceSampler is experimental (supported from v3.1.0). The interface can change in the future.
sampler_instance = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment]
/home/runner/work/AutoIntent/AutoIntent/autointent/nodes/_node_optimizer.py:99: ExperimentalWarning: BruteForceSampler is experimental (supported from v3.1.0). The interface can change in the future.
sampler_instance = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment]
/home/runner/work/AutoIntent/AutoIntent/autointent/nodes/_node_optimizer.py:99: ExperimentalWarning: BruteForceSampler is experimental (supported from v3.1.0). The interface can change in the future.
sampler_instance = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment]
"argmax" is NOT designed to handle OOS samples, but your data contains it. So, using this method reduces the power of classification.
/home/runner/.cache/pypoetry/virtualenvs/autointent-FDypUDHQ-py3.10/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[5]:
<autointent.context._context.Context at 0x7fdbdac5ba30>
Now results of the optimization process can be viewed in the tensorboard.
tensorboard --logdir test_tensorboard