autointent.context.data_handler.DataHandler#

class autointent.context.data_handler.DataHandler(dataset, config=None, random_seed=0)#

Convenient wrapper for autointent.Dataset.

Performs splitting of the wrapped dataset when instantiated.

Parameters:
dataset: autointent.Dataset#

Wrapped dataset.

config: autointent.configs.DataConfig#

Configuration used for instantiation.

property intent_descriptions: list[str | None]#

String descriptions for all intents.

Return type:

list[str | None]

property tags: list[autointent.schemas.Tag]#

Tags associated with intents.

Tagging is an experimental feature that is not guaranteed to work.

Return type:

list[autointent.schemas.Tag]

property multilabel: bool#

Check if the dataset is multilabel.

Return type:

bool

train_utterances(idx=None)#

Retrieve training utterances from the dataset.

If a specific training split index is provided, retrieves utterances from the indexed training split. Otherwise, retrieves utterances from the primary training split.

Parameters:

idx (int | None) – Optional index for a specific training split.

Return type:

list[str]

train_labels(idx=None)#

Retrieve training labels from the dataset.

If a specific training split index is provided, retrieves labels from the indexed training split. Otherwise, retrieves labels from the primary training split.

Parameters:

idx (int | None) – Optional index for a specific training split.

Return type:

autointent.custom_types.ListOfGenericLabels

train_labels_folded()#

Retrieve train labels fold by fold.

Return type:

list[autointent.custom_types.ListOfGenericLabels]

validation_utterances(idx=None)#

Retrieve validation utterances from the dataset.

If a specific validation split index is provided, retrieves utterances from the indexed validation split. Otherwise, retrieves utterances from the primary validation split.

Parameters:

idx (int | None) – Optional index for a specific validation split.

Return type:

list[str]

validation_labels(idx=None)#

Retrieve validation labels from the dataset.

If a specific validation split index is provided, retrieves labels from the indexed validation split. Otherwise, retrieves labels from the primary validation split.

Parameters:

idx (int | None) – Optional index for a specific validation split.

Return type:

autointent.custom_types.ListOfGenericLabels

test_utterances()#

Retrieve test utterances from the dataset.

Return type:

list[str] | None

test_labels()#

Retrieve test labels from the dataset.

Return type:

autointent.custom_types.ListOfGenericLabels

validation_iterator()#

Yield folds for cross-validation.

Return type:

collections.abc.Generator[tuple[list[str], autointent.custom_types.ListOfLabels, list[str], autointent.custom_types.ListOfLabels]]

prepare_for_refit()#

Merge all training folds into one in order to retrain configured optimal pipeline on it.

Return type:

None