Source code for ablator.config.mp

from ablator.config.hpo import SearchSpace
from ablator.config.main import configclass
from ablator.config.proto import RunConfig
from ablator.config.types import Dict, Enum, Optional, Stateless
from ablator.modules.storage.remote import RemoteConfig


[docs]class SearchAlgo(Enum):
    """
    Type of search algorithm.

    Grid Sampling: Discretizes the search space into even intervals `n_bins`.
    TPE Sampling: Tree-Structured Parzen Estimator [1] is a hyper-parameter optimization algorithm.
    Random Sampling: Naively samples from the search space with a random probability.

    The behavior of each algorithm depends highly on the budget allocated for each trial. For example,
    Grid Sampling will repeat sampled configurations only after it has exhaustively evaluated the current
    configuration space.

    TPE and Random Sampling can repeat configurations at random.

    References:
    [1] Bergstra, James S., et al. “Algorithms for hyper-parameter optimization.”
    Advances in Neural Information Processing Systems. 2011.
    """

    random = "random"
    tpe = "tpe"
    grid = "grid"


[docs]class Optim(Enum):
    """
    Type of optimization direction.

    can take values `min` and `max` that indicate whether the HPO
    algorithm should minimize or maximize the corresponding metric.
    """

    min = "min"
    max = "max"


[docs]@configclass
class ParallelConfig(RunConfig):
    """
    Parallel training configuration, extending from ``RunConfig``, defines the settings of a parallel experiment
    (number of trials to run for, number of concurrent trials, search space for hyperparameter search, etc.).
    
    ``ParallelConfig`` encapsulates every configuration (model config, optimizer-scheduler config, train config,
    and the search space) needed to run a parallel experiment. The entire umbrella of configuration is then passed
    to ``ParallelTrainer`` that launches the experiment.

    Attributes
    ----------
    total_trials: Optional[int]
        total number of trials.
    concurrent_trials: int
        number of trials to run concurrently.
    search_space: Dict[SearchSpace]
        search space for hyperparameter search,
        eg. ``{"train_config.optimizer_config.arguments.lr": SearchSpace(value_range=[0, 10], value_type="int"),}``
    optim_metrics: Optional[Dict[Optim]]
        metrics to optimize, eg. ``{"val_loss": "min"}``
    gpu_mb_per_experiment: int
        CUDA memory requirement per experimental trial in MB. e.g. a value of 100 is equivalent to 100MB
    search_algo: SearchAlgo = SearchAlgo.tpe
        type of search algorithm.
    ignore_invalid_params: bool = False
        whether to ignore invalid parameters when sampling or raise an error.
    remote_config: Optional[RemoteConfig] = None
        remote storage configuration.

    Examples
    --------
    There are several steps before defining a parallel run config, let's go through them one by one: 

    - Define model config, we want to run HPO on activation functions and model hidden size:

    >>> @configclass
    >>> class CustomModelConfig(ModelConfig):
    >>>     hidden_size: int
    >>>     activation: str
    >>> model_config = CustomModelConfig(hidden_size=100, activation="relu")

    - Define training config:

    >>> my_optim_config = OptimizerConfig("sgd", {"lr": 0.5, "weight_decay": 0.5})
    >>> my_scheduler_config = SchedulerConfig("step", arguments={"step_size": 1, "gamma": 0.99})
    >>> train_config = TrainConfig(
    ...     dataset="[Dataset Name]",
    ...     batch_size=32,
    ...     epochs=10,
    ...     optimizer_config = my_optimizer_config,
    ...     scheduler_config = my_scheduler_config,
    ...     rand_weights_init = True
    ... )

    - Define search space:

    >>> search_space = {
    ...     "train_config.optimizer_config.arguments.lr": SearchSpace(value_range = [0.001, 0.01], value_type = 'float'),
    ...     "model_config.hidden_size": SearchSpace(value_range = [32, 64], value_type = 'int'),
    ...     "model_config.activation": SearchSpace(categorical_values = ["relu", "elu", "leakyRelu"]),
    ... }

    - Lastly, we will define the run config from the previous config components (remember to redefine
      the parallel config to update the model config type to be ``CustomModelConfig``):

    >>> @configclass
    >>> class CustomParallelConfig(ParallelConfig):
    ...    model_config: CustomModelConfig
    >>> parallel_config = CustomParallelConfig(
    ...     train_config=train_config,
    ...     model_config=model_config,
    ...     metrics_n_batches = 800,
    ...     experiment_dir = "/tmp/experiments/",
    ...     device="cuda",
    ...     amp=True,
    ...     random_seed = 42,
    ...     total_trials = 20,
    ...     concurrent_trials = 20,
    ...     search_space = search_space,
    ...     optim_metrics = {"val_loss": "min"},
    ...     gpu_mb_per_experiment = 1024,
    ...     cpus_per_experiment = 1,
    ... )
    """

    total_trials: Optional[int]
    concurrent_trials: Stateless[Optional[int]]
    search_space: Dict[SearchSpace]
    optim_metrics: Stateless[Optional[Dict[Optim]]]
    gpu_mb_per_experiment: Stateless[int]
    search_algo: Stateless[SearchAlgo] = SearchAlgo.tpe
    ignore_invalid_params: Stateless[bool] = False
    remote_config: Stateless[Optional[RemoteConfig]] = None