Source code for ablator.analysis.plot.cat_plot

import logging
import typing as ty

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.axes import Axes
from matplotlib.figure import Figure

from ablator.analysis.plot import Plot
from ablator.config.proto import Optim

logger = logging.getLogger(__name__)


# flake8: noqa: DOC102
[docs]class Categorical(Plot):
    """
    This class is for preparing the results that are associated with each categorical attribute to be studied
    (e.g., grouping metric results with each of the attributes). Its constructor takes in as input positional
    arguments or keyword arguments from the base class `Plot`. Possible arguments are listed in the Parameters
    section. The Attributes section lists its own attributes as well as those that are inherited.

    Parameters
    ----------
    metric : pd.Series
        The ablation study metric values to plot.
    attributes : pd.Series
        The ablation study attributes values to plot.
    metric_obj_fn : Optim
        The metric optimization direction.
    y_axis : str, optional
        The y-axis label (metric name), by default ``None``.
    x_axis : str, optional
        The x-axis label (attribute name), by default ``None``.
    x_ticks : list[str], optional
        The x-axis ticks, by default ``None``.
    ax : Axes, optional
        The axes to plot on, by default ``None``.

    Attributes
    ----------
    metric : pd.Series
        The ablation study metric values to plot (with null value removed).
    attributes : pd.Series
        The ablation study attributes values to plot (with null metric value removed).
    metric_obj_fn : Optim
        The metric optimization direction.
    y_axis : str
        The y-axis label (metric name).
    x_axis : str
        The x-axis label (attribute name).
    x_ticks : list[str]
        The x-axis ticks.
    figure : Figure
        The figure to plot on. If `None`, a new figure of size ``(4,4)`` will be created.
    ax : Axes
        The axes to plot on. If `None`, a new axis will be created as the first subplot
        in the first cell and first column of a `1x1` grid.
    DATA_TYPE : str
        The attribute data type. In this case, it is ``"categorical"``.
    attribute_metric_map : dict[str, pd.Series]
        A dictionary mapping attribute values to metric values.

    """

    DATA_TYPE: str = "categorical"

    def __init__(self, *args: ty.Any, **kwargs: ty.Any) -> None:
        super().__init__(*args, **kwargs)
        self.attribute_metric_map = self._make_attribute_metric_map(
            self.metric, self.attributes
        )

    @classmethod
    def _make_attribute_metric_map(
        cls,
        metric: pd.Series,
        attributes: pd.Series,
    ) -> dict[str | float, pd.Series]:
        if len(attributes.shape) > 1 and attributes.shape[-1] > 1:
            raise ValueError(f"{cls.__name__} attributes must be single dimensional.")
        unique_values = attributes.unique()
        metrics: dict[str | float, pd.Series] = {}

        if None in unique_values:
            # this is because None can not be dictionary key.
            unique_values = list(filter(None, unique_values))
            none_name = "None"
            if "None" in unique_values:
                logger.warning(
                    "`None` is present as a categorical string value as"
                    " well as None. Will rename None to Type(None)."
                )
                none_name = "Type(None)"
                assert none_name not in unique_values, (
                    f"{none_name}, and `None` are both present as categorical values. "
                    "Unable to rename None value."
                )
            metrics[none_name] = metric[attributes.apply(lambda x: x is None)]

        for i in np.argsort(unique_values):
            u = unique_values[i]
            if isinstance(u, float) and np.isnan(u):
                metrics[u] = metric[
                    attributes.apply(lambda x: isinstance(x, float) and np.isnan(x))
                ]
            else:
                metrics[u] = metric[attributes == u]

        return metrics

    def _sort_vals_obj(self, vals: pd.Series, obj_fn: Optim) -> np.ndarray:
        if Optim(obj_fn) == Optim.min:
            return vals.sort_values(na_position="last").values
        return vals.sort_values(ascending=False, na_position="last").values


# flake8: noqa: DOC102
[docs]class ViolinPlot(Categorical):
    """
    Class for constructing violinplots. Its constructor takes in as input positional arguments or keyword
    arguments from the base class `Categorical`. Possible arguments are listed in the Parameters section.
    The Attributes section lists its own attributes as well as those that are inherited.

    Parameters
    ----------
    metric : pd.Series
        The ablation study metric values to plot.
    attributes : pd.Series
        The ablation study attributes values to plot.
    metric_obj_fn : Optim
        The metric optimization direction.
    y_axis : str, optional
        The y-axis label (metric name), by default ``None``.
    x_axis : str, optional
        The x-axis label (attribute name), by default ``None``.
    x_ticks : list[str], optional
        The x-axis ticks, by default ``None``.
    ax : Axes, optional
        The axes to plot on, by default ``None``.

    Attributes
    ----------
    metric : pd.Series
        The ablation study metric values to plot (with null value removed).
    attributes : pd.Series
        The ablation study attributes values to plot (with null metric value removed).
    metric_obj_fn : Optim
        The metric optimization direction.
    y_axis : str
        The y-axis label (metric name).
    x_axis : str
        The x-axis label (attribute name).
    x_ticks : list[str]
        The x-axis ticks.
    figure : Figure
        The figure to plot on. If `None`, a new figure of size `(4,4)` will be created.
    ax : Axes
        The axes to plot on. If `None`, a new axis will be created as the first subplot
        in the first cell and first column of a `1x1` grid.
    DATA_TYPE : str
        The attribute data type. In this case, it is `"categorical"`.
    attribute_metric_map : dict[str, pd.Series]
        A dictionary mapping attribute values to metric values.
    figsize: tuple
        A tuple representing the size of the figure in terms of axes `(x, y)`.

    """

    def __init__(self, *args: ty.Any, **kwargs: ty.Any) -> None:
        sns.set()
        sns.set_style("whitegrid")
        self.figsize = (8, 4)
        super().__init__(*args, **kwargs)

    def _make_figure(self, ax: Axes | None = None) -> tuple[Figure | None, Axes]:
        figure = None
        if ax is None:
            figure = plt.figure(figsize=(10, 8))
            ax = figure.add_subplot(1, 1, 1)
        return figure, ax

    def _make(
        self,
        **kwargs: ty.Any,
    ) -> tuple[Figure, Axes]:
        sns.violinplot(
            [v.values for v in self.attribute_metric_map.values()],
            ax=self.ax,
            palette="Set3",
        )
        mean_perf = []
        median_perf = []
        best_perf = []
        for vals in self.attribute_metric_map.values():
            # top performance marker
            obj_fn = self.metrics_obj_fn
            best_perf.append(self._sort_vals_obj(vals, obj_fn)[0])
            mean_perf.append(np.mean(vals))
            median_perf.append(np.median(vals))

        labels = [
            f"Mean: {mean:.2e}\nBest: {best:.2e}\n{name}"
            for mean, best, name in zip(
                mean_perf, best_perf, self.attribute_metric_map.keys()
            )
        ]
        self.ax.set_xticks(
            np.arange(len(self.attribute_metric_map)),
            labels=labels,
        )

        sns.despine(left=True, bottom=True)
        return self.figure, self.ax

    def _parse_legend(self, ax: Axes):
        pass