Source code for sorbetto.flavor.correlation_flavor

import logging
from typing import Any, Callable, Literal

import numpy as np
from scipy import stats
from tqdm import tqdm

from sorbetto.core.importance import Importance
from sorbetto.flavor.abstract_numeric_flavor import AbstractNumericFlavor
from sorbetto.performance.finite_set_of_two_class_classification_performances import (
    FiniteSetOfTwoClassClassificationPerformances,
)
from sorbetto.performance.two_class_classification_performance import (
    TwoClassClassificationPerformance,
)
from sorbetto.ranking.ranking_score import RankingScore



[docs]
class CorrelationFlavor(AbstractNumericFlavor):
    """
    For a given performance, the *Correlation Flavor* is the mathematical function
    that gives, to any importance :math:`I`  (that is, some application-specific preferences),
    the correlation, using a defined correlation coefficient (e.g., Pearson's r),
    between a score :math:`X` and the Ranking Score :math:`R_I` corresponding to this
    importance.
    """

    def __init__(
        self,
        performances: FiniteSetOfTwoClassClassificationPerformances,
        score: Callable[
            [
                TwoClassClassificationPerformance
                | FiniteSetOfTwoClassClassificationPerformances
            ],
            np.ndarray,
        ],
        correlation_coefficient: Literal[
            "pearson_r", "spearman_rho", "kendall_tau"
        ] = "pearson_r",
        name: str = "Correlation Flavor",
        colormap: Any = None,
    ):
        super().__init__(name=name, colormap=colormap)
        self._performances = performances
        self._score = score
        self._correlation_coefficient = correlation_coefficient

    @property
    def performances(self) -> FiniteSetOfTwoClassClassificationPerformances:
        return self._performances

    @property
    def score(
        self,
    ) -> Callable[
        [
            TwoClassClassificationPerformance
            | FiniteSetOfTwoClassClassificationPerformances
        ],
        np.ndarray,
    ]:
        return self._score

    @property
    def correlation_coefficient(self) -> str:
        return self._correlation_coefficient

    def __call__(
        self,
        importance: Importance | np.ndarray,
    ):
        try:  # try if X is vectorized
            x_scores: list | np.ndarray = self._score(self._performances)
        except Exception as e:  # else fallback to loop
            logging.warning(
                "The score given to the Correlation Flavor is not vectorized. "
                "Continuing with sequential loop.\n"
                f"Got : {e!r}.\n"
            )
            x_scores = [self._score(p) for p in self._performances]

        value_scores = RankingScore._compute(
            importance=importance, performance=self._performances
        )

        correlation = np.empty((value_scores.shape[1], value_scores.shape[2]))
        if self._correlation_coefficient == "pearson_r":

            def corr_func(x, y):
                return stats.pearsonr(x, y).correlation  # type:ignore
        elif self._correlation_coefficient == "spearman_rho":

            def corr_func(x, y):
                return stats.spearmanr(x, y).correlation  # type:ignore
        elif self._correlation_coefficient == "kendall_tau":

            def corr_func(x, y):
                return stats.kendalltau(x, y).correlation  # type:ignore
        else:
            raise ValueError(
                f"Unknown correlation coefficient: {self._correlation_coefficient}. "
                "Available options are 'pearson_r' and 'spearman_rho' and 'kendall_tau'."
            )

        for x in tqdm(range(value_scores.shape[1])):
            for y in range(value_scores.shape[2]):
                correlation[x, y] = corr_func(x_scores, value_scores[:, x, y])
        return correlation


[docs]
    def getDefaultColormap(self):
        return "gist_rainbow"



[docs]
    def getLowerBound(self):
        return -1.0



[docs]
    def getUpperBound(self):
        return 1.0