Source code for sensai.evaluation.evaluator_clustering

from abc import ABC, abstractmethod
from typing import Dict, Sequence, Generic, TypeVar

from .eval_stats.eval_stats_clustering import ClusteringUnsupervisedEvalStats, \
    ClusteringSupervisedEvalStats, ClusterLabelsEvalStats
from .evaluator import MetricsDictProvider
from ..clustering import EuclideanClusterer
from ..util.profiling import timed

TClusteringEvalStats = TypeVar("TClusteringEvalStats", bound=ClusterLabelsEvalStats)


[docs]class ClusteringModelEvaluator(MetricsDictProvider, Generic[TClusteringEvalStats], ABC): @timed def _compute_metrics(self, model: EuclideanClusterer, **kwargs) -> Dict[str, float]: """ Evaluate the model and return the results as dict :param model: :param kwargs: will be passed to evalModel :return: """ eval_stats = self.eval_model(model, **kwargs) return eval_stats.metrics_dict()
[docs] @abstractmethod def eval_model(self, model: EuclideanClusterer, **kwargs) -> TClusteringEvalStats: pass
[docs]class ClusteringModelUnsupervisedEvaluator(ClusteringModelEvaluator[ClusteringUnsupervisedEvalStats]): def __init__(self, datapoints): self.datapoints = datapoints
[docs] def eval_model(self, model: EuclideanClusterer, fit=True): """ Retrieve evaluation statistics holder for the clustering model :param model: :param fit: whether to fit on the evaluator's data before retrieving statistics. Set this to False if the model you wish to evaluate was already fitted on the desired dataset :return: instance of ClusteringUnsupervisedEvalStats that can be used for calculating various evaluation metrics """ if fit: model.fit(self.datapoints) return ClusteringUnsupervisedEvalStats.from_model(model)
[docs]class ClusteringModelSupervisedEvaluator(ClusteringModelEvaluator[ClusteringSupervisedEvalStats]): def __init__(self, datapoints, true_labels: Sequence[int], noise_label=-1): """ :param datapoints: :param true_labels: labels of the true clusters, including the noise clusters. :param noise_label: label of the noise cluster (if there is one) in the true labels """ if len(true_labels) != len(datapoints): raise ValueError("true labels must be of same length as datapoints") self.datapoints = datapoints self.trueLabels = true_labels self.noiseLabel = noise_label
[docs] def eval_model(self, model: EuclideanClusterer, fit=True): """ Retrieve evaluation statistics holder for the clustering model :param model: :param fit: whether to fit on the evaluator's data before retrieving statistics. Set this to False if the model you wish to evaluate was already fitted on the desired dataset :return: instance of ClusteringSupervisedEvalStats that can be used for calculating various evaluation metrics """ if fit: model.noiseLabel = self.noiseLabel model.fit(self.datapoints) else: if model.noiseLabel != self.noiseLabel: raise ValueError(f"Noise label of evaluator does not match noise label of the model:" f" {self.noiseLabel} != {model.noiseLabel}. " f"Either evaluate with fit=True or adjust the noise label in the ground truth labels") return ClusteringSupervisedEvalStats.from_model(model, self.trueLabels)