Source code for sensai.evaluation.evaluator_clustering
from abc import ABC, abstractmethod
from typing import Dict, Sequence, Generic, TypeVar
from .eval_stats.eval_stats_clustering import ClusteringUnsupervisedEvalStats, \
ClusteringSupervisedEvalStats, ClusterLabelsEvalStats
from .evaluator import MetricsDictProvider
from ..clustering import EuclideanClusterer
from ..util.profiling import timed
TClusteringEvalStats = TypeVar("TClusteringEvalStats", bound=ClusterLabelsEvalStats)
[docs]class ClusteringModelEvaluator(MetricsDictProvider, Generic[TClusteringEvalStats], ABC):
@timed
def _compute_metrics(self, model: EuclideanClusterer, **kwargs) -> Dict[str, float]:
"""
Evaluate the model and return the results as dict
:param model:
:param kwargs: will be passed to evalModel
:return:
"""
eval_stats = self.eval_model(model, **kwargs)
return eval_stats.metrics_dict()
[docs] @abstractmethod
def eval_model(self, model: EuclideanClusterer, **kwargs) -> TClusteringEvalStats:
pass
[docs]class ClusteringModelUnsupervisedEvaluator(ClusteringModelEvaluator[ClusteringUnsupervisedEvalStats]):
def __init__(self, datapoints):
self.datapoints = datapoints
[docs] def eval_model(self, model: EuclideanClusterer, fit=True):
"""
Retrieve evaluation statistics holder for the clustering model
:param model:
:param fit: whether to fit on the evaluator's data before retrieving statistics.
Set this to False if the model you wish to evaluate was already fitted on the desired dataset
:return: instance of ClusteringUnsupervisedEvalStats that can be used for calculating various evaluation metrics
"""
if fit:
model.fit(self.datapoints)
return ClusteringUnsupervisedEvalStats.from_model(model)
[docs]class ClusteringModelSupervisedEvaluator(ClusteringModelEvaluator[ClusteringSupervisedEvalStats]):
def __init__(self, datapoints, true_labels: Sequence[int], noise_label=-1):
"""
:param datapoints:
:param true_labels: labels of the true clusters, including the noise clusters.
:param noise_label: label of the noise cluster (if there is one) in the true labels
"""
if len(true_labels) != len(datapoints):
raise ValueError("true labels must be of same length as datapoints")
self.datapoints = datapoints
self.trueLabels = true_labels
self.noiseLabel = noise_label
[docs] def eval_model(self, model: EuclideanClusterer, fit=True):
"""
Retrieve evaluation statistics holder for the clustering model
:param model:
:param fit: whether to fit on the evaluator's data before retrieving statistics.
Set this to False if the model you wish to evaluate was already fitted on the desired dataset
:return: instance of ClusteringSupervisedEvalStats that can be used for calculating various evaluation metrics
"""
if fit:
model.noiseLabel = self.noiseLabel
model.fit(self.datapoints)
else:
if model.noiseLabel != self.noiseLabel:
raise ValueError(f"Noise label of evaluator does not match noise label of the model:"
f" {self.noiseLabel} != {model.noiseLabel}. "
f"Either evaluate with fit=True or adjust the noise label in the ground truth labels")
return ClusteringSupervisedEvalStats.from_model(model, self.trueLabels)