Coverage for src/sensai/evaluation/evaluator_clustering.py: 0%
36 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
1from abc import ABC, abstractmethod
2from typing import Dict, Sequence, Generic, TypeVar
4from .eval_stats.eval_stats_clustering import ClusteringUnsupervisedEvalStats, \
5 ClusteringSupervisedEvalStats, ClusterLabelsEvalStats
6from .evaluator import MetricsDictProvider
7from ..clustering import EuclideanClusterer
8from ..util.profiling import timed
10TClusteringEvalStats = TypeVar("TClusteringEvalStats", bound=ClusterLabelsEvalStats)
13class ClusteringModelEvaluator(MetricsDictProvider, Generic[TClusteringEvalStats], ABC):
14 @timed
15 def _compute_metrics(self, model: EuclideanClusterer, **kwargs) -> Dict[str, float]:
16 """
17 Evaluate the model and return the results as dict
19 :param model:
20 :param kwargs: will be passed to evalModel
21 :return:
22 """
23 eval_stats = self.eval_model(model, **kwargs)
24 return eval_stats.metrics_dict()
26 @abstractmethod
27 def eval_model(self, model: EuclideanClusterer, **kwargs) -> TClusteringEvalStats:
28 pass
31class ClusteringModelUnsupervisedEvaluator(ClusteringModelEvaluator[ClusteringUnsupervisedEvalStats]):
32 def __init__(self, datapoints):
33 self.datapoints = datapoints
35 def eval_model(self, model: EuclideanClusterer, fit=True):
36 """
37 Retrieve evaluation statistics holder for the clustering model
39 :param model:
40 :param fit: whether to fit on the evaluator's data before retrieving statistics.
41 Set this to False if the model you wish to evaluate was already fitted on the desired dataset
42 :return: instance of ClusteringUnsupervisedEvalStats that can be used for calculating various evaluation metrics
43 """
44 if fit:
45 model.fit(self.datapoints)
46 return ClusteringUnsupervisedEvalStats.from_model(model)
49class ClusteringModelSupervisedEvaluator(ClusteringModelEvaluator[ClusteringSupervisedEvalStats]):
50 def __init__(self, datapoints, true_labels: Sequence[int], noise_label=-1):
51 """
52 :param datapoints:
53 :param true_labels: labels of the true clusters, including the noise clusters.
54 :param noise_label: label of the noise cluster (if there is one) in the true labels
55 """
56 if len(true_labels) != len(datapoints):
57 raise ValueError("true labels must be of same length as datapoints")
58 self.datapoints = datapoints
59 self.trueLabels = true_labels
60 self.noiseLabel = noise_label
62 def eval_model(self, model: EuclideanClusterer, fit=True):
63 """
64 Retrieve evaluation statistics holder for the clustering model
66 :param model:
67 :param fit: whether to fit on the evaluator's data before retrieving statistics.
68 Set this to False if the model you wish to evaluate was already fitted on the desired dataset
69 :return: instance of ClusteringSupervisedEvalStats that can be used for calculating various evaluation metrics
70 """
71 if fit:
72 model.noiseLabel = self.noiseLabel
73 model.fit(self.datapoints)
74 else:
75 if model.noiseLabel != self.noiseLabel:
76 raise ValueError(f"Noise label of evaluator does not match noise label of the model:"
77 f" {self.noiseLabel} != {model.noiseLabel}. "
78 f"Either evaluate with fit=True or adjust the noise label in the ground truth labels")
79 return ClusteringSupervisedEvalStats.from_model(model, self.trueLabels)