Coverage for src/sensai/evaluation/evaluator_clustering.py: 0%

36 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-13 22:17 +0000

1from abc import ABC, abstractmethod 

2from typing import Dict, Sequence, Generic, TypeVar 

3 

4from .eval_stats.eval_stats_clustering import ClusteringUnsupervisedEvalStats, \ 

5 ClusteringSupervisedEvalStats, ClusterLabelsEvalStats 

6from .evaluator import MetricsDictProvider 

7from ..clustering import EuclideanClusterer 

8from ..util.profiling import timed 

9 

10TClusteringEvalStats = TypeVar("TClusteringEvalStats", bound=ClusterLabelsEvalStats) 

11 

12 

13class ClusteringModelEvaluator(MetricsDictProvider, Generic[TClusteringEvalStats], ABC): 

14 @timed 

15 def _compute_metrics(self, model: EuclideanClusterer, **kwargs) -> Dict[str, float]: 

16 """ 

17 Evaluate the model and return the results as dict 

18 

19 :param model: 

20 :param kwargs: will be passed to evalModel 

21 :return: 

22 """ 

23 eval_stats = self.eval_model(model, **kwargs) 

24 return eval_stats.metrics_dict() 

25 

26 @abstractmethod 

27 def eval_model(self, model: EuclideanClusterer, **kwargs) -> TClusteringEvalStats: 

28 pass 

29 

30 

31class ClusteringModelUnsupervisedEvaluator(ClusteringModelEvaluator[ClusteringUnsupervisedEvalStats]): 

32 def __init__(self, datapoints): 

33 self.datapoints = datapoints 

34 

35 def eval_model(self, model: EuclideanClusterer, fit=True): 

36 """ 

37 Retrieve evaluation statistics holder for the clustering model 

38 

39 :param model: 

40 :param fit: whether to fit on the evaluator's data before retrieving statistics. 

41 Set this to False if the model you wish to evaluate was already fitted on the desired dataset 

42 :return: instance of ClusteringUnsupervisedEvalStats that can be used for calculating various evaluation metrics 

43 """ 

44 if fit: 

45 model.fit(self.datapoints) 

46 return ClusteringUnsupervisedEvalStats.from_model(model) 

47 

48 

49class ClusteringModelSupervisedEvaluator(ClusteringModelEvaluator[ClusteringSupervisedEvalStats]): 

50 def __init__(self, datapoints, true_labels: Sequence[int], noise_label=-1): 

51 """ 

52 :param datapoints: 

53 :param true_labels: labels of the true clusters, including the noise clusters. 

54 :param noise_label: label of the noise cluster (if there is one) in the true labels 

55 """ 

56 if len(true_labels) != len(datapoints): 

57 raise ValueError("true labels must be of same length as datapoints") 

58 self.datapoints = datapoints 

59 self.trueLabels = true_labels 

60 self.noiseLabel = noise_label 

61 

62 def eval_model(self, model: EuclideanClusterer, fit=True): 

63 """ 

64 Retrieve evaluation statistics holder for the clustering model 

65 

66 :param model: 

67 :param fit: whether to fit on the evaluator's data before retrieving statistics. 

68 Set this to False if the model you wish to evaluate was already fitted on the desired dataset 

69 :return: instance of ClusteringSupervisedEvalStats that can be used for calculating various evaluation metrics 

70 """ 

71 if fit: 

72 model.noiseLabel = self.noiseLabel 

73 model.fit(self.datapoints) 

74 else: 

75 if model.noiseLabel != self.noiseLabel: 

76 raise ValueError(f"Noise label of evaluator does not match noise label of the model:" 

77 f" {self.noiseLabel} != {model.noiseLabel}. " 

78 f"Either evaluate with fit=True or adjust the noise label in the ground truth labels") 

79 return ClusteringSupervisedEvalStats.from_model(model, self.trueLabels)