Coverage for src/sensai/evaluation/evaluator

1from abc import ABC, abstractmethod

2from typing import Dict, Sequence, Generic, TypeVar

4from .eval_stats.eval_stats_clustering import ClusteringUnsupervisedEvalStats, \

5 ClusteringSupervisedEvalStats, ClusterLabelsEvalStats

6from .evaluator import MetricsDictProvider

7from ..clustering import EuclideanClusterer

8from ..util.profiling import timed

10TClusteringEvalStats = TypeVar("TClusteringEvalStats", bound=ClusterLabelsEvalStats)

13class ClusteringModelEvaluator(MetricsDictProvider, Generic[TClusteringEvalStats], ABC):

14 @timed

15 def _compute_metrics(self, model: EuclideanClusterer, **kwargs) -> Dict[str, float]:

16 """

17 Evaluate the model and return the results as dict

19 :param model:

20 :param kwargs: will be passed to evalModel

21 :return:

22 """

23 eval_stats = self.eval_model(model, **kwargs)

24 return eval_stats.metrics_dict()

26 @abstractmethod

27 def eval_model(self, model: EuclideanClusterer, **kwargs) -> TClusteringEvalStats:

28 pass

31class ClusteringModelUnsupervisedEvaluator(ClusteringModelEvaluator[ClusteringUnsupervisedEvalStats]):

32 def __init__(self, datapoints):

33 self.datapoints = datapoints

35 def eval_model(self, model: EuclideanClusterer, fit=True):

36 """

37 Retrieve evaluation statistics holder for the clustering model

39 :param model:

40 :param fit: whether to fit on the evaluator's data before retrieving statistics.

41 Set this to False if the model you wish to evaluate was already fitted on the desired dataset

42 :return: instance of ClusteringUnsupervisedEvalStats that can be used for calculating various evaluation metrics

43 """

44 if fit:

45 model.fit(self.datapoints)

46 return ClusteringUnsupervisedEvalStats.from_model(model)

49class ClusteringModelSupervisedEvaluator(ClusteringModelEvaluator[ClusteringSupervisedEvalStats]):

50 def __init__(self, datapoints, true_labels: Sequence[int], noise_label=-1):

51 """

52 :param datapoints:

53 :param true_labels: labels of the true clusters, including the noise clusters.

54 :param noise_label: label of the noise cluster (if there is one) in the true labels

55 """

56 if len(true_labels) != len(datapoints):

57 raise ValueError("true labels must be of same length as datapoints")

58 self.datapoints = datapoints

59 self.trueLabels = true_labels

60 self.noiseLabel = noise_label

62 def eval_model(self, model: EuclideanClusterer, fit=True):

63 """

64 Retrieve evaluation statistics holder for the clustering model

66 :param model:

67 :param fit: whether to fit on the evaluator's data before retrieving statistics.

68 Set this to False if the model you wish to evaluate was already fitted on the desired dataset

69 :return: instance of ClusteringSupervisedEvalStats that can be used for calculating various evaluation metrics

70 """

71 if fit:

72 model.noiseLabel = self.noiseLabel

73 model.fit(self.datapoints)

74 else:

75 if model.noiseLabel != self.noiseLabel:

76 raise ValueError(f"Noise label of evaluator does not match noise label of the model:"

77 f" {self.noiseLabel} != {model.noiseLabel}. "

78 f"Either evaluate with fit=True or adjust the noise label in the ground truth labels")

79 return ClusteringSupervisedEvalStats.from_model(model, self.trueLabels)

Coverage for src/sensai/evaluation/evaluator_clustering.py: 0%

36 statements