eval_stats_clustering

eval_stats_clustering#

Source code: sensai/evaluation/eval_stats/eval_stats_clustering.py

class ClusterLabelsEvalStats(labels: Sequence[int], noise_label: int, default_metrics: List[TMetric], additional_metrics: Optional[List[TMetric]] = None)[source]#

Bases: EvalStats[TMetric], ABC

NUM_CLUSTERS = 'numClusters'#

AV_SIZE = 'averageClusterSize'#

MEDIAN_SIZE = 'medianClusterSize'#

STDDEV_SIZE = 'clusterSizeStd'#

MIN_SIZE = 'minClusterSize'#

MAX_SIZE = 'maxClusterSize'#

NOISE_SIZE = 'noiseClusterSize'#

get_distribution_summary() → Dict[str, float][source]#

metrics_dict() → Dict[str, float][source]#

Computes all metrics

Returns:: a dictionary mapping metric names to values

class ClusteringUnsupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: Metric[ClusteringUnsupervisedEvalStats], ABC

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

class RemovedNoiseUnsupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: ClusteringUnsupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

worstValue = 0#

compute_value_for_eval_stats(eval_stats: ClusteringUnsupervisedEvalStats) → float[source]#

abstract static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])[source]#

class CalinskiHarabaszScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedNoiseUnsupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'CalinskiHarabaszScore'#

static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])[source]#

class DaviesBouldinScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedNoiseUnsupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'DaviesBouldinScore'#

worstValue = 1#

static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])[source]#

class SilhouetteScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedNoiseUnsupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'SilhouetteScore'#

worstValue = -1#

static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])[source]#

class ClusteringUnsupervisedEvalStats(datapoints: numpy.ndarray, labels: Sequence[int], noise_label=- 1, metrics: Optional[Sequence[ClusteringUnsupervisedMetric]] = None, additional_metrics: Optional[Sequence[ClusteringUnsupervisedMetric]] = None)[source]#

Bases: ClusterLabelsEvalStats[ClusteringUnsupervisedMetric]

Class containing methods to compute evaluation statistics of a clustering result

Parameters:

datapoints – datapoints that were clustered
labels – sequence of labels, usually the output of some clustering algorithm
metrics – the metrics to compute. If None, will compute default metrics
additional_metrics – the metrics to additionally compute

classmethod from_model(clustering_model: EuclideanClusterer)[source]#

class ClusteringSupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: Metric[ClusteringSupervisedEvalStats], ABC

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

class RemovedCommonNoiseSupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: ClusteringSupervisedMetric, ABC

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

worstValue = 0#

compute_value_for_eval_stats(eval_stats: ClusteringSupervisedEvalStats) → float[source]#

abstract static compute_value(labels: Sequence[int], true_labels: Sequence[int])[source]#

class VMeasureScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedCommonNoiseSupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'VMeasureScore'#

static compute_value(labels: Sequence[int], true_labels: Sequence[int])[source]#

class AdjustedRandScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedCommonNoiseSupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'AdjustedRandScore'#

worstValue = -1#

static compute_value(labels: Sequence[int], true_labels: Sequence[int])[source]#

class FowlkesMallowsScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedCommonNoiseSupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'FowlkesMallowsScore'#

static compute_value(labels: Sequence[int], true_labels: Sequence[int])[source]#

class AdjustedMutualInfoScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]#

Bases: RemovedCommonNoiseSupervisedMetric

Parameters:

name – the name of the metric; if None use the class’ name attribute
bounds – the minimum and maximum values the metric can take on (or None if the bounds are not specified)

name: str = 'AdjustedMutualInfoScore'#

static compute_value(labels: Sequence[int], true_labels: Sequence[int])[source]#

class ClusteringSupervisedEvalStats(labels: Sequence[int], true_labels: Sequence[int], noise_label=- 1, metrics: Optional[Sequence[ClusteringSupervisedMetric]] = None, additional_metrics: Optional[Sequence[ClusteringSupervisedMetric]] = None)[source]#

Bases: ClusterLabelsEvalStats[ClusteringSupervisedMetric]

Class containing methods to compute evaluation statistics a clustering result based on ground truth clusters

Parameters:

labels – sequence of labels, usually the output of some clustering algorithm
true_labels – sequence of labels that represent the ground truth clusters
metrics – the metrics to compute. If None, will compute default metrics
additional_metrics – the metrics to additionally compute

classmethod from_model(clustering_model: EuclideanClusterer, true_labels: Sequence[int])[source]#

labels_with_removed_common_noise() → Tuple[numpy.ndarray, numpy.ndarray][source]#

Returns:: tuple (labels, true_labels) where points classified as noise in true and predicted data were removed