Coverage for src/sensai/sklearn_quantile.py: 0%
69 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
1import functools
2from abc import ABC
4import numpy as np
5import pandas as pd
6from sklearn_quantile import RandomForestQuantileRegressor
8from .vector_model import VectorRegressionModel, InputOutputData
9from .evaluation.eval_stats import RegressionMetric
11from .sklearn.sklearn_base import AbstractSkLearnMultipleOneDimVectorRegressionModel
12from .util.aggregation import RelativeFrequencyCounter
15class RandomForestQuantileRegressorVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
16 def __init__(self, confidence: float, random_state=42, **kwargs):
17 """
18 :param q: the default quantile that is used for predictions
19 :param kwargs: keyword arguments to pass on to RandomForestQuantileRegressor
20 """
21 margin = 1-confidence
22 q = [0.5, margin/2, 1 - margin/2]
23 super().__init__(RandomForestQuantileRegressor, q=q, random_state=random_state, **kwargs)
25 def predict_confidence_intervals(self, x: pd.DataFrame, var_name: str = None):
26 """
27 :param x: the input data
28 :param var_name: the predicted variable name; may be None if there is only one predicted variable
29 :return: an array of shape [2, N], where the first dimension contains the confidence interval's lower bounds and the second
30 its upper bounds
31 """
32 model = self.get_sklearn_model(var_name)
33 model: RandomForestQuantileRegressor
34 outputs = self._predict_quantiles(model, self.compute_model_inputs(x))
35 return outputs[1:]
37 def _predict_quantiles(self, model: RandomForestQuantileRegressor, inputs: pd.DataFrame) -> np.ndarray:
38 outputs = model.predict(inputs)
39 return outputs
41 def _predict_sklearn_single_model(self, model, inputs: pd.DataFrame) -> np.ndarray:
42 return self._predict_quantiles(model, inputs)[0]
45class QuantileRegressionMetric(RegressionMetric, ABC):
46 @staticmethod
47 @functools.lru_cache(maxsize=1) # use cache for efficient reuse of results across different subclasses during evaluation
48 def compute_confidence_intervals(model: VectorRegressionModel, io_data: InputOutputData = None) -> np.ndarray:
49 if not isinstance(model, RandomForestQuantileRegressorVectorRegressionModel):
50 raise ValueError(f"Model must be of type RandomForestQuantileRegressorVectorRegressionModel, got type {type(model)}")
51 intervals: np.ndarray = model.predict_confidence_intervals(io_data.inputs)
52 return intervals
55class QuantileRegressionMetricAccuracyInConfidenceInterval(QuantileRegressionMetric):
56 """
57 Metric reflecting the accuracy of the confidence interval, i.e. the relative frequency of predictions where the confidence interval
58 contains the ground true value
59 """
60 name = "AccuracyInCI"
62 @classmethod
63 def compute_value(cls, y_true: np.ndarray, y_predicted: np.ndarray, model: VectorRegressionModel = None,
64 io_data: InputOutputData = None):
65 intervals = cls.compute_confidence_intervals(model, io_data)
66 rf = RelativeFrequencyCounter()
67 for (lower, upper), gt in zip(intervals.transpose(), y_true):
68 rf.count(lower <= gt <= upper)
69 return rf.get_relative_frequency()
72class QuantileRegressionMetricConfidenceIntervalMeanSize(QuantileRegressionMetric):
73 """
74 Metric for the mean size of the confidence interval
75 """
76 name = "MeanSizeCI"
78 @classmethod
79 def compute_value(cls, y_true: np.ndarray, y_predicted: np.ndarray, model: VectorRegressionModel = None, io_data: InputOutputData = None):
80 intervals = cls.compute_confidence_intervals(model, io_data)
81 values = []
82 for lower, upper in intervals.transpose():
83 values.append(upper-lower)
84 return np.mean(values)
87class QuantileRegressionMetricConfidenceIntervalMedianSize(QuantileRegressionMetric):
88 """
89 Metric for the median size of the confidence interval
90 """
91 name = "MedianSizeCI"
93 @classmethod
94 def compute_value(cls, y_true: np.ndarray, y_predicted: np.ndarray, model: VectorRegressionModel = None, io_data: InputOutputData = None):
95 intervals = cls.compute_confidence_intervals(model, io_data)
96 values = []
97 for lower, upper in intervals.transpose():
98 values.append(upper-lower)
99 return np.median(values)
102class QuantileRegressionMetricRelFreqMaxSizeConfidenceInterval(QuantileRegressionMetric):
103 """
104 Relative frequency of confidence interval having the given maximum size
105 """
106 def __init__(self, max_size: float):
107 super().__init__(f"RelFreqMaxSizeCI[{max_size}]")
108 self.max_size = max_size
110 def compute_value(self, y_true: np.ndarray, y_predicted: np.ndarray, model: VectorRegressionModel = None, io_data: InputOutputData = None):
111 intervals = self.compute_confidence_intervals(model, io_data)
112 counter = RelativeFrequencyCounter()
113 for lower, upper in intervals.transpose():
114 size = upper-lower
115 counter.count(size <= self.max_size)
116 return counter.get_relative_frequency()