Coverage for src/sensai/evaluation/evaluator.py: 62%

263 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-29 18:29 +0000

1import functools 

2import logging 

3from abc import ABC, abstractmethod 

4from typing import Tuple, Dict, Any, Generator, Generic, TypeVar, Sequence, Optional, List, Union, Callable 

5 

6import numpy as np 

7import pandas as pd 

8 

9from .eval_stats import GUESS 

10from .eval_stats.eval_stats_base import EvalStats, EvalStatsCollection 

11from .eval_stats.eval_stats_classification import ClassificationEvalStats, ClassificationMetric 

12from .eval_stats.eval_stats_regression import RegressionEvalStats, RegressionEvalStatsCollection, RegressionMetric 

13from .result_set import RegressionResultSet 

14from ..data import DataSplitter, DataSplitterFractional, InputOutputData 

15from ..data_transformation import DataFrameTransformer 

16from ..tracking import TrackingMixin, TrackedExperiment 

17from ..util.deprecation import deprecated 

18from ..util.string import ToStringMixin 

19from ..util.typing import PandasNamedTuple 

20from ..vector_model import VectorClassificationModel, VectorModel, VectorModelBase, VectorRegressionModel 

21 

22log = logging.getLogger(__name__) 

23 

24TModel = TypeVar("TModel", bound=VectorModel) 

25TEvalStats = TypeVar("TEvalStats", bound=EvalStats) 

26TEvalStatsCollection = TypeVar("TEvalStatsCollection", bound=EvalStatsCollection) 

27 

28 

29class MetricsDictProvider(TrackingMixin, ABC): 

30 @abstractmethod 

31 def _compute_metrics(self, model, **kwargs) -> Dict[str, float]: 

32 """ 

33 Computes metrics for the given model, typically by fitting the model and applying it to test data 

34 

35 :param model: the model 

36 :param kwargs: parameters to pass on to the underlying evaluation method 

37 :return: a dictionary with metrics values 

38 """ 

39 pass 

40 

41 def compute_metrics(self, model, **kwargs) -> Optional[Dict[str, float]]: 

42 """ 

43 Computes metrics for the given model, typically by fitting the model and applying it to test data. 

44 If a tracked experiment was previously set, the metrics are tracked with the string representation 

45 of the model added under an additional key 'str(model)'. 

46 

47 :param model: the model for which to compute metrics 

48 :param kwargs: parameters to pass on to the underlying evaluation method 

49 :return: a dictionary with metrics values 

50 """ 

51 values_dict = self._compute_metrics(model, **kwargs) 

52 if self.tracked_experiment is not None: 

53 self.tracked_experiment.track_values(values_dict, add_values_dict={"str(model)": str(model)}) # TODO strings unsupported (mlflow) 

54 return values_dict 

55 

56 

57class MetricsDictProviderFromFunction(MetricsDictProvider): 

58 def __init__(self, compute_metrics_fn: Callable[[VectorModel], Dict[str, float]]): 

59 self._compute_metrics_fn = compute_metrics_fn 

60 

61 def _compute_metrics(self, model, **kwargs) -> Dict[str, float]: 

62 return self._compute_metrics_fn(model) 

63 

64 

65class VectorModelEvaluationData(ABC, Generic[TEvalStats]): 

66 def __init__(self, stats_dict: Dict[str, TEvalStats], io_data: InputOutputData, model: VectorModelBase): 

67 """ 

68 :param stats_dict: a dictionary mapping from output variable name to the evaluation statistics object 

69 :param io_data: the input/output data that was used to produce the results 

70 :param model: the model that was used to produce predictions 

71 """ 

72 self.io_data = io_data 

73 self.eval_stats_by_var_name = stats_dict 

74 self.predicted_var_names = list(self.eval_stats_by_var_name.keys()) 

75 self.model = model 

76 

77 @property 

78 def model_name(self): 

79 return self.model.get_name() 

80 

81 @property 

82 def input_data(self): # for backward compatibility 

83 return self.io_data.inputs 

84 

85 def get_eval_stats(self, predicted_var_name=None) -> TEvalStats: 

86 if predicted_var_name is None: 

87 if len(self.eval_stats_by_var_name) != 1: 

88 raise Exception(f"Must provide name of predicted variable name, as multiple variables were predicted:" 

89 f" {list(self.eval_stats_by_var_name.keys())}") 

90 else: 

91 predicted_var_name = next(iter(self.eval_stats_by_var_name.keys())) 

92 eval_stats = self.eval_stats_by_var_name.get(predicted_var_name) 

93 if eval_stats is None: 

94 raise ValueError(f"No evaluation data present for '{predicted_var_name}'; known output variables: " 

95 f"{list(self.eval_stats_by_var_name.keys())}") 

96 return eval_stats 

97 

98 def get_data_frame(self): 

99 """ 

100 Returns an DataFrame with all evaluation metrics (one row per output variable) 

101 

102 :return: a DataFrame containing evaluation metrics 

103 """ 

104 stats_dicts = [] 

105 var_names = [] 

106 for predictedVarName, evalStats in self.eval_stats_by_var_name.items(): 

107 stats_dicts.append(evalStats.metrics_dict()) 

108 var_names.append(predictedVarName) 

109 df = pd.DataFrame(stats_dicts, index=var_names) 

110 df.index.name = "predictedVar" 

111 return df 

112 

113 def iter_input_output_ground_truth_tuples(self, predicted_var_name=None) -> Generator[Tuple[PandasNamedTuple, Any, Any], None, None]: 

114 eval_stats = self.get_eval_stats(predicted_var_name) 

115 for i, named_tuple in enumerate(self.input_data.itertuples()): 

116 yield named_tuple, eval_stats.y_predicted[i], eval_stats.y_true[i] 

117 

118 

119class VectorRegressionModelEvaluationData(VectorModelEvaluationData[RegressionEvalStats]): 

120 def get_eval_stats_collection(self): 

121 return RegressionEvalStatsCollection(list(self.eval_stats_by_var_name.values())) 

122 

123 def to_data_frame(self, modify_input_df: bool = False, output_col_name_override: Optional[str] = None): 

124 """ 

125 Creates a data frame with all inputs, predictions and prediction errors. 

126 For each predicted variable "y", there will be columns "y_predicted", "y_true", "y_error" and 

127 "y_abs_error". 

128 If there is only a single predicted variable, the variable can be renamed for convenience. 

129 

130 The resulting data frame can be conveniently queried and analysed using class ResultSet. 

131 

132 :param modify_input_df: whether to modify the input data frame in-place to generate the data frame 

133 (instead of copying it). This can be reasonable in cases where the data is very large. 

134 :param output_col_name_override: overrides the output column name. For example, if this is set to "y", 

135 then the columns named in the description above will be present in the data frame. 

136 :return: a data frame containing all inputs, outputs and prediction errors 

137 """ 

138 df = self.io_data.inputs 

139 if not modify_input_df: 

140 df = df.copy() 

141 for predicted_var_name, eval_stats in self.eval_stats_by_var_name.items(): 

142 y_predicted = np.array(eval_stats.y_predicted) 

143 y_true = np.array(eval_stats.y_true) 

144 if output_col_name_override is not None: 

145 assert(len(self.eval_stats_by_var_name)) == 1, "Column name override is only valid for a single output variable" 

146 predicted_var_name = output_col_name_override 

147 df[RegressionResultSet.col_name_predicted(predicted_var_name)] = y_predicted 

148 df[RegressionResultSet.col_name_ground_truth(predicted_var_name)] = y_true 

149 error = y_predicted - y_true 

150 df[RegressionResultSet.col_name_error(predicted_var_name)] = error 

151 df[RegressionResultSet.col_name_abs_error(predicted_var_name)] = np.abs(error) 

152 return df 

153 

154 def create_result_set(self, modify_input_df: bool = False, output_col_name_override: Optional[str] = None, 

155 regression_result_set_factory: Callable[[pd.DataFrame, List[str]], RegressionResultSet] = RegressionResultSet) \ 

156 -> RegressionResultSet: 

157 """ 

158 Creates a queryable result set from the prediction results which can be used, in particular, for interactive analyses. 

159 

160 The result set will contain a data frame, and for each predicted variable "y", 

161 there will be columns "y_predicted", "y_true", "y_error" and "y_abs_error" in this data frame. 

162 If there is only a single predicted variable, the variable can be renamed for convenience. 

163 

164 The resulting data frame can be conveniently queried and analysed using class ResultSet. 

165 

166 :param modify_input_df: whether to modify the input data frame in-place to generate the data frame 

167 (instead of copying it). This can be reasonable in cases where the data is very large. 

168 :param output_col_name_override: overrides the output column name. For example, if this is set to "y", 

169 then the columns named in the description above will be present in the data frame. 

170 :return: a data frame containing all inputs, outputs and prediction errors 

171 

172 :return: the result set 

173 """ 

174 return RegressionResultSet.from_regression_eval_data(self, modify_input_df=modify_input_df, 

175 output_col_name_override=output_col_name_override) 

176 

177 

178TEvalData = TypeVar("TEvalData", bound=VectorModelEvaluationData) 

179 

180 

181class EvaluatorParams(ToStringMixin, ABC): 

182 def __init__(self, data_splitter: DataSplitter = None, fractional_split_test_fraction: float = None, fractional_split_random_seed=42, 

183 fractional_split_shuffle=True): 

184 """ 

185 :param data_splitter: [if test data must be obtained via split] a splitter to use in order to obtain; if None, must specify 

186 fractionalSplitTestFraction for fractional split (default) 

187 :param fractional_split_test_fraction: [if test data must be obtained via split, dataSplitter is None] the fraction of the data to 

188 use for testing/evaluation; 

189 :param fractional_split_random_seed: [if test data must be obtained via split, dataSplitter is none] the random seed to use for the 

190 fractional split of the data 

191 :param fractional_split_shuffle: [if test data must be obtained via split, dataSplitter is None] whether to randomly (based on 

192 randomSeed) shuffle the dataset before splitting it 

193 """ 

194 self._dataSplitter = data_splitter 

195 self._fractionalSplitTestFraction = fractional_split_test_fraction 

196 self._fractionalSplitRandomSeed = fractional_split_random_seed 

197 self._fractionalSplitShuffle = fractional_split_shuffle 

198 

199 def _tostring_exclude_private(self) -> bool: 

200 return True 

201 

202 def _tostring_additional_entries(self) -> Dict[str, Any]: 

203 d = {} 

204 if self._dataSplitter is not None: 

205 d["dataSplitter"] = self._dataSplitter 

206 else: 

207 d["fractionalSplitTestFraction"] = self._fractionalSplitTestFraction 

208 d["fractionalSplitRandomSeed"] = self._fractionalSplitRandomSeed 

209 d["fractionalSplitShuffle"] = self._fractionalSplitShuffle 

210 return d 

211 

212 def get_data_splitter(self) -> DataSplitter: 

213 if self._dataSplitter is None: 

214 if self._fractionalSplitTestFraction is None: 

215 raise ValueError("Cannot create default data splitter, as no split fraction was provided") 

216 self._dataSplitter = DataSplitterFractional(1 - self._fractionalSplitTestFraction, shuffle=self._fractionalSplitShuffle, 

217 random_seed=self._fractionalSplitRandomSeed) 

218 return self._dataSplitter 

219 

220 def set_data_splitter(self, splitter: DataSplitter): 

221 self._dataSplitter = splitter 

222 

223 

224class VectorModelEvaluator(MetricsDictProvider, Generic[TEvalData], ABC): 

225 def __init__(self, data: InputOutputData, test_data: InputOutputData = None, params: EvaluatorParams = None): 

226 """ 

227 Constructs an evaluator with test and training data. 

228 

229 :param data: the full data set, or, if `test_data` is given, the training data 

230 :param test_data: the data to use for testing/evaluation; if None, must specify appropriate parameters to define splitting 

231 :param params: the parameters 

232 """ 

233 if test_data is None: 

234 if params is None: 

235 raise ValueError("Parameters required for data split must be provided") 

236 data_splitter = params.get_data_splitter() 

237 self.training_data, self.test_data = data_splitter.split(data) 

238 log.debug(f"{data_splitter} created split with {len(self.training_data)} " 

239 f"({100 * len(self.training_data) / len(data):.2f}%) and " 

240 f"{len(self.test_data)} ({100 * len(self.test_data) / len(data):.2f}%) training and test data points respectively") 

241 else: 

242 self.training_data = data 

243 self.test_data = test_data 

244 

245 def set_tracked_experiment(self, tracked_experiment: TrackedExperiment): 

246 """ 

247 Sets a tracked experiment which will result in metrics being saved whenever computeMetrics is called 

248 or evalModel is called with track=True. 

249 

250 :param tracked_experiment: the experiment in which to track evaluation metrics. 

251 """ 

252 super().set_tracked_experiment(tracked_experiment) 

253 

254 def eval_model(self, model: Union[VectorModelBase, VectorModel], on_training_data=False, track=True, 

255 fit=False) -> TEvalData: 

256 """ 

257 Evaluates the given model 

258 

259 :param model: the model to evaluate 

260 :param on_training_data: if True, evaluate on this evaluator's training data rather than the held-out test data 

261 :param track: whether to track the evaluation metrics for the case where a tracked experiment was set on this object 

262 :param fit: whether to fit the model before evaluating it (via this object's `fit_model` method); if enabled, the model 

263 must support fitting 

264 :return: the evaluation result 

265 """ 

266 data = self.training_data if on_training_data else self.test_data 

267 with self.begin_optional_tracking_context_for_model(model, track=track) as trackingContext: 

268 if fit: 

269 self.fit_model(model) 

270 result: VectorModelEvaluationData = self._eval_model(model, data) 

271 is_multiple_pred_vars = len(result.predicted_var_names) > 1 

272 for pred_var_name in result.predicted_var_names: 

273 metrics = result.get_eval_stats(pred_var_name).metrics_dict() 

274 trackingContext.track_metrics(metrics, pred_var_name if is_multiple_pred_vars else None) 

275 return result 

276 

277 @abstractmethod 

278 def _eval_model(self, model: VectorModelBase, data: InputOutputData) -> TEvalData: 

279 pass 

280 

281 def _compute_metrics(self, model: VectorModel, on_training_data=False) -> Dict[str, float]: 

282 return self._compute_metrics_for_var_name(model, None, on_training_data=on_training_data) 

283 

284 def _compute_metrics_for_var_name(self, model, predicted_var_name: Optional[str], on_training_data=False): 

285 self.fit_model(model) 

286 track = False # avoid duplicate tracking (as this function is only called by computeMetrics, which already tracks) 

287 eval_data: VectorModelEvaluationData = self.eval_model(model, on_training_data=on_training_data, track=track) 

288 return eval_data.get_eval_stats(predicted_var_name=predicted_var_name).metrics_dict() 

289 

290 def create_metrics_dict_provider(self, predicted_var_name: Optional[str]) -> MetricsDictProvider: 

291 """ 

292 Creates a metrics dictionary provider, e.g. for use in hyperparameter optimisation 

293 

294 :param predicted_var_name: the name of the predicted variable for which to obtain evaluation metrics; may be None only 

295 if the model outputs but a single predicted variable 

296 :return: a metrics dictionary provider instance for the given variable 

297 """ 

298 return MetricsDictProviderFromFunction(functools.partial(self._compute_metrics_for_var_name, predictedVarName=predicted_var_name)) 

299 

300 def fit_model(self, model: VectorModel): 

301 """Fits the given model's parameters using this evaluator's training data""" 

302 if self.training_data is None: 

303 raise Exception(f"Cannot fit model with evaluator {self.__class__.__name__}: no training data provided") 

304 model.fit_input_output_data(self.training_data) 

305 

306 

307class RegressionEvaluatorParams(EvaluatorParams): 

308 def __init__(self, 

309 data_splitter: DataSplitter = None, 

310 fractional_split_test_fraction: float = None, 

311 fractional_split_random_seed=42, 

312 fractional_split_shuffle=True, 

313 metrics: Sequence[RegressionMetric] = None, 

314 additional_metrics: Sequence[RegressionMetric] = None, 

315 output_data_frame_transformer: DataFrameTransformer = None): 

316 """ 

317 :param data_splitter: [if test data must be obtained via split] a splitter to use in order to obtain; if None, must specify 

318 fractionalSplitTestFraction for fractional split (default) 

319 :param fractional_split_test_fraction: [if dataSplitter is None, test data must be obtained via split] the fraction of the data to 

320 use for testing/evaluation; 

321 :param fractional_split_random_seed: [if dataSplitter is none, test data must be obtained via split] the random seed to use for the 

322 fractional split of the data 

323 :param fractional_split_shuffle: [if dataSplitter is None, test data must be obtained via split] whether to randomly (based on 

324 randomSeed) shuffle the dataset before splitting it 

325 :param metrics: regression metrics to apply. If None, default regression metrics are used. 

326 :param additional_metrics: additional regression metrics to apply 

327 :param output_data_frame_transformer: a data frame transformer to apply to all output data frames (both model outputs and ground 

328 truth), such that evaluation metrics are computed on the transformed data frame 

329 """ 

330 super().__init__(data_splitter, 

331 fractional_split_test_fraction=fractional_split_test_fraction, 

332 fractional_split_random_seed=fractional_split_random_seed, 

333 fractional_split_shuffle=fractional_split_shuffle) 

334 self.metrics = metrics 

335 self.additional_metrics = additional_metrics 

336 self.output_data_frame_transformer = output_data_frame_transformer 

337 

338 @classmethod 

339 def from_dict_or_instance(cls, 

340 params: Optional[Union[Dict[str, Any], "RegressionEvaluatorParams"]]) -> "RegressionEvaluatorParams": 

341 if params is None: 

342 return RegressionEvaluatorParams() 

343 elif type(params) == dict: 

344 raise Exception("Old-style dictionary parametrisation is no longer supported") 

345 elif isinstance(params, cls): 

346 return params 

347 else: 

348 raise ValueError(f"Must provide dictionary or {cls} instance, got {params}, type {type(params)}") 

349 

350 

351class VectorRegressionModelEvaluatorParams(RegressionEvaluatorParams): 

352 @deprecated("Use RegressionEvaluatorParams instead") 

353 def __init__(self, *args, **kwargs): 

354 super().__init__(*args, **kwargs) 

355 

356 

357class VectorRegressionModelEvaluator(VectorModelEvaluator[VectorRegressionModelEvaluationData]): 

358 def __init__(self, data: InputOutputData, test_data: InputOutputData = None, 

359 params: RegressionEvaluatorParams = None): 

360 """ 

361 Constructs an evaluator with test and training data. 

362 

363 :param data: the full data set, or, if testData is given, the training data 

364 :param test_data: the data to use for testing/evaluation; if None, must specify appropriate parameters to define splitting 

365 :param params: the parameters 

366 """ 

367 super().__init__(data=data, test_data=test_data, params=params) 

368 self.params = params 

369 

370 def _eval_model(self, model: VectorRegressionModel, data: InputOutputData) -> VectorRegressionModelEvaluationData: 

371 if not model.is_regression_model(): 

372 raise ValueError(f"Expected a regression model, got {model}") 

373 eval_stats_by_var_name = {} 

374 predictions, ground_truth = self._compute_outputs(model, data) 

375 for predictedVarName in predictions.columns: 

376 if predictedVarName in ground_truth.columns: 

377 y_true = ground_truth[predictedVarName] 

378 else: 

379 if len(predictions.columns) == 1 and len(ground_truth.columns) == 1: 

380 log.warning(f"Model output column '{predictedVarName}' does not match ground truth column '{ground_truth.columns[0]}'; " 

381 f"assuming that this is not a problem since there is but a single column available") 

382 y_true = ground_truth.iloc[:, 0] 

383 else: 

384 raise Exception(f"Model output column '{predictedVarName}' not found in ground truth columns {ground_truth.columns}") 

385 eval_stats = RegressionEvalStats(y_predicted=predictions[predictedVarName], y_true=y_true, 

386 weights=data.weights, 

387 metrics=self.params.metrics, 

388 additional_metrics=self.params.additional_metrics, 

389 model=model, 

390 io_data=data) 

391 eval_stats_by_var_name[predictedVarName] = eval_stats 

392 return VectorRegressionModelEvaluationData(eval_stats_by_var_name, data, model) 

393 

394 def compute_test_data_outputs(self, model: VectorModelBase) -> Tuple[pd.DataFrame, pd.DataFrame]: 

395 """ 

396 Applies the given model to the test data 

397 

398 :param model: the model to apply 

399 :return: a pair (predictions, groundTruth) 

400 """ 

401 return self._compute_outputs(model, self.test_data) 

402 

403 def _compute_outputs(self, model: VectorModelBase, io_data: InputOutputData): 

404 """ 

405 Applies the given model to the given data 

406 

407 :param model: the model to apply 

408 :param io_data: the data set 

409 :return: a pair (predictions, ground_truth) 

410 """ 

411 predictions = model.predict(io_data.inputs) 

412 ground_truth = io_data.outputs 

413 if self.params.output_data_frame_transformer: 

414 predictions = self.params.output_data_frame_transformer.apply(predictions) 

415 ground_truth = self.params.output_data_frame_transformer.apply(ground_truth) 

416 return predictions, ground_truth 

417 

418 

419class VectorClassificationModelEvaluationData(VectorModelEvaluationData[ClassificationEvalStats]): 

420 def get_misclassified_inputs_data_frame(self) -> pd.DataFrame: 

421 return self.input_data.iloc[self.get_eval_stats().get_misclassified_indices()] 

422 

423 def get_misclassified_triples_pred_true_input(self) -> List[Tuple[Any, Any, pd.Series]]: 

424 """ 

425 :return: a list containing a triple (predicted class, true class, input series) for each misclassified data point 

426 """ 

427 eval_stats = self.get_eval_stats() 

428 indices = eval_stats.get_misclassified_indices() 

429 return [(eval_stats.y_predicted[i], eval_stats.y_true[i], self.input_data.iloc[i]) for i in indices] 

430 

431 

432class ClassificationEvaluatorParams(EvaluatorParams): 

433 def __init__(self, data_splitter: DataSplitter = None, fractional_split_test_fraction: float = None, fractional_split_random_seed=42, 

434 fractional_split_shuffle=True, additional_metrics: Sequence[ClassificationMetric] = None, 

435 compute_probabilities: bool = False, binary_positive_label: Optional[str] = GUESS): 

436 """ 

437 :param data_splitter: [if test data must be obtained via split] a splitter to use in order to obtain; if None, must specify 

438 fractionalSplitTestFraction for fractional split (default) 

439 :param fractional_split_test_fraction: [if dataSplitter is None, test data must be obtained via split] the fraction of the data to 

440 use for testing/evaluation 

441 :param fractional_split_random_seed: [if dataSplitter is none, test data must be obtained via split] the random seed to use for the 

442 fractional split of the data 

443 :param fractional_split_shuffle: [if dataSplitter is None, test data must be obtained via split] whether to randomly (based on 

444 randomSeed) shuffle the dataset before splitting it 

445 :param additional_metrics: additional metrics to apply 

446 :param compute_probabilities: whether to compute class probabilities. Enabling this will enable many downstream computations 

447 and visualisations (e.g. precision-recall plots) but requires the model to support probability computation in general. 

448 :param binary_positive_label: the positive class label for binary classification; if GUESS, try to detect from labels; 

449 if None, no detection (assume non-binary classification) 

450 """ 

451 super().__init__(data_splitter, 

452 fractional_split_test_fraction=fractional_split_test_fraction, 

453 fractional_split_random_seed=fractional_split_random_seed, 

454 fractional_split_shuffle=fractional_split_shuffle) 

455 self.additionalMetrics = additional_metrics 

456 self.computeProbabilities = compute_probabilities 

457 self.binaryPositiveLabel = binary_positive_label 

458 

459 @classmethod 

460 def from_dict_or_instance(cls, 

461 params: Optional[Union[Dict[str, Any], "ClassificationEvaluatorParams"]]) \ 

462 -> "ClassificationEvaluatorParams": 

463 if params is None: 

464 return ClassificationEvaluatorParams() 

465 elif type(params) == dict: 

466 raise ValueError("Old-style dictionary parametrisation is no longer supported") 

467 elif isinstance(params, ClassificationEvaluatorParams): 

468 return params 

469 else: 

470 raise ValueError(f"Must provide dictionary or instance, got {params}") 

471 

472 

473class VectorClassificationModelEvaluatorParams(ClassificationEvaluatorParams): 

474 @deprecated("Use ClassificationEvaluatorParams instead") 

475 def __init__(self, *args, **kwargs): 

476 super().__init__(*args, **kwargs) 

477 

478 

479class VectorClassificationModelEvaluator(VectorModelEvaluator[VectorClassificationModelEvaluationData]): 

480 def __init__(self, 

481 data: InputOutputData, 

482 test_data: InputOutputData = None, 

483 params: ClassificationEvaluatorParams = None): 

484 """ 

485 Constructs an evaluator with test and training data. 

486 

487 :param data: the full data set, or, if `test_data` is given, the training data 

488 :param test_data: the data to use for testing/evaluation; if None, must specify appropriate parameters to define splitting 

489 :param params: the parameters 

490 """ 

491 super().__init__(data=data, test_data=test_data, params=params) 

492 self.params = params 

493 

494 def _eval_model(self, model: VectorClassificationModel, data: InputOutputData) -> VectorClassificationModelEvaluationData: 

495 if model.is_regression_model(): 

496 raise ValueError(f"Expected a classification model, got {model}") 

497 predictions, predictions_proba, ground_truth = self._compute_outputs(model, data) 

498 eval_stats = ClassificationEvalStats( 

499 y_predicted_class_probabilities=predictions_proba, 

500 y_predicted=predictions, 

501 y_true=ground_truth, 

502 labels=model.get_class_labels(), 

503 additional_metrics=self.params.additionalMetrics, 

504 binary_positive_label=self.params.binaryPositiveLabel) 

505 predicted_var_name = model.get_predicted_variable_names()[0] 

506 return VectorClassificationModelEvaluationData({predicted_var_name: eval_stats}, data, model) 

507 

508 def compute_test_data_outputs(self, model) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: 

509 """ 

510 Applies the given model to the test data 

511 

512 :param model: the model to apply 

513 :return: a triple (predictions, predicted class probability vectors, groundTruth) of DataFrames 

514 """ 

515 return self._compute_outputs(model, self.test_data) 

516 

517 def _compute_outputs(self, model, io_data: InputOutputData) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: 

518 """ 

519 Applies the given model to the given data 

520 

521 :param model: the model to apply 

522 :param io_data: the data set 

523 :return: a triple (predictions, predicted class probability vectors, ground_truth) of DataFrames 

524 """ 

525 if self.params.computeProbabilities: 

526 class_probabilities = model.predict_class_probabilities(io_data.inputs) 

527 predictions = model.convert_class_probabilities_to_predictions(class_probabilities) 

528 else: 

529 class_probabilities = None 

530 predictions = model.predict(io_data.inputs) 

531 ground_truth = io_data.outputs 

532 return predictions, class_probabilities, ground_truth 

533 

534 

535class RuleBasedVectorClassificationModelEvaluator(VectorClassificationModelEvaluator): 

536 def __init__(self, data: InputOutputData): 

537 super().__init__(data, test_data=data) 

538 

539 def eval_model(self, model: VectorModelBase, on_training_data=False, track=True, 

540 fit=False) -> VectorClassificationModelEvaluationData: 

541 """ 

542 Evaluate the rule based model. The training data and test data coincide, thus fitting the model 

543 will fit the model's preprocessors on the full data set and evaluating it will evaluate the model on the 

544 same data set. 

545 

546 :param model: the model to evaluate 

547 :param on_training_data: has to be False here. Setting to True is not supported and will lead to an 

548 exception 

549 :param track: whether to track the evaluation metrics for the case where a tracked experiment was set on this object 

550 :return: the evaluation result 

551 """ 

552 if on_training_data: 

553 raise Exception("Evaluating rule based models on training data is not supported. In this evaluator" 

554 "training and test data coincide.") 

555 return super().eval_model(model) 

556 

557 

558class RuleBasedVectorRegressionModelEvaluator(VectorRegressionModelEvaluator): 

559 def __init__(self, data: InputOutputData): 

560 super().__init__(data, test_data=data) 

561 

562 def eval_model(self, model: Union[VectorModelBase, VectorModel], on_training_data=False, track=True, 

563 fit=False) -> VectorRegressionModelEvaluationData: 

564 """ 

565 Evaluate the rule based model. The training data and test data coincide, thus fitting the model 

566 will fit the model's preprocessors on the full data set and evaluating it will evaluate the model on the 

567 same data set. 

568 

569 :param model: the model to evaluate 

570 :param on_training_data: has to be False here. Setting to True is not supported and will lead to an 

571 exception 

572 :param track: whether to track the evaluation metrics for the case where a tracked experiment was set on this object 

573 :return: the evaluation result 

574 """ 

575 if on_training_data: 

576 raise Exception("Evaluating rule based models on training data is not supported. In this evaluator" 

577 "training and test data coincide.") 

578 return super().eval_model(model)