Coverage for src/sensai/ensemble/models.py: 33%
24 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
1from typing import Sequence, List
3import pandas as pd
4import numpy as np
6from .ensemble_base import EnsembleRegressionVectorModel
7from ..vector_model import VectorRegressionModel
10class AveragingVectorRegressionModel(EnsembleRegressionVectorModel):
11 def __init__(self, models: Sequence[VectorRegressionModel], weights: Sequence[float] = None, num_processes=1):
12 if weights is not None:
13 if len(weights) != len(models):
14 raise Exception(f"Number of weights does not match number of vectorRegressionModels: {len(weights)} != {len(models)}")
15 else:
16 weights = 1 / len(models) * np.ones(len(models))
17 self.weights = weights
18 super().__init__(models, num_processes=num_processes)
20 def aggregate_predictions(self, predictions_data_frames: List[pd.DataFrame]) -> pd.DataFrame:
21 combined_prediction = pd.DataFrame()
22 for cur_prediction_df, weight in zip(predictions_data_frames, self.weights):
23 if combined_prediction.empty:
24 combined_prediction = weight * cur_prediction_df
25 continue
26 if not set(combined_prediction.columns) == set(cur_prediction_df.columns):
27 raise Exception(f"Cannot combine different sets of columns for prediction: {combined_prediction.columns}, "
28 f"{cur_prediction_df.columns}")
29 for column in cur_prediction_df.columns:
30 combined_prediction[column] += weight * cur_prediction_df[column]
31 return combined_prediction