Coverage for src/sensai/ensemble/models.py: 33%

24 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-13 22:17 +0000

1from typing import Sequence, List 

2 

3import pandas as pd 

4import numpy as np 

5 

6from .ensemble_base import EnsembleRegressionVectorModel 

7from ..vector_model import VectorRegressionModel 

8 

9 

10class AveragingVectorRegressionModel(EnsembleRegressionVectorModel): 

11 def __init__(self, models: Sequence[VectorRegressionModel], weights: Sequence[float] = None, num_processes=1): 

12 if weights is not None: 

13 if len(weights) != len(models): 

14 raise Exception(f"Number of weights does not match number of vectorRegressionModels: {len(weights)} != {len(models)}") 

15 else: 

16 weights = 1 / len(models) * np.ones(len(models)) 

17 self.weights = weights 

18 super().__init__(models, num_processes=num_processes) 

19 

20 def aggregate_predictions(self, predictions_data_frames: List[pd.DataFrame]) -> pd.DataFrame: 

21 combined_prediction = pd.DataFrame() 

22 for cur_prediction_df, weight in zip(predictions_data_frames, self.weights): 

23 if combined_prediction.empty: 

24 combined_prediction = weight * cur_prediction_df 

25 continue 

26 if not set(combined_prediction.columns) == set(cur_prediction_df.columns): 

27 raise Exception(f"Cannot combine different sets of columns for prediction: {combined_prediction.columns}, " 

28 f"{cur_prediction_df.columns}") 

29 for column in cur_prediction_df.columns: 

30 combined_prediction[column] += weight * cur_prediction_df[column] 

31 return combined_prediction