Coverage for src/sensai/sklearn/sklearn_regression.py: 71%

79 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-29 18:29 +0000

1import logging 

2from typing import Union, Optional 

3 

4import sklearn.ensemble 

5import sklearn.linear_model 

6import sklearn.neighbors 

7import sklearn.neural_network 

8import sklearn.svm 

9from matplotlib import pyplot as plt 

10 

11from .sklearn_base import AbstractSkLearnMultipleOneDimVectorRegressionModel, AbstractSkLearnMultiDimVectorRegressionModel, \ 

12 FeatureImportanceProviderSkLearnRegressionMultipleOneDim, FeatureImportanceProviderSkLearnRegressionMultiDim 

13 

14log = logging.getLogger(__name__) 

15 

16 

17class SkLearnRandomForestVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel, 

18 FeatureImportanceProviderSkLearnRegressionMultipleOneDim): 

19 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args): 

20 super().__init__(sklearn.ensemble.RandomForestRegressor, 

21 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args) 

22 

23 def is_sample_weight_supported(self) -> bool: 

24 return True 

25 

26 

27class SkLearnLinearRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel, 

28 FeatureImportanceProviderSkLearnRegressionMultiDim): 

29 def __init__(self, fit_intercept=True, **model_args): 

30 """ 

31 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value; 

32 set to False if the data is already centred 

33 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html 

34 """ 

35 super().__init__(sklearn.linear_model.LinearRegression, fit_intercept=fit_intercept, **model_args) 

36 

37 def is_sample_weight_supported(self) -> bool: 

38 return True 

39 

40 

41class SkLearnLinearRidgeRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel, 

42 FeatureImportanceProviderSkLearnRegressionMultiDim): 

43 """ 

44 Linear least squares with L2 regularisation 

45 """ 

46 def __init__(self, alpha=1.0, fit_intercept=True, solver="auto", max_iter=None, tol=1e-3, **model_args): 

47 """ 

48 :param alpha: multiplies the L2 term, controlling regularisation strength 

49 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value; 

50 set to False if the data is already centred 

51 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge 

52 """ 

53 super().__init__(sklearn.linear_model.Ridge, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, 

54 solver=solver, **model_args) 

55 

56 def is_sample_weight_supported(self) -> bool: 

57 return True 

58 

59 

60class SkLearnLinearLassoRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel, 

61 FeatureImportanceProviderSkLearnRegressionMultiDim): 

62 """ 

63 Linear least squares with L1 regularisation, a.k.a. the lasso 

64 """ 

65 def __init__(self, alpha=1.0, fit_intercept=True, max_iter=1000, tol=0.0001, **model_args): 

66 """ 

67 :param alpha: multiplies the L1 term, controlling regularisation strength 

68 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value; 

69 set to False if the data is already centred 

70 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso 

71 """ 

72 super().__init__(sklearn.linear_model.Lasso, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, **model_args) 

73 

74 def is_sample_weight_supported(self) -> bool: 

75 return True 

76 

77 

78class SkLearnMultiLayerPerceptronVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

79 def __init__(self, 

80 hidden_layer_sizes=(100,), activation: str = "relu", 

81 solver: str = "adam", batch_size: Union[int, str] = "auto", random_state: Optional[int] = 42, 

82 max_iter: int = 200, early_stopping: bool = False, n_iter_no_change: int = 10, **model_args): 

83 """ 

84 :param hidden_layer_sizes: the sequence of hidden layer sizes 

85 :param activation: {"identity", "logistic", "tanh", "relu"} the activation function to use for hidden layers (the one used for the 

86 output layer is always 'identity') 

87 :param solver: {"adam", "lbfgs", "sgd"} the name of the solver to apply 

88 :param batch_size: the batch size or "auto" for min(200, data set size) 

89 :param random_state: the random seed for reproducability; use None if it shall not be specifically defined 

90 :param max_iter: the number of iterations (gradient steps for L-BFGS, epochs for other solvers) 

91 :param early_stopping: whether to use early stopping (stop training after n_iter_no_change epochs without improvement) 

92 :param n_iter_no_change: the number of iterations after which to stop early (if early_stopping is enabled) 

93 :param model_args: additional arguments to pass on to MLPRegressor, 

94 see https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html 

95 """ 

96 super().__init__(sklearn.neural_network.MLPRegressor, 

97 random_state=random_state, hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, batch_size=batch_size, 

98 max_iter=max_iter, early_stopping=early_stopping, n_iter_no_change=n_iter_no_change, **model_args) 

99 

100 def is_sample_weight_supported(self) -> bool: 

101 return False 

102 

103 

104class SkLearnSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

105 def __init__(self, **model_args): 

106 super().__init__(sklearn.svm.SVR, **model_args) 

107 

108 def is_sample_weight_supported(self) -> bool: 

109 return True 

110 

111 

112class SkLearnLinearSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

113 def __init__(self, **model_args): 

114 super().__init__(sklearn.svm.LinearSVR, **model_args) 

115 

116 def is_sample_weight_supported(self) -> bool: 

117 return True 

118 

119 

120class SkLearnGradientBoostingVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

121 def __init__(self, random_state=42, **model_args): 

122 super().__init__(sklearn.ensemble.GradientBoostingRegressor, random_state=random_state, **model_args) 

123 

124 def is_sample_weight_supported(self) -> bool: 

125 return True 

126 

127 

128class SkLearnKNeighborsVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

129 def __init__(self, **model_args): 

130 super().__init__(sklearn.neighbors.KNeighborsRegressor, **model_args) 

131 

132 def is_sample_weight_supported(self) -> bool: 

133 return False 

134 

135 

136class SkLearnExtraTreesVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

137 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args): 

138 super().__init__(sklearn.ensemble.ExtraTreesRegressor, 

139 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args) 

140 

141 def is_sample_weight_supported(self) -> bool: 

142 return True 

143 

144 

145class SkLearnDummyVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

146 def __init__(self, strategy='mean', constant=None, quantile=None): 

147 super().__init__(sklearn.dummy.DummyRegressor, 

148 strategy=strategy, constant=constant, quantile=quantile) 

149 

150 def is_sample_weight_supported(self) -> bool: 

151 return True 

152 

153 

154class SkLearnDecisionTreeVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

155 def __init__(self, random_state=42, **model_args): 

156 super().__init__(sklearn.tree.DecisionTreeRegressor, random_state=random_state, **model_args) 

157 

158 def plot(self, predicted_var_name=None, figsize=None) -> plt.Figure: 

159 model = self.get_sklearn_model(predicted_var_name) 

160 fig = plt.figure(figsize=figsize) 

161 sklearn.tree.plot_tree(model, feature_names=self.get_model_input_variable_names()) 

162 return fig 

163 

164 def plot_graphviz_pdf(self, dot_path, predicted_var_name=None): 

165 """ 

166 :param path: the path to a .dot file that will be created, alongside which a rendered PDF file (with added suffix ".pdf") 

167 will be placed 

168 :param predicted_var_name: the predicted variable name for which to plot the model (if multiple; None is admissible if 

169 there is only one predicted variable) 

170 """ 

171 import graphviz 

172 dot = sklearn.tree.export_graphviz(self.get_sklearn_model(predicted_var_name), out_file=None, 

173 feature_names=self.get_model_input_variable_names(), filled=True) 

174 graphviz.Source(dot).render(dot_path) 

175 

176 def is_sample_weight_supported(self) -> bool: 

177 return True