Coverage for src/sensai/sklearn/sklearn_regression.py: 80%

55 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-13 22:17 +0000

1import logging 

2from typing import Union, Optional 

3 

4import sklearn.ensemble 

5import sklearn.linear_model 

6import sklearn.neighbors 

7import sklearn.neural_network 

8import sklearn.svm 

9from matplotlib import pyplot as plt 

10 

11from .sklearn_base import AbstractSkLearnMultipleOneDimVectorRegressionModel, AbstractSkLearnMultiDimVectorRegressionModel, \ 

12 FeatureImportanceProviderSkLearnRegressionMultipleOneDim, FeatureImportanceProviderSkLearnRegressionMultiDim 

13 

14log = logging.getLogger(__name__) 

15 

16 

17class SkLearnRandomForestVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel, 

18 FeatureImportanceProviderSkLearnRegressionMultipleOneDim): 

19 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args): 

20 super().__init__(sklearn.ensemble.RandomForestRegressor, 

21 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args) 

22 

23 

24class SkLearnLinearRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel, 

25 FeatureImportanceProviderSkLearnRegressionMultiDim): 

26 def __init__(self, fit_intercept=True, **model_args): 

27 """ 

28 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value; 

29 set to False if the data is already centred 

30 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html 

31 """ 

32 super().__init__(sklearn.linear_model.LinearRegression, fit_intercept=fit_intercept, **model_args) 

33 

34 

35class SkLearnLinearRidgeRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel, 

36 FeatureImportanceProviderSkLearnRegressionMultiDim): 

37 """ 

38 Linear least squares with L2 regularisation 

39 """ 

40 def __init__(self, alpha=1.0, fit_intercept=True, solver="auto", max_iter=None, tol=1e-3, **model_args): 

41 """ 

42 :param alpha: multiplies the L2 term, controlling regularisation strength 

43 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value; 

44 set to False if the data is already centred 

45 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge 

46 """ 

47 super().__init__(sklearn.linear_model.Ridge, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, 

48 solver=solver, **model_args) 

49 

50 

51class SkLearnLinearLassoRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel, 

52 FeatureImportanceProviderSkLearnRegressionMultiDim): 

53 """ 

54 Linear least squares with L1 regularisation, a.k.a. the lasso 

55 """ 

56 def __init__(self, alpha=1.0, fit_intercept=True, max_iter=1000, tol=0.0001, **model_args): 

57 """ 

58 :param alpha: multiplies the L1 term, controlling regularisation strength 

59 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value; 

60 set to False if the data is already centred 

61 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso 

62 """ 

63 super().__init__(sklearn.linear_model.Lasso, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, **model_args) 

64 

65 

66class SkLearnMultiLayerPerceptronVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

67 def __init__(self, 

68 hidden_layer_sizes=(100,), activation: str = "relu", 

69 solver: str = "adam", batch_size: Union[int, str] = "auto", random_state: Optional[int] = 42, 

70 max_iter: int = 200, early_stopping: bool = False, n_iter_no_change: int = 10, **model_args): 

71 """ 

72 :param hidden_layer_sizes: the sequence of hidden layer sizes 

73 :param activation: {"identity", "logistic", "tanh", "relu"} the activation function to use for hidden layers (the one used for the 

74 output layer is always 'identity') 

75 :param solver: {"adam", "lbfgs", "sgd"} the name of the solver to apply 

76 :param batch_size: the batch size or "auto" for min(200, data set size) 

77 :param random_state: the random seed for reproducability; use None if it shall not be specifically defined 

78 :param max_iter: the number of iterations (gradient steps for L-BFGS, epochs for other solvers) 

79 :param early_stopping: whether to use early stopping (stop training after n_iter_no_change epochs without improvement) 

80 :param n_iter_no_change: the number of iterations after which to stop early (if early_stopping is enabled) 

81 :param model_args: additional arguments to pass on to MLPRegressor, 

82 see https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html 

83 """ 

84 super().__init__(sklearn.neural_network.MLPRegressor, 

85 random_state=random_state, hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, batch_size=batch_size, 

86 max_iter=max_iter, early_stopping=early_stopping, n_iter_no_change=n_iter_no_change, **model_args) 

87 

88 

89class SkLearnSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

90 def __init__(self, **model_args): 

91 super().__init__(sklearn.svm.SVR, **model_args) 

92 

93 

94class SkLearnLinearSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

95 def __init__(self, **model_args): 

96 super().__init__(sklearn.svm.LinearSVR, **model_args) 

97 

98 

99class SkLearnGradientBoostingVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

100 def __init__(self, random_state=42, **model_args): 

101 super().__init__(sklearn.ensemble.GradientBoostingRegressor, random_state=random_state, **model_args) 

102 

103 

104class SkLearnKNeighborsVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel): 

105 def __init__(self, **model_args): 

106 super().__init__(sklearn.neighbors.KNeighborsRegressor, **model_args) 

107 

108 

109class SkLearnExtraTreesVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

110 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args): 

111 super().__init__(sklearn.ensemble.ExtraTreesRegressor, 

112 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args) 

113 

114 

115class SkLearnDummyVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

116 def __init__(self, strategy='mean', constant=None, quantile=None): 

117 super().__init__(sklearn.dummy.DummyRegressor, 

118 strategy=strategy, constant=constant, quantile=quantile) 

119 

120 

121class SkLearnDecisionTreeVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel): 

122 def __init__(self, random_state=42, **model_args): 

123 super().__init__(sklearn.tree.DecisionTreeRegressor, random_state=random_state, **model_args) 

124 

125 def plot(self, predicted_var_name=None, figsize=None) -> plt.Figure: 

126 model = self.get_sklearn_model(predicted_var_name) 

127 fig = plt.figure(figsize=figsize) 

128 sklearn.tree.plot_tree(model, feature_names=self.get_model_input_variable_names()) 

129 return fig 

130 

131 def plot_graphviz_pdf(self, dot_path, predicted_var_name=None): 

132 """ 

133 :param path: the path to a .dot file that will be created, alongside which a rendered PDF file (with added suffix ".pdf") 

134 will be placed 

135 :param predicted_var_name: the predicted variable name for which to plot the model (if multiple; None is admissible if 

136 there is only one predicted variable) 

137 """ 

138 import graphviz 

139 dot = sklearn.tree.export_graphviz(self.get_sklearn_model(predicted_var_name), out_file=None, 

140 feature_names=self.get_model_input_variable_names(), filled=True) 

141 graphviz.Source(dot).render(dot_path) 

142