Coverage for src/sensai/sklearn/sklearn_regression.py: 71%
79 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
1import logging
2from typing import Union, Optional
4import sklearn.ensemble
5import sklearn.linear_model
6import sklearn.neighbors
7import sklearn.neural_network
8import sklearn.svm
9from matplotlib import pyplot as plt
11from .sklearn_base import AbstractSkLearnMultipleOneDimVectorRegressionModel, AbstractSkLearnMultiDimVectorRegressionModel, \
12 FeatureImportanceProviderSkLearnRegressionMultipleOneDim, FeatureImportanceProviderSkLearnRegressionMultiDim
14log = logging.getLogger(__name__)
17class SkLearnRandomForestVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel,
18 FeatureImportanceProviderSkLearnRegressionMultipleOneDim):
19 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args):
20 super().__init__(sklearn.ensemble.RandomForestRegressor,
21 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args)
23 def is_sample_weight_supported(self) -> bool:
24 return True
27class SkLearnLinearRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel,
28 FeatureImportanceProviderSkLearnRegressionMultiDim):
29 def __init__(self, fit_intercept=True, **model_args):
30 """
31 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value;
32 set to False if the data is already centred
33 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
34 """
35 super().__init__(sklearn.linear_model.LinearRegression, fit_intercept=fit_intercept, **model_args)
37 def is_sample_weight_supported(self) -> bool:
38 return True
41class SkLearnLinearRidgeRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel,
42 FeatureImportanceProviderSkLearnRegressionMultiDim):
43 """
44 Linear least squares with L2 regularisation
45 """
46 def __init__(self, alpha=1.0, fit_intercept=True, solver="auto", max_iter=None, tol=1e-3, **model_args):
47 """
48 :param alpha: multiplies the L2 term, controlling regularisation strength
49 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value;
50 set to False if the data is already centred
51 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge
52 """
53 super().__init__(sklearn.linear_model.Ridge, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
54 solver=solver, **model_args)
56 def is_sample_weight_supported(self) -> bool:
57 return True
60class SkLearnLinearLassoRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel,
61 FeatureImportanceProviderSkLearnRegressionMultiDim):
62 """
63 Linear least squares with L1 regularisation, a.k.a. the lasso
64 """
65 def __init__(self, alpha=1.0, fit_intercept=True, max_iter=1000, tol=0.0001, **model_args):
66 """
67 :param alpha: multiplies the L1 term, controlling regularisation strength
68 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value;
69 set to False if the data is already centred
70 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso
71 """
72 super().__init__(sklearn.linear_model.Lasso, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, **model_args)
74 def is_sample_weight_supported(self) -> bool:
75 return True
78class SkLearnMultiLayerPerceptronVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
79 def __init__(self,
80 hidden_layer_sizes=(100,), activation: str = "relu",
81 solver: str = "adam", batch_size: Union[int, str] = "auto", random_state: Optional[int] = 42,
82 max_iter: int = 200, early_stopping: bool = False, n_iter_no_change: int = 10, **model_args):
83 """
84 :param hidden_layer_sizes: the sequence of hidden layer sizes
85 :param activation: {"identity", "logistic", "tanh", "relu"} the activation function to use for hidden layers (the one used for the
86 output layer is always 'identity')
87 :param solver: {"adam", "lbfgs", "sgd"} the name of the solver to apply
88 :param batch_size: the batch size or "auto" for min(200, data set size)
89 :param random_state: the random seed for reproducability; use None if it shall not be specifically defined
90 :param max_iter: the number of iterations (gradient steps for L-BFGS, epochs for other solvers)
91 :param early_stopping: whether to use early stopping (stop training after n_iter_no_change epochs without improvement)
92 :param n_iter_no_change: the number of iterations after which to stop early (if early_stopping is enabled)
93 :param model_args: additional arguments to pass on to MLPRegressor,
94 see https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html
95 """
96 super().__init__(sklearn.neural_network.MLPRegressor,
97 random_state=random_state, hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, batch_size=batch_size,
98 max_iter=max_iter, early_stopping=early_stopping, n_iter_no_change=n_iter_no_change, **model_args)
100 def is_sample_weight_supported(self) -> bool:
101 return False
104class SkLearnSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
105 def __init__(self, **model_args):
106 super().__init__(sklearn.svm.SVR, **model_args)
108 def is_sample_weight_supported(self) -> bool:
109 return True
112class SkLearnLinearSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
113 def __init__(self, **model_args):
114 super().__init__(sklearn.svm.LinearSVR, **model_args)
116 def is_sample_weight_supported(self) -> bool:
117 return True
120class SkLearnGradientBoostingVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
121 def __init__(self, random_state=42, **model_args):
122 super().__init__(sklearn.ensemble.GradientBoostingRegressor, random_state=random_state, **model_args)
124 def is_sample_weight_supported(self) -> bool:
125 return True
128class SkLearnKNeighborsVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
129 def __init__(self, **model_args):
130 super().__init__(sklearn.neighbors.KNeighborsRegressor, **model_args)
132 def is_sample_weight_supported(self) -> bool:
133 return False
136class SkLearnExtraTreesVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
137 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args):
138 super().__init__(sklearn.ensemble.ExtraTreesRegressor,
139 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args)
141 def is_sample_weight_supported(self) -> bool:
142 return True
145class SkLearnDummyVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
146 def __init__(self, strategy='mean', constant=None, quantile=None):
147 super().__init__(sklearn.dummy.DummyRegressor,
148 strategy=strategy, constant=constant, quantile=quantile)
150 def is_sample_weight_supported(self) -> bool:
151 return True
154class SkLearnDecisionTreeVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
155 def __init__(self, random_state=42, **model_args):
156 super().__init__(sklearn.tree.DecisionTreeRegressor, random_state=random_state, **model_args)
158 def plot(self, predicted_var_name=None, figsize=None) -> plt.Figure:
159 model = self.get_sklearn_model(predicted_var_name)
160 fig = plt.figure(figsize=figsize)
161 sklearn.tree.plot_tree(model, feature_names=self.get_model_input_variable_names())
162 return fig
164 def plot_graphviz_pdf(self, dot_path, predicted_var_name=None):
165 """
166 :param path: the path to a .dot file that will be created, alongside which a rendered PDF file (with added suffix ".pdf")
167 will be placed
168 :param predicted_var_name: the predicted variable name for which to plot the model (if multiple; None is admissible if
169 there is only one predicted variable)
170 """
171 import graphviz
172 dot = sklearn.tree.export_graphviz(self.get_sklearn_model(predicted_var_name), out_file=None,
173 feature_names=self.get_model_input_variable_names(), filled=True)
174 graphviz.Source(dot).render(dot_path)
176 def is_sample_weight_supported(self) -> bool:
177 return True