Coverage for src/sensai/sklearn/sklearn_regression.py: 80%
55 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
1import logging
2from typing import Union, Optional
4import sklearn.ensemble
5import sklearn.linear_model
6import sklearn.neighbors
7import sklearn.neural_network
8import sklearn.svm
9from matplotlib import pyplot as plt
11from .sklearn_base import AbstractSkLearnMultipleOneDimVectorRegressionModel, AbstractSkLearnMultiDimVectorRegressionModel, \
12 FeatureImportanceProviderSkLearnRegressionMultipleOneDim, FeatureImportanceProviderSkLearnRegressionMultiDim
14log = logging.getLogger(__name__)
17class SkLearnRandomForestVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel,
18 FeatureImportanceProviderSkLearnRegressionMultipleOneDim):
19 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args):
20 super().__init__(sklearn.ensemble.RandomForestRegressor,
21 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args)
24class SkLearnLinearRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel,
25 FeatureImportanceProviderSkLearnRegressionMultiDim):
26 def __init__(self, fit_intercept=True, **model_args):
27 """
28 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value;
29 set to False if the data is already centred
30 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
31 """
32 super().__init__(sklearn.linear_model.LinearRegression, fit_intercept=fit_intercept, **model_args)
35class SkLearnLinearRidgeRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel,
36 FeatureImportanceProviderSkLearnRegressionMultiDim):
37 """
38 Linear least squares with L2 regularisation
39 """
40 def __init__(self, alpha=1.0, fit_intercept=True, solver="auto", max_iter=None, tol=1e-3, **model_args):
41 """
42 :param alpha: multiplies the L2 term, controlling regularisation strength
43 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value;
44 set to False if the data is already centred
45 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge
46 """
47 super().__init__(sklearn.linear_model.Ridge, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
48 solver=solver, **model_args)
51class SkLearnLinearLassoRegressionVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel,
52 FeatureImportanceProviderSkLearnRegressionMultiDim):
53 """
54 Linear least squares with L1 regularisation, a.k.a. the lasso
55 """
56 def __init__(self, alpha=1.0, fit_intercept=True, max_iter=1000, tol=0.0001, **model_args):
57 """
58 :param alpha: multiplies the L1 term, controlling regularisation strength
59 :param fit_intercept: whether to determine the intercept, i.e. the constant term which is not scaled with an input feature value;
60 set to False if the data is already centred
61 :param model_args: see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso
62 """
63 super().__init__(sklearn.linear_model.Lasso, alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, **model_args)
66class SkLearnMultiLayerPerceptronVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
67 def __init__(self,
68 hidden_layer_sizes=(100,), activation: str = "relu",
69 solver: str = "adam", batch_size: Union[int, str] = "auto", random_state: Optional[int] = 42,
70 max_iter: int = 200, early_stopping: bool = False, n_iter_no_change: int = 10, **model_args):
71 """
72 :param hidden_layer_sizes: the sequence of hidden layer sizes
73 :param activation: {"identity", "logistic", "tanh", "relu"} the activation function to use for hidden layers (the one used for the
74 output layer is always 'identity')
75 :param solver: {"adam", "lbfgs", "sgd"} the name of the solver to apply
76 :param batch_size: the batch size or "auto" for min(200, data set size)
77 :param random_state: the random seed for reproducability; use None if it shall not be specifically defined
78 :param max_iter: the number of iterations (gradient steps for L-BFGS, epochs for other solvers)
79 :param early_stopping: whether to use early stopping (stop training after n_iter_no_change epochs without improvement)
80 :param n_iter_no_change: the number of iterations after which to stop early (if early_stopping is enabled)
81 :param model_args: additional arguments to pass on to MLPRegressor,
82 see https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html
83 """
84 super().__init__(sklearn.neural_network.MLPRegressor,
85 random_state=random_state, hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, batch_size=batch_size,
86 max_iter=max_iter, early_stopping=early_stopping, n_iter_no_change=n_iter_no_change, **model_args)
89class SkLearnSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
90 def __init__(self, **model_args):
91 super().__init__(sklearn.svm.SVR, **model_args)
94class SkLearnLinearSVRVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
95 def __init__(self, **model_args):
96 super().__init__(sklearn.svm.LinearSVR, **model_args)
99class SkLearnGradientBoostingVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
100 def __init__(self, random_state=42, **model_args):
101 super().__init__(sklearn.ensemble.GradientBoostingRegressor, random_state=random_state, **model_args)
104class SkLearnKNeighborsVectorRegressionModel(AbstractSkLearnMultiDimVectorRegressionModel):
105 def __init__(self, **model_args):
106 super().__init__(sklearn.neighbors.KNeighborsRegressor, **model_args)
109class SkLearnExtraTreesVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
110 def __init__(self, n_estimators=100, min_samples_leaf=10, random_state=42, **model_args):
111 super().__init__(sklearn.ensemble.ExtraTreesRegressor,
112 n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=random_state, **model_args)
115class SkLearnDummyVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
116 def __init__(self, strategy='mean', constant=None, quantile=None):
117 super().__init__(sklearn.dummy.DummyRegressor,
118 strategy=strategy, constant=constant, quantile=quantile)
121class SkLearnDecisionTreeVectorRegressionModel(AbstractSkLearnMultipleOneDimVectorRegressionModel):
122 def __init__(self, random_state=42, **model_args):
123 super().__init__(sklearn.tree.DecisionTreeRegressor, random_state=random_state, **model_args)
125 def plot(self, predicted_var_name=None, figsize=None) -> plt.Figure:
126 model = self.get_sklearn_model(predicted_var_name)
127 fig = plt.figure(figsize=figsize)
128 sklearn.tree.plot_tree(model, feature_names=self.get_model_input_variable_names())
129 return fig
131 def plot_graphviz_pdf(self, dot_path, predicted_var_name=None):
132 """
133 :param path: the path to a .dot file that will be created, alongside which a rendered PDF file (with added suffix ".pdf")
134 will be placed
135 :param predicted_var_name: the predicted variable name for which to plot the model (if multiple; None is admissible if
136 there is only one predicted variable)
137 """
138 import graphviz
139 dot = sklearn.tree.export_graphviz(self.get_sklearn_model(predicted_var_name), out_file=None,
140 feature_names=self.get_model_input_variable_names(), filled=True)
141 graphviz.Source(dot).render(dot_path)