Source code for sensai.data_transformation.sklearn_transformer
import functools
import logging
from typing import Optional, Sequence, Union, Any, Callable
from sklearn.preprocessing import MaxAbsScaler, StandardScaler, RobustScaler, MinMaxScaler
import numpy as np
from typing_extensions import Protocol
log = logging.getLogger(__name__)
TransformableArray = Union[np.ndarray, Sequence[Sequence[Any]]]
[docs]def to_2d_array(arr: TransformableArray) -> np.ndarray:
if not isinstance(arr, np.ndarray):
arr = np.array(arr)
if len(arr.shape) != 2:
raise ValueError(f"Got array of shape {arr.shape}; expected 2D array")
return arr
[docs]class ManualScaler(SkLearnTransformerProtocol):
"""
A scaler whose parameters are not learnt from data but manually defined
"""
def __init__(self, centre: Optional[float] = None, scale: Optional[float] = None):
"""
:param centre: the value to subtract from all values (if any)
:param scale: the value with which to scale all values (after removing the centre)
"""
self.centre = centre if centre is not None else 0.0
self.scale = scale if scale is not None else 1.0
[docs] def transform(self, arr: TransformableArray) -> np.ndarray:
arr = to_2d_array(arr)
return (arr - self.centre) * self.scale
[docs] def inverse_transform(self, arr: TransformableArray) -> np.ndarray:
arr = to_2d_array(arr)
return (arr / self.scale) + self.centre
# noinspection PyPep8Naming
[docs]class SkLearnTransformerFactoryFactory:
[docs] @staticmethod
def StandardScaler(with_mean=True, with_std=True) -> Callable[[], StandardScaler]:
return functools.partial(StandardScaler, with_mean=with_mean, with_std=with_std)
[docs] @staticmethod
def RobustScaler(quantile_range=(25, 75), with_scaling=True, with_centering=True) -> Callable[[], RobustScaler]:
"""
:param quantile_range: a tuple (a, b) where a and b > a (both in range 0..100) are the percentiles which determine the scaling.
Specifically, each value (after centering) is scaled with 1.0/(vb-va) where va and vb are the values corresponding to the
percentiles a and b respectively, such that, in the symmetric case where va and vb are equally far from the centre,
va will be transformed into -0.5 and vb into 0.5.
In a uniformly distributed data set ranging from `min` to `max`, the default values of a=25 and b=75 will thus result in
`min` being mapped to -1 and `max` being mapped to 1.
:param with_scaling: whether to apply scaling based on quantile_range.
:param with_centering: whether to apply centering by subtracting the median.
:return: a function, which when called without any arguments, produces the respective RobustScaler instance.
"""
return functools.partial(RobustScaler, quantile_range=quantile_range, with_scaling=with_scaling, with_centering=with_centering)
[docs] @staticmethod
def ManualScaler(centre: Optional[float] = None, scale: Optional[float] = None) -> Callable[[], ManualScaler]:
"""
:param centre: the value to subtract from all values (if any)
:param scale: the value with which to scale all values (after removing the centre)
:return: a function, which when called without any arguments, produces the respective scaler instance.
"""
return functools.partial(ManualScaler, centre=centre, scale=scale)