Source code for sensai.normalisation
from enum import Enum
from typing import Union
import numpy as np
import pandas as pd
import sklearn.preprocessing
from .util.dtype import to_float_array
[docs]class NormalisationMode(Enum):
NONE = "none"
MAX_ALL = "max_all"
MAX_BY_COLUMN = "max_by_column"
STANDARDISED = "standardised"
[docs]class VectorDataScaler:
def __init__(self, data_frame: pd.DataFrame, normalisation_mode: NormalisationMode):
self.normalisation_mode = normalisation_mode
self.scale, self.translate = self._compute_scaling_params(data_frame.values, normalisation_mode)
self.dimension_names = list(data_frame.columns)
@classmethod
def _compute_scaling_params(cls, raw_array: np.ndarray, normalisation_mode: NormalisationMode):
"""
:param raw_array: numpy array containing raw data
:param normalisation_mode: the normalization mode (0=none, 1=by maximum in entire data set, 2=by separate maximum in each column)
"""
translate = None
scale = None
if normalisation_mode != NormalisationMode.NONE:
if len(raw_array.shape) != 2:
raise ValueError(f"Only 2D arrays are supported by {cls.__name__} with mode {normalisation_mode}")
dim = raw_array.shape[1]
if normalisation_mode == NormalisationMode.MAX_ALL:
scale = np.ones(dim) * np.max(np.abs(raw_array))
elif normalisation_mode == NormalisationMode.MAX_BY_COLUMN:
scale = np.ones(dim)
for i in range(dim):
scale[i] = np.max(np.abs(raw_array[:, i]))
elif normalisation_mode == NormalisationMode.STANDARDISED:
standardScaler = sklearn.preprocessing.StandardScaler()
standardScaler.fit(raw_array)
translate = standardScaler.mean_
scale = standardScaler.scale_
else:
raise Exception("Unknown normalization mode")
return scale, translate
@staticmethod
def _array(data: Union[pd.DataFrame, np.ndarray]):
return to_float_array(data)
[docs] def get_normalised_array(self, data: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
result = self._array(data)
if self.translate is not None:
result = result - self.translate
if self.scale is not None:
result = result / self.scale
return result
[docs] def get_denormalised_array(self, data: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
result = self._array(data)
if self.scale is not None:
result = result * self.scale
if self.translate is not None:
result = result + self.translate
return result