Coverage for src/sensai/torch/torch_data.py: 67%
386 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-13 22:17 +0000
1import logging
2from abc import ABC, abstractmethod
3import math
4from typing import Tuple, Sequence, Optional, Union, List, Iterator
6import numpy as np
7import pandas as pd
8import sklearn.preprocessing
9import torch
10from torch.autograd import Variable
12from .. import normalisation
13from ..data import DataFrameSplitter, DataFrameSplitterFractional
14from ..data_transformation import DFTSkLearnTransformer
15from ..util.dtype import to_float_array
16from ..util.pickle import setstate
19log = logging.getLogger(__name__)
22def to_tensor(d: Union[torch.Tensor, np.ndarray, list], cuda=False):
23 if not isinstance(d, torch.Tensor):
24 if isinstance(d, np.ndarray):
25 d = torch.from_numpy(d)
26 elif isinstance(d, list):
27 d = torch.from_numpy(np.array(d))
28 else:
29 raise ValueError()
30 if cuda:
31 d.cuda()
32 return d
35class TensorScaler(ABC):
36 @abstractmethod
37 def cuda(self):
38 """
39 Makes this scaler's components use CUDA
40 """
41 pass
43 @abstractmethod
44 def normalise(self, tensor: torch.Tensor) -> torch.Tensor:
45 """
46 Applies scaling/normalisation to the given tensor
47 :param tensor: the tensor to scale/normalise
48 :return: the scaled/normalised tensor
49 """
50 pass
52 @abstractmethod
53 def denormalise(self, tensor: torch.Tensor) -> torch.Tensor:
54 """
55 Applies the inverse of method normalise to the given tensor
56 :param tensor: the tensor to denormalise
57 :return: the denormalised tensor
58 """
59 pass
62class TensorScalerCentreAndScale(TensorScaler):
63 def __init__(self, centre: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None):
64 self.centre = centre
65 self.scale = scale
67 def cuda(self):
68 if self.scale is not None:
69 self.scale = self.scale.cuda()
70 if self.centre is not None:
71 self.centre = self.centre.cuda()
73 def normalise(self, tensor: torch.Tensor) -> torch.Tensor:
74 if self.centre is not None:
75 tensor -= self.centre
76 if self.scale is not None:
77 tensor *= self.scale
78 return tensor
80 def denormalise(self, tensor: torch.Tensor) -> torch.Tensor:
81 if self.scale is not None:
82 tensor /= self.scale
83 if self.centre is not None:
84 tensor += self.centre
85 return tensor
88class TensorScalerFromVectorDataScaler(TensorScalerCentreAndScale):
89 def __init__(self, vector_data_scaler: normalisation.VectorDataScaler, cuda: bool):
90 if vector_data_scaler.scale is not None:
91 inv_scale = torch.from_numpy(vector_data_scaler.scale).float()
92 scale = 1.0 / inv_scale
93 else:
94 scale = None
95 centre = vector_data_scaler.translate
96 if centre is not None:
97 centre = torch.from_numpy(vector_data_scaler.translate).float()
98 super().__init__(centre=centre, scale=scale)
99 if cuda:
100 self.cuda()
102 def __setstate__(self, state):
103 if "translate" in state:
104 if state["scale"] is not None: # old representation where scale is actually inverse scale
105 state["scale"] = 1.0 / state["scale"]
106 setstate(TensorScalerFromVectorDataScaler, self, state, renamed_properties={"translate": "centre"})
109class TensorScalerIdentity(TensorScaler):
110 def cuda(self):
111 pass
113 def normalise(self, tensor: torch.Tensor) -> torch.Tensor:
114 return tensor
116 def denormalise(self, tensor: torch.Tensor) -> torch.Tensor:
117 return tensor
120class TensorScalerFromDFTSkLearnTransformer(TensorScalerCentreAndScale):
121 def __init__(self, dft: DFTSkLearnTransformer):
122 trans = dft.sklearnTransformer
123 if isinstance(trans, sklearn.preprocessing.RobustScaler):
124 centre = trans.center_
125 scale = trans.scale_
126 is_reciprocal_scale = True
127 else:
128 raise ValueError(f"sklearn transformer of type '{trans.__class__}' is unhandled")
129 if centre is not None:
130 centre = torch.from_numpy(centre).float()
131 if scale is not None:
132 scale = torch.from_numpy(scale).float()
133 if is_reciprocal_scale:
134 scale = 1.0 / scale
135 super().__init__(centre=centre, scale=scale)
138class Tensoriser(ABC):
139 """
140 Represents a method for transforming a data frame into one or more tensors to be processed by a neural network model
141 """
142 def tensorise(self, df: pd.DataFrame) -> Union[torch.Tensor, List[torch.Tensor]]:
143 result = self._tensorise(df)
144 if type(result) == list:
145 lengths = set(map(len, result))
146 if len(lengths) != 1:
147 raise Exception("Lengths of tensors inconsistent")
148 length = lengths.pop()
149 else:
150 length = len(result)
151 if length != len(df):
152 raise Exception(f"{self} produced result of length {length} for DataFrame of shape {df.shape}")
153 return result
155 @abstractmethod
156 def _tensorise(self, df: pd.DataFrame) -> Union[torch.Tensor, List[torch.Tensor]]:
157 pass
159 @abstractmethod
160 def fit(self, df: pd.DataFrame, model=None):
161 """
162 :param df: the data frame with which to fit this tensoriser
163 :param model: the model in the context of which the fitting takes place (if any).
164 The fitting process may set parameters within the model that can only be determined from the (pre-tensorised) data.
165 """
166 pass
169class RuleBasedTensoriser(Tensoriser, ABC):
170 """
171 Base class for tensorisers which transform data frames into tensors based on a predefined set of rules and do not require fitting
172 """
173 def fit(self, df: pd.DataFrame, model=None):
174 pass
177class TensoriserDataFrameFloatValuesMatrix(RuleBasedTensoriser):
178 def _tensorise(self, df: pd.DataFrame) -> np.ndarray:
179 return torch.from_numpy(to_float_array(df)).float()
182class TensoriserClassLabelIndices(RuleBasedTensoriser):
183 def _tensorise(self, df: pd.DataFrame) -> np.ndarray:
184 if len(df.columns) != 1:
185 raise ValueError("Expected a single column containing the class label indices")
186 return torch.from_numpy(df[df.columns[0]].values).long()
189class DataUtil(ABC):
190 """Interface for DataUtil classes, which are used to process data for neural networks"""
192 @abstractmethod
193 def split_into_tensors(self, fractional_size_of_first_set) \
194 -> Tuple[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor, torch.Tensor]]:
195 """
196 Splits the data set
198 :param fractional_size_of_first_set: the desired fractional size in
199 :return: a tuple (A, B) where A and B are tuples (in, out) with input and output data
200 """
201 pass
203 @abstractmethod
204 def get_output_tensor_scaler(self) -> TensorScaler:
205 """
206 Gets the scaler with which to scale model outputs
208 :return: the scaler
209 """
210 pass
212 @abstractmethod
213 def get_input_tensor_scaler(self) -> TensorScaler:
214 """
215 Gets the scaler with which to scale model inputs
217 :return: the scaler
218 """
219 pass
221 @abstractmethod
222 def model_output_dim(self) -> int:
223 """
224 :return: the dimensionality that is to be output by the model to be trained
225 """
226 pass
228 @abstractmethod
229 def input_dim(self):
230 """
231 :return: the dimensionality of the input or None if it is variable
232 """
233 pass
236class VectorDataUtil(DataUtil):
237 def __init__(self,
238 inputs: pd.DataFrame,
239 outputs: pd.DataFrame,
240 cuda: bool,
241 normalisation_mode=normalisation.NormalisationMode.NONE,
242 differing_output_normalisation_mode=None,
243 input_tensoriser: Optional[Tensoriser] = None,
244 output_tensoriser: Optional[Tensoriser] = None,
245 data_frame_splitter: Optional[DataFrameSplitter] = None):
246 """
247 :param inputs: the data frame of inputs
248 :param outputs: the data frame of outputs
249 :param cuda: whether to apply CUDA
250 :param normalisation_mode: the normalisation mode to use for inputs and (unless differingOutputNormalisationMode is specified)
251 outputs
252 :param differing_output_normalisation_mode: the normalisation mode to apply to outputs, overriding normalisationMode;
253 if None, use normalisationMode
254 """
255 if inputs.shape[0] != outputs.shape[0]:
256 raise ValueError("Output length must be equal to input length")
257 self.inputs = inputs
258 self.outputs = outputs
259 self.inputTensoriser = input_tensoriser if input_tensoriser is not None else TensoriserDataFrameFloatValuesMatrix()
260 self.outputTensoriser = output_tensoriser if output_tensoriser is not None else TensoriserDataFrameFloatValuesMatrix()
261 self.inputVectorDataScaler = normalisation.VectorDataScaler(self.inputs, normalisation_mode)
262 self.inputTensorScaler = TensorScalerFromVectorDataScaler(self.inputVectorDataScaler, cuda)
263 self.outputVectorDataScaler = normalisation.VectorDataScaler(self.outputs,
264 normalisation_mode if differing_output_normalisation_mode is None else differing_output_normalisation_mode)
265 self.outputTensorScaler = TensorScalerFromVectorDataScaler(self.outputVectorDataScaler, cuda)
266 self.dataFrameSplitter = data_frame_splitter
268 def __len__(self):
269 return len(self.inputs)
271 def get_output_tensor_scaler(self):
272 return self.outputTensorScaler
274 def get_input_tensor_scaler(self):
275 return self.inputTensorScaler
277 def _compute_split_indices(self, fractional_size_of_first_set):
278 splitter = self.dataFrameSplitter
279 if splitter is None:
280 # By default, we use a simple fractional split without shuffling.
281 # Shuffling is usually unnecessary, because in evaluation contexts, the data may have already been shuffled by the evaluator
282 # (unless explicitly disabled by the user). Furthermore, not shuffling gives the user the possibility to manually
283 # order the data in ways that result in desirable fractional splits (though the user may, of course, simply override
284 # the splitter to achieve any desired split).
285 splitter = DataFrameSplitterFractional(shuffle=False)
286 indices_a, indices_b = splitter.compute_split_indices(self.inputs, fractional_size_of_first_set)
287 return indices_a, indices_b
289 def split_into_tensors(self, fractional_size_of_first_set):
290 indices_a, indices_b = self._compute_split_indices(fractional_size_of_first_set)
291 a = self._tensors_for_indices(indices_a)
292 b = self._tensors_for_indices(indices_b)
293 return a, b
295 def _data_frames_for_indices(self, indices):
296 input_df = self.inputs.iloc[indices]
297 output_df = self.outputs.iloc[indices]
298 return input_df, output_df
300 def _tensors_for_indices(self, indices):
301 input_df, output_df = self._data_frames_for_indices(indices)
302 return self._tensors_for_data_frames(input_df, output_df)
304 def _tensors_for_data_frames(self, input_df, output_df):
305 # apply normalisation (if any)
306 if self.inputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE:
307 input_df = pd.DataFrame(self.inputVectorDataScaler.get_normalised_array(input_df), columns=input_df.columns,
308 index=input_df.index)
309 if self.outputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE:
310 output_df = pd.DataFrame(self.outputVectorDataScaler.get_normalised_array(output_df), columns=output_df.columns,
311 index=output_df.index)
313 return self.inputTensoriser.tensorise(input_df), self.outputTensoriser.tensorise(output_df)
315 def split_into_data_sets(self, fractional_size_of_first_set, cuda: bool, tensorise_dynamically=False) \
316 -> Tuple["TorchDataSet", "TorchDataSet"]:
317 if not tensorise_dynamically:
318 (xA, yA), (xB, yB) = self.split_into_tensors(fractional_size_of_first_set)
319 return TorchDataSetFromTensors(xA, yA, cuda), TorchDataSetFromTensors(xB, yB, cuda)
320 else:
321 if self.inputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE or \
322 self.outputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE:
323 raise Exception("Dynamic tensorisation is not supported when using data scaling")
324 indices_a, indices_b = self._compute_split_indices(fractional_size_of_first_set)
325 input_a, output_a = self._data_frames_for_indices(indices_a)
326 input_b, output_b = self._data_frames_for_indices(indices_b)
327 ds_a = TorchDataSetFromDataFramesDynamicallyTensorised(input_a, output_a, cuda, input_tensoriser=self.inputTensoriser,
328 output_tensoriser=self.outputTensoriser)
329 ds_b = TorchDataSetFromDataFramesDynamicallyTensorised(input_b, output_b, cuda, input_tensoriser=self.inputTensoriser,
330 output_tensoriser=self.outputTensoriser)
331 return ds_a, ds_b
333 def input_dim(self):
334 return self.inputs.shape[1]
336 def output_dim(self):
337 """
338 :return: the dimensionality of the outputs (ground truth values)
339 """
340 return self.outputs.shape[1]
342 def model_output_dim(self):
343 return self.output_dim()
346class ClassificationVectorDataUtil(VectorDataUtil):
347 def __init__(self,
348 inputs: pd.DataFrame,
349 outputs: pd.DataFrame,
350 cuda,
351 num_classes,
352 normalisation_mode=normalisation.NormalisationMode.NONE,
353 input_tensoriser: Tensoriser = None,
354 output_tensoriser: Tensoriser = None,
355 data_frame_splitter: Optional[DataFrameSplitter] = None):
356 if len(outputs.columns) != 1:
357 raise Exception(f"Exactly one output dimension (the class index) is required, got {len(outputs.columns)}")
358 super().__init__(inputs, outputs, cuda, normalisation_mode=normalisation_mode,
359 differing_output_normalisation_mode=normalisation.NormalisationMode.NONE, input_tensoriser=input_tensoriser,
360 output_tensoriser=TensoriserClassLabelIndices() if output_tensoriser is None else output_tensoriser,
361 data_frame_splitter=data_frame_splitter)
362 self.numClasses = num_classes
364 def model_output_dim(self):
365 return self.numClasses
368class TorchDataSet:
369 @abstractmethod
370 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False) -> Iterator[Union[Tuple[torch.Tensor, torch.Tensor],
371 Tuple[Sequence[torch.Tensor], torch.Tensor], torch.Tensor, Sequence[torch.Tensor]]]:
372 """
373 Provides an iterator over batches from the data set.
375 :param batch_size: the maximum size of each batch
376 :param shuffle: whether to shuffle the data set
377 :param input_only: whether to provide only inputs (rather than inputs and corresponding outputs).
378 If true, provide only inputs, where inputs can either be a tensor or a tuple of tensors.
379 If false, provide a pair (i, o) with inputs and corresponding outputs (o is always a tensor).
380 Some data sets may only be able to provide inputs, in which case inputOnly=False should lead to an
381 exception.
382 """
383 pass
385 @abstractmethod
386 def size(self) -> Optional[int]:
387 """
388 Returns the total size of the data set (number of data points) if it is known.
390 :return: the number of data points or None of the size is not known.
391 """
392 pass
395class TorchDataSetProvider:
396 def __init__(self, input_tensor_scaler: Optional[TensorScaler] = None, output_tensor_scaler: Optional[TensorScaler] = None,
397 input_dim: Optional[int] = None, model_output_dim: int = None):
398 if input_tensor_scaler is None:
399 input_tensor_scaler = TensorScalerIdentity()
400 if output_tensor_scaler is None:
401 output_tensor_scaler = TensorScalerIdentity()
402 if model_output_dim is None:
403 raise ValueError("The model output dimension must be provided")
404 self.inputTensorScaler = input_tensor_scaler
405 self.outputTensorScaler = output_tensor_scaler
406 self.inputDim = input_dim
407 self.modelOutputDim = model_output_dim
409 @abstractmethod
410 def provide_split(self, fractional_size_of_first_set: float) -> Tuple[TorchDataSet, TorchDataSet]:
411 """
412 Provides two data sets, which could, for example, serve as training and validation sets.
414 :param fractional_size_of_first_set: the fractional size of the first data set
415 :return: a tuple of data sets (A, B) where A has (approximately) the given fractional size and B encompasses
416 the remainder of the data
417 """
418 pass
420 def get_output_tensor_scaler(self) -> TensorScaler:
421 return self.outputTensorScaler
423 def get_input_tensor_scaler(self) -> TensorScaler:
424 return self.inputTensorScaler
426 def get_model_output_dim(self) -> int:
427 """
428 :return: the number of output dimensions that would be required to be generated by the model to match this dataset.
429 """
430 return self.modelOutputDim
432 def get_input_dim(self) -> Optional[int]:
433 """
434 :return: the number of output dimensions that would be required to be generated by the model to match this dataset.
435 For models that accept variable input sizes (such as RNNs), this may be None.
436 """
437 return self.inputDim
440class TensorTuple:
441 """
442 Represents a tuple of tensors (or a single tensor) and can be used to manipulate the contained tensors simultaneously
443 """
444 def __init__(self, tensors: Union[torch.Tensor, Sequence[torch.Tensor]]):
445 if isinstance(tensors, torch.Tensor):
446 tensors = [tensors]
447 lengths = set(map(len, tensors))
448 if len(lengths) != 1:
449 raise ValueError("Not all tensors are of the same length")
450 self.length = lengths.pop()
451 self.tensors = tensors
453 def __len__(self):
454 return self.length
456 def __getitem__(self, key) -> "TensorTuple":
457 t = tuple((t[key] for t in self.tensors))
458 return TensorTuple(t)
460 def cuda(self) -> "TensorTuple":
461 return TensorTuple([t.cuda() for t in self.tensors])
463 def tuple(self) -> Sequence[torch.Tensor]:
464 return tuple(self.tensors)
466 def item(self) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
467 if len(self.tensors) == 1:
468 return self.tensors[0]
469 else:
470 return self.tuple()
472 def concat(self, other: "TensorTuple") -> "TensorTuple":
473 if len(self.tensors) != len(other.tensors):
474 raise ValueError("Tensor tuples are incompatible")
475 tensors = [torch.cat([a, b], dim=0) for a, b in zip(self.tensors, other.tensors)]
476 return TensorTuple(tensors)
479class TorchDataSetFromTensors(TorchDataSet):
480 def __init__(self, x: Union[torch.Tensor, Sequence[torch.Tensor]], y: Optional[torch.Tensor], cuda: bool):
481 """
482 :param x: the input tensor(s); if more than one, they must be of the same length (and a slice of each shall be provided to the
483 model as an input in each batch)
484 :param y: the output tensor
485 :param cuda: whether any generated tensors shall be moved to the selected CUDA device
486 """
487 x = TensorTuple(x)
488 y = TensorTuple(y) if y is not None else None
489 if y is not None and len(x) != len(y):
490 raise ValueError("Tensors are not of the same length")
491 self.x = x
492 self.y = y
493 self.cuda = cuda
495 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False) -> Iterator[Union[Tuple[torch.Tensor, torch.Tensor],
496 Tuple[Sequence[torch.Tensor], torch.Tensor], torch.Tensor, Sequence[torch.Tensor]]]:
497 tensor_tuples = (self.x, self.y) if not input_only and self.y is not None else (self.x,)
498 yield from self._get_batches(tensor_tuples, batch_size, shuffle)
500 def _get_batches(self, tensor_tuples: Sequence[TensorTuple], batch_size, shuffle):
501 length = len(tensor_tuples[0])
502 if shuffle:
503 index = torch.randperm(length)
504 else:
505 index = torch.LongTensor(range(length))
506 start_idx = 0
507 while start_idx < length:
508 remaining_items = length - start_idx
509 is_second_last_batch = remaining_items <= 2*batch_size and remaining_items > batch_size
510 if is_second_last_batch:
511 # to avoid cases where the last batch is excessively small (1 item in the worst case, where e.g. batch
512 # normalisation would not be applicable), we evenly distribute the items across the last two batches
513 adjusted_batch_size = math.ceil(remaining_items / 2)
514 end_idx = min(length, start_idx + adjusted_batch_size)
515 else:
516 end_idx = min(length, start_idx + batch_size)
517 excerpt = index[start_idx:end_idx]
518 batch = []
519 for tensorTuple in tensor_tuples:
520 if len(tensorTuple) != length:
521 raise Exception("Passed tensors of differing lengths")
522 t = tensorTuple[excerpt]
523 if self.cuda:
524 t = t.cuda()
525 item = t.item()
526 if type(item) == tuple:
527 item = tuple(Variable(t) for t in item)
528 else:
529 item = Variable(item)
530 batch.append(item)
531 if len(batch) == 1:
532 yield batch[0]
533 else:
534 yield tuple(batch)
535 start_idx = end_idx
537 def size(self):
538 return len(self.x)
541class TorchDataSetFromDataFramesPreTensorised(TorchDataSetFromTensors):
542 def __init__(self, input_df: pd.DataFrame, output_df: Optional[pd.DataFrame], cuda: bool,
543 input_tensoriser: Optional[Tensoriser] = None, output_tensoriser: Optional[Tensoriser] = None):
544 if input_tensoriser is None:
545 input_tensoriser = TensoriserDataFrameFloatValuesMatrix()
546 log.debug(f"Applying {input_tensoriser} to data frame of length {len(input_df)} ...")
547 input_tensors = input_tensoriser.tensorise(input_df)
548 if output_df is not None:
549 if output_tensoriser is None:
550 output_tensoriser = TensoriserDataFrameFloatValuesMatrix()
551 log.debug(f"Applying {output_tensoriser} to data frame of length {len(output_df)} ...")
552 output_tensors = output_tensoriser.tensorise(output_df)
553 else:
554 output_tensors = None
555 super().__init__(input_tensors, output_tensors, cuda)
558class TorchDataSetFromDataFramesDynamicallyTensorised(TorchDataSet):
559 def __init__(self, input_df: pd.DataFrame, output_df: Optional[pd.DataFrame], cuda: bool,
560 input_tensoriser: Optional[Tensoriser] = None, output_tensoriser: Optional[Tensoriser] = None):
561 self.inputDF = input_df
562 self.outputDF = output_df
563 self.cuda = cuda
564 if input_tensoriser is None:
565 input_tensoriser = TensoriserDataFrameFloatValuesMatrix()
566 self.inputTensoriser = input_tensoriser
567 if output_df is not None:
568 if len(input_df) != len(output_df):
569 raise ValueError("Lengths of input and output data frames must be equal")
570 if output_tensoriser is None:
571 output_tensoriser = TensoriserDataFrameFloatValuesMatrix()
572 self.outputTensoriser = output_tensoriser
574 def size(self) -> Optional[int]:
575 return len(self.inputDF)
577 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False):
578 length = len(self.inputDF)
579 if shuffle:
580 index = torch.randperm(length)
581 else:
582 index = torch.LongTensor(range(length))
583 i = 0
584 while i < length:
585 batch_indices = index[i:i + batch_size]
586 input_tensors = TensorTuple(self.inputTensoriser.tensorise(self.inputDF.iloc[batch_indices]))
587 if self.cuda:
588 input_tensors = input_tensors.cuda()
589 if input_only:
590 yield input_tensors.item()
591 else:
592 output_tensors = TensorTuple(self.outputTensoriser.tensorise(self.outputDF.iloc[batch_indices]))
593 if self.cuda:
594 output_tensors = output_tensors.cuda()
595 yield input_tensors.item(), output_tensors.item()
596 i += batch_size
599class TorchDataSetFromDataFrames(TorchDataSet):
600 def __init__(self, input_df: pd.DataFrame, output_df: Optional[pd.DataFrame], cuda: bool,
601 input_tensoriser: Optional[Tensoriser] = None, output_tensoriser: Optional[Tensoriser] = None,
602 tensorise_dynamically=False):
603 if tensorise_dynamically:
604 self._torchDataSet: TorchDataSet = TorchDataSetFromDataFramesDynamicallyTensorised(input_df, output_df, cuda,
605 input_tensoriser=input_tensoriser, output_tensoriser=output_tensoriser)
606 else:
607 self._torchDataSet: TorchDataSet = TorchDataSetFromDataFramesPreTensorised(input_df, output_df, cuda,
608 input_tensoriser=input_tensoriser, output_tensoriser=output_tensoriser)
610 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False):
611 yield from self._torchDataSet.iter_batches(batch_size, shuffle=shuffle, input_only=input_only)
613 def size(self) -> Optional[int]:
614 return self._torchDataSet.size()
617class TorchDataSetProviderFromDataUtil(TorchDataSetProvider):
618 def __init__(self, data_util: DataUtil, cuda: bool):
619 super().__init__(input_tensor_scaler=data_util.get_input_tensor_scaler(), output_tensor_scaler=data_util.get_output_tensor_scaler(),
620 input_dim=data_util.input_dim(), model_output_dim=data_util.model_output_dim())
621 self.dataUtil = data_util
622 self.cuda = cuda
624 def provide_split(self, fractional_size_of_first_set: float) -> Tuple[TorchDataSet, TorchDataSet]:
625 (x1, y1), (x2, y2) = self.dataUtil.split_into_tensors(fractional_size_of_first_set)
626 return TorchDataSetFromTensors(x1, y1, self.cuda), TorchDataSetFromTensors(x2, y2, self.cuda)
629class TorchDataSetProviderFromVectorDataUtil(TorchDataSetProvider):
630 def __init__(self, data_util: VectorDataUtil, cuda: bool, tensorise_dynamically=False):
631 super().__init__(input_tensor_scaler=data_util.get_input_tensor_scaler(), output_tensor_scaler=data_util.get_output_tensor_scaler(),
632 input_dim=data_util.input_dim(), model_output_dim=data_util.model_output_dim())
633 self.dataUtil = data_util
634 self.cuda = cuda
635 self.tensoriseDynamically = tensorise_dynamically
637 def provide_split(self, fractional_size_of_first_set: float) -> Tuple[TorchDataSet, TorchDataSet]:
638 return self.dataUtil.split_into_data_sets(fractional_size_of_first_set, self.cuda, tensorise_dynamically=self.tensoriseDynamically)
641class TensorTransformer(ABC):
642 @abstractmethod
643 def transform(self, t: torch.Tensor) -> torch.Tensor:
644 pass