Coverage for src/sensai/torch/torch

1import logging

2from abc import ABC, abstractmethod

3import math

4from typing import Tuple, Sequence, Optional, Union, List, Iterator

6import numpy as np

7import pandas as pd

8import sklearn.preprocessing

9import torch

10from torch.autograd import Variable

12from .. import normalisation

13from ..data import DataFrameSplitter, DataFrameSplitterFractional

14from ..data_transformation import DFTSkLearnTransformer

15from ..util.dtype import to_float_array

16from ..util.pickle import setstate

19log = logging.getLogger(__name__)

22def to_tensor(d: Union[torch.Tensor, np.ndarray, list], cuda=False):

23 if not isinstance(d, torch.Tensor):

24 if isinstance(d, np.ndarray):

25 d = torch.from_numpy(d)

26 elif isinstance(d, list):

27 d = torch.from_numpy(np.array(d))

28 else:

29 raise ValueError()

30 if cuda:

31 d.cuda()

32 return d

35class TensorScaler(ABC):

36 @abstractmethod

37 def cuda(self):

38 """

39 Makes this scaler's components use CUDA

40 """

41 pass

43 @abstractmethod

44 def normalise(self, tensor: torch.Tensor) -> torch.Tensor:

45 """

46 Applies scaling/normalisation to the given tensor

47 :param tensor: the tensor to scale/normalise

48 :return: the scaled/normalised tensor

49 """

50 pass

52 @abstractmethod

53 def denormalise(self, tensor: torch.Tensor) -> torch.Tensor:

54 """

55 Applies the inverse of method normalise to the given tensor

56 :param tensor: the tensor to denormalise

57 :return: the denormalised tensor

58 """

59 pass

62class TensorScalerCentreAndScale(TensorScaler):

63 def __init__(self, centre: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None):

64 self.centre = centre

65 self.scale = scale

67 def cuda(self):

68 if self.scale is not None:

69 self.scale = self.scale.cuda()

70 if self.centre is not None:

71 self.centre = self.centre.cuda()

73 def normalise(self, tensor: torch.Tensor) -> torch.Tensor:

74 if self.centre is not None:

75 tensor -= self.centre

76 if self.scale is not None:

77 tensor *= self.scale

78 return tensor

80 def denormalise(self, tensor: torch.Tensor) -> torch.Tensor:

81 if self.scale is not None:

82 tensor /= self.scale

83 if self.centre is not None:

84 tensor += self.centre

85 return tensor

88class TensorScalerFromVectorDataScaler(TensorScalerCentreAndScale):

89 def __init__(self, vector_data_scaler: normalisation.VectorDataScaler, cuda: bool):

90 if vector_data_scaler.scale is not None:

91 inv_scale = torch.from_numpy(vector_data_scaler.scale).float()

92 scale = 1.0 / inv_scale

93 else:

94 scale = None

95 centre = vector_data_scaler.translate

96 if centre is not None:

97 centre = torch.from_numpy(vector_data_scaler.translate).float()

98 super().__init__(centre=centre, scale=scale)

99 if cuda:

100 self.cuda()

101

102 def __setstate__(self, state):

103 if "translate" in state:

104 if state["scale"] is not None: # old representation where scale is actually inverse scale

105 state["scale"] = 1.0 / state["scale"]

106 setstate(TensorScalerFromVectorDataScaler, self, state, renamed_properties={"translate": "centre"})

107

108

109class TensorScalerIdentity(TensorScaler):

110 def cuda(self):

111 pass

112

113 def normalise(self, tensor: torch.Tensor) -> torch.Tensor:

114 return tensor

115

116 def denormalise(self, tensor: torch.Tensor) -> torch.Tensor:

117 return tensor

118

119

120class TensorScalerFromDFTSkLearnTransformer(TensorScalerCentreAndScale):

121 def __init__(self, dft: DFTSkLearnTransformer):

122 trans = dft.sklearnTransformer

123 if isinstance(trans, sklearn.preprocessing.RobustScaler):

124 centre = trans.center_

125 scale = trans.scale_

126 is_reciprocal_scale = True

127 else:

128 raise ValueError(f"sklearn transformer of type '{trans.__class__}' is unhandled")

129 if centre is not None:

130 centre = torch.from_numpy(centre).float()

131 if scale is not None:

132 scale = torch.from_numpy(scale).float()

133 if is_reciprocal_scale:

134 scale = 1.0 / scale

135 super().__init__(centre=centre, scale=scale)

136

137

138class Tensoriser(ABC):

139 """

140 Represents a method for transforming a data frame into one or more tensors to be processed by a neural network model

141 """

142 def tensorise(self, df: pd.DataFrame) -> Union[torch.Tensor, List[torch.Tensor]]:

143 result = self._tensorise(df)

144 if type(result) == list:

145 lengths = set(map(len, result))

146 if len(lengths) != 1:

147 raise Exception("Lengths of tensors inconsistent")

148 length = lengths.pop()

149 else:

150 length = len(result)

151 if length != len(df):

152 raise Exception(f"{self} produced result of length {length} for DataFrame of shape {df.shape}")

153 return result

154

155 @abstractmethod

156 def _tensorise(self, df: pd.DataFrame) -> Union[torch.Tensor, List[torch.Tensor]]:

157 pass

158

159 @abstractmethod

160 def fit(self, df: pd.DataFrame, model=None):

161 """

162 :param df: the data frame with which to fit this tensoriser

163 :param model: the model in the context of which the fitting takes place (if any).

164 The fitting process may set parameters within the model that can only be determined from the (pre-tensorised) data.

165 """

166 pass

167

168

169class RuleBasedTensoriser(Tensoriser, ABC):

170 """

171 Base class for tensorisers which transform data frames into tensors based on a predefined set of rules and do not require fitting

172 """

173 def fit(self, df: pd.DataFrame, model=None):

174 pass

175

176

177class TensoriserDataFrameFloatValuesMatrix(RuleBasedTensoriser):

178 def _tensorise(self, df: pd.DataFrame) -> np.ndarray:

179 return torch.from_numpy(to_float_array(df)).float()

180

181

182class TensoriserClassLabelIndices(RuleBasedTensoriser):

183 def _tensorise(self, df: pd.DataFrame) -> np.ndarray:

184 if len(df.columns) != 1:

185 raise ValueError("Expected a single column containing the class label indices")

186 return torch.from_numpy(df[df.columns[0]].values).long()

187

188

189class DataUtil(ABC):

190 """Interface for DataUtil classes, which are used to process data for neural networks"""

191

192 @abstractmethod

193 def split_into_tensors(self, fractional_size_of_first_set) \

194 -> Tuple[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor, torch.Tensor]]:

195 """

196 Splits the data set

197

198 :param fractional_size_of_first_set: the desired fractional size in

199 :return: a tuple (A, B) where A and B are tuples (in, out) with input and output data

200 """

201 pass

202

203 @abstractmethod

204 def get_output_tensor_scaler(self) -> TensorScaler:

205 """

206 Gets the scaler with which to scale model outputs

207

208 :return: the scaler

209 """

210 pass

211

212 @abstractmethod

213 def get_input_tensor_scaler(self) -> TensorScaler:

214 """

215 Gets the scaler with which to scale model inputs

216

217 :return: the scaler

218 """

219 pass

220

221 @abstractmethod

222 def model_output_dim(self) -> int:

223 """

224 :return: the dimensionality that is to be output by the model to be trained

225 """

226 pass

227

228 @abstractmethod

229 def input_dim(self):

230 """

231 :return: the dimensionality of the input or None if it is variable

232 """

233 pass

234

235

236class VectorDataUtil(DataUtil):

237 def __init__(self,

238 inputs: pd.DataFrame,

239 outputs: pd.DataFrame,

240 cuda: bool,

241 normalisation_mode=normalisation.NormalisationMode.NONE,

242 differing_output_normalisation_mode=None,

243 input_tensoriser: Optional[Tensoriser] = None,

244 output_tensoriser: Optional[Tensoriser] = None,

245 data_frame_splitter: Optional[DataFrameSplitter] = None):

246 """

247 :param inputs: the data frame of inputs

248 :param outputs: the data frame of outputs

249 :param cuda: whether to apply CUDA

250 :param normalisation_mode: the normalisation mode to use for inputs and (unless differingOutputNormalisationMode is specified)

251 outputs

252 :param differing_output_normalisation_mode: the normalisation mode to apply to outputs, overriding normalisationMode;

253 if None, use normalisationMode

254 """

255 if inputs.shape[0] != outputs.shape[0]:

256 raise ValueError("Output length must be equal to input length")

257 self.inputs = inputs

258 self.outputs = outputs

259 self.inputTensoriser = input_tensoriser if input_tensoriser is not None else TensoriserDataFrameFloatValuesMatrix()

260 self.outputTensoriser = output_tensoriser if output_tensoriser is not None else TensoriserDataFrameFloatValuesMatrix()

261 self.inputVectorDataScaler = normalisation.VectorDataScaler(self.inputs, normalisation_mode)

262 self.inputTensorScaler = TensorScalerFromVectorDataScaler(self.inputVectorDataScaler, cuda)

263 self.outputVectorDataScaler = normalisation.VectorDataScaler(self.outputs,

264 normalisation_mode if differing_output_normalisation_mode is None else differing_output_normalisation_mode)

265 self.outputTensorScaler = TensorScalerFromVectorDataScaler(self.outputVectorDataScaler, cuda)

266 self.dataFrameSplitter = data_frame_splitter

267

268 def __len__(self):

269 return len(self.inputs)

270

271 def get_output_tensor_scaler(self):

272 return self.outputTensorScaler

273

274 def get_input_tensor_scaler(self):

275 return self.inputTensorScaler

276

277 def _compute_split_indices(self, fractional_size_of_first_set):

278 splitter = self.dataFrameSplitter

279 if splitter is None:

280 # By default, we use a simple fractional split without shuffling.

281 # Shuffling is usually unnecessary, because in evaluation contexts, the data may have already been shuffled by the evaluator

282 # (unless explicitly disabled by the user). Furthermore, not shuffling gives the user the possibility to manually

283 # order the data in ways that result in desirable fractional splits (though the user may, of course, simply override

284 # the splitter to achieve any desired split).

285 splitter = DataFrameSplitterFractional(shuffle=False)

286 indices_a, indices_b = splitter.compute_split_indices(self.inputs, fractional_size_of_first_set)

287 return indices_a, indices_b

288

289 def split_into_tensors(self, fractional_size_of_first_set):

290 indices_a, indices_b = self._compute_split_indices(fractional_size_of_first_set)

291 a = self._tensors_for_indices(indices_a)

292 b = self._tensors_for_indices(indices_b)

293 return a, b

294

295 def _data_frames_for_indices(self, indices):

296 input_df = self.inputs.iloc[indices]

297 output_df = self.outputs.iloc[indices]

298 return input_df, output_df

299

300 def _tensors_for_indices(self, indices):

301 input_df, output_df = self._data_frames_for_indices(indices)

302 return self._tensors_for_data_frames(input_df, output_df)

303

304 def _tensors_for_data_frames(self, input_df, output_df):

305 # apply normalisation (if any)

306 if self.inputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE:

307 input_df = pd.DataFrame(self.inputVectorDataScaler.get_normalised_array(input_df), columns=input_df.columns,

308 index=input_df.index)

309 if self.outputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE:

310 output_df = pd.DataFrame(self.outputVectorDataScaler.get_normalised_array(output_df), columns=output_df.columns,

311 index=output_df.index)

312

313 return self.inputTensoriser.tensorise(input_df), self.outputTensoriser.tensorise(output_df)

314

315 def split_into_data_sets(self, fractional_size_of_first_set, cuda: bool, tensorise_dynamically=False) \

316 -> Tuple["TorchDataSet", "TorchDataSet"]:

317 if not tensorise_dynamically:

318 (xA, yA), (xB, yB) = self.split_into_tensors(fractional_size_of_first_set)

319 return TorchDataSetFromTensors(xA, yA, cuda), TorchDataSetFromTensors(xB, yB, cuda)

320 else:

321 if self.inputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE or \

322 self.outputVectorDataScaler.normalisation_mode != normalisation.NormalisationMode.NONE:

323 raise Exception("Dynamic tensorisation is not supported when using data scaling")

324 indices_a, indices_b = self._compute_split_indices(fractional_size_of_first_set)

325 input_a, output_a = self._data_frames_for_indices(indices_a)

326 input_b, output_b = self._data_frames_for_indices(indices_b)

327 ds_a = TorchDataSetFromDataFramesDynamicallyTensorised(input_a, output_a, cuda, input_tensoriser=self.inputTensoriser,

328 output_tensoriser=self.outputTensoriser)

329 ds_b = TorchDataSetFromDataFramesDynamicallyTensorised(input_b, output_b, cuda, input_tensoriser=self.inputTensoriser,

330 output_tensoriser=self.outputTensoriser)

331 return ds_a, ds_b

332

333 def input_dim(self):

334 return self.inputs.shape[1]

335

336 def output_dim(self):

337 """

338 :return: the dimensionality of the outputs (ground truth values)

339 """

340 return self.outputs.shape[1]

341

342 def model_output_dim(self):

343 return self.output_dim()

344

345

346class ClassificationVectorDataUtil(VectorDataUtil):

347 def __init__(self,

348 inputs: pd.DataFrame,

349 outputs: pd.DataFrame,

350 cuda,

351 num_classes,

352 normalisation_mode=normalisation.NormalisationMode.NONE,

353 input_tensoriser: Tensoriser = None,

354 output_tensoriser: Tensoriser = None,

355 data_frame_splitter: Optional[DataFrameSplitter] = None):

356 if len(outputs.columns) != 1:

357 raise Exception(f"Exactly one output dimension (the class index) is required, got {len(outputs.columns)}")

358 super().__init__(inputs, outputs, cuda, normalisation_mode=normalisation_mode,

359 differing_output_normalisation_mode=normalisation.NormalisationMode.NONE, input_tensoriser=input_tensoriser,

360 output_tensoriser=TensoriserClassLabelIndices() if output_tensoriser is None else output_tensoriser,

361 data_frame_splitter=data_frame_splitter)

362 self.numClasses = num_classes

363

364 def model_output_dim(self):

365 return self.numClasses

366

367

368class TorchDataSet:

369 @abstractmethod

370 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False) -> Iterator[Union[Tuple[torch.Tensor, torch.Tensor],

371 Tuple[Sequence[torch.Tensor], torch.Tensor], torch.Tensor, Sequence[torch.Tensor]]]:

372 """

373 Provides an iterator over batches from the data set.

374

375 :param batch_size: the maximum size of each batch

376 :param shuffle: whether to shuffle the data set

377 :param input_only: whether to provide only inputs (rather than inputs and corresponding outputs).

378 If true, provide only inputs, where inputs can either be a tensor or a tuple of tensors.

379 If false, provide a pair (i, o) with inputs and corresponding outputs (o is always a tensor).

380 Some data sets may only be able to provide inputs, in which case inputOnly=False should lead to an

381 exception.

382 """

383 pass

384

385 @abstractmethod

386 def size(self) -> Optional[int]:

387 """

388 Returns the total size of the data set (number of data points) if it is known.

389

390 :return: the number of data points or None of the size is not known.

391 """

392 pass

393

394

395class TorchDataSetProvider:

396 def __init__(self, input_tensor_scaler: Optional[TensorScaler] = None, output_tensor_scaler: Optional[TensorScaler] = None,

397 input_dim: Optional[int] = None, model_output_dim: int = None):

398 if input_tensor_scaler is None:

399 input_tensor_scaler = TensorScalerIdentity()

400 if output_tensor_scaler is None:

401 output_tensor_scaler = TensorScalerIdentity()

402 if model_output_dim is None:

403 raise ValueError("The model output dimension must be provided")

404 self.inputTensorScaler = input_tensor_scaler

405 self.outputTensorScaler = output_tensor_scaler

406 self.inputDim = input_dim

407 self.modelOutputDim = model_output_dim

408

409 @abstractmethod

410 def provide_split(self, fractional_size_of_first_set: float) -> Tuple[TorchDataSet, TorchDataSet]:

411 """

412 Provides two data sets, which could, for example, serve as training and validation sets.

413

414 :param fractional_size_of_first_set: the fractional size of the first data set

415 :return: a tuple of data sets (A, B) where A has (approximately) the given fractional size and B encompasses

416 the remainder of the data

417 """

418 pass

419

420 def get_output_tensor_scaler(self) -> TensorScaler:

421 return self.outputTensorScaler

422

423 def get_input_tensor_scaler(self) -> TensorScaler:

424 return self.inputTensorScaler

425

426 def get_model_output_dim(self) -> int:

427 """

428 :return: the number of output dimensions that would be required to be generated by the model to match this dataset.

429 """

430 return self.modelOutputDim

431

432 def get_input_dim(self) -> Optional[int]:

433 """

434 :return: the number of output dimensions that would be required to be generated by the model to match this dataset.

435 For models that accept variable input sizes (such as RNNs), this may be None.

436 """

437 return self.inputDim

438

439

440class TensorTuple:

441 """

442 Represents a tuple of tensors (or a single tensor) and can be used to manipulate the contained tensors simultaneously

443 """

444 def __init__(self, tensors: Union[torch.Tensor, Sequence[torch.Tensor]]):

445 if isinstance(tensors, torch.Tensor):

446 tensors = [tensors]

447 lengths = set(map(len, tensors))

448 if len(lengths) != 1:

449 raise ValueError("Not all tensors are of the same length")

450 self.length = lengths.pop()

451 self.tensors = tensors

452

453 def __len__(self):

454 return self.length

455

456 def __getitem__(self, key) -> "TensorTuple":

457 t = tuple((t[key] for t in self.tensors))

458 return TensorTuple(t)

459

460 def cuda(self) -> "TensorTuple":

461 return TensorTuple([t.cuda() for t in self.tensors])

462

463 def tuple(self) -> Sequence[torch.Tensor]:

464 return tuple(self.tensors)

465

466 def item(self) -> Union[torch.Tensor, Sequence[torch.Tensor]]:

467 if len(self.tensors) == 1:

468 return self.tensors[0]

469 else:

470 return self.tuple()

471

472 def concat(self, other: "TensorTuple") -> "TensorTuple":

473 if len(self.tensors) != len(other.tensors):

474 raise ValueError("Tensor tuples are incompatible")

475 tensors = [torch.cat([a, b], dim=0) for a, b in zip(self.tensors, other.tensors)]

476 return TensorTuple(tensors)

477

478

479class TorchDataSetFromTensors(TorchDataSet):

480 def __init__(self, x: Union[torch.Tensor, Sequence[torch.Tensor]], y: Optional[torch.Tensor], cuda: bool):

481 """

482 :param x: the input tensor(s); if more than one, they must be of the same length (and a slice of each shall be provided to the

483 model as an input in each batch)

484 :param y: the output tensor

485 :param cuda: whether any generated tensors shall be moved to the selected CUDA device

486 """

487 x = TensorTuple(x)

488 y = TensorTuple(y) if y is not None else None

489 if y is not None and len(x) != len(y):

490 raise ValueError("Tensors are not of the same length")

491 self.x = x

492 self.y = y

493 self.cuda = cuda

494

495 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False) -> Iterator[Union[Tuple[torch.Tensor, torch.Tensor],

496 Tuple[Sequence[torch.Tensor], torch.Tensor], torch.Tensor, Sequence[torch.Tensor]]]:

497 tensor_tuples = (self.x, self.y) if not input_only and self.y is not None else (self.x,)

498 yield from self._get_batches(tensor_tuples, batch_size, shuffle)

499

500 def _get_batches(self, tensor_tuples: Sequence[TensorTuple], batch_size, shuffle):

501 length = len(tensor_tuples[0])

502 if shuffle:

503 index = torch.randperm(length)

504 else:

505 index = torch.LongTensor(range(length))

506 start_idx = 0

507 while start_idx < length:

508 remaining_items = length - start_idx

509 is_second_last_batch = remaining_items <= 2*batch_size and remaining_items > batch_size

510 if is_second_last_batch:

511 # to avoid cases where the last batch is excessively small (1 item in the worst case, where e.g. batch

512 # normalisation would not be applicable), we evenly distribute the items across the last two batches

513 adjusted_batch_size = math.ceil(remaining_items / 2)

514 end_idx = min(length, start_idx + adjusted_batch_size)

515 else:

516 end_idx = min(length, start_idx + batch_size)

517 excerpt = index[start_idx:end_idx]

518 batch = []

519 for tensorTuple in tensor_tuples:

520 if len(tensorTuple) != length:

521 raise Exception("Passed tensors of differing lengths")

522 t = tensorTuple[excerpt]

523 if self.cuda:

524 t = t.cuda()

525 item = t.item()

526 if type(item) == tuple:

527 item = tuple(Variable(t) for t in item)

528 else:

529 item = Variable(item)

530 batch.append(item)

531 if len(batch) == 1:

532 yield batch[0]

533 else:

534 yield tuple(batch)

535 start_idx = end_idx

536

537 def size(self):

538 return len(self.x)

539

540

541class TorchDataSetFromDataFramesPreTensorised(TorchDataSetFromTensors):

542 def __init__(self, input_df: pd.DataFrame, output_df: Optional[pd.DataFrame], cuda: bool,

543 input_tensoriser: Optional[Tensoriser] = None, output_tensoriser: Optional[Tensoriser] = None):

544 if input_tensoriser is None:

545 input_tensoriser = TensoriserDataFrameFloatValuesMatrix()

546 log.debug(f"Applying {input_tensoriser} to data frame of length {len(input_df)} ...")

547 input_tensors = input_tensoriser.tensorise(input_df)

548 if output_df is not None:

549 if output_tensoriser is None:

550 output_tensoriser = TensoriserDataFrameFloatValuesMatrix()

551 log.debug(f"Applying {output_tensoriser} to data frame of length {len(output_df)} ...")

552 output_tensors = output_tensoriser.tensorise(output_df)

553 else:

554 output_tensors = None

555 super().__init__(input_tensors, output_tensors, cuda)

556

557

558class TorchDataSetFromDataFramesDynamicallyTensorised(TorchDataSet):

559 def __init__(self, input_df: pd.DataFrame, output_df: Optional[pd.DataFrame], cuda: bool,

560 input_tensoriser: Optional[Tensoriser] = None, output_tensoriser: Optional[Tensoriser] = None):

561 self.inputDF = input_df

562 self.outputDF = output_df

563 self.cuda = cuda

564 if input_tensoriser is None:

565 input_tensoriser = TensoriserDataFrameFloatValuesMatrix()

566 self.inputTensoriser = input_tensoriser

567 if output_df is not None:

568 if len(input_df) != len(output_df):

569 raise ValueError("Lengths of input and output data frames must be equal")

570 if output_tensoriser is None:

571 output_tensoriser = TensoriserDataFrameFloatValuesMatrix()

572 self.outputTensoriser = output_tensoriser

573

574 def size(self) -> Optional[int]:

575 return len(self.inputDF)

576

577 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False):

578 length = len(self.inputDF)

579 if shuffle:

580 index = torch.randperm(length)

581 else:

582 index = torch.LongTensor(range(length))

583 i = 0

584 while i < length:

585 batch_indices = index[i:i + batch_size]

586 input_tensors = TensorTuple(self.inputTensoriser.tensorise(self.inputDF.iloc[batch_indices]))

587 if self.cuda:

588 input_tensors = input_tensors.cuda()

589 if input_only:

590 yield input_tensors.item()

591 else:

592 output_tensors = TensorTuple(self.outputTensoriser.tensorise(self.outputDF.iloc[batch_indices]))

593 if self.cuda:

594 output_tensors = output_tensors.cuda()

595 yield input_tensors.item(), output_tensors.item()

596 i += batch_size

597

598

599class TorchDataSetFromDataFrames(TorchDataSet):

600 def __init__(self, input_df: pd.DataFrame, output_df: Optional[pd.DataFrame], cuda: bool,

601 input_tensoriser: Optional[Tensoriser] = None, output_tensoriser: Optional[Tensoriser] = None,

602 tensorise_dynamically=False):

603 if tensorise_dynamically:

604 self._torchDataSet: TorchDataSet = TorchDataSetFromDataFramesDynamicallyTensorised(input_df, output_df, cuda,

605 input_tensoriser=input_tensoriser, output_tensoriser=output_tensoriser)

606 else:

607 self._torchDataSet: TorchDataSet = TorchDataSetFromDataFramesPreTensorised(input_df, output_df, cuda,

608 input_tensoriser=input_tensoriser, output_tensoriser=output_tensoriser)

609

610 def iter_batches(self, batch_size: int, shuffle: bool = False, input_only=False):

611 yield from self._torchDataSet.iter_batches(batch_size, shuffle=shuffle, input_only=input_only)

612

613 def size(self) -> Optional[int]:

614 return self._torchDataSet.size()

615

616

617class TorchDataSetProviderFromDataUtil(TorchDataSetProvider):

618 def __init__(self, data_util: DataUtil, cuda: bool):

619 super().__init__(input_tensor_scaler=data_util.get_input_tensor_scaler(), output_tensor_scaler=data_util.get_output_tensor_scaler(),

620 input_dim=data_util.input_dim(), model_output_dim=data_util.model_output_dim())

621 self.dataUtil = data_util

622 self.cuda = cuda

623

624 def provide_split(self, fractional_size_of_first_set: float) -> Tuple[TorchDataSet, TorchDataSet]:

625 (x1, y1), (x2, y2) = self.dataUtil.split_into_tensors(fractional_size_of_first_set)

626 return TorchDataSetFromTensors(x1, y1, self.cuda), TorchDataSetFromTensors(x2, y2, self.cuda)

627

628

629class TorchDataSetProviderFromVectorDataUtil(TorchDataSetProvider):

630 def __init__(self, data_util: VectorDataUtil, cuda: bool, tensorise_dynamically=False):

631 super().__init__(input_tensor_scaler=data_util.get_input_tensor_scaler(), output_tensor_scaler=data_util.get_output_tensor_scaler(),

632 input_dim=data_util.input_dim(), model_output_dim=data_util.model_output_dim())

633 self.dataUtil = data_util

634 self.cuda = cuda

635 self.tensoriseDynamically = tensorise_dynamically

636

637 def provide_split(self, fractional_size_of_first_set: float) -> Tuple[TorchDataSet, TorchDataSet]:

638 return self.dataUtil.split_into_data_sets(fractional_size_of_first_set, self.cuda, tensorise_dynamically=self.tensoriseDynamically)

639

640

641class TensorTransformer(ABC):

642 @abstractmethod

643 def transform(self, t: torch.Tensor) -> torch.Tensor:

644 pass

Coverage for src/sensai/torch/torch_data.py: 67%

386 statements