Coverage for src/sensai/tensorflow/tf_base.py: 30%

76 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-13 22:17 +0000

1from abc import ABC, abstractmethod 

2import logging 

3import os 

4import tempfile 

5 

6import pandas as pd 

7import tensorflow as tf 

8 

9from .. import normalisation 

10from ..vector_model import VectorRegressionModel 

11 

12log = logging.getLogger(__name__) 

13 

14 

15class TensorFlowSession: 

16 session = None 

17 _isKerasSessionSet = False 

18 

19 @classmethod 

20 def configure_session(cls, gpu_allow_growth=True, gpu_per_process_memory_fraction=None): 

21 tf_config = tf.compat.v1.ConfigProto() 

22 tf_config.gpu_options.allow_growth = gpu_allow_growth # dynamically grow the memory used on the GPU 

23 tf_config.log_device_placement = False 

24 if gpu_per_process_memory_fraction is not None: 

25 tf_config.gpu_options.per_process_gpu_memory_fraction = gpu_per_process_memory_fraction # in case we get CUDNN_STATUS_INTERNAL_ERROR 

26 cls.session = tf.compat.v1.Session(config=tf_config) 

27 

28 @classmethod 

29 def set_keras_session(cls, allow_default=True): 

30 """ 

31 Sets the (previously configured) session for use with keras if it has not been previously been set. 

32 If no session has been configured, the parameter allowDefault controls whether it is admissible to create a session with default 

33 parameters. 

34 

35 :param allow_default: whether to configure, for the case where no session was previously configured, a new session with the defaults. 

36 """ 

37 if cls.session is None: 

38 if allow_default: 

39 log.info("No TensorFlow session was configured. Creating a new session with default values.") 

40 cls.configure_session() 

41 else: 

42 raise Exception(f"The session has not yet been configured. Call {cls.__name__}.{cls.configure_session.__name__} beforehand") 

43 if not cls._isKerasSessionSet: 

44 tf.keras.backend.set_session(cls.session) 

45 cls._isKerasSessionSet = True 

46 

47 

48class KerasVectorRegressionModel(VectorRegressionModel, ABC): 

49 """An abstract simple model which maps vectors to vectors and works on pandas.DataFrames (for inputs and outputs)""" 

50 

51 def __init__(self, normalisation_mode: normalisation.NormalisationMode, loss, metrics, optimiser, 

52 batch_size=64, epochs=1000, validation_fraction=0.2): 

53 """ 

54 :param normalisation_mode: 

55 :param loss: 

56 :param metrics: 

57 :param optimiser: 

58 :param batch_size: 

59 :param epochs: 

60 :param validation_fraction: 

61 """ 

62 super().__init__() 

63 self.normalisation_mode = normalisation_mode 

64 self.batch_size = batch_size 

65 self.epochs = epochs 

66 self.optimiser = optimiser 

67 self.loss = loss 

68 self.metrics = list(metrics) 

69 self.validation_fraction = validation_fraction 

70 

71 self.model = None 

72 self.input_scaler = None 

73 self.output_scaler = None 

74 self.training_history = None 

75 

76 def __str__(self): 

77 params = dict(normalisationMode=self.normalisation_mode, optimiser=self.optimiser, loss=self.loss, metrics=self.metrics, 

78 epochs=self.epochs, validationFraction=self.validation_fraction, batchSize=self.batch_size) 

79 return f"{self.__class__.__name__}{params}" 

80 

81 @abstractmethod 

82 def _create_model(self, input_dim, output_dim): 

83 """ 

84 Creates a keras model 

85 

86 :param input_dim: the number of input dimensions 

87 :param output_dim: the number of output dimensions 

88 :return: the model 

89 """ 

90 pass 

91 

92 def _fit(self, inputs: pd.DataFrame, outputs: pd.DataFrame): 

93 # normalise data 

94 self.input_scaler = normalisation.VectorDataScaler(inputs, self.normalisation_mode) 

95 self.output_scaler = normalisation.VectorDataScaler(outputs, self.normalisation_mode) 

96 norm_inputs = self.input_scaler.get_normalised_array(inputs) 

97 norm_outputs = self.output_scaler.get_normalised_array(outputs) 

98 

99 # split data into training and validation set 

100 train_split = int(norm_inputs.shape[0] * (1-self.validation_fraction)) 

101 train_inputs = norm_inputs[:train_split] 

102 train_outputs = norm_outputs[:train_split] 

103 val_inputs = norm_inputs[train_split:] 

104 val_outputs = norm_outputs[train_split:] 

105 

106 # create and fit model 

107 TensorFlowSession.set_keras_session() 

108 model = self._create_model(inputs.shape[1], outputs.shape[1]) 

109 model.compile(optimizer=self.optimiser, loss=self.loss, metrics=self.metrics) 

110 temp_file_handle, temp_file_path = tempfile.mkstemp(".keras.model") 

111 try: 

112 os.close(temp_file_handle) 

113 checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(temp_file_path, monitor='val_loss', save_best_only=True, 

114 save_weights_only=True) 

115 self.training_history = model.fit(train_inputs, train_outputs, batch_size=self.batch_size, epochs=self.epochs, verbose=2, 

116 validation_data=(val_inputs, val_outputs), callbacks=[checkpoint_callback]) 

117 model.load_weights(temp_file_path) 

118 finally: 

119 os.unlink(temp_file_path) 

120 self.model = model 

121 

122 def _predict(self, inputs: pd.DataFrame) -> pd.DataFrame: 

123 x = self.input_scaler.get_normalised_array(inputs) 

124 y = self.model.predict(x) 

125 y = self.output_scaler.get_denormalised_array(y) 

126 return pd.DataFrame(y, columns=self.output_scaler.dimension_names)