Coverage for src/sensai/evaluation/result_set.py: 46%

56 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-29 18:29 +0000

1from typing import Optional, List, TYPE_CHECKING, Callable 

2 

3import pandas as pd 

4 

5from sensai.evaluation.eval_stats import RegressionEvalStats 

6from sensai.util.pandas import query_data_frame 

7from sensai.vector_model import get_predicted_var_name 

8 

9if TYPE_CHECKING: 

10 from sensai.evaluation import VectorRegressionModelEvaluationData 

11 

12 

13class ResultSet: 

14 """ 

15 A result set which is designed for interactive result inspection (e.g. in an iPython notebook). 

16 An instance can, for example, be created with a data frame as returned by VectorRegressionModelEvaluationData.to_data_frame 

17 and subsequently be applied to interactively analyse the results. 

18 

19 The class is designed to be subclassed, such that, in particular, method `_show_df` can be 

20 overridden to display meaningful information (use case-specific) in the notebook environment. 

21 """ 

22 def __init__(self, df: pd.DataFrame): 

23 self.df = df 

24 

25 def _create_result_set(self, df: pd.DataFrame, parent: "ResultSet"): 

26 """ 

27 Creates a new result set for the given data frame 

28 

29 :param df: the data frame 

30 :return: the result set 

31 """ 

32 return ResultSet(df) 

33 

34 def query(self, sql: str) -> "ResultSet": 

35 """ 

36 Queries the result set with the given condition specified in SQL syntax. 

37 

38 NOTE: Requires duckdb to be installed. 

39 

40 :param sql: an SQL query starting with the WHERE clause (excluding the 'where' keyword itself) 

41 :return: the result set corresponding to the query 

42 """ 

43 result_df = query_data_frame(self.df, sql) 

44 return self._create_result_set(result_df, self) 

45 

46 def show(self, first: Optional[int] = None, sample: Optional[int] = None) -> None: 

47 """ 

48 Shows all or some of the result set's contents. 

49 

50 :param first: if not None, show this many rows from the start of the result set 

51 :param sample: if not None, sample this many rows from the result set to be shown 

52 """ 

53 df = self.df 

54 if first is not None: 

55 df = df.iloc[:first] 

56 if sample is not None: 

57 df = df.sample(sample) 

58 self._show_df(df) 

59 

60 def _show_df(self, df: pd.DataFrame): 

61 print(df.to_string()) 

62 

63 

64class RegressionResultSet(ResultSet): 

65 def __init__(self, df: pd.DataFrame, predicted_var_names: List[str]): 

66 super().__init__(df) 

67 self.predicted_var_names = predicted_var_names 

68 

69 @classmethod 

70 def from_regression_eval_data(cls, eval_data: "VectorRegressionModelEvaluationData", modify_input_df: bool = False, 

71 output_col_name_override: Optional[str] = None, 

72 regression_result_set_factory: Callable[[pd.DataFrame, List[str]], "RegressionResultSet"] = None) \ 

73 -> "RegressionResultSet": 

74 df = eval_data.to_data_frame(modify_input_df=modify_input_df, output_col_name_override=output_col_name_override) 

75 if output_col_name_override: 

76 predicted_var_names = [output_col_name_override] 

77 else: 

78 predicted_var_names = eval_data.predicted_var_names 

79 

80 def default_factory(data_frame: pd.DataFrame, var_names: List[str]): 

81 return cls(data_frame, var_names) 

82 

83 if regression_result_set_factory is None: 

84 regression_result_set_factory = default_factory 

85 

86 return regression_result_set_factory(df, predicted_var_names) 

87 

88 def _create_result_set(self, df: pd.DataFrame, parent: "RegressionResultSet"): 

89 return self.__class__(df, parent.predicted_var_names) 

90 

91 @staticmethod 

92 def col_name_predicted(predicted_var_name: str): 

93 return f"{predicted_var_name}_predicted" 

94 

95 @staticmethod 

96 def col_name_ground_truth(predicted_var_name: str): 

97 return f"{predicted_var_name}_true" 

98 

99 @staticmethod 

100 def col_name_error(predicted_var_name: str): 

101 return f"{predicted_var_name}_error" 

102 

103 @staticmethod 

104 def col_name_abs_error(predicted_var_name: str): 

105 return f"{predicted_var_name}_abs_error" 

106 

107 def eval_stats(self, predicted_var_name: Optional[str] = None): 

108 """ 

109 Creates the evaluation stats object for this result object, which can be used to compute metrics 

110 or to create plots. 

111 

112 :param predicted_var_name: the name of the predicted variable for which to create the object; 

113 can be None if there is but a single variable 

114 :return: the evaluation stats object 

115 """ 

116 predicted_var_name = get_predicted_var_name(predicted_var_name, self.predicted_var_names) 

117 return RegressionEvalStats(y_predicted=self.df[self.col_name_predicted(predicted_var_name)], 

118 y_true=self.df[self.col_name_ground_truth(predicted_var_name)])