Coverage for src/sensai/evaluation/result_set.py: 46%
56 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
1from typing import Optional, List, TYPE_CHECKING, Callable
3import pandas as pd
5from sensai.evaluation.eval_stats import RegressionEvalStats
6from sensai.util.pandas import query_data_frame
7from sensai.vector_model import get_predicted_var_name
9if TYPE_CHECKING:
10 from sensai.evaluation import VectorRegressionModelEvaluationData
13class ResultSet:
14 """
15 A result set which is designed for interactive result inspection (e.g. in an iPython notebook).
16 An instance can, for example, be created with a data frame as returned by VectorRegressionModelEvaluationData.to_data_frame
17 and subsequently be applied to interactively analyse the results.
19 The class is designed to be subclassed, such that, in particular, method `_show_df` can be
20 overridden to display meaningful information (use case-specific) in the notebook environment.
21 """
22 def __init__(self, df: pd.DataFrame):
23 self.df = df
25 def _create_result_set(self, df: pd.DataFrame, parent: "ResultSet"):
26 """
27 Creates a new result set for the given data frame
29 :param df: the data frame
30 :return: the result set
31 """
32 return ResultSet(df)
34 def query(self, sql: str) -> "ResultSet":
35 """
36 Queries the result set with the given condition specified in SQL syntax.
38 NOTE: Requires duckdb to be installed.
40 :param sql: an SQL query starting with the WHERE clause (excluding the 'where' keyword itself)
41 :return: the result set corresponding to the query
42 """
43 result_df = query_data_frame(self.df, sql)
44 return self._create_result_set(result_df, self)
46 def show(self, first: Optional[int] = None, sample: Optional[int] = None) -> None:
47 """
48 Shows all or some of the result set's contents.
50 :param first: if not None, show this many rows from the start of the result set
51 :param sample: if not None, sample this many rows from the result set to be shown
52 """
53 df = self.df
54 if first is not None:
55 df = df.iloc[:first]
56 if sample is not None:
57 df = df.sample(sample)
58 self._show_df(df)
60 def _show_df(self, df: pd.DataFrame):
61 print(df.to_string())
64class RegressionResultSet(ResultSet):
65 def __init__(self, df: pd.DataFrame, predicted_var_names: List[str]):
66 super().__init__(df)
67 self.predicted_var_names = predicted_var_names
69 @classmethod
70 def from_regression_eval_data(cls, eval_data: "VectorRegressionModelEvaluationData", modify_input_df: bool = False,
71 output_col_name_override: Optional[str] = None,
72 regression_result_set_factory: Callable[[pd.DataFrame, List[str]], "RegressionResultSet"] = None) \
73 -> "RegressionResultSet":
74 df = eval_data.to_data_frame(modify_input_df=modify_input_df, output_col_name_override=output_col_name_override)
75 if output_col_name_override:
76 predicted_var_names = [output_col_name_override]
77 else:
78 predicted_var_names = eval_data.predicted_var_names
80 def default_factory(data_frame: pd.DataFrame, var_names: List[str]):
81 return cls(data_frame, var_names)
83 if regression_result_set_factory is None:
84 regression_result_set_factory = default_factory
86 return regression_result_set_factory(df, predicted_var_names)
88 def _create_result_set(self, df: pd.DataFrame, parent: "RegressionResultSet"):
89 return self.__class__(df, parent.predicted_var_names)
91 @staticmethod
92 def col_name_predicted(predicted_var_name: str):
93 return f"{predicted_var_name}_predicted"
95 @staticmethod
96 def col_name_ground_truth(predicted_var_name: str):
97 return f"{predicted_var_name}_true"
99 @staticmethod
100 def col_name_error(predicted_var_name: str):
101 return f"{predicted_var_name}_error"
103 @staticmethod
104 def col_name_abs_error(predicted_var_name: str):
105 return f"{predicted_var_name}_abs_error"
107 def eval_stats(self, predicted_var_name: Optional[str] = None):
108 """
109 Creates the evaluation stats object for this result object, which can be used to compute metrics
110 or to create plots.
112 :param predicted_var_name: the name of the predicted variable for which to create the object;
113 can be None if there is but a single variable
114 :return: the evaluation stats object
115 """
116 predicted_var_name = get_predicted_var_name(predicted_var_name, self.predicted_var_names)
117 return RegressionEvalStats(y_predicted=self.df[self.col_name_predicted(predicted_var_name)],
118 y_true=self.df[self.col_name_ground_truth(predicted_var_name)])