Source code for sensai.evaluation.result_set
from typing import Optional, List, TYPE_CHECKING, Callable
import pandas as pd
from sensai.evaluation.eval_stats import RegressionEvalStats
from sensai.util.pandas import query_data_frame
from sensai.vector_model import get_predicted_var_name
if TYPE_CHECKING:
from sensai.evaluation import VectorRegressionModelEvaluationData
[docs]class ResultSet:
"""
A result set which is designed for interactive result inspection (e.g. in an iPython notebook).
An instance can, for example, be created with a data frame as returned by VectorRegressionModelEvaluationData.to_data_frame
and subsequently be applied to interactively analyse the results.
The class is designed to be subclassed, such that, in particular, method `_show_df` can be
overridden to display meaningful information (use case-specific) in the notebook environment.
"""
def __init__(self, df: pd.DataFrame):
self.df = df
def _create_result_set(self, df: pd.DataFrame, parent: "ResultSet"):
"""
Creates a new result set for the given data frame
:param df: the data frame
:return: the result set
"""
return ResultSet(df)
[docs] def query(self, sql: str) -> "ResultSet":
"""
Queries the result set with the given condition specified in SQL syntax.
NOTE: Requires duckdb to be installed.
:param sql: an SQL query starting with the WHERE clause (excluding the 'where' keyword itself)
:return: the result set corresponding to the query
"""
result_df = query_data_frame(self.df, sql)
return self._create_result_set(result_df, self)
[docs] def show(self, first: Optional[int] = None, sample: Optional[int] = None) -> None:
"""
Shows all or some of the result set's contents.
:param first: if not None, show this many rows from the start of the result set
:param sample: if not None, sample this many rows from the result set to be shown
"""
df = self.df
if first is not None:
df = df.iloc[:first]
if sample is not None:
df = df.sample(sample)
self._show_df(df)
def _show_df(self, df: pd.DataFrame):
print(df.to_string())
[docs]class RegressionResultSet(ResultSet):
def __init__(self, df: pd.DataFrame, predicted_var_names: List[str]):
super().__init__(df)
self.predicted_var_names = predicted_var_names
[docs] @classmethod
def from_regression_eval_data(cls, eval_data: "VectorRegressionModelEvaluationData", modify_input_df: bool = False,
output_col_name_override: Optional[str] = None,
regression_result_set_factory: Callable[[pd.DataFrame, List[str]], "RegressionResultSet"] = None) \
-> "RegressionResultSet":
df = eval_data.to_data_frame(modify_input_df=modify_input_df, output_col_name_override=output_col_name_override)
if output_col_name_override:
predicted_var_names = [output_col_name_override]
else:
predicted_var_names = eval_data.predicted_var_names
def default_factory(data_frame: pd.DataFrame, var_names: List[str]):
return cls(data_frame, var_names)
if regression_result_set_factory is None:
regression_result_set_factory = default_factory
return regression_result_set_factory(df, predicted_var_names)
def _create_result_set(self, df: pd.DataFrame, parent: "RegressionResultSet"):
return self.__class__(df, parent.predicted_var_names)
[docs] @staticmethod
def col_name_predicted(predicted_var_name: str):
return f"{predicted_var_name}_predicted"
[docs] @staticmethod
def col_name_ground_truth(predicted_var_name: str):
return f"{predicted_var_name}_true"
[docs] @staticmethod
def col_name_error(predicted_var_name: str):
return f"{predicted_var_name}_error"
[docs] @staticmethod
def col_name_abs_error(predicted_var_name: str):
return f"{predicted_var_name}_abs_error"
[docs] def eval_stats(self, predicted_var_name: Optional[str] = None):
"""
Creates the evaluation stats object for this result object, which can be used to compute metrics
or to create plots.
:param predicted_var_name: the name of the predicted variable for which to create the object;
can be None if there is but a single variable
:return: the evaluation stats object
"""
predicted_var_name = get_predicted_var_name(predicted_var_name, self.predicted_var_names)
return RegressionEvalStats(y_predicted=self.df[self.col_name_predicted(predicted_var_name)],
y_true=self.df[self.col_name_ground_truth(predicted_var_name)])