dataset#
Source code: sensai/data/dataset.py
This module contains sample datasets to facilitate testing and development.
- class DataSet[source]#
Bases:
ToStringMixin
,ABC
- abstract load_io_data() InputOutputData [source]#
- class DataSetClassificationTitanicSurvival(drop_metadata_columns: bool = False)[source]#
Bases:
DataSet
- Parameters:
drop_metadata_columns – whether to drop meta-data columns which are not useful for a generalising prediction model
- URL = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'#
- COL_INDEX = 'PassengerId'#
unique identifier for each passenger
- COL_SURVIVAL = 'Survived'#
0 = No, 1 = Yes
- COL_NAME = 'Name'#
passenger name
- COL_PASSENGER_CLASS = 'Pclass'#
Ticket class as an integer (1 = first, 2 = second, 3 = third)
- COL_SEX = 'Sex'#
‘male’ or ‘female’
- COL_AGE_YEARS = 'Age'#
age in years (integer)
- COL_SIBLINGS_SPOUSES = 'SibSp'#
number of siblings/spouses aboard the Titanic
- COL_PARENTS_CHILDREN = 'Parch'#
number of parents/children aboard the Titanic
- COL_FARE_PRICE = 'Fare'#
amount of money paid for the ticket
- COL_CABIN = 'Cabin'#
the cabin number (if available)
- COL_PORT_EMBARKED = 'Embarked'#
port of embarkation (‘C’ = Cherbourg, ‘Q’ = Queenstown, ‘S’ = Southampton)
- COL_TICKET = 'Ticket'#
the ticket number
- COLS_METADATA = ['Name', 'Ticket', 'Cabin']#
list of columns containing meta-data which are not useful for generalising prediction models
- load_io_data() InputOutputData [source]#