dataset#


This module contains sample datasets to facilitate testing and development.

class DataSet[source]#

Bases: ToStringMixin, ABC

abstract load_io_data() InputOutputData[source]#
class DataSetClassificationIris[source]#

Bases: DataSet

load_io_data() InputOutputData[source]#
class DataSetClassificationTitanicSurvival(drop_metadata_columns: bool = False)[source]#

Bases: DataSet

Parameters:

drop_metadata_columns – whether to drop meta-data columns which are not useful for a generalising prediction model

URL = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'#
COL_INDEX = 'PassengerId'#

unique identifier for each passenger

COL_SURVIVAL = 'Survived'#

0 = No, 1 = Yes

COL_NAME = 'Name'#

passenger name

COL_PASSENGER_CLASS = 'Pclass'#

Ticket class as an integer (1 = first, 2 = second, 3 = third)

COL_SEX = 'Sex'#

‘male’ or ‘female’

COL_AGE_YEARS = 'Age'#

age in years (integer)

COL_SIBLINGS_SPOUSES = 'SibSp'#

number of siblings/spouses aboard the Titanic

COL_PARENTS_CHILDREN = 'Parch'#

number of parents/children aboard the Titanic

COL_FARE_PRICE = 'Fare'#

amount of money paid for the ticket

COL_CABIN = 'Cabin'#

the cabin number (if available)

COL_PORT_EMBARKED = 'Embarked'#

port of embarkation (‘C’ = Cherbourg, ‘Q’ = Queenstown, ‘S’ = Southampton)

COL_TICKET = 'Ticket'#

the ticket number

COLS_METADATA = ['Name', 'Ticket', 'Cabin']#

list of columns containing meta-data which are not useful for generalising prediction models

load_io_data() InputOutputData[source]#