distance_metric#


class DistanceMetric[source]#

Bases: ABC

Abstract base class for (symmetric) distance metrics

abstract distance(named_tuple_a: PandasNamedTuple, named_tuple_b: PandasNamedTuple) float[source]#
class SingleColumnDistanceMetric(column: str)[source]#

Bases: DistanceMetric, ABC

distance(named_tuple_a: PandasNamedTuple, named_tuple_b: PandasNamedTuple)[source]#
class DistanceMatrixDFCache(pickle_path: str, save_on_update: bool = True, deferred_save_delay_secs: float = 1.0)[source]#

Bases: PersistentKeyValueCache[Tuple[Union[str, int], Union[str, int]], TValue], Generic[TValue]

A cache for distance matrices, which are stored as dataframes with identifiers as both index and columns

shape()[source]#
set(key: Tuple[Union[str, int], Union[str, int]], value: TValue)[source]#

Sets a cached value

Parameters:
  • key – the key under which to store the value

  • value – the value to store; since None is used indicate the absence of a value, None should not be used a value

save()[source]#
get(key: Tuple[Union[str, int], Union[str, int]]) TValue[source]#

Retrieves a cached value

Parameters:

key – the lookup key

Returns:

the cached value or None if no value is found

num_unfilled_entries()[source]#
get_all_cached(identifier: Union[str, int])[source]#
class CachedDistanceMetric(distance_metric: DistanceMetric, key_value_cache: KeyValueCache, persist_cache=False)[source]#

Bases: DistanceMetric, CachedValueProviderMixin

A decorator which provides caching for a distance metric, i.e. the metric is computed only if the value for the given pair of identifiers is not found within the persistent cache

distance(named_tuple_a, named_tuple_b)[source]#
fill_cache(df_indexed_by_id: DataFrame)[source]#

Fill cache for all identifiers in the provided dataframe

Args:

df_indexed_by_id: Dataframe that is indexed by identifiers of the members

class LinearCombinationDistanceMetric(metrics: Sequence[Tuple[float, DistanceMetric]])[source]#

Bases: DistanceMetric

Parameters:

metrics – a sequence of tuples (weight, distance metric)

distance(named_tuple_a, named_tuple_b)[source]#
class HellingerDistanceMetric(column: str, check_input=False)[source]#

Bases: SingleColumnDistanceMetric

class EuclideanDistanceMetric(column: str)[source]#

Bases: SingleColumnDistanceMetric

class IdentityDistanceMetric(keys: Union[str, List[str]])[source]#

Bases: DistanceMetric

distance(named_tuple_a, named_tuple_b)[source]#
class RelativeBitwiseEqualityDistanceMetric(column: str, check_input=False)[source]#

Bases: SingleColumnDistanceMetric

check_input_value(input_value)[source]#