geo_clustering#


class GeoCoordClusterer[source]#

Bases: ABC

abstract fit_geo_coords(geo_coords: List[GeoCoord])[source]#
Parameters:

geo_coords – the coordinates to be clustered

abstract clusters_indices() Tuple[List[List[int]], List[int]][source]#
Returns:

a tuple (clusters, outliers), where clusters is a list of point indices, one list for each cluster containing the indices of points within the cluster, and outliers is the list of indices of points not within clusters

class GreedyAgglomerativeGeoCoordClusterer(max_min_distance_for_merge_m: float, max_distance_m: float, min_cluster_size: int, lcs: Optional[LocalCoordinateSystem] = None)[source]#

Bases: GeoCoordClusterer

Parameters:
  • max_min_distance_for_merge_m – the maximum distance, in metres, for the minimum distance between two existing clusters for a merge to be admissible

  • max_distance_m – the maximum distance, in metres, between any two points for the points to be allowed to be in the same cluster

  • min_cluster_size – the minimum number of points any valid cluster must ultimately contain; the points in any smaller clusters shall be considered as outliers

  • lcs – the local coordinate system to use for clustering; if None, compute based on mean coordinates passed when fitting

class Matrix(dim: int)[source]#

Bases: object

UNSET_VALUE = inf#
set(c1: int, c2: int, value: float)[source]#
get(c1: int, c2: int) float[source]#
class LocalPoint(xy: ndarray, idx: int)[source]#

Bases: object

class Cluster(point: LocalPoint, idx: int, clusterer: GreedyAgglomerativeGeoCoordClusterer)[source]#

Bases: Cluster

merge_cost(other: Cluster)[source]#

Computes the cost of merging the given cluster with this cluster

Returns:

the (non-negative) merge cost or math.inf if a merge is inadmissible

merge(other)[source]#

Merges the given cluster into this cluster”

Parameters:

other – the cluster that is to be merged into this cluster

fit_geo_coords(geo_coords: List[GeoCoord]) None[source]#
Parameters:

geo_coords – the coordinates to be clustered

clusters_indices() Tuple[List[List[int]], List[int]][source]#
Returns:

a tuple (clusters, outliers), where clusters is a list of point indices, one list for each cluster containing the indices of points within the cluster, and outliers is the list of indices of points not within clusters

class MergeCandidateDeterminationStrategy(search_radius_m: float, parent: GreedyAgglomerativeGeoCoordClusterer)[source]#

Bases: MergeCandidateDeterminationStrategy

set_clusterer(clusterer: GreedyAgglomerativeClustering)[source]#

Initialises the clusterer the strategy is applied to :param clusterer: the clusterer

iter_candidate_indices(wc: WrappedCluster, initial: bool, merged_cluster_indices: Optional[Tuple[int, int]] = None) Iterator[int][source]#
Parameters:
  • wc – the wrapped cluster: the cluster for which to determine the cluster indices that are to be considered for a potential merge

  • initial – whether we are computing the initial candidates (at the start of the clustering algorithm)

  • merged_cluster_indices – [for initial=False] the pair of cluster indices that were just joined to form the updated cluster wc

Returns:

an iterator of cluster indices that should be evaluated as potential merge partners for wc (it may contain the index of wc, which will be ignored)

class SkLearnGeoCoordClusterer(clusterer, lcs: Optional[LocalCoordinateSystem] = None)[source]#

Bases: GeoCoordClusterer

Parameters:
  • clusterer – a clusterer from sklearn.cluster

  • lcs – the local coordinate system to use for Euclidian conversion; if None, determine from data (using mean coordinate as centre)

fit_geo_coords(geo_coords: List[GeoCoord])[source]#
Parameters:

geo_coords – the coordinates to be clustered

clusters_local_points() Tuple[List[List[Tuple[float, float]]], List[Tuple[float, float]]][source]#
Returns:

a tuple (clusters, outliers), where clusters is a dictionary mapping from cluster index to the list of local points within the cluster and outliers is a list of local points not within clusters

clusters_indices() Tuple[List[List[int]], List[int]][source]#
Returns:

a tuple (clusters, outliers), where clusters is a list of point indices, one list for each cluster containing the indices of points within the cluster, and outliers is the list of indices of points not within clusters

class DBSCANGeoCoordClusterer(eps, min_samples, lcs: Optional[LocalCoordinateSystem] = None, **kwargs)[source]#

Bases: SkLearnGeoCoordClusterer

Parameters:
  • eps – the maximum distance between two samples for one to be considered as in the neighbourhood of the other

  • min_samples – the minimum number of samples that must be within a neighbourhood for a cluster to be formed

  • lcs – the local coordinate system for conversion to a Euclidian space

  • kwargs – additional arguments to pass to DBSCAN (see https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html)