import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import make_moons
from sklearn.cluster import DBSCAN
This was inspired by a fastai Part 2 assignment by Jeremy Howard. The main idea is to practice implementing algorithms on a GPU for practice with broadcasting and other key techniques relevant to working on GPUs.
There are 4 Steps - CPU implementation - Slower simple GPU implementation - Somewhat faster simple GPU implementation - Much faster cuml GPU implementation with brief explanation of what makes it work - See what replacing only the distance calculation does in my slower GPU implementation
DBSCAN
First, check look at the desired behavior using the default implementation
= DBSCAN(eps=0.2, min_samples=5, metric='euclidean')
db = make_moons(n_samples=200, noise=0.05, random_state=0) X, y
= db.fit_predict(X)
y_db print(y_db,len(y_db))
print(len(X[y_db == 0, 0]), len(X[y_db == 1, 1]))
== 0, 0],
plt.scatter(X[y_db == 0, 1],
X[y_db ='lightblue',
c='black',
edgecolor='o',
marker=40,
s='Cluster 1')
label== 1, 0],
plt.scatter(X[y_db == 1, 1],
X[y_db ='lightblue',
c='red',
edgecolor='o',
marker=40,
s='Cluster 1')
label'Feature 1')
plt.xlabel('Feature 2')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
[0 1 1 0 1 1 0 1 0 1 0 1 1 1 0 0 0 1 0 0 1 1 0 1 0 1 1 1 1 0 0 0 1 1 0 1 1
0 0 1 1 0 0 1 1 0 0 0 1 1 0 1 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 1
1 0 1 0 0 1 1 0 1 1 1 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 0 0 0 1 0 0 1 0 0
0 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0 1 0 1 1 0 0 0 0 1 1
0 1 1 1 0 0 1 0 1 1 0 0 1 1 0 1 1 1 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 1 1 1
0 0 1 0 0 0 0 0 0 1 0 1 1 0 1] 200
100 100
DBSCAN custom implementation
- A point is a core point if at least a specified number min_samples of neighboring points fall within eps radius
- A border point is a point that has fewer than min_samples within eps, but lies within the radius eps of a core point
- All other points that are neither core nor border points are noise points
Algorithm - Label the points as core, border, or noise. - Form a separate cluster for each core point or connected group of core points. (Core points are connected if they are no farther away than eps). - Assign each border point to the cluster of its corresponding core points.
- Want border point to have a reference to its core point for easy merging
- Once merge cluster points, want to go find and update corresponding border points’ reference
class DbscanCustom: #(torch.nn.Module):
def __init__(self, eps, min_samples):
super().__init__()
self.eps = eps
self.min_samples = min_samples
def fit_predict(self, X):
# Label points as core or assign border to core, skip if noise
self.label_points(X)
# Form clusters of connected groups of core points
# Avoid RuntimeError: dictionary changed size during iteration,
# make a copy of the keys!!!
= list(self.mapping.keys())
keys #print(len(keys)+sum([len(v) for _,v in self.mapping.items()]))
= set()
merged for idx,i in enumerate(keys[:-1]): # iterating through a list now!
for j in keys[idx+1:]:
#if i == j: continue
if self.distance(i,j) <= self.eps:
#print(self.mapping[i],self.mapping[j])
self.mapping[j].add(i) #IN-PLACE
self.mapping[j]=self.mapping[j].union(self.mapping[i]) #NOT IN-PLACE!!!!!!!!!!
print(f"Deleting {self.mapping[i]} and i is {i} and self.mapping[j] is {self.mapping[j]}")
del self.mapping[i]
merged.add(i)break
#if i in merged: continue
# Relabel clusters
print(self.mapping)
= {i:v for i, (k,v) in enumerate(self.mapping.items())}
final_map = len(final_map)+sum([len(v) for _,v in final_map.items()])
len_pts print(final_map, "\n", len_pts)
return final_map
def label_points(self, X):
"""1. Label the points as core, border, or noise."""
self.mapping = {}
self.X = X
self.get_core_points()
self.get_border_points()
# Ignore the noise points, don't need them later
def get_core_points(self):
for i,x in enumerate(self.X):
if self.is_core_point(i):
self.mapping[i] = set()
def get_border_points(self):
for i,x in enumerate(X):
if i not in self.mapping:
self.map_border(i)
def is_core_point(self, i):
# TODO: update to point to itself?
"""A point is a core point if at least a specified number min_samples of neighboring points fall within eps radius"""
= 0
num_within for j,x in enumerate(self.X): # TODO: Keys vs indices; straighten up!
if i == j:
continue
if self.distance(i,j) <= self.eps:
+= 1
num_within if num_within >= self.min_samples:
#print(X[i],X[j],self.distance(i,j),num_within,self.min_samples)
return True
return False
def map_border(self, i):
# TODO: update to point to point to its core idx
"""A *border point* is a point that has fewer than min_samples within eps, but lies within the radius eps of a core point"""
for j in self.mapping:
if i == j:
continue
if self.distance(i,j) <= self.eps:
self.mapping[j].add(i)
break
def distance(self,i,j):
"""Distance between points of self.X with indices i and j"""
#print(np.sqrt(np.sum((self.X[i]-self.X[j])**2)))
return np.sqrt(np.sum((self.X[i]-self.X[j])**2))
class DbscanCustom: #(torch.nn.Module):
def __init__(self, eps, min_samples):
super().__init__()
self.eps = eps
self.min_samples = min_samples
def fit_predict(self, X):
# Label points as core or assign border to core, skip if noise
self.label_points(X)
# Form clusters of connected groups of core points
# Avoid RuntimeError: dictionary changed size during iteration,
# make a copy of the keys!!!
= list(self.mapping.keys())
keys #print(keys)
#print(len(keys)+sum([len(v) for _,v in self.mapping.items()]))
= set()
merged for idx,i in enumerate(keys[:-1]): # iterating through a list now!
for j in keys[idx+1:]:
#if i == j: continue
if self.distance(i,j) <= self.eps:
#print(self.mapping[i],self.mapping[j])
self.mapping[j].add(i) #IN-PLACE
self.mapping[j]=self.mapping[j].union(self.mapping[i]) #NOT IN-PLACE!!!!!!!!!!
merged.add(i)if i in merged:
#print(f"Deleting {self.mapping[i]} and i is {i} and self.mapping[j] is {self.mapping[j]}")
del self.mapping[i]
#break
#if i in merged: continue
# Relabel clusters
print(self.mapping)
= {i:v for i, (k,v) in enumerate(self.mapping.items())}
final_map = len(final_map)+sum([len(v) for _,v in final_map.items()])
len_pts print(final_map, "\n", len_pts)
return final_map
def label_points(self, X):
"""1. Label the points as core, border, or noise."""
self.mapping = {}
self.X = X
self.get_core_points()
self.get_border_points()
# Ignore the noise points, don't need them later
def get_core_points(self):
for i,x in enumerate(self.X):
if self.is_core_point(i):
self.mapping[i] = set()
def get_border_points(self):
for i,x in enumerate(X):
if i not in self.mapping:
self.map_border(i)
def is_core_point(self, i):
# TODO: update to point to itself?
"""A point is a core point if at least a specified number min_samples of neighboring points fall within eps radius"""
= 0
num_within for j,x in enumerate(self.X): # TODO: Keys vs indices; straighten up!
if i == j:
continue
if self.distance(i,j) <= self.eps:
+= 1
num_within if num_within >= self.min_samples:
#print(X[i],X[j],self.distance(i,j),num_within,self.min_samples)
return True
return False
def map_border(self, i):
# TODO: update to point to point to its core idx
"""A *border point* is a point that has fewer than min_samples within eps, but lies within the radius eps of a core point"""
for j in self.mapping:
if i == j:
continue
if self.distance(i,j) <= self.eps:
self.mapping[j].add(i)
break
def distance(self,i,j):
"""Distance between points of self.X with indices i and j"""
#print(np.sqrt(np.sum((self.X[i]-self.X[j])**2)))
#print(np.sqrt(np.sum((self.X[20]-self.X[199])**2)))
return np.sqrt(np.sum((self.X[i]-self.X[j])**2))
- Next, handle border points
- I think the three remaining points are probably border points and need to be mapped to their respective cluster!!!!
from collections import Counter
class DbscanCustom: #(torch.nn.Module):
def __init__(self, eps, min_samples):
super().__init__()
self.eps = eps
self.min_samples = min_samples
self.edges = [] # for union find
def find(self,i):
= i
res while self.parents[res]!=res:
self.parents[res] = self.parents[self.parents[res]]
= self.parents[res]
res return res
def union(self, i,j):
= self.find(i), self.find(j)
pi, pj if pi == pj: return 0, None
if self.rank[pi] >= self.rank[pj]:
self.parents[pj] = pi
self.rank[pi] += self.rank[pj]
return 1, i
else:
self.parents[pi] = pj
self.rank[pj] += self.rank[pi]
return 1, j
def fit_predict(self, X):
self.parents = list(range(len(X)))
self.rank = [1]*len(X)
self.mapping = {}
self.X = X
# Label points as core or assign border to core, skip if noise
self.label_points(X)
= list(self.mapping.keys())
keys = len(keys)
n for i, j in self.edges:
if i in self.mapping and j in self.mapping:
= self.union(i,j) #0 or 1, idx
res,p #if res: n -= 1
if res and p == i:
self.mapping[i].update(self.mapping[j])
#del self.mapping[j]
elif res and p == j:
self.mapping[j].update(self.mapping[i])
#del self.mapping[i]
print("SELF.MAPPING")
print(self.mapping)
print("PARENTS")
print(self.parents)
print("Parents len")
print(len(self.parents))
print("CLUSTERS")
print(set(self.parents))
print("RANKS")
print(self.rank)
print(self.parents[3], self.parents[73], self.parents[103])
= {}
final_map = Counter(self.parents)
counts #cluster_rename = {i:v for i,(k,v) in enumerate(counts.items()) if counts[k]>1}
# print("CLUSTER RENAME")
# print(cluster_rename)
= [np.nan]*len(X)
result = self.mapping.values()
all_vals for idx,elt in enumerate(self.parents):
if elt not in final_map:
= set()
final_map[elt] if counts[elt]> 1:
final_map[elt].add(idx)
= {v1:k for k,v in self.mapping.items() for v1 in v if v}
v_to_k print("v_to_k")
print(v_to_k)
print(final_map)
print(len(final_map))
= {i:v for i, (k,v) in enumerate(final_map.items())}
final_map print(final_map)
for i,v in final_map.items():
for j in v:
= i
result[j] for v,k in v_to_k.items():
= result[k]
result[v] print(result)
return np.array(result)
# final_map = {i:v for i, (k,v) in enumerate(self.mapping.items())}
# len_pts = len(final_map)+sum([len(v) for _,v in final_map.items()])
# print(final_map, "\n", len_pts)
#return final_map
def label_points(self, X):
"""1. Label the points as core, border, or noise."""
self.mapping = {}
self.X = X
self.get_core_points()
self.get_border_points()
print("EDGES:")
print(self.edges)
# Ignore the noise points, don't need them later
def get_core_points(self):
for i,x in enumerate(self.X):
if self.map_core(i):
self.mapping[i] = set()
def get_border_points(self):
for i,x in enumerate(X):
if i not in self.mapping:
self.map_border(i)
def map_core(self, i):
# TODO: update to point to itself?
"""A point is a core point if at least a specified number min_samples of neighboring points fall within eps radius"""
= 0
num_within for j,x in enumerate(self.X): # TODO: Keys vs indices; straighten up!
if i == j:
continue
if self.distance(i,j) <= self.eps:
self.edges.append([i,j])
+= 1
num_within if num_within >= self.min_samples:
return True
return False
def map_border(self, i):
# TODO: update to point to point to its core idx
"""A *border point* is a point that has fewer than min_samples within eps, but lies within the radius eps of a core point"""
= self.eps
min_dist = None
min_idx for j in self.mapping:
if i == j:
continue
if self.distance(i,j) <= self.eps:
if self.distance(i,j) <= min_dist:
= self.distance(i,j)
min_dist = j
min_idx if min_idx: self.mapping[min_idx].add(i)
def distance(self,i,j):
"""Distance between points of self.X with indices i and j"""
#print(np.sqrt(np.sum((self.X[i]-self.X[j])**2)))
return np.sqrt(np.sum((self.X[i]-self.X[j])**2))
= make_moons(n_samples=200, noise=0.05, random_state=0)
X, y = DbscanCustom(eps=0.2, min_samples=5)
db_custom = db_custom.fit_predict(X)
y_db == 0, 0],
plt.scatter(X[y_db == 0, 1],
X[y_db ='lightblue',
c='black',
edgecolor='o',
marker=40,
s='Cluster 1')
label== 1, 0],
plt.scatter(X[y_db == 1, 1],
X[y_db ='lightblue',
c='red',
edgecolor='o',
marker=40,
s='Cluster 1')
label'Feature 1')
plt.xlabel('Feature 2')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
EDGES:
[[0, 8], [0, 30], [0, 34], [0, 55], [0, 63], [0, 77], [0, 114], [0, 119], [0, 120], [0, 139], [0, 152], [0, 192], [1, 44], [1, 67], [1, 96], [1, 108], [1, 151], [1, 163], [1, 168], [1, 180], [1, 183], [1, 199], [2, 33], [2, 39], [2, 76], [2, 84], [2, 105], [2, 122], [2, 124], [2, 141], [2, 149], [2, 194], [3, 53], [3, 106], [3, 171], [4, 5], [4, 12], [4, 26], [4, 62], [4, 95], [4, 96], [4, 135], [4, 151], [4, 165], [4, 183], [4, 199], [5, 4], [5, 12], [5, 20], [5, 26], [5, 27], [5, 52], [5, 62], [5, 80], [5, 95], [5, 117], [6, 8], [6, 15], [6, 37], [6, 46], [6, 86], [6, 91], [6, 114], [6, 126], [6, 130], [6, 139], [6, 145], [6, 193], [7, 21], [7, 32], [7, 49], [7, 54], [7, 74], [7, 79], [7, 167], [7, 169], [7, 175], [7, 187], [8, 0], [8, 6], [8, 15], [8, 30], [8, 63], [8, 77], [8, 91], [8, 114], [8, 120], [8, 130], [8, 139], [8, 152], [8, 192], [8, 193], [9, 13], [9, 32], [9, 54], [9, 57], [9, 64], [9, 100], [9, 134], [9, 136], [9, 156], [9, 161], [9, 169], [9, 175], [10, 31], [10, 41], [10, 45], [10, 85], [10, 87], [10, 107], [10, 131], [10, 143], [10, 144], [10, 195], [11, 17], [11, 25], [11, 28], [11, 36], [11, 48], [11, 51], [11, 60], [11, 97], [11, 133], [11, 157], [11, 164], [12, 4], [12, 5], [12, 20], [12, 26], [12, 27], [12, 62], [12, 95], [12, 96], [12, 165], [12, 183], [13, 9], [13, 23], [13, 54], [13, 57], [13, 64], [13, 100], [13, 128], [13, 134], [13, 136], [13, 147], [13, 156], [13, 161], [14, 19], [14, 38], [14, 47], [14, 93], [14, 121], [14, 125], [14, 172], [14, 173], [14, 177], [14, 178], [14, 181], [15, 6], [15, 8], [15, 37], [15, 46], [15, 86], [15, 91], [15, 114], [15, 126], [15, 130], [15, 145], [15, 193], [16, 18], [16, 56], [16, 78], [16, 142], [16, 153], [16, 158], [16, 159], [16, 162], [16, 166], [16, 170], [16, 179], [16, 191], [17, 11], [17, 36], [17, 51], [17, 60], [17, 97], [17, 133], [17, 150], [17, 157], [17, 164], [18, 16], [18, 22], [18, 58], [18, 90], [18, 109], [18, 142], [18, 148], [18, 153], [18, 158], [18, 166], [18, 185], [18, 191], [19, 14], [19, 47], [19, 50], [19, 93], [19, 110], [19, 125], [19, 177], [19, 189], [20, 5], [20, 12], [20, 26], [20, 27], [20, 52], [20, 62], [20, 80], [20, 95], [20, 117], [20, 182], [21, 7], [21, 32], [21, 49], [21, 54], [21, 74], [21, 79], [21, 167], [21, 169], [21, 175], [21, 187], [22, 18], [22, 29], [22, 58], [22, 68], [22, 90], [22, 102], [22, 109], [22, 111], [22, 148], [22, 153], [22, 185], [22, 198], [23, 13], [23, 57], [23, 67], [23, 115], [23, 128], [23, 134], [23, 136], [23, 147], [23, 156], [23, 163], [23, 180], [24, 31], [24, 45], [24, 59], [24, 66], [24, 85], [24, 87], [24, 143], [24, 155], [24, 186], [25, 11], [25, 28], [25, 43], [25, 48], [25, 51], [25, 127], [25, 133], [25, 140], [25, 146], [25, 157], [25, 160], [25, 174], [25, 197], [26, 4], [26, 5], [26, 12], [26, 20], [26, 27], [26, 52], [26, 62], [26, 95], [26, 96], [26, 165], [26, 183], [27, 5], [27, 12], [27, 20], [27, 26], [27, 35], [27, 52], [27, 62], [27, 80], [27, 95], [27, 117], [27, 182], [27, 196], [28, 11], [28, 25], [28, 43], [28, 48], [28, 51], [28, 99], [28, 127], [28, 133], [28, 140], [28, 146], [28, 157], [28, 160], [28, 174], [28, 197], [29, 22], [29, 58], [29, 68], [29, 81], [29, 90], [29, 109], [29, 111], [29, 123], [29, 148], [29, 198], [30, 0], [30, 8], [30, 34], [30, 55], [30, 63], [30, 77], [30, 114], [30, 119], [30, 120], [30, 139], [30, 152], [30, 192], [31, 10], [31, 24], [31, 45], [31, 59], [31, 66], [31, 87], [31, 143], [31, 155], [31, 186], [32, 7], [32, 9], [32, 21], [32, 49], [32, 54], [32, 64], [32, 74], [32, 136], [32, 161], [32, 169], [32, 175], [33, 2], [33, 39], [33, 43], [33, 76], [33, 84], [33, 99], [33, 105], [33, 122], [33, 149], [33, 160], [33, 174], [33, 194], [34, 0], [34, 30], [34, 42], [34, 55], [34, 63], [34, 78], [34, 116], [34, 119], [34, 120], [34, 159], [34, 179], [34, 192], [35, 27], [35, 52], [35, 80], [35, 89], [35, 92], [35, 94], [35, 101], [35, 117], [35, 182], [35, 184], [35, 196], [36, 11], [36, 17], [36, 51], [36, 60], [36, 97], [36, 150], [36, 154], [36, 157], [36, 164], [37, 6], [37, 15], [37, 46], [37, 86], [37, 103], [37, 126], [37, 130], [37, 145], [37, 193], [38, 14], [38, 47], [38, 69], [38, 71], [38, 75], [38, 113], [38, 121], [38, 125], [38, 132], [38, 172], [38, 173], [38, 178], [38, 181], [38, 188], [38, 190], [39, 2], [39, 33], [39, 73], [39, 76], [39, 84], [39, 105], [39, 122], [39, 124], [39, 141], [39, 149], [39, 194], [40, 70], [40, 72], [40, 79], [40, 82], [40, 88], [40, 118], [40, 150], [40, 154], [40, 164], [40, 167], [41, 10], [41, 45], [41, 85], [41, 87], [41, 107], [41, 110], [41, 131], [41, 143], [41, 144], [41, 195], [42, 34], [42, 55], [42, 56], [42, 78], [42, 116], [42, 159], [42, 162], [42, 170], [42, 179], [43, 25], [43, 28], [43, 33], [43, 48], [43, 76], [43, 84], [43, 99], [43, 127], [43, 140], [43, 146], [43, 160], [43, 174], [43, 197], [44, 1], [44, 67], [44, 108], [44, 115], [44, 135], [44, 151], [44, 163], [44, 168], [44, 180], [44, 199], [45, 10], [45, 24], [45, 31], [45, 41], [45, 59], [45, 85], [45, 87], [45, 107], [45, 143], [45, 144], [45, 155], [45, 195], [46, 6], [46, 15], [46, 37], [46, 86], [46, 91], [46, 126], [46, 130], [46, 145], [46, 193], [47, 14], [47, 19], [47, 38], [47, 50], [47, 93], [47, 121], [47, 125], [47, 173], [47, 177], [47, 178], [47, 189], [48, 11], [48, 25], [48, 28], [48, 43], [48, 51], [48, 127], [48, 133], [48, 140], [48, 146], [48, 157], [48, 160], [48, 174], [48, 197], [49, 7], [49, 21], [49, 32], [49, 54], [49, 74], [49, 79], [49, 167], [49, 169], [49, 175], [49, 187], [50, 19], [50, 47], [50, 93], [50, 110], [50, 125], [50, 131], [50, 177], [50, 189], [50, 195], [51, 11], [51, 17], [51, 25], [51, 28], [51, 36], [51, 48], [51, 60], [51, 97], [51, 133], [51, 157], [51, 164], [52, 5], [52, 20], [52, 26], [52, 27], [52, 35], [52, 80], [52, 117], [52, 182], [53, 3], [53, 61], [53, 106], [53, 137], [53, 171], [54, 7], [54, 9], [54, 13], [54, 21], [54, 32], [54, 49], [54, 57], [54, 64], [54, 134], [54, 136], [54, 161], [54, 169], [54, 175], [55, 0], [55, 30], [55, 34], [55, 42], [55, 63], [55, 78], [55, 116], [55, 119], [55, 120], [55, 159], [55, 179], [55, 192], [56, 16], [56, 42], [56, 78], [56, 116], [56, 159], [56, 162], [56, 166], [56, 170], [56, 179], [57, 9], [57, 13], [57, 23], [57, 54], [57, 64], [57, 100], [57, 115], [57, 128], [57, 134], [57, 136], [57, 147], [57, 156], [57, 161], [58, 18], [58, 22], [58, 29], [58, 90], [58, 109], [58, 142], [58, 148], [58, 153], [58, 185], [58, 191], [58, 198], [59, 24], [59, 31], [59, 45], [59, 66], [59, 81], [59, 85], [59, 87], [59, 123], [59, 143], [59, 155], [59, 186], [60, 11], [60, 17], [60, 36], [60, 51], [60, 97], [60, 133], [60, 150], [60, 157], [60, 164], [61, 53], [61, 65], [61, 98], [61, 104], [61, 106], [61, 137], [61, 171], [62, 4], [62, 5], [62, 12], [62, 20], [62, 26], [62, 27], [62, 95], [62, 96], [62, 135], [62, 165], [62, 183], [62, 199], [63, 0], [63, 8], [63, 30], [63, 34], [63, 55], [63, 77], [63, 114], [63, 119], [63, 120], [63, 139], [63, 152], [63, 192], [64, 9], [64, 13], [64, 32], [64, 54], [64, 57], [64, 134], [64, 136], [64, 156], [64, 161], [64, 169], [64, 175], [65, 61], [65, 71], [65, 75], [65, 98], [65, 104], [65, 106], [65, 137], [66, 24], [66, 31], [66, 59], [66, 68], [66, 81], [66, 87], [66, 111], [66, 112], [66, 123], [66, 155], [66, 186], [67, 1], [67, 23], [67, 44], [67, 108], [67, 115], [67, 128], [67, 147], [67, 163], [67, 168], [67, 180], [68, 22], [68, 29], [68, 66], [68, 81], [68, 102], [68, 111], [68, 112], [68, 123], [68, 198], [69, 38], [69, 71], [69, 75], [69, 104], [69, 113], [69, 132], [69, 172], [69, 173], [69, 181], [69, 188], [69, 190], [70, 40], [70, 72], [70, 74], [70, 79], [70, 82], [70, 88], [70, 118], [70, 154], [70, 167], [70, 187], [71, 38], [71, 65], [71, 69], [71, 75], [71, 98], [71, 104], [71, 113], [71, 132], [71, 137], [71, 172], [71, 181], [71, 188], [71, 190], [72, 40], [72, 70], [72, 82], [72, 88], [72, 118], [72, 150], [72, 154], [72, 164], [73, 39], [73, 124], [73, 141], [73, 194], [74, 7], [74, 21], [74, 32], [74, 49], [74, 70], [74, 79], [74, 82], [74, 167], [74, 169], [74, 175], [74, 187], [75, 38], [75, 65], [75, 69], [75, 71], [75, 98], [75, 104], [75, 113], [75, 132], [75, 137], [75, 172], [75, 181], [75, 188], [75, 190], [76, 2], [76, 33], [76, 39], [76, 43], [76, 84], [76, 99], [76, 105], [76, 122], [76, 127], [76, 149], [76, 160], [76, 174], [77, 0], [77, 8], [77, 30], [77, 63], [77, 91], [77, 114], [77, 120], [77, 130], [77, 139], [77, 152], [77, 192], [77, 193], [78, 16], [78, 34], [78, 42], [78, 55], [78, 56], [78, 116], [78, 158], [78, 159], [78, 162], [78, 166], [78, 170], [78, 179], [79, 7], [79, 21], [79, 40], [79, 49], [79, 70], [79, 74], [79, 82], [79, 88], [79, 118], [79, 154], [79, 167], [79, 187], [80, 5], [80, 20], [80, 27], [80, 35], [80, 52], [80, 92], [80, 101], [80, 117], [80, 182], [80, 184], [80, 196], [81, 29], [81, 59], [81, 66], [81, 68], [81, 102], [81, 111], [81, 112], [81, 123], [81, 155], [81, 186], [81, 198], [82, 40], [82, 70], [82, 72], [82, 74], [82, 79], [82, 88], [82, 118], [82, 154], [82, 167], [82, 187], [83, 89], [83, 92], [83, 94], [83, 101], [83, 129], [83, 138], [83, 176], [83, 184], [84, 2], [84, 33], [84, 39], [84, 43], [84, 76], [84, 99], [84, 105], [84, 122], [84, 149], [84, 194], [85, 10], [85, 24], [85, 41], [85, 45], [85, 59], [85, 87], [85, 107], [85, 143], [85, 144], [85, 155], [86, 6], [86, 15], [86, 37], [86, 46], [86, 103], [86, 126], [86, 145], [87, 10], [87, 24], [87, 31], [87, 41], [87, 45], [87, 59], [87, 66], [87, 85], [87, 107], [87, 143], [87, 144], [87, 155], [87, 186], [88, 40], [88, 70], [88, 72], [88, 79], [88, 82], [88, 118], [88, 150], [88, 154], [88, 167], [89, 35], [89, 83], [89, 92], [89, 94], [89, 101], [89, 129], [89, 138], [89, 176], [89, 184], [89, 196], [90, 18], [90, 22], [90, 29], [90, 58], [90, 109], [90, 142], [90, 148], [90, 153], [90, 158], [90, 166], [90, 185], [90, 191], [91, 6], [91, 8], [91, 15], [91, 46], [91, 77], [91, 114], [91, 130], [91, 139], [91, 152], [91, 193], [92, 35], [92, 80], [92, 83], [92, 89], [92, 94], [92, 101], [92, 117], [92, 182], [92, 184], [92, 196], [93, 14], [93, 19], [93, 47], [93, 50], [93, 110], [93, 121], [93, 125], [93, 131], [93, 177], [93, 178], [93, 189], [93, 195], [94, 35], [94, 83], [94, 89], [94, 92], [94, 101], [94, 129], [94, 138], [94, 176], [94, 182], [94, 184], [94, 196], [95, 4], [95, 5], [95, 12], [95, 20], [95, 26], [95, 27], [95, 62], [95, 96], [95, 135], [95, 165], [95, 183], [95, 199], [96, 1], [96, 4], [96, 12], [96, 26], [96, 62], [96, 95], [96, 135], [96, 151], [96, 165], [96, 183], [96, 199], [97, 11], [97, 17], [97, 36], [97, 51], [97, 60], [97, 133], [97, 150], [97, 157], [97, 164], [98, 61], [98, 65], [98, 71], [98, 75], [98, 104], [98, 106], [98, 137], [98, 188], [98, 190], [99, 28], [99, 33], [99, 43], [99, 76], [99, 84], [99, 127], [99, 140], [99, 146], [99, 149], [99, 160], [99, 174], [99, 197], [100, 9], [100, 13], [100, 57], [100, 115], [100, 128], [100, 134], [100, 147], [100, 156], [101, 35], [101, 80], [101, 83], [101, 89], [101, 92], [101, 94], [101, 117], [101, 138], [101, 176], [101, 182], [101, 184], [101, 196], [102, 22], [102, 68], [102, 81], [102, 111], [102, 112], [102, 123], [102, 198], [103, 37], [103, 86], [103, 126], [103, 145], [104, 61], [104, 65], [104, 69], [104, 71], [104, 75], [104, 98], [104, 113], [104, 132], [104, 137], [104, 188], [104, 190], [105, 2], [105, 33], [105, 39], [105, 76], [105, 84], [105, 122], [105, 124], [105, 149], [105, 194], [106, 3], [106, 53], [106, 61], [106, 65], [106, 98], [106, 137], [106, 171], [107, 10], [107, 41], [107, 45], [107, 85], [107, 87], [107, 110], [107, 143], [107, 144], [107, 195], [108, 1], [108, 44], [108, 67], [108, 115], [108, 135], [108, 151], [108, 163], [108, 168], [108, 180], [108, 199], [109, 18], [109, 22], [109, 29], [109, 58], [109, 90], [109, 148], [109, 153], [109, 185], [109, 191], [109, 198], [110, 19], [110, 41], [110, 50], [110, 93], [110, 107], [110, 131], [110, 144], [110, 177], [110, 189], [110, 195], [111, 22], [111, 29], [111, 66], [111, 68], [111, 81], [111, 102], [111, 112], [111, 123], [111, 198], [112, 66], [112, 68], [112, 81], [112, 102], [112, 111], [112, 123], [112, 155], [112, 198], [113, 38], [113, 69], [113, 71], [113, 75], [113, 104], [113, 132], [113, 172], [113, 173], [113, 181], [113, 188], [113, 190], [114, 0], [114, 6], [114, 8], [114, 15], [114, 30], [114, 63], [114, 77], [114, 91], [114, 120], [114, 130], [114, 139], [114, 152], [114, 192], [114, 193], [115, 23], [115, 44], [115, 57], [115, 67], [115, 100], [115, 108], [115, 128], [115, 147], [115, 156], [115, 163], [115, 180], [116, 34], [116, 42], [116, 55], [116, 56], [116, 78], [116, 159], [116, 162], [116, 170], [116, 179], [117, 5], [117, 20], [117, 27], [117, 35], [117, 52], [117, 80], [117, 92], [117, 101], [117, 182], [117, 184], [117, 196], [118, 40], [118, 70], [118, 72], [118, 79], [118, 82], [118, 88], [118, 150], [118, 154], [118, 164], [118, 167], [119, 0], [119, 30], [119, 34], [119, 55], [119, 63], [119, 120], [119, 139], [119, 152], [119, 192], [120, 0], [120, 8], [120, 30], [120, 34], [120, 55], [120, 63], [120, 77], [120, 114], [120, 119], [120, 139], [120, 152], [120, 192], [121, 14], [121, 38], [121, 47], [121, 93], [121, 125], [121, 173], [121, 177], [121, 178], [121, 181], [122, 2], [122, 33], [122, 39], [122, 76], [122, 84], [122, 105], [122, 124], [122, 141], [122, 149], [122, 194], [123, 29], [123, 59], [123, 66], [123, 68], [123, 81], [123, 102], [123, 111], [123, 112], [123, 155], [123, 186], [123, 198], [124, 2], [124, 39], [124, 73], [124, 105], [124, 122], [124, 141], [124, 149], [124, 194], [125, 14], [125, 19], [125, 38], [125, 47], [125, 50], [125, 93], [125, 121], [125, 172], [125, 173], [125, 177], [125, 178], [125, 181], [125, 189], [126, 6], [126, 15], [126, 37], [126, 46], [126, 86], [126, 103], [126, 145], [127, 25], [127, 28], [127, 43], [127, 48], [127, 76], [127, 99], [127, 140], [127, 146], [127, 160], [127, 174], [127, 197], [128, 13], [128, 23], [128, 57], [128, 67], [128, 100], [128, 115], [128, 134], [128, 136], [128, 147], [128, 156], [128, 163], [128, 180], [129, 83], [129, 89], [129, 94], [129, 138], [129, 176], [130, 6], [130, 8], [130, 15], [130, 37], [130, 46], [130, 77], [130, 91], [130, 114], [130, 139], [130, 193], [131, 10], [131, 41], [131, 50], [131, 93], [131, 110], [131, 144], [131, 177], [131, 189], [131, 195], [132, 38], [132, 69], [132, 71], [132, 75], [132, 104], [132, 113], [132, 172], [132, 181], [132, 188], [132, 190], [133, 11], [133, 17], [133, 25], [133, 28], [133, 48], [133, 51], [133, 60], [133, 97], [133, 140], [133, 146], [133, 157], [133, 197], [134, 9], [134, 13], [134, 23], [134, 54], [134, 57], [134, 64], [134, 100], [134, 128], [134, 136], [134, 147], [134, 156], [134, 161], [135, 4], [135, 44], [135, 62], [135, 95], [135, 96], [135, 108], [135, 151], [135, 163], [135, 165], [135, 168], [135, 199], [136, 9], [136, 13], [136, 23], [136, 32], [136, 54], [136, 57], [136, 64], [136, 128], [136, 134], [136, 147], [136, 156], [136, 161], [137, 53], [137, 61], [137, 65], [137, 71], [137, 75], [137, 98], [137, 104], [137, 106], [137, 171], [137, 188], [137, 190], [138, 83], [138, 89], [138, 94], [138, 101], [138, 129], [138, 176], [139, 0], [139, 6], [139, 8], [139, 30], [139, 63], [139, 77], [139, 91], [139, 114], [139, 119], [139, 120], [139, 130], [139, 152], [139, 192], [139, 193], [140, 25], [140, 28], [140, 43], [140, 48], [140, 99], [140, 127], [140, 133], [140, 146], [140, 160], [140, 174], [140, 197], [141, 2], [141, 39], [141, 73], [141, 122], [141, 124], [141, 194], [142, 16], [142, 18], [142, 58], [142, 90], [142, 153], [142, 158], [142, 162], [142, 166], [142, 170], [142, 185], [142, 191], [143, 10], [143, 24], [143, 31], [143, 41], [143, 45], [143, 59], [143, 85], [143, 87], [143, 107], [143, 144], [143, 155], [144, 10], [144, 41], [144, 45], [144, 85], [144, 87], [144, 107], [144, 110], [144, 131], [144, 143], [144, 195], [145, 6], [145, 15], [145, 37], [145, 46], [145, 86], [145, 103], [145, 126], [146, 25], [146, 28], [146, 43], [146, 48], [146, 99], [146, 127], [146, 133], [146, 140], [146, 160], [146, 174], [146, 197], [147, 13], [147, 23], [147, 57], [147, 67], [147, 100], [147, 115], [147, 128], [147, 134], [147, 136], [147, 156], [147, 163], [147, 180], [148, 18], [148, 22], [148, 29], [148, 58], [148, 90], [148, 109], [148, 153], [148, 185], [148, 191], [148, 198], [149, 2], [149, 33], [149, 39], [149, 76], [149, 84], [149, 99], [149, 105], [149, 122], [149, 124], [149, 194], [150, 17], [150, 36], [150, 40], [150, 60], [150, 72], [150, 88], [150, 97], [150, 118], [150, 154], [150, 164], [151, 1], [151, 4], [151, 44], [151, 96], [151, 108], [151, 135], [151, 163], [151, 168], [151, 183], [151, 199], [152, 0], [152, 8], [152, 30], [152, 63], [152, 77], [152, 91], [152, 114], [152, 119], [152, 120], [152, 139], [152, 192], [153, 16], [153, 18], [153, 22], [153, 58], [153, 90], [153, 109], [153, 142], [153, 148], [153, 158], [153, 166], [153, 185], [153, 191], [154, 36], [154, 40], [154, 70], [154, 72], [154, 79], [154, 82], [154, 88], [154, 118], [154, 150], [154, 164], [154, 167], [155, 24], [155, 31], [155, 45], [155, 59], [155, 66], [155, 81], [155, 85], [155, 87], [155, 112], [155, 123], [155, 143], [155, 186], [156, 9], [156, 13], [156, 23], [156, 57], [156, 64], [156, 100], [156, 115], [156, 128], [156, 134], [156, 136], [156, 147], [156, 161], [157, 11], [157, 17], [157, 25], [157, 28], [157, 36], [157, 48], [157, 51], [157, 60], [157, 97], [157, 133], [157, 164], [158, 16], [158, 18], [158, 78], [158, 90], [158, 142], [158, 153], [158, 162], [158, 166], [158, 170], [158, 185], [158, 191], [159, 16], [159, 34], [159, 42], [159, 55], [159, 56], [159, 78], [159, 116], [159, 162], [159, 166], [159, 170], [159, 179], [160, 25], [160, 28], [160, 33], [160, 43], [160, 48], [160, 76], [160, 99], [160, 127], [160, 140], [160, 146], [160, 174], [160, 197], [161, 9], [161, 13], [161, 32], [161, 54], [161, 57], [161, 64], [161, 134], [161, 136], [161, 156], [161, 169], [161, 175], [162, 16], [162, 42], [162, 56], [162, 78], [162, 116], [162, 142], [162, 158], [162, 159], [162, 166], [162, 170], [162, 179], [163, 1], [163, 23], [163, 44], [163, 67], [163, 108], [163, 115], [163, 128], [163, 135], [163, 147], [163, 151], [163, 168], [163, 180], [163, 199], [164, 11], [164, 17], [164, 36], [164, 40], [164, 51], [164, 60], [164, 72], [164, 97], [164, 118], [164, 150], [164, 154], [164, 157], [165, 4], [165, 12], [165, 26], [165, 62], [165, 95], [165, 96], [165, 135], [165, 199], [166, 16], [166, 18], [166, 56], [166, 78], [166, 90], [166, 142], [166, 153], [166, 158], [166, 159], [166, 162], [166, 170], [166, 179], [166, 185], [166, 191], [167, 7], [167, 21], [167, 40], [167, 49], [167, 70], [167, 74], [167, 79], [167, 82], [167, 88], [167, 118], [167, 154], [167, 187], [168, 1], [168, 44], [168, 67], [168, 108], [168, 135], [168, 151], [168, 163], [168, 180], [168, 199], [169, 7], [169, 9], [169, 21], [169, 32], [169, 49], [169, 54], [169, 64], [169, 74], [169, 161], [169, 175], [169, 187], [170, 16], [170, 42], [170, 56], [170, 78], [170, 116], [170, 142], [170, 158], [170, 159], [170, 162], [170, 166], [170, 179], [171, 3], [171, 53], [171, 61], [171, 106], [171, 137], [172, 14], [172, 38], [172, 69], [172, 71], [172, 75], [172, 113], [172, 125], [172, 132], [172, 173], [172, 178], [172, 181], [172, 188], [172, 190], [173, 14], [173, 38], [173, 47], [173, 69], [173, 113], [173, 121], [173, 125], [173, 172], [173, 177], [173, 178], [173, 181], [174, 25], [174, 28], [174, 33], [174, 43], [174, 48], [174, 76], [174, 99], [174, 127], [174, 140], [174, 146], [174, 160], [174, 197], [175, 7], [175, 9], [175, 21], [175, 32], [175, 49], [175, 54], [175, 64], [175, 74], [175, 161], [175, 169], [175, 187], [176, 83], [176, 89], [176, 94], [176, 101], [176, 129], [176, 138], [176, 184], [176, 196], [177, 14], [177, 19], [177, 47], [177, 50], [177, 93], [177, 110], [177, 121], [177, 125], [177, 131], [177, 173], [177, 178], [177, 189], [178, 14], [178, 38], [178, 47], [178, 93], [178, 121], [178, 125], [178, 172], [178, 173], [178, 177], [178, 181], [179, 16], [179, 34], [179, 42], [179, 55], [179, 56], [179, 78], [179, 116], [179, 159], [179, 162], [179, 166], [179, 170], [180, 1], [180, 23], [180, 44], [180, 67], [180, 108], [180, 115], [180, 128], [180, 147], [180, 163], [180, 168], [181, 14], [181, 38], [181, 69], [181, 71], [181, 75], [181, 113], [181, 121], [181, 125], [181, 132], [181, 172], [181, 173], [181, 178], [181, 188], [181, 190], [182, 20], [182, 27], [182, 35], [182, 52], [182, 80], [182, 92], [182, 94], [182, 101], [182, 117], [182, 184], [182, 196], [183, 1], [183, 4], [183, 12], [183, 26], [183, 62], [183, 95], [183, 96], [183, 151], [183, 199], [184, 35], [184, 80], [184, 83], [184, 89], [184, 92], [184, 94], [184, 101], [184, 117], [184, 176], [184, 182], [184, 196], [185, 18], [185, 22], [185, 58], [185, 90], [185, 109], [185, 142], [185, 148], [185, 153], [185, 158], [185, 166], [185, 191], [186, 24], [186, 31], [186, 59], [186, 66], [186, 81], [186, 87], [186, 123], [186, 155], [187, 7], [187, 21], [187, 49], [187, 70], [187, 74], [187, 79], [187, 82], [187, 167], [187, 169], [187, 175], [188, 38], [188, 69], [188, 71], [188, 75], [188, 98], [188, 104], [188, 113], [188, 132], [188, 137], [188, 172], [188, 181], [188, 190], [189, 19], [189, 47], [189, 50], [189, 93], [189, 110], [189, 125], [189, 131], [189, 177], [189, 195], [190, 38], [190, 69], [190, 71], [190, 75], [190, 98], [190, 104], [190, 113], [190, 132], [190, 137], [190, 172], [190, 181], [190, 188], [191, 16], [191, 18], [191, 58], [191, 90], [191, 109], [191, 142], [191, 148], [191, 153], [191, 158], [191, 166], [191, 185], [192, 0], [192, 8], [192, 30], [192, 34], [192, 55], [192, 63], [192, 77], [192, 114], [192, 119], [192, 120], [192, 139], [192, 152], [193, 6], [193, 8], [193, 15], [193, 37], [193, 46], [193, 77], [193, 91], [193, 114], [193, 130], [193, 139], [194, 2], [194, 33], [194, 39], [194, 73], [194, 84], [194, 105], [194, 122], [194, 124], [194, 141], [194, 149], [195, 10], [195, 41], [195, 45], [195, 50], [195, 93], [195, 107], [195, 110], [195, 131], [195, 144], [195, 189], [196, 27], [196, 35], [196, 80], [196, 89], [196, 92], [196, 94], [196, 101], [196, 117], [196, 176], [196, 182], [196, 184], [197, 25], [197, 28], [197, 43], [197, 48], [197, 99], [197, 127], [197, 133], [197, 140], [197, 146], [197, 160], [197, 174], [198, 22], [198, 29], [198, 58], [198, 68], [198, 81], [198, 102], [198, 109], [198, 111], [198, 112], [198, 123], [198, 148], [199, 1], [199, 4], [199, 44], [199, 62], [199, 95], [199, 96], [199, 108], [199, 135], [199, 151], [199, 163], [199, 165], [199, 168], [199, 183]]
SELF.MAPPING
{0: set(), 1: set(), 2: {73}, 4: set(), 5: set(), 6: {103}, 7: set(), 8: set(), 9: set(), 10: set(), 11: set(), 12: set(), 13: set(), 14: set(), 15: set(), 16: set(), 17: set(), 18: set(), 19: set(), 20: set(), 21: set(), 22: set(), 23: set(), 24: set(), 25: set(), 26: set(), 27: set(), 28: set(), 29: set(), 30: set(), 31: set(), 32: set(), 33: set(), 34: set(), 35: set(), 36: set(), 37: set(), 38: set(), 39: set(), 40: set(), 41: set(), 42: set(), 43: set(), 44: set(), 45: set(), 46: set(), 47: set(), 48: set(), 49: set(), 50: set(), 51: set(), 52: set(), 53: {3}, 54: set(), 55: set(), 56: set(), 57: set(), 58: set(), 59: set(), 60: set(), 61: set(), 62: set(), 63: set(), 64: set(), 65: set(), 66: set(), 67: set(), 68: set(), 69: set(), 70: set(), 71: set(), 72: set(), 74: set(), 75: set(), 76: set(), 77: set(), 78: set(), 79: set(), 80: set(), 81: set(), 82: set(), 83: set(), 84: set(), 85: set(), 86: set(), 87: set(), 88: set(), 89: set(), 90: set(), 91: set(), 92: set(), 93: set(), 94: set(), 95: set(), 96: set(), 97: set(), 98: set(), 99: set(), 100: set(), 101: set(), 102: set(), 104: set(), 105: set(), 106: set(), 107: set(), 108: set(), 109: set(), 110: set(), 111: set(), 112: set(), 113: set(), 114: set(), 115: set(), 116: set(), 117: set(), 118: set(), 119: set(), 120: set(), 121: set(), 122: set(), 123: set(), 124: {73}, 125: set(), 126: set(), 127: set(), 128: set(), 129: set(), 130: set(), 131: set(), 132: set(), 133: set(), 134: set(), 135: set(), 136: set(), 137: set(), 138: set(), 139: set(), 140: set(), 141: set(), 142: set(), 143: set(), 144: set(), 145: {103}, 146: set(), 147: set(), 148: set(), 149: set(), 150: set(), 151: set(), 152: set(), 153: set(), 154: set(), 155: set(), 156: set(), 157: set(), 158: set(), 159: set(), 160: set(), 161: set(), 162: set(), 163: set(), 164: set(), 165: set(), 166: set(), 167: set(), 168: set(), 169: set(), 170: set(), 171: {3}, 172: set(), 173: set(), 174: set(), 175: set(), 176: set(), 177: set(), 178: set(), 179: set(), 180: set(), 181: set(), 182: set(), 183: set(), 184: set(), 185: set(), 186: set(), 187: set(), 188: set(), 189: set(), 190: set(), 191: set(), 192: set(), 193: set(), 194: set(), 195: set(), 196: set(), 197: set(), 198: set(), 199: set()}
PARENTS
[16, 1, 1, 3, 1, 1, 16, 1, 16, 1, 16, 1, 1, 1, 16, 16, 16, 1, 16, 16, 1, 1, 16, 1, 16, 1, 1, 1, 1, 16, 16, 16, 1, 1, 16, 1, 1, 16, 16, 1, 1, 16, 16, 1, 1, 16, 16, 16, 1, 1, 16, 1, 1, 16, 1, 16, 16, 1, 16, 16, 1, 16, 1, 16, 1, 16, 16, 1, 16, 16, 1, 16, 1, 73, 1, 16, 1, 16, 16, 1, 1, 16, 1, 1, 1, 16, 16, 16, 1, 1, 16, 16, 1, 16, 1, 1, 1, 1, 16, 1, 1, 1, 16, 103, 16, 1, 16, 16, 1, 16, 16, 16, 16, 16, 16, 1, 16, 1, 1, 16, 16, 16, 1, 16, 1, 16, 16, 1, 1, 1, 16, 16, 16, 1, 1, 1, 1, 16, 1, 16, 1, 1, 16, 16, 16, 16, 1, 1, 16, 1, 1, 1, 16, 16, 1, 16, 1, 1, 16, 16, 1, 1, 16, 1, 1, 1, 16, 1, 1, 1, 16, 16, 16, 16, 1, 1, 1, 16, 16, 16, 1, 16, 1, 1, 1, 16, 16, 1, 16, 16, 16, 16, 16, 16, 1, 16, 1, 1, 16, 1]
Parents len
200
CLUSTERS
{1, 3, 103, 73, 16}
RANKS
[24, 99, 11, 1, 6, 1, 1, 23, 1, 2, 16, 33, 1, 1, 38, 1, 98, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
3 73 103
v_to_k
{73: 124, 103: 145, 3: 171}
{16: {0, 6, 8, 10, 14, 15, 16, 18, 19, 22, 24, 29, 30, 31, 34, 37, 38, 41, 42, 45, 46, 47, 50, 53, 55, 56, 58, 59, 61, 63, 65, 66, 68, 69, 71, 75, 77, 78, 81, 85, 86, 87, 90, 91, 93, 98, 102, 104, 106, 107, 109, 110, 111, 112, 113, 114, 116, 119, 120, 121, 123, 125, 126, 130, 131, 132, 137, 139, 142, 143, 144, 145, 148, 152, 153, 155, 158, 159, 162, 166, 170, 171, 172, 173, 177, 178, 179, 181, 185, 186, 188, 189, 190, 191, 192, 193, 195, 198}, 1: {1, 2, 4, 5, 7, 9, 11, 12, 13, 17, 20, 21, 23, 25, 26, 27, 28, 32, 33, 35, 36, 39, 40, 43, 44, 48, 49, 51, 52, 54, 57, 60, 62, 64, 67, 70, 72, 74, 76, 79, 80, 82, 83, 84, 88, 89, 92, 94, 95, 96, 97, 99, 100, 101, 105, 108, 115, 117, 118, 122, 124, 127, 128, 129, 133, 134, 135, 136, 138, 140, 141, 146, 147, 149, 150, 151, 154, 156, 157, 160, 161, 163, 164, 165, 167, 168, 169, 174, 175, 176, 180, 182, 183, 184, 187, 194, 196, 197, 199}, 3: set(), 73: set(), 103: set()}
5
{0: {0, 6, 8, 10, 14, 15, 16, 18, 19, 22, 24, 29, 30, 31, 34, 37, 38, 41, 42, 45, 46, 47, 50, 53, 55, 56, 58, 59, 61, 63, 65, 66, 68, 69, 71, 75, 77, 78, 81, 85, 86, 87, 90, 91, 93, 98, 102, 104, 106, 107, 109, 110, 111, 112, 113, 114, 116, 119, 120, 121, 123, 125, 126, 130, 131, 132, 137, 139, 142, 143, 144, 145, 148, 152, 153, 155, 158, 159, 162, 166, 170, 171, 172, 173, 177, 178, 179, 181, 185, 186, 188, 189, 190, 191, 192, 193, 195, 198}, 1: {1, 2, 4, 5, 7, 9, 11, 12, 13, 17, 20, 21, 23, 25, 26, 27, 28, 32, 33, 35, 36, 39, 40, 43, 44, 48, 49, 51, 52, 54, 57, 60, 62, 64, 67, 70, 72, 74, 76, 79, 80, 82, 83, 84, 88, 89, 92, 94, 95, 96, 97, 99, 100, 101, 105, 108, 115, 117, 118, 122, 124, 127, 128, 129, 133, 134, 135, 136, 138, 140, 141, 146, 147, 149, 150, 151, 154, 156, 157, 160, 161, 163, 164, 165, 167, 168, 169, 174, 175, 176, 180, 182, 183, 184, 187, 194, 196, 197, 199}, 2: set(), 3: set(), 4: set()}
[0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1]
- The code below is correct, make it efficient; remove v_to_k by chaning map_border, see if can directly do all the ops on the numpy array
- Then move to GPU, perhaps needing to make less efficient on CPU first by doing all distance calculations (200*200, only need upper triangle) on CPU, then moving over…
- Q: Do I need the mapping? EOD, just looking at parents…
In this code, np.unique(a) returns an array of the unique elements in a, sorted in ascending order. Then, np.searchsorted(unique_elements, a) returns an array of the same shape as a, where each element of a is replaced by its index in unique_elements. This effectively remaps the elements of a to the range 0 to len(a)-1.
Final CPU Implementation
- Make a set of core points and a mapping of border points to their core.
- Use union find to label the array of parents, forming clusters of core points or connected groups of core points.
- Label border points with the parents of their corresponding core points.
from collections import Counter
class DbscanCustom:
def __init__(self, eps, min_samples):
self.eps = eps
self.min_samples = min_samples
self.edges = [] # for union find
self.core_points = set()
self.border_to_core = {}
def find(self,i):
= i
res while self.parents[res]!=res:
self.parents[res] = self.parents[self.parents[res]]
= self.parents[res]
res return res
def union(self, i,j):
= self.find(i), self.find(j)
pi, pj if pi == pj: return
if self.rank[pi] >= self.rank[pj]:
self.parents[pj] = pi
self.rank[pi] += self.rank[pj]
else:
self.parents[pi] = pj
self.rank[pj] += self.rank[pi]
def fit_predict(self, X):
"""
Use union find to label the array of parents, forming clusters of core points or connected groups of core points.
Label border points with the parents of their corresponding core points.
"""
self.parents = list(range(len(X)))
self.rank = [1]*len(X)
self.X = X
self.label_points(X)
for i, j in self.edges:
if i in self.core_points and j in self.core_points:
self.union(i,j)
for border_idx, core_idx in self.border_to_core.items():
self.parents[border_idx] = self.parents[core_idx]
= np.array(self.parents)
result_orig_label = np.unique(result_orig_label)
unique_elements = np.searchsorted(unique_elements,result_orig_label)
result_final_label return result_final_label
def label_points(self, X):
"""1. Label the points as core, border, or noise."""
self.X = X
self.get_core_points()
self.get_border_points()
# Ignore the noise points, don't need them later
def get_core_points(self):
for i,x in enumerate(self.X):
if self.map_core(i):
self.core_points.add(i)
def get_border_points(self):
for i,x in enumerate(X):
if i not in self.core_points:
self.map_border(i)
def map_core(self, i):
"""A point is a core point if at least a specified number min_samples of neighboring points fall within eps radius"""
= 0
num_within for j,x in enumerate(self.X):
if i == j:
continue
if self.distance(i,j) <= self.eps:
self.edges.append([i,j])
+= 1
num_within if num_within >= self.min_samples:
return True
return False
def map_border(self, border_idx):
"""A border point is a point that has fewer than min_samples within eps, but lies within the radius eps of a core point"""
= self.eps
min_dist = None
min_idx for core_idx in self.core_points:
if border_idx == core_idx:
continue
if self.distance(border_idx,core_idx) <= self.eps:
if self.distance(border_idx,core_idx) <= min_dist:
= self.distance(border_idx,core_idx)
min_dist = core_idx
min_idx if min_idx: self.border_to_core[border_idx] = min_idx
def distance(self,i,j):
"""Distance between points of self.X with indices i and j"""
return np.sqrt(np.sum((self.X[i]-self.X[j])**2))
= make_moons(n_samples=200, noise=0.05, random_state=0)
X, y = DbscanCustom(eps=0.2, min_samples=5)
db_custom = db_custom.fit_predict(X)
y_db == 0, 0],
plt.scatter(X[y_db == 0, 1],
X[y_db ='lightblue',
c='black',
edgecolor='o',
marker=40,
s='Cluster 1')
label== 1, 0],
plt.scatter(X[y_db == 1, 1],
X[y_db ='lightblue',
c='red',
edgecolor='o',
marker=40,
s='Cluster 1')
label'Feature 1')
plt.xlabel('Feature 2')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
%timeit -n 10 db_custom.fit_predict(X)
504 ms ± 220 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Naive GPU implementation (takes more time than CPU implementation)
- Get distance matrix
- Label < min_dist as 1 else 0
- Sum (need min_samples +1 bc of 0 diag for dist between point and itself)
- If sum > min_dist+1, then add to core
- Parallelize computation of cluster labels, not sure how…
from collections import Counter
class DbscanGPU:
def __init__(self, eps, min_samples):
self.eps = eps
self.min_samples = min_samples
self.core_points = set()
self.border_to_core = {}
def find(self,i):
= i
res while self.parents[res]!=res:
self.parents[res] = self.parents[self.parents[res]]
= self.parents[res]
res return res
def union(self, i,j):
= self.find(i), self.find(j)
pi, pj if pi == pj: return
if self.rank[pi] >= self.rank[pj]:
self.parents[pj] = pi
self.rank[pi] += self.rank[pj]
else:
self.parents[pi] = pj
self.rank[pj] += self.rank[pi]
def fit_predict(self, X):
"""
Use union find to label the array of parents, forming clusters of core points or connected groups of core points.
Label border points with the parents of their corresponding core points.
"""
self.parents = list(range(len(X)))
self.rank = [1]*len(X)
self.X = X
self.label_points(X)
#note self.dists is a square matrix
for i in range(len(self.dists)):
for j in range(len(self.dists)):
if self.core_points[i] and self.core_points[j] and self.dists[i,j] <= self.eps:
self.union(i,j)
for border_idx, core_idx in self.border_to_core.items():
self.parents[border_idx] = self.parents[core_idx]
= np.array(self.parents)
result_orig_label = np.unique(result_orig_label)
unique_elements = np.searchsorted(unique_elements,result_orig_label)
result_final_label return result_final_label
def label_points(self, X):
"""1. Label the points as core, border, or noise."""
self.X = X
self.calc_distances()
self.get_core_points()
self.get_border_points()
# Ignore the noise points, don't need them later
def get_core_points(self):
self.core_points = torch.sum(self.dists<=self.eps,dim = 1)>self.min_samples
def get_border_points(self):
self.border_points = set()
for i,x in enumerate(self.core_points):
if not x: #if False
self.border_to_core[i]=self.dists[i][i+1:].argmin().item() + i + 1
def calc_distances(self):
"""Calculate the distance matrix between a point and every other point"""
self.dists=(self.X[None,:,:]-self.X[:,None,:]).norm(dim=2)
= map(torch.tensor,make_moons(n_samples=200, noise=0.05, random_state=0))
X, y = DbscanGPU(eps=0.2, min_samples=5)
db_custom_gpu = db_custom_gpu.fit_predict(X)
y_db == 0, 0],
plt.scatter(X[y_db == 0, 1],
X[y_db ='lightblue',
c='black',
edgecolor='o',
marker=40,
s='Cluster 1')
label== 1, 0],
plt.scatter(X[y_db == 1, 1],
X[y_db ='lightblue',
c='red',
edgecolor='o',
marker=40,
s='Cluster 1')
label'Feature 1')
plt.xlabel('Feature 2')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
%timeit -n 10 db_custom_gpu.fit_predict(X)
652 ms ± 52.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Simple optimized GPU implementation
GitHub Copilot Your DBSCAN implementation looks quite good, but there are a few areas where it could be improved for better GPU utilization:
Batching distance calculations: Calculating pairwise distances can be memory-intensive for large datasets. You can batch these calculations to reduce memory usage.
Avoiding Python loops: Python loops are slow and cannot be parallelized on a GPU. You should replace the loops in fit_predict and get_border_points with vectorized operations.
Using in-place operations: In-place operations can reduce memory usage and improve performance. For example, you can use torch.add_ instead of + to update self.rank.
class DbscanGPU:
def __init__(self, eps, min_samples):
self.eps = eps
self.min_samples = min_samples
def fit_predict(self, X):
# Calculate pairwise distances in batches to save memory
= torch.cdist(X, X)
dists
# Find core points
= (dists <= self.eps).sum(dim=1) >= self.min_samples
core_points
# Label each point with its own index initially
= torch.arange(len(X), device=X.device)
labels
# Union-find for core points
for i in range(len(X)):
if core_points[i]:
for j in range(i + 1, len(X)):
if core_points[j] and dists[i, j] <= self.eps:
# Find the roots of i and j
= self.find(labels, i)
root_i = self.find(labels, j)
root_j
# Union i and j by assigning the smaller label to the larger one
if root_i < root_j:
= root_i
labels[root_j] else:
= root_j
labels[root_i]
# Assign each non-core point to the same cluster as its nearest core point
for i in range(len(X)):
if not core_points[i]:
= labels[dists[i, core_points].argmin()]
labels[i]
# Convert labels to consecutive integers starting from 0
= torch.unique(labels, return_inverse=True)
_, labels
return labels.cpu().numpy()
@staticmethod
def find(labels, i):
if labels[i] != i:
= DbscanGPU.find(labels, labels[i])
labels[i] return labels[i]
class DbscanGPU:
def __init__(self, eps, min_samples):
self.eps = eps
self.min_samples = min_samples
def fit_predict(self, X):
# Calculate pairwise distances in batches to save memory
= torch.cdist(X, X)
dists
# Find core points
= (dists <= self.eps).sum(dim=1) >= self.min_samples
core_points
# Label each point with its own index initially
= torch.arange(len(X), device=X.device)
labels
# Union-find for core points
for i in range(len(X)):
if core_points[i]:
for j in range(i + 1, len(X)):
if core_points[j] and dists[i, j] <= self.eps:
# Find the roots of i and j
= self.find(labels, i)
root_i = self.find(labels, j)
root_j
# Union i and j by assigning the smaller label to the larger one
if root_i < root_j:
= root_i
labels[root_j] else:
= root_j
labels[root_i]
# Assign each non-core point to the same cluster as its nearest core point
for i in range(len(X)):
if not core_points[i]:
= labels[dists[i, core_points].argmin()]
labels[i]
# Convert labels to consecutive integers starting from 0
= torch.unique(labels, return_inverse=True)
_, labels
return labels.cpu().numpy()
@staticmethod
def find(labels, i):
if labels[i] != i:
= DbscanGPU.find(labels, labels[i])
labels[i] return labels[i]
# @staticmethod
# def find(labels, i):
# root = i
# while labels[root] != root:
# root = labels[root]
# # Path compression
# while labels[i] != i:
# parent = labels[i]
# labels[i] = root
# i = parent
# return root
= map(torch.tensor,make_moons(n_samples=200, noise=0.05, random_state=0))
X, y = DbscanGPU(eps=0.2, min_samples=5)
db_custom_gpu = db_custom_gpu.fit_predict(X)
y_db == 0, 0],
plt.scatter(X[y_db == 0, 1],
X[y_db ='lightblue',
c='black',
edgecolor='o',
marker=40,
s='Cluster 1')
label== 1, 0],
plt.scatter(X[y_db == 1, 1],
X[y_db ='lightblue',
c='red',
edgecolor='o',
marker=40,
s='Cluster 1')
label'Feature 1')
plt.xlabel('Feature 2')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
%timeit -n 10 db_custom_gpu.fit_predict(X)
336 ms ± 45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
329/504)*100 (
65.27777777777779
Out-of-the box optimized DBSCAN implementation
!pip install \
--extra-index-url=https://pypi.nvidia.com \
-cu12==23.12.* dask-cudf-cu12==23.12.* cuml-cu12==23.12.* \
cudf-cu12==23.12.* cuspatial-cu12==23.12.* cuproj-cu12==23.12.* \
cugraph-cu12==23.12.* cucim-cu12==23.12.* pylibraft-cu12==23.12.* \
cuxfilter-dask-cu12==23.12.*
raft
from cuml import DBSCAN as cuml
= DBSCAN(eps=0.2, min_samples=5)
db_gpu_opt = db_gpu_opt.fit_predict(X)
y_db == 0, 0],
plt.scatter(X[y_db == 0, 1],
X[y_db ='lightblue',
c='black',
edgecolor='o',
marker=40,
s='Cluster 1')
label== 1, 0],
plt.scatter(X[y_db == 1, 1],
X[y_db ='lightblue',
c='red',
edgecolor='o',
marker=40,
s='Cluster 1')
label'Feature 1')
plt.xlabel('Feature 2')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
%timeit -n 10 db_gpu_opt.fit_predict(X)
5.7 ms ± 858 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
- Almost 100x faster on T4!
= torch.tensor([[1, 2],[3, 4], [5, 6]])
X
X= X.shape
n,m None,:,:]-X[:,None,:] X[
tensor([[[ 0, 0],
[ 2, 2],
[ 4, 4]],
[[-2, -2],
[ 0, 0],
[ 2, 2]],
[[-4, -4],
[-2, -2],
[ 0, 0]]])
None,:,:].float() X[
tensor([[[1., 2.],
[3., 4.],
[5., 6.]]])
None,:].float() X[:,
tensor([[[1., 2.]],
[[3., 4.]],
[[5., 6.]]])
=(X[None,:,:].float()-X[:,None,:].float()).norm(dim=2)
out out
tensor([[0.0000, 2.8284, 5.6569],
[2.8284, 0.0000, 2.8284],
[5.6569, 2.8284, 0.0000]])
=0.2
eps<= 0.2).sum(dim=1) (out
tensor([1, 1, 1])
None,:,:].shape, X[:,None,:].shape X.shape, X[
(torch.Size([3, 2]), torch.Size([1, 3, 2]), torch.Size([3, 1, 2]))
None,:,:,None].float()-X[:,:,None].float()).norm(dim=2) (X[
tensor([[[0.],
[0.],
[0.]]])
import math
1-6)**2) math.sqrt((
5.0
None,:,:].shape X[
torch.Size([1, 3, 2])
None].shape X.T[:,:,
torch.Size([2, 3, 1])
sum((X[None,:,:]- X.T[:,:,None]) ** 2, dim=(1,))) torch.sqrt(torch.
tensor([[0.0000, 1.7321],
[1.7321, 0.0000]])
sum((X[None,:,:]- X.T[:,:,None]) ** 2,dim=(0,1,2))) torch.sqrt(torch.
tensor(2.4495)
import torch
def pairwise_distance(matrix):
# Expand dims to allow broadcasting
= matrix.unsqueeze(1)
expanded_a = matrix.unsqueeze(0)
expanded_b
# Compute pairwise distance using Euclidean distance formula
= torch.sqrt(torch.sum((expanded_a - expanded_b) ** 2, dim=-1))
distances
return distances
# Test the function
= torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.float)
matrix = pairwise_distance(matrix)
distances print(distances)
tensor([[0.0000, 2.8284, 5.6569],
[2.8284, 0.0000, 2.8284],
[5.6569, 2.8284, 0.0000]])
import torch
def pairwise_distance(matrix):
# Expand dims to allow broadcasting
= matrix[:,None,:]
expanded_a = matrix[None,:,:]
expanded_b
# Compute pairwise distance using Euclidean distance formula
= torch.sqrt(torch.sum((expanded_a - expanded_b) ** 2, dim=-1))
distances
return distances
# Test the function
= torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.float)
matrix = pairwise_distance(matrix)
distances print(distances)
tensor([[0.0000, 2.8284, 5.6569],
[2.8284, 0.0000, 2.8284],
[5.6569, 2.8284, 0.0000]])
1].argmin().item() distances[
1
1,2] distances[
tensor(2.8284)
1-5)**2+(2-6)**2) math.sqrt((
5.656854249492381
= torch.tensor([[1, 2, 2],[2,3,3],[1,2,1]])
distances sum(distances>1,dim=1) torch.
tensor([2, 3, 1])
cuML DBSCAN > Naive GPU DBSCAN
Spatial indexing is a common technique used to speed up DBSCAN implementations. The main idea is to organize the data in such a way that queries about the spatial proximity of data points can be answered more efficiently.
In the context of DBSCAN, one of the most computationally expensive parts of the algorithm is finding all points within a certain distance (eps) of a given point. This is because, in a naive implementation, you would need to calculate the distance between the given point and every other point in the dataset, which is an O(n) operation.
Spatial indexes, such as k-d trees or R-trees, can significantly speed up this operation. These data structures partition the data space into regions, and points within the same region are likely to be close to each other. This allows the algorithm to quickly eliminate many points that are too far away from the given point without having to calculate the exact distance.
In the cuML implementation of DBSCAN, the use of spatial indexing is abstracted away by the underlying CUDA libraries. The VertexDeg::run function, which calculates the degree of each vertex in the adjacency graph representation of the dataset, is likely where the spatial indexing is used. Different algorithms can be used for this calculation based on the algo parameter, some of which may use spatial indexing.
However, it’s important to note that while spatial indexing can significantly speed up DBSCAN on CPUs, the situation is a bit different on GPUs. Due to the parallel nature of GPUs, sometimes a brute-force approach (calculating the distance between all pairs of points) can be faster than building and querying a spatial index. The optimal approach depends on the specific characteristics of the GPU and the dataset.