当前位置:Gxlcms > 数据库问题 > DBSCAN算法实现---Python

DBSCAN算法实现---Python

时间:2021-07-01 10:21:17 帮助过:94人阅读

def ecludDist(x,y): 2 return np.sqrt(sum(np.sqrt(np.array(x) - np.array(y)))) 3 4 def euclidean_distance(data): 5 all_points = [] 6 for i in data: 7 temp = [] 8 for j in data: 9 temp.append(ecludDist(i,j)) 10 all_points.append(temp) 11 return all_points

  3、点种类的划分:

 1 def classify(z):   #z为通过欧式距离计算所得的矩阵
 2     pts = []
 3     for row in z:
 4         density = np.sum(z.ix[row] < eps)
 5         pts = 0   
 6         if density > MinPts:
 7             pts = 1      #核心点
 8         elif density > 1:
 9             pts = 2    #边界点
10         else:
11             pts = 0    #离群点
12         pts.append(pts)
13 return pts

  4、将每个点的领域作为一个类:

 1 def point_type:
 2     cluster = dict()
 3     i = 0
 4     for row in z:
 5         cluster[i] = np.where(z.ix[row] < eps)[0]
 6         i = i+1
 7     for i in range(len(cluster)):
 8         for j in range(len(cluster)):
 9             if len(set(cluster[i]) & set(cluster[j])) > 0 and i!=j:
10                 cluster[i] = cluster[i] | cluster[j]
11                 cluster[j] = []

  5、找出独立的领域

1 def independent_filed(cluster):
2     j = 0
3     result = dict()
4     for i in range(len(cluster)):
5         if len(cluster[i]) >0:    
6             result[j] = cluster[i]
7             j =j+1
8     return result

  6、对最后聚类的结果标记

 1 def mark(df):    #传进来的是转换后的数据
 2     for i in range(result):   #result 为一个字典型结构
 3         for j in result[i]:
 4             df.at[j,type] = i     #新建一列,并给它把i值添加上去
 5 
 6 
 7 plt.scatter(
 8     df[one],
 9     df[two],
10     c=df[type])
11     

最后把这些函数封装一下就Ok啦

DBSCAN算法实现---Python

标签:之间   领域   算法实现   字典   lse   return   point   scan   python   

人气教程排行