标题: 【上海校区】python实现 模糊C均值聚类算法(Fuzzy-C-Means)-... [打印本页] 作者: 不二晨 时间: 2018-10-19 09:44 标题: 【上海校区】python实现 模糊C均值聚类算法(Fuzzy-C-Means)-... # python3模糊C均值聚类算法,数据集为iris,放在和代码同一目录即可。算法原理及步骤参考:https://wenku.baidu.com/view/ee968c00eff9aef8941e06a2.html
import copy
import math
import random
import time
import sys
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import decimal
# 用于初始化隶属度矩阵U
global MAX
MAX = 10000.0
# 用于结束条件
global Epsilon
Epsilon = 0.00000001
def import_data_format_iris(file):
"""
格式化数据,前四列为data,最后一列为cluster_location
数据地址 http://archive.ics.uci.edu/ml/machine-learning-databases/iris/
"""
data = []
cluster_location =[]
with open(str(file), 'r') as f:
for line in f:
current = line.strip().split(",")
current_dummy = []
for j in range(0, len(current)-1):
current_dummy.append(float(current[j]))
j += 1
if current[j] == "Iris-setosa\n":
cluster_location.append(0)
elif current[j] == "Iris-versicolor\n":
cluster_location.append(1)
else:
cluster_location.append(2)
data.append(current_dummy)
print ("加载数据完毕")
return data , cluster_location
def randomise_data(data):
"""
该功能将数据随机化,并保持随机化顺序的记录
"""
order = list(range(0, len(data)))
random.shuffle(order)
new_data = [[] for i in range(0, len(data))]
for index in range(0, len(order)):
new_data[index] = data[order[index]]
return new_data, order
def de_randomise_data(data, order):
"""
此函数将返回数据的原始顺序,将randomise_data()返回的order列表作为参数
"""
new_data = [[]for i in range(0, len(data))]
for index in range(len(order)):
new_data[order[index]] = data[index]
return new_data
def print_matrix(list):
"""
以可重复的方式打印矩阵
"""
for i in range(0, len(list)):
print (list)
def initialise_U(data, cluster_number):
"""
这个函数是隶属度矩阵U的每行加起来都为1. 此处需要一个全局变量MAX.
"""
global MAX
U = []
for i in range(0, len(data)):
current = []
rand_sum = 0.0
for j in range(0, cluster_number):
dummy = random.randint(1,int(MAX))
current.append(dummy)
rand_sum += dummy
for j in range(0, cluster_number):
current[j] = current[j] / rand_sum
U.append(current)
return U
def distance(point, center):
"""
该函数计算2点之间的距离(作为列表)。我们指欧几里德距离。 闵可夫斯基距离
"""
if len(point) != len(center):
return -1
dummy = 0.0
for i in range(0, len(point)):
dummy += abs(point - center) ** 2
return math.sqrt(dummy)
def end_conditon(U, U_old):
"""
结束条件。当U矩阵随着连续迭代停止变化时,触发结束
"""
global Epsilon
for i in range(0, len(U)):
for j in range(0, len(U[0])):
if abs(U[j] - U_old[j]) > Epsilon :
return False
return True
def normalise_U(U):
"""
在聚类结束时使U模糊化。每个样本的隶属度最大的为1,其余为0
"""
for i in range(0, len(U)):
maximum = max(U)
for j in range(0, len(U[0])):
if U[j] != maximum:
U[j] = 0
else:
U[j] = 1
return U
# m的最佳取值范围为[1.5,2.5]
def fuzzy(data, cluster_number, m):
"""
这是主函数,它将计算所需的聚类中心,并返回最终的归一化隶属矩阵U.
参数是:簇数(cluster_number)和隶属度的因子(m)
"""
# 初始化隶属度矩阵U
U = initialise_U(data, cluster_number)
# print_matrix(U)
# 循环更新U
while (True):
# 创建它的副本,以检查结束条件
U_old = copy.deepcopy(U)
# 计算聚类中心
C = []
for j in range(0, cluster_number):
current_cluster_center = []
for i in range(0, len(data[0])):
dummy_sum_num = 0.0
dummy_sum_dum = 0.0
for k in range(0, len(data)):
# 分子
dummy_sum_num += (U[k][j] ** m) * data[k]
# 分母
dummy_sum_dum += (U[k][j] ** m)
# 第i列的聚类中心
current_cluster_center.append(dummy_sum_num/dummy_sum_dum)
# 第j簇的所有聚类中心
C.append(current_cluster_center)
# 创建一个距离向量, 用于计算U矩阵。
distance_matrix =[]
for i in range(0, len(data)):
current = []
for j in range(0, cluster_number):
current.append(distance(data, C[j]))
distance_matrix.append(current)
# 更新U
for j in range(0, cluster_number):
for i in range(0, len(data)):
dummy = 0.0
for k in range(0, cluster_number):
# 分母
dummy += (distance_matrix[j ] / distance_matrix[k]) ** (2/(m-1))
U[j] = 1 / dummy
if end_conditon(U, U_old):
print ("结束聚类")
break
print ("标准化 U")
U = normalise_U(U)
return U
def checker_iris(final_location):
"""
和真实的聚类结果进行校验比对
"""
right = 0.0
for k in range(0, 3):
checker =[0,0,0]
for i in range(0, 50):
for j in range(0, len(final_location[0])):
if final_location[i + (50*k)][j] == 1:
checker[j] += 1
right += max(checker)
print (right)
answer = right / 150 * 100
return "准确度:" + str(answer) + "%"