import numpy as np import matplotlib.pyplot as plt import pandas as pd
from sklearn.datasets import make_blobs centers = [(-3, 0), (3, 2), (-4, 5), (0, 6)] X, y = make_blobs(n_samples=500, centers=centers, n_features=2, random_state=0) print('数据集X的形状为:',X.shape)
plt.rc('font', size=14) plt.rcParams['font.sans-serif'] = 'SimHei' plt.rcParams['axes.unicode_minus']=False
plt.figure(figsize=(6,4)) DrawElbowKMeans(X=X) plt.show
from sklearn.preprocessing import MinMaxScaler from sklearn.cluster import KMeans data = X scale = MinMaxScaler().fit(data) dataScale = scale.transform(data) kmeans = KMeans(n_clusters = 4).fit(dataScale)
index_y0,index_y1=np.where(y==0),np.where(y==1) index_y2,index_y3=np.where(y==2),np.where(y==3) labels= kmeans.labels_
index_label0,index_label1=np.where(labels==0),np.where(labels==1) index_label2,index_label3=np.where(labels==2),np.where(labels==3)
p=plt.figure(figsize=(12,4))
ax = p.add_subplot(1,2,1) plt.scatter(X[index_y0,0], X[index_y0,1],c='k',marker='.') plt.scatter(X[index_y1,0], X[index_y1,1],c='k',marker='o') plt.scatter(X[index_y2,0], X[index_y2,1],c='k',marker='*') plt.scatter(X[index_y3,0], X[index_y3,1],c='k',marker='v') plt.legend(['类0','类1','类2','类3']) plt.title('原始样本类别')
ax = p.add_subplot(1,2,2) plt.scatter(X[index_label0,0], X[index_label0,1],c='k',marker='.') plt.scatter(X[index_label1,0], X[index_label1,1],c='k',marker='o') plt.scatter(X[index_label2,0], X[index_label2,1],c='k',marker='*') plt.scatter(X[index_label3,0], X[index_label3,1],c='k',marker='v') plt.legend(['簇0','簇1','簇2','簇3']) plt.title('聚类结果') plt.show()
print('原始数据集X的形状为:',X.shape) X_yl=np.hstack((X,y.reshape(-1,1),labels.reshape(-1,1))) print('原始数据集与类标签、聚类标签合并后的数据集X_yl的形状为:',X_yl.shape) print('原始数据集与类标签、聚类标签合并后的数据集X_yl的前5行为:\n',X_yl[0:5,:])
index_0,index_1=np.where(X_yl[:,2]==0),np.where(X_yl[:,2]==1) index_2,index_3=np.where(X_yl[:,2]==2),np.where(X_yl[:,2]==3) X_yl1=np.vstack((X_yl[index_0],X_yl[index_1],X_yl[index_2],X_yl[index_3])) print('原始数据集按类组织后的数据集X_yl1的形状为:',X_yl1.shape) print('原始数据集按类组织后的数据集X_yl1的前5行为:\n',X_yl1[0:5,:])
plt.figure(figsize=(12,4)) plt.scatter(range(y.size), X_yl1[:,2], c='k',marker='.') plt.scatter(range(y.size), X_yl1[:,3]+.2, c='k',marker='x') plt.grid(True) plt.xlim((0,y.size)) plt.xlabel('样本序号') plt.ylabel('分类/聚类标签') plt.title('聚类结果与原始分类结果对比') plt.legend(['原始分类','聚类结果']) plt.show()
|