22043104+范纬.zip
资源文件列表:

22043104+范纬/
22043104+范纬/第一次作业.docx 279.18KB
22043104+范纬/第七次作业.docx 141.03KB
22043104+范纬/第三次作业.docx 124.77KB
22043104+范纬/第九次作业.docx 1.01MB
22043104+范纬/第二次作业.docx 80.66KB
22043104+范纬/第五次作业.docx 552.54KB
22043104+范纬/第八次作业.docx 664.06KB
22043104+范纬/第六次作业.docx 149.53KB
22043104+范纬/第十次作业.docx 594.46KB
22043104+范纬/第四次作业.docx 133.33KB
资源介绍:
22043104+范纬.zip
1.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import warnings
warnings.filterwarnings(action = 'ignore')
%matplotlib inline
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
from sklearn.datasets import make_blobs
from sklearn.feature_selection import f_classif
from sklearn import decomposition
from sklearn.cluster import KMeans,AgglomerativeClustering
from sklearn.metrics import silhouette_score,calinski_harabasz_score
import scipy.cluster.hierarchy as sch
from itertools import cycle
from matplotlib.patches import Ellipse
from sklearn.mixture import GaussianMixture
N=100
X1, y1 = make_blobs(n_samples=N, centers=4, n_features=2,random_state=0) #2 特征
X2, y2 = make_blobs(n_samples=N, centers=4, n_features=3,random_state=123) #3 特征
print('y1=',y1)
print('y2=',y2)
y1= [0 3 0 0 0 0 2 3 0 3 3 3 3 3 3 1 1 2 2 1 0 3 2 1 0 2 2 0 1 1 1 3 1
1 2 0 3
1 3 2 0 2 3 2 2 3 1 2 0 0 0 1 2 2 2 3 3 1 1 3 3 1 1 0 1 3 2 2 1 0 3 1
0 3
0 0 2 2 1 1 1 3 2 0 1 2 1 1 0 0 0 2 0 2 2 3 3 2 3 0]
y2= [2 0 1 3 2 2 1 0 2 1 0 1 1 0 1 3 0 0 3 1 0 3 1 0 3 1 1 0 2 2 0 3 3
3 3 2 0
0 3 1 2 0 3 0 2 2 2 2 0 2 1 0 1 3 0 1 2 3 0 1 1 2 2 3 2 3 3 3 1 1 0 3
2 2
0 1 2 3 2 3 1 1 0 2 0 2 3 3 0 1 1 1 3 3 2 0 1 2 3 0]
2.
plt.figure(figsize=(18,12))
plt.subplot(121)
plt.scatter(X1[:,0],X1[:,1],s=50)
plt.xlabel("X1-1")
plt.ylabel("X1-2")
plt.title("%d 个样本观测点的分布"%N)
ax=plt.subplot(122, projection='3d')
ax.scatter(X2[:,0],X2[:,1],X2[:,2],c='blue')
ax.set_xlabel("X2-1")

ax.set_ylabel("X2-2")
ax.set_zlabel("X2-3")
ax.set_title("%d 个样本观测点的分布"%N)
Text(0.5, 0.92, '100 个样本观测点的分布')
3.
KM= KMeans(n_clusters=4, max_iter = 500) # 建立二特征数据 KMeans 模型
KM.fit(X1) #训练
labels=np.unique(KM.labels_) #预测
print('labels=',labels)
#预测结果可视化
markers='o*^+'
for i,label in enumerate(labels): #分别绘制每一个小类数据
plt.scatter(X1[KM.labels_==label,0],X1[KM.labels_==label,1],
label="cluster %d"%label,marker=markers[i],s=50)
plt.scatter(KM.cluster_centers_[:,0],KM.cluster_centers_[:,1],marker='X',
s=60,c='r',label="小类中心") #绘制小类中心
plt.legend(loc="best",framealpha=0.5)
plt.xlabel("X1-1")
plt.ylabel("X1-2")
plt.title("%d 个样本观测点的聚类结果"%N)
labels= [0 1 2 3]
Out[9]:
Text(0.5, 1.0, '100 个样本观测点的聚类结果')

4.
KM= KMeans(n_clusters=4, max_iter = 500) # 建立三特征数据
KMeans 模型
KM.fit(X2) #训练
labels=np.unique(KM.labels_) #预测
#(获得聚类标签,聚类解存储在 K-均值聚类对象的.labels_属性中)
#预测结果可视化
#(利用 for 循环可视化聚类解,即以不同颜色和形状的符号分别绘制各小类的
散点图)
ax=plt.subplot(111, projection='3d')
markers='o*^+'
for i,label in enumerate(labels): #分别绘制每一个小类
数据
ax.scatter(X2[KM.labels_==label,0],X2[KM.labels_==labe
l,1],X2[KM.labels_==label,2],
label="cluster %d"%label,marker=markers[i],s=50)
ax.scatter(KM.cluster_centers_[:,0],KM.cluster_centers_[:
,1],KM.cluster_centers_[:,2], marker='X',s=60,c='r',label
="小类中心") #绘制小类中心
#小类的类质心坐标存储在 K-均值对象的 cluster_centers_属性中
ax.legend(loc="best",framealpha=0.5)
ax.set_xlabel("X2-1")
ax.set_ylabel("X2-2")
ax.set_zlabel("X2-3")
ax.set_title("%d 个样本观测点的聚类结果"%N)