- import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.cm as cm
from matplotlib图形调配.图像基础属性 import Solve_Chinese,Set_label
import seaborn as sns
# 读取数据
data = pd.read_csv(r"C:\Users\luyao\Desktop\Iris.csv")
# print(data.head(),'\n')
# print(data.describe(),'\n')
#解决中文乱码问题
Solve_Chinese('FangSong')
#去掉种类中多余部分
data['Species'] = data['Species'].apply(lambda x: x.split('-')[1])
# print(data.head(),'\n')
# 数据转换,把种类映射成数据类别
print(data['Species'].unique(),'\n')
dict = {'setosa':0,'versicolor':1,'virginica':2}
data['Category_num'] = data['Species'].map(dict)
# data.drop(['Species'],axis=1,inplace=True)
print(data.head(),'\n')
# 认识数据
fig1 = plt.figure(1)
fig1.set(alpha=0.5)
# 先来看看各种种类的数量分布
data['Species'].value_counts().plot(kind = 'bar',title = '鸢尾花种类分布',color = 'c',rot=360 )
plt.show()
# 查看特征组合对种类的区分效果
fig2 = plt.figure(2)
plt.subplot2grid((3,2),(0,0))
plt.scatter(data.Setal_length,data.Setal_width,c=data.Category_num)
Set_label('Setal_length','Seta_width')
plt.title("据萼片长度和宽度的种类分布图")
plt.subplot2grid((3,2),(0,1))
plt.scatter(data.Petal_length,data.Petal_width,c = data.Category_num)
Set_label('Petal_length','Petal_width')
plt.title("据花瓣长度和宽度的种类分布图")
plt.subplot2grid((3,2),(1,0))
plt.scatter(data.Petal_length,data.Setal_length,c = data.Category_num)
Set_label('Petal_length','Setal_length')
plt.title("据花瓣长度和萼片长度的种类分布图")
plt.subplot2grid((3,2),(1,1))
plt.scatter(data.Petal_width,data.Setal_width,c = data.Category_num)
Set_label('Petal_width','Setal_width')
plt.title("据花瓣宽度和萼片宽度的种类分布图")
plt.subplot2grid((3,2),(2,0))
plt.scatter(data.Petal_length,data.Setal_width,c = data.Category_num)
Set_label('Petal_length','Setal_width')
plt.title("据花瓣长度和萼片宽度的种类分布图")
plt.subplot2grid((3,2),(2,1))
plt.scatter(data.Petal_width,data.Setal_length,c = data.Category_num)
Set_label('Petal_width','Setal_length')
plt.title("据花瓣宽度和萼片长度的种类分布图")
plt.tight_layout()
plt.show()
## 可以看出,只有一类的与其他类别有明显的差异存在,另外两类相互之间存在重叠
|
|