# coding: utf-8
"""
svm 四个核在iris上的测试
"""
import time
import matplotlib as mpl
import matplotlib.pyplot as plt
from keras.utils import to_categorical
import numpy as np
import pandas as pd
# 训练测试数据分割
# 准确率
from sklearn.metrics import accuracy_score
# from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
# SVC向量机
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from sklearn.svm import SVC
encoder = LabelEncoder()
lb = LabelBinarizer()
# 乱码设置
# 设置属性防止中文乱码
mpl.rcParams[‘font.sans-serif‘] = [u‘SimHei‘]
mpl.rcParams[‘axes.unicode_minus‘] = False
def load_data():
raw_data = np.loadtxt("./iris.txt", dtype=‘str‘, encoding=‘utf-8‘)
x, y = list(), list()
for index in range(len(raw_data)):
x.append([float(i) for i in raw_data[index].split(",")[:-1]])
y.append(raw_data[index].split(",")[-1])
x = np.array(x)
# y = encoder.fit_transform(_y)
print(lb.fit_transform(y))
y = pd.Categorical(y).codes
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33)
return x_train, x_test, y_train, y_test
if __name__ == ‘__main__‘:
x_train, x_test, y_train, y_test = load_data()
print(y_test)
# 构建分类器/
# kernel是不同的方法
svm1 = SVC(C=1, kernel=‘linear‘)
svm2 = SVC(C=1, kernel=‘rbf‘)
svm3 = SVC(C=1, kernel=‘poly‘) # 表现的更好
svm4 = SVC(C=1, kernel=‘sigmoid‘)
# 分类器的训练
t0 = time.time()
svm1.fit(x_train, y_train)
t1 = time.time()
svm2.fit(x_train, y_train)
t2 = time.time()
svm3.fit(x_train, y_train)
t3 = time.time()
svm4.fit(x_train, y_train)
t4 = time.time()
# 训练完后做什么呢?当然是画图测试准确率
print(svm3.predict(x_train))
svm1_train_score = accuracy_score(y_train, svm1.predict(x_train))
svm1_test_score = accuracy_score(y_test, svm1.predict(x_test))
svm2_train_score = accuracy_score(y_train, svm2.predict(x_train))
svm2_test_score = accuracy_score(y_test, svm2.predict(x_test))
svm3_train_score = accuracy_score(y_train, svm3.predict(x_train))
svm3_test_score = accuracy_score(y_test, svm3.predict(x_test))
svm4_train_score = accuracy_score(y_train, svm4.predict(x_train))
svm4_test_score = accuracy_score(y_test, svm4.predict(x_test))
# 画图
# 设置X轴,y轴
x_tmp = [0, 1, 2, 3]
y_train_tmp = [svm1_train_score, svm2_train_score, svm3_train_score, svm4_train_score]
y_test_tmp = [svm1_test_score, svm2_test_score, svm3_test_score, svm4_test_score]
y_score = [t1 - t0, t2 - t1, t3 - t2, t4 - t3]
# 设置尺寸和颜色
plt.figure(facecolor=‘w‘, figsize=(12, 6))
# subplot是用来分割画布,分割成一行两列,用第一列的来画图
a = plt.subplot(121)
# 绘图
plt.plot(x_tmp, y_train_tmp, ‘r-‘, lw=2, label=u‘训练集准确率‘)
plt.plot(x_tmp, y_test_tmp, ‘g-‘, lw=2, label=u‘测试集准确率‘)
# 设置两个图形的解释
plt.legend(loc=‘lower left‘)
# plt.title(u‘模型预测准确率‘, fontsize=13)
plt.xticks(x_tmp, [u‘linear-SVM‘, u‘rbf-SVM‘, u‘poly-SVM‘, u‘sigmoid-SVM‘], rotation=0)
# 开启网格线
plt.grid(b=True)
# 第二个图,分割成一行两列,第二列来画图
b = plt.subplot(122)
plt.plot(x_tmp, y_score, ‘b-‘, lw=2, label=u‘模型训练时间‘)
# plt.title(u‘模型训练耗时‘, fontsize=13)
# 给x赋予标签
plt.xticks(x_tmp, [u‘linear-SVM‘, u‘rbf-SVM‘, u‘poly-SVM‘, u‘sigmoid-SVM‘], rotation=0)
plt.legend(loc=‘lower left‘)
plt.grid(b=True)
# 大标题suptitle
plt.suptitle(u‘鸢尾花数据SVM分类器不同内核函数模型比较‘, fontsize=16)
a.set_title(u‘预测准确率‘)
b.set_title(u‘模型运行时间‘)
plt.show()
原文:https://www.cnblogs.com/xiennnnn/p/12155354.html