天天看點

Python從excel讀取資料,并使用scipy進行散點的平滑曲線化方法

首先給出一個沒有smooth過的曲線

import xlrd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.interpolate import spline  
workbook = xlrd.open_workbook("/Users/lm/Documents/實驗一/算法對比.xlsx")
sheet_01 = workbook.sheets()[0]

dataset = []
for i in range(0, sheet_01.nrows):
    dataset.append(sheet_01.row_values(i))  
dataset=np.array(dataset)  #将list轉換為numpy.ndarray
X=dataset[:1,:].ravel()
y=dataset[1:5,]
plt.figure(1, figsize=(8, 4))
i=0
plt.xlim((0,140))
plt.ylim((0,0.50))
new_xticks = np.linspace(0, 150, 16)
plt.xticks(new_xticks)
new_yticks = np.linspace(0, 0.5, 11)
plt.yticks(new_yticks)
#xnew = np.linspace(X.min(),X.max(),3000) #3000 represents number of points to make between T.min and T.max 
labels=['Fisher','NB','Decision Tree','Logistic Regression']
while i<len(y):
    #power_smooth = spline(X,y[i],xnew)  
    plt.plot(X,y[i], linewidth=0.8,label=labels[i])
    i=i+1


#從第二個sheet中繪制點
sheet_02 = workbook.sheets()[1]
dataset_02 = []
i=0
for i in range(0, sheet_02.nrows):
    dataset_02.append(sheet_02.row_values(i))  
dataset_02 = np.array(dataset_02)  #将list轉換為numpy.ndarray
scatter_x = np.linspace(10,140,14)
scatter_y = dataset_02[1:,]
i=0
while i<len(scatter_y):
    plt.scatter(scatter_x,scatter_y[i],s=15,marker='.')
    i=i+1
plt.xlabel('number of feature')
plt.ylabel('error in classification')
plt.legend(loc='best')
plt.savefig('/Users/lm/Documents/分類誤差_無曲線拟合.png', dpi=200)
plt.show()
           

輸出的曲線如下圖

Python從excel讀取資料,并使用scipy進行散點的平滑曲線化方法

使用scipy庫可以進行曲線的smooth

代碼如下

import xlrd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.interpolate import spline  
workbook = xlrd.open_workbook("/Users/lm/Documents/實驗一/算法對比.xlsx")
sheet_01 = workbook.sheets()[0]

dataset = []
for i in range(0, sheet_01.nrows):
    dataset.append(sheet_01.row_values(i))  
dataset=np.array(dataset)  #将list轉換為numpy.ndarray
X=dataset[:1,:].ravel()
y=dataset[1:5,]
plt.figure(1, figsize=(8, 4))
i=0
plt.xlim((0,140))
plt.ylim((0,0.50))
new_xticks = np.linspace(0, 150, 16)
plt.xticks(new_xticks)
new_yticks = np.linspace(0, 0.5, 11)
plt.yticks(new_yticks)
xnew = np.linspace(X.min(),X.max(),3000) #3000 represents number of points to make between T.min and T.max 
labels=['Fisher','NB','Decision Tree','Logistic Regression']
while i<len(y):
    power_smooth = spline(X,y[i],xnew)  
    li,=plt.plot(xnew,power_smooth, linewidth=0.8,label=labels[i])
    i=i+1


#從第二個sheet中繪制點
sheet_02 = workbook.sheets()[1]
dataset_02 = []
i=0
for i in range(0, sheet_02.nrows):
    dataset_02.append(sheet_02.row_values(i))  
dataset_02 = np.array(dataset_02)  #将list轉換為numpy.ndarray
scatter_x = np.linspace(10,140,14)
scatter_y = dataset_02[1:,]
i=0
while i<len(scatter_y):
    plt.scatter(scatter_x,scatter_y[i],s=15,marker='.')
    i=i+1
plt.legend(loc='best')
plt.xlabel('number of feature')
plt.ylabel('error in classification')
plt.savefig('/Users/lm/Documents/分類誤差_曲線拟合.png', dpi=200)
plt.show()
           

輸出的圖檔為

Python從excel讀取資料,并使用scipy進行散點的平滑曲線化方法

所用資料:點選該連結    密碼:ruy8