線性回歸原理比較簡單,其在一維特征時候的方程我們在高中階段就學習過了,對于多元特征的線性回歸,隻是在其基礎上進行擴充,對于尋找合适參數的過程,可以使用梯度下降的方法來進行,但對于線性回歸而言,其實是有數值解的:
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsIyZuBnLyQDN2AzNxETM3ATMxgTMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
其相關代碼如下:
import numpy as np
import matplotlib.pyplot as plt
def loaddata():
file = open(r'E:\學習資料\AI+CS\01 個人\《機器學習實戰》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\ex0.txt')
data = []
label = []
for i in file.readlines():
line = i.strip().split()
data.append([float(line[0]),float(line[1])])
label.append([float(line[-1])])
file.close()
return data,label
#整個樣本的梯度下降
def calw(data,label):
x = np.mat(data)
y = np.mat(label)
xTx = np.dot(x.T,x)
if np.linalg.det(xTx) == 0:
print('the result is wrong!')
return
w1 = np.dot(xTx.I,x.T)
w = np.dot(w1,y)
return w
def plotdata(data,label,w):
data = np.array(data)
label = np.array(label)
x = []
for i in range(data.shape[0]):
x.append(data[i][1])
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.scatter(x,label,s = 5,color = 'blue',marker = 'o')
yhat = np.dot(data,w)
ax1.plot(data[:,1],yhat,color = 'red')
plt.title('data')
plt.xlabel('X')
plt.show()
#局部權重線性回歸(Locally Weighted Linear Regression)
def lwlr(testpoint,data,label, k = 1.0):
x = np.mat(data)
label = np.mat(label)
m = x.shape[0]
weight = np.eye(m)
for i in range(m):
error = testpoint - x[i,:]
weight[i,i] = np.exp(error * error.T/(-2*k**2))
xTx = x.T * weight * x
if np.linalg.det(xTx) == 0:
print('it is wrong')
return
ws = xTx.I * x.T * weight * label
return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
data = np.mat(data)
m = data.shape[0]
yhat = np.zeros(m)
for i in range(m):
yhat[i] = lwlr(testarr[i],data,label, k )
return yhat
def plotlwlr(data,label,k=1.0):
data = np.mat(data)
label = np.mat(label)
f,ax = plt.subplots(3,1,sharex = False,sharey = False,figsize = (10,6))
ax[0].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
ax[1].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
ax[2].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
yhat_0 = lwlrtest(data,data,label,k = 1.0)
yhat_1 = lwlrtest(data,data,label,k = 0.01)
yhat_2 = lwlrtest(data,data,label,k = 0.003)
index = data[:,1].argsort(axis = 0)
xmat = data[index][:,0,:]
ax[0].plot(xmat[:,1],yhat_0[index],c = 'red')
ax[1].plot(xmat[:,1],yhat_1[index],c = 'red')
ax[2].plot(xmat[:,1],yhat_2[index],c = 'red')
plt.show()
data,label = loaddata()
plotlwlr(data,label,k=1.0)
yhat = lwlrtest(data,data,label,k = 1.0)
yhat = np.mat(yhat)
yhat.shape
鮑魚年齡預測:
#示例:鮑魚年齡預測—資料讀取
def loaddata_0(filename):
f = open(filename)
num = len(open(filename).readline().strip().split()) - 1 #資料特征的數目---更改這個部位,将f.readline() 改為 open(filename).readline()
data = []
label = []
for i in f.readlines():
line = i.strip().split() #将字元串分割返還的是清單
temp = []
for j in range(num):
temp.append(float(line[j]))
data.append(temp)
label.append(float(line[-1]))
f.close()
return data,label
def lwlr(testpoint,data,label, k = 1.0):
x = np.mat(data)
label = np.mat(label).T
m = x.shape[0]
weight = np.eye(m)
for i in range(m):
error = testpoint - x[i,:]
weight[i,i] = np.exp(error * error.T/(-2*k**2))
xTx = x.T * weight * x
if np.linalg.det(xTx) == 0:
print('it is wrong')
return
ws = xTx.I * x.T * weight * label
return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
data = np.mat(data)
m = data.shape[0]
yhat = np.zeros(m)
for i in range(m):
yhat[i] = lwlr(testarr[i],data,label, k )
return yhat
def calw_0(data,label):
x = np.mat(data)
y = np.mat(label).T
xTx = np.dot(x.T,x)
if np.linalg.det(xTx) == 0: #計算行列式的值
print('the result is wrong!')
return
w1 = np.dot(xTx.I,x.T)
w = np.dot(w1,y)
return w
def error(yhat,label):
yhat = np.array(yhat)
label = np.array(label)
error_sum = ((yhat - label)**2).sum()
return error_sum
data,label= loaddata_0(r'E:\學習資料\AI+CS\01 個人\《機器學習實戰》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')
print('訓練集與測試集相同時,檢視各誤差結果:')
yhat_0 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 10)
print('當k=1時,誤差為:%f' %(error(yhat_0,label[0:99])) )
print('當k=0.1時,誤差為:%f' %(error(yhat_1,label[0:99])) )
print('當k=10時,誤差為:%f' %(error(yhat_2,label[0:99])) )
print('')
print('訓練集與測試集不同的情況下,檢視結果:')
yhat_0 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 10)
print('當k=1時,誤差為:%f' %(error(yhat_0,label[100:199])) )
print('當k=0.1時,誤差為:%f' %(error(yhat_1,label[100:199])) )
print('當k=10時,誤差為:%f' %(error(yhat_2,label[100:199])) )
print('')
print('比較簡單線性回歸和權重線性回歸,k = 1.0時的誤差大小:')
print('權重線性回歸,k=1時,誤差為:%f' %(error(yhat_0,label[100:199])) )
w = calw_0(data[0:99],label[0:99])
yhat = data[100:199] * w
print('簡單線性回歸,誤差為:%f' %(error(yhat.flatten().A[0],label[100:199])) )
嶺回歸:
import numpy as np
import matplotlib.pyplot as plt
#示例:鮑魚年齡預測—資料讀取
def loaddata_0(filename):
f = open(filename)
num = len(open(filename).readline().strip().split()) - 1 #資料特征的數目---更改這個部位,将f.readline() 改為 open(filename).readline()
data = []
label = []
for i in f.readlines():
line = i.strip().split() #将字元串分割返還的是清單
temp = []
for j in range(num):
temp.append(float(line[j]))
data.append(temp)
label.append(float(line[-1]))
f.close()
return data,label
data,label = loaddata_0(r'E:\學習資料\AI+CS\01 個人\《機器學習實戰》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')
def ridgeRegres(data,label,lam):
x = np.mat(data)
y = np.mat(label)
m,n = x.shape
lam_I = lam * np.mat(np.eye(n))
xTx = x.T * x + lam_I
if np.linalg.det(xTx) == 0:
print('該矩陣為奇異矩陣,不能計算逆矩陣')
return
w_lam = xTx.I * x.T * y
return w_lam
def normdata(data,label):
x = np.mat(data) #次元為(m,n)
y = np.mat(label).T #次元為(n,1)
n = x.shape[1]
x_mean = np.mean(x,axis = 0)
y_mean = np.mean(y,axis = 0)
y_new = y - y_mean
x_var = np.var(x,axis = 0 )
x_new = (x - x_mean)/ x_var
num = 30
wmat = np.zeros((num,n))
for i in range(num):
w_lam = ridgeRegres(x_new,y_new,np.exp(i-10))
wmat[i,:] = w_lam.T
return wmat
wmat = normdata(data,label)
f = plt.figure()
ax = f.add_subplot(1,1,1)
ax.plot(wmat)
plt.show()