天天看點

《機器學習實戰》——線性回歸

線性回歸原理比較簡單,其在一維特征時候的方程我們在高中階段就學習過了,對于多元特征的線性回歸,隻是在其基礎上進行擴充,對于尋找合适參數的過程,可以使用梯度下降的方法來進行,但對于線性回歸而言,其實是有數值解的:

《機器學習實戰》——線性回歸

其相關代碼如下:

import numpy as np
import  matplotlib.pyplot as plt
def loaddata():
    file = open(r'E:\學習資料\AI+CS\01 個人\《機器學習實戰》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\ex0.txt')
    data = []
    label = []
    for i in file.readlines():
        line = i.strip().split()
        data.append([float(line[0]),float(line[1])])
        label.append([float(line[-1])])
    file.close()
    return data,label

#整個樣本的梯度下降
def calw(data,label):
    x = np.mat(data)
    y = np.mat(label)
    xTx = np.dot(x.T,x)
    if np.linalg.det(xTx) == 0:
        print('the result is wrong!')
        return
    w1 = np.dot(xTx.I,x.T)
    w = np.dot(w1,y)
    return w


def plotdata(data,label,w):
    data = np.array(data)
    label = np.array(label)
    x = []
    for i in range(data.shape[0]):
        x.append(data[i][1])
    fig = plt.figure()
    ax1 = fig.add_subplot(1,1,1)
    ax1.scatter(x,label,s = 5,color = 'blue',marker = 'o')
    yhat = np.dot(data,w)
    ax1.plot(data[:,1],yhat,color = 'red')
    plt.title('data')
    plt.xlabel('X')
    plt.show()

#局部權重線性回歸(Locally Weighted Linear Regression)
def lwlr(testpoint,data,label, k = 1.0):
    x = np.mat(data)
    label = np.mat(label)
    m = x.shape[0]
    weight = np.eye(m)
    for i in range(m):
        error = testpoint - x[i,:]
        weight[i,i] = np.exp(error * error.T/(-2*k**2))
    xTx = x.T * weight * x
    if np.linalg.det(xTx) == 0:
        print('it is wrong')
        return 
    ws = xTx.I * x.T * weight * label
    return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
    data = np.mat(data)
    m = data.shape[0]
    yhat = np.zeros(m)
    for i in range(m):
        yhat[i] = lwlr(testarr[i],data,label, k )
    return yhat
def plotlwlr(data,label,k=1.0):
    data = np.mat(data)
    label = np.mat(label)
    f,ax = plt.subplots(3,1,sharex = False,sharey = False,figsize = (10,6))
    ax[0].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    ax[1].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    ax[2].scatter(data[:,1].flatten().A[0],label.flatten().A[0],c = 'blue',s = 5)
    yhat_0 = lwlrtest(data,data,label,k = 1.0)
    yhat_1 = lwlrtest(data,data,label,k = 0.01)
    yhat_2 = lwlrtest(data,data,label,k = 0.003)
    index = data[:,1].argsort(axis = 0)
    xmat = data[index][:,0,:]
    ax[0].plot(xmat[:,1],yhat_0[index],c = 'red')
    ax[1].plot(xmat[:,1],yhat_1[index],c = 'red')
    ax[2].plot(xmat[:,1],yhat_2[index],c = 'red')
    
  
    plt.show()
data,label = loaddata()
plotlwlr(data,label,k=1.0)
yhat = lwlrtest(data,data,label,k = 1.0)
yhat = np.mat(yhat)
yhat.shape
           
《機器學習實戰》——線性回歸

 鮑魚年齡預測:

#示例:鮑魚年齡預測—資料讀取
def loaddata_0(filename):
    f = open(filename)
    num = len(open(filename).readline().strip().split()) - 1   #資料特征的數目---更改這個部位,将f.readline() 改為 open(filename).readline()
    data = []
    label = []
    for i in f.readlines():
        line = i.strip().split()          #将字元串分割返還的是清單
        temp = []
        for j in range(num):
            temp.append(float(line[j]))
        data.append(temp)
        label.append(float(line[-1]))
    f.close()
    return data,label
def lwlr(testpoint,data,label, k = 1.0):
    x = np.mat(data)
    label = np.mat(label).T
    m = x.shape[0]
    weight = np.eye(m)
    for i in range(m):
        error = testpoint - x[i,:]
        weight[i,i] = np.exp(error * error.T/(-2*k**2))
    xTx = x.T * weight * x
    if np.linalg.det(xTx) == 0:
        print('it is wrong')
        return 
    ws = xTx.I * x.T * weight * label
    return testpoint * ws
def lwlrtest(testarr,data,label,k = 1.0):
    data = np.mat(data)
    m = data.shape[0]
    yhat = np.zeros(m)
    for i in range(m):
        yhat[i] = lwlr(testarr[i],data,label, k )
    return yhat
def calw_0(data,label):
    x = np.mat(data)
    y = np.mat(label).T
    xTx = np.dot(x.T,x)
    if np.linalg.det(xTx) == 0:               #計算行列式的值
        print('the result is wrong!')
        return
    w1 = np.dot(xTx.I,x.T)
    w = np.dot(w1,y)
    return w
def error(yhat,label):
    yhat = np.array(yhat)
    label = np.array(label)
    error_sum = ((yhat - label)**2).sum()
    return error_sum

data,label= loaddata_0(r'E:\學習資料\AI+CS\01 個人\《機器學習實戰》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')

print('訓練集與測試集相同時,檢視各誤差結果:')
yhat_0 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[0:99],data[0:99],label[0:99],k = 10)
print('當k=1時,誤差為:%f' %(error(yhat_0,label[0:99])) )
print('當k=0.1時,誤差為:%f' %(error(yhat_1,label[0:99])) )
print('當k=10時,誤差為:%f' %(error(yhat_2,label[0:99])) )
print('')
print('訓練集與測試集不同的情況下,檢視結果:')
yhat_0 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 1.0)
yhat_1 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 0.1)
yhat_2 = lwlrtest(data[100:199],data[0:99],label[0:99],k = 10)
print('當k=1時,誤差為:%f' %(error(yhat_0,label[100:199])) )
print('當k=0.1時,誤差為:%f' %(error(yhat_1,label[100:199])) )
print('當k=10時,誤差為:%f' %(error(yhat_2,label[100:199])) )
print('')
print('比較簡單線性回歸和權重線性回歸,k = 1.0時的誤差大小:')
print('權重線性回歸,k=1時,誤差為:%f' %(error(yhat_0,label[100:199])) )
w = calw_0(data[0:99],label[0:99])
yhat = data[100:199] * w

print('簡單線性回歸,誤差為:%f' %(error(yhat.flatten().A[0],label[100:199])) )
           
《機器學習實戰》——線性回歸

嶺回歸:

import numpy as np
import matplotlib.pyplot as plt
#示例:鮑魚年齡預測—資料讀取
def loaddata_0(filename):
    f = open(filename)
    num = len(open(filename).readline().strip().split()) - 1   #資料特征的數目---更改這個部位,将f.readline() 改為 open(filename).readline()
    data = []
    label = []
    for i in f.readlines():
        line = i.strip().split()          #将字元串分割返還的是清單
        temp = []
        for j in range(num):
            temp.append(float(line[j]))
        data.append(temp)
        label.append(float(line[-1]))
    f.close()
    return data,label
data,label = loaddata_0(r'E:\學習資料\AI+CS\01 個人\《機器學習實戰》-Peter Harriton\MLiA_SourceCode\machinelearninginaction\Ch08\abalone.txt')
def ridgeRegres(data,label,lam):
    x = np.mat(data)
    y = np.mat(label)
    m,n = x.shape
    lam_I = lam * np.mat(np.eye(n))
    xTx = x.T * x + lam_I
    if np.linalg.det(xTx) == 0:
        print('該矩陣為奇異矩陣,不能計算逆矩陣')
        return 
    w_lam = xTx.I * x.T * y
    return w_lam
def normdata(data,label):
    x = np.mat(data) #次元為(m,n)
    y = np.mat(label).T    #次元為(n,1)
    n = x.shape[1]
    x_mean = np.mean(x,axis = 0)
    y_mean = np.mean(y,axis = 0)
    y_new = y - y_mean
    x_var = np.var(x,axis = 0 )
    x_new = (x - x_mean)/ x_var
    num = 30
    wmat = np.zeros((num,n))
    for i in range(num):
        w_lam = ridgeRegres(x_new,y_new,np.exp(i-10))
        wmat[i,:] = w_lam.T
    return wmat
wmat =  normdata(data,label)
f = plt.figure()
ax = f.add_subplot(1,1,1)
ax.plot(wmat)
plt.show()
           
《機器學習實戰》——線性回歸
ML

繼續閱讀