1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

文章目錄

- - 一、函數形式化表示
  - 二、梯度下降算法
  - - 梯度下降法代碼實作
    - 使用梯度下降法求解線性回歸問題
    - 梯度下降算法變形
  - 三、模型評價名額
  - - 代碼實作
  - 四、嶺回歸
  - - 嶺回歸代碼實作
  - 五、LASSO回歸
  - - LASSO回歸代碼實作
  - 六、Elastic Net回歸
  - 七、最小二乘法求線性回歸
  - - 最小二乘法代碼實作

一、函數形式化表示

二、梯度下降算法

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

當目标函數是凸函數時，梯度下降法是全局最優解。

梯度下降法代碼實作

疊代次數控制

# alpha步長，過小—疊代步數過多，容易局部收斂；過大時，容易在最低點周圍來回振蕩
import numpy as np
import matplotlib.pyplot as plt

# x、y坐标
x = np.linspace(-6, 4, 100)
y = x ** 2 + 2 * x + 5
# 繪圖
fig, ax = plt.subplots()
ax.plot(x, y, 'r-', lw=3)
plt.show()

# 初始化初始值x、步長alpha，疊代次數（可以通過設定精度來控制疊代次數）
x = 3
alpha = 0.8
iternum = 100
for i in range(iternum):
    x = x - alpha * (2 * x + 2)
    y = x ** 2 + 2 * x + 5

print("疊代%d次後，最小值點為%d,對應的極小值為%d" % (iternum, x, y))
# 疊代100次後，最小值點為-1,對應的極小值為4

精度控制

# alpha步長，過小—疊代步數過多，容易局部收斂；過大時，容易在最低點周圍來回振蕩
import numpy as np
import matplotlib.pyplot as plt

# x、y坐标
x = np.linspace(-6, 4, 100)
y = x ** 2 + 2 * x + 5
# 繪圖
fig, ax = plt.subplots()
ax.plot(x, y, 'r-', lw=3)
plt.show()

# 初始化初始值x、步長alpha，疊代次數（可以通過設定精度來控制疊代次數）
x = 3
alpha = 0.8
eta = 0
while np.abs(2 * x + 2) > eta:
    x = x - alpha * (2 * x + 2)
    y = x ** 2 + 2 * x + 5

print("最小值點為%d,對應的極小值為%d" % (x, y))
# 最小值點為-1,對應的極小值為4

使用梯度下降法求解線性回歸問題

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

import numpy as np
import matplotlib.pyplot as plt

# 1.加載資料
def loaddata(filename):
    data = np.loadtxt(filename, delimiter=',')
    n = data.shape[1] - 1  # 特征數
    X = data[:, 0:n]
    y = data[:, n].reshape(-1, 1)
    return X, y


# 2.标準化——減小異常點的影響
# 标準化即将每一個資料減去該列的平均值，再除以該列的方差
def featureNormized(x):
    avg = np.average(x, axis=0)
    std = np.std(x, axis=0, ddof=-1)  # ddof=-1時表示求方差時除的是n-1
    x = (x - avg) / std
    return x, avg, std


# 3.代價函數
def computeCost(X, y, theta):
    m = X.shape[0]  # 資料量
    # np.dot()表示一維向量點乘
    return np.sum(np.power(np.dot(X, theta) - y, 2)) / (2 * m)


# 4.梯度下降法求導
def gradientDescent(X, y, theta, iternum, alpha):
    # 建構x0=1
    c = np.ones(X.shape[0]).transpose()
    X = np.insert(X, 0, values=c, axis=1)
    m = X.shape[0]  # 資料量
    n = X.shape[1]  # 特征數
    # 儲存代價值
    costs = np.zeros(iternum)
    # 求導
    for i in range(iternum):
        for j in range(n):
            theta[j] = theta[j] + np.sum((y - np.dot(X, theta)) * X[:, j].reshape(-1, 1)) * alpha / m
        costs[i] = computeCost(X, y, theta)
    return theta, costs


# 5.預測值
def predict(x):
    x = (x - avg) / std
    c = np.ones(x.shape[0]).transpose()
    X = np.insert(x, 0, values=c, axis=1)
    return np.dot(X, theta)


# 6.模型評價-mse
def mse(y_true, y_test):
    return np.sum(np.power(y_true - y_test, 2)) / len(y_true)


if __name__ == '__main__':
    filename = 'data/data1.txt'
    # 加載資料
    X_orign, y = loaddata(filename)
    # 标準化
    X, avg, std = featureNormized(X_orign)
    theta = np.zeros(X.shape[1] + 1).reshape(-1, 1)
    iternum = 100
    alpha = 0.8

    # 梯度下降求解
    theta, costs = gradientDescent(X, y, theta, iternum, alpha)
    # 預測值
    print(predict([[5.734]]))

    # 模型評價
    model_pred = predict(X_orign)
    print(model_pred)
    print('mse=', mse(y, model_pred))

	  # 畫圖
    ax1 = plt.subplot(121)
    ax2 = plt.subplot(122)

    # 代價函數變化圖
    x_ = np.linspace(1,iternum,iternum)
    ax1.plot(x_,costs)

    # 拟合圖
    ax2.scatter(X,y)
    h_theta = theta[0]+theta[1]*X
    ax2.plot(X,h_theta)
    plt.show()

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

梯度下降算法變形

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

三、模型評價名額

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

代碼實作

import numpy as np


# mse
def mse(y_true, y_pred):
    return np.sum(np.power(y_true - y_pred, 2)) / (len(y_true))


# rmse
def rmse(y_true, y_pred):
    return np.sqrt(np.sum(np.power(y_true - y_pred, 2)) / (len(y_true)))


# mae
def mae(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred)) / (len(y_true))


# mape
def mape(y_true, y_pred):
    return (100 / len(y_true) * np.sum(np.abs((y_true - y_pred) / y_true)))


if __name__ == '__main__':
    y_true = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    y_pred = np.array([1.1, 2.1, 3.2, 3.9, 5]).reshape(-1, 1)
    print('mse = %.4f' % mse(y_true, y_pred))
    print('rmse = %.4f' % rmse(y_true, y_pred))
    print('mae = %.4f' % mae(y_true, y_pred))
    print('mape = %.4f' % mape(y_true, y_pred))

mse = 0.0140
rmse = 0.1183
mae = 0.1000
mape = 4.8333

四、嶺回歸

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

嶺回歸代碼實作

# 嶺回歸
import numpy as np
import matplotlib.pyplot as plt


# 1.加載資料
def loaddata(filename):
    data = np.loadtxt(filename, delimiter=',')
    n = data.shape[1] - 1
    X = data[:, 0:n]
    y = data[:, n].reshape(-1, 1)
    return X, y


# 2.标準化
def featureNormized(x):
    avg = np.average(x, axis=0)
    # 參數ddof=1時，表示求方差時除的是n-1
    std = np.std(x, axis=0, ddof=1)
    x = (x - avg) / std
    return x, avg, std


# 3.代價函數
def computeCosts(X, y, lamda, theta):
    m = X.shape[0]
    return np.sum(np.power((np.dot(X, theta) - y), 2)) / (2 * m) + lamda * np.sum(np.power(theta, 2))


# 4.梯度下降法求解
def gradientDescent(X, y, alpha, iternum, lamda, theta):
    # 構造x0=1那一列
    c = np.ones(X.shape[0]).transpose()
    X = np.insert(X, 0, values=c, axis=1)
    m = X.shape[0]  # 資料量
    n = X.shape[1]  # 特征量
    costs = np.ones(iternum)
    for i in range(iternum):
        for j in range(n):
            theta[j] = theta[j] + np.sum((y - np.dot(X, theta)) * X[:, j].reshape(-1, 1)) * (alpha / m) - 2 * lamda * \
                       theta[j]
        costs[i] = computeCosts(X, y, lamda, theta)
    return costs, theta


# 5.預測
def predict(x):
    x = (x - avg) / std
    c = np.ones(x.shape[0]).transpose()
    x = np.insert(x, 0, values=c, axis=1)
    return np.dot(x, theta)


# 6.模型評價
def rmse(y_true, y_pred):
    return np.sqrt(np.sum(np.power((y_true - y_pred), 2)) / len(y_true))


if __name__ == '__main__':
    filename = '../data/data1.txt'
    # 加載資料
    X_Orign, y = loaddata(filename)
    # 标準化
    X, avg, std = featureNormized(X_Orign)
    # 參數
    theta = np.zeros(X.shape[1] + 1).reshape(-1, 1)
    alpha = 0.01
    iternum = 400
    lamda = 0.001
    # 梯度下降法求解
    costs, theta = gradientDescent(X, y, alpha, iternum, lamda, theta)
    print(costs)
    # 預測
    print(predict([[5]]))

    # 模型評價
    y_pred = predict(X_Orign)
    print('rmse = %.4f' % rmse(y, y_pred))

    # 畫圖
    ax1 = plt.subplot(121)
    ax2 = plt.subplot(122)
    # 代價函數變化圖
    x_ = np.linspace(1, iternum, iternum)
    ax1.plot(x_, costs, 'r-', lw=3)

    # 拟合圖
    ax2.scatter(X, y)
    h_theta = theta[0] + theta[1] * X
    ax2.plot(X, h_theta, 'r-', lw=3)
    plt.show()


[[1.71717285]]
rmse = 3.2595

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

五、LASSO回歸

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

LASSO回歸代碼實作

# LASSO回歸
import numpy as np
import matplotlib.pyplot as plt


# 1.加載資料
def loaddata(filename):
    data = np.loadtxt(filename, delimiter=',')
    n = data.shape[1] - 1
    X = data[:, 0:n]
    y = data[:, n].reshape(-1, 1)
    return X, y


# 2.标準化
def featureNorimized(x):
    avg = np.average(x, axis=0)
    std = np.std(x, axis=0, ddof=1)
    x = (x - avg) / std
    return x, avg, std


# 3.代價函數
def computerCosts(X, y, lamda, theta):
    m = X.shape[0]
    return np.sum(np.power((np.dot(X, theta) - y), 2)) / (2 * m) + lamda * np.sum(np.abs(theta))


# 4.梯度下降求解函數
def gradientDescent(X, y, iternum, lamda):
    m, n = X.shape
    theta = np.matrix(np.zeros((n, 1)))
    costs = np.zeros(iternum)
    # 循環
    for it in range(iternum):
        for k in range(n):  # n個特征
            # 計算z_k和p_k
            z_k = np.sum(np.power(X[:, k], 2))
            p_k = 0
            for i in range(m):
                p_k += X[i, k] * (y[i, 0] - np.sum([X[i, j] * theta[j, 0] for j in range(n) if j != k]))
            # 根據p_k的不同取值進行計算
            if p_k < -lamda / 2:
                w_k = (p_k + lamda / 2) / z_k
            elif p_k > lamda / 2:
                w_k = (p_k - lamda / 2) / z_k
            else:
                w_k = 0
            theta[k, 0] = w_k
        costs[it] = computerCosts(X, y, lamda, theta)
    return theta, costs


# 5.預測
def predict(x):
    x = (x - avg) / std
    c = np.ones(x.shape[0]).transpose()
    x = np.insert(x,0,values=c,axis=1)
    return np.dot(x, theta)


if __name__ == '__main__':
    filename = '../data/data1.txt'
    iternum = 400
    # 加載資料
    X_orgin, y = loaddata(filename)
    # 标準化
    X, avg, std = featureNorimized(X_orgin)
    # 插入一列數值為1的資料
    X_1 = np.insert(X, 0, values=1, axis=1)
    # 梯度下降法
    theta, costs = gradientDescent(X_1, y, iternum, lamda=0.01)
    print(theta)

    # 預測
    print(predict([[5.55]]))

    # 畫圖
    ax1 = plt.subplot(121)
    ax2 = plt.subplot(122)
    # 畫損失變化圖
    x_ = np.linspace(1, iternum, iternum)
    ax1.plot(x_, costs, 'r-')

    # 畫拟合圖
    ax2.scatter(X, y)
    h_theta = theta[0, 0] + theta[1, 0] * X
    ax2.plot(X, h_theta, 'r-')
    plt.show()

[[5.83908351]
 [4.61684916]]
[[2.72553942]]

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

六、Elastic Net回歸

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

Elastic Net是一種使用L1和L2作為正則化矩陣的線性回歸模型。當多個特征和另一個特征相關的時候，彈性網絡就非常好用。LASSO傾向于随機選擇其中一個特征，而彈性網絡更傾向于選擇兩個。

ElasticNetCV可以通過交叉驗證來設定參數alpha和l1_ratio,l1_ratio可以用來調節L1和L2的凸組合。

七、最小二乘法求線性回歸

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

最小二乘法代碼實作

# 最小二乘法
import numpy as np
import matplotlib.pyplot as plt


# 1.加載資料
def loaddata(filename):
    data = np.loadtxt(filename, delimiter=',')
    n = data.shape[1] - 1
    X = data[:, 0:n]
    y = data[:, n].reshape(-1, 1)
    return X, y


# 2.标準化
def featureNorimized(x):
    avg = np.average(x, axis=0)
    std = np.std(x, axis=0, ddof=1)
    x = (x - avg) / std
    return x, avg, std


# 3.目标函數
def computerCosts(X, y, theta):
    return np.sum(np.power(np.dot(X, theta) - y, 2)) / 2


# 4.最下二乘法求解——逆矩陣存在時
def LeastSquaresMethod(X, y):
    theta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
    costs = computerCosts(X, y, theta)
    return theta, costs


if __name__ == '__main__':
    filename = '../data/data1.txt'
    # 加載資料
    X_orgin, y = loaddata(filename)
    # 構造x0=1的列
    X_1 = np.insert(X_orgin, 0, values=1, axis=1)
    # 最小二乘法求解線性回歸
    # 逆矩陣存在時
    theta, costs = LeastSquaresMethod(X_1, y)
    print(theta)

    # 畫圖——散點圖與直線圖
    fig, ax = plt.subplots()
    ax.scatter(X_orgin, y)
    h_theta = theta[0] + theta[1] * X_orgin
    ax.plot(X_orgin, h_theta, 'r-', lw=2)
    plt.show()

[[-3.89578088]
 [ 1.19303364]]

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

如果對您有幫助，麻煩點贊關注，這真的對我很重要！！！如果需要互關，請評論留言！

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

1.線性回歸、梯度下降法、嶺回歸、LASSO回歸、最小二乘法

文章目錄

一、函數形式化表示

二、梯度下降算法

梯度下降法代碼實作

使用梯度下降法求解線性回歸問題

梯度下降算法變形

三、模型評價名額

代碼實作

四、嶺回歸

嶺回歸代碼實作

五、LASSO回歸

LASSO回歸代碼實作

六、Elastic Net回歸

七、最小二乘法求線性回歸

最小二乘法代碼實作

繼續閱讀

使用二維小波方法和平均深度計算進行重力異常分離前言：使用二維多分辨率分析MRA，來進行離散參數小波變換DPWT，并應用于

機器學習計算一進制一次和二進制一次方程的系數（sklearn和paddle）

最小二乘問題，，而不是方法

LR梯度下降法MSE演練

R語言近似貝葉斯計算MCMC（ABC-MCMC）軌迹圖和邊緣圖可視化

logistics判别與線性模型中的問題

幾句話梳理Linear Regression、Logistics Regression、Softmax Regression之間的共性與差別

數學模組化基本算法---線性規劃線性規劃

拓端資料tecdat|R語言分段線性回歸分析預測車輛的制動距離

【視訊】線性回歸中的貝葉斯推斷與R語言預測勞工工資資料|資料分享

機器學習之線性回歸（Linear Regression）

【AndrewNg機器學習】線性回歸(Linear Regression)1 單變量線性回歸2 多變量線性回歸

線性回歸算法梳理機器學習的一些概念線性回歸

線性回歸算法梳理1. 機器學習的一些概念2. 線性回歸的原理3. 線性回歸損失函數、代價函數、目标函數4. 優化方法5. 線性回歸的評價名額6. 線性回歸sklearn參數詳解

線性回歸算法梳理（打卡task-1）

【多變量線性回歸】學習記錄序思路實作終