一進制線性拟合
現有兩組資料,求y=a*x+c的系數
X =[12.46, 0.25, 5.22, 11.3, 6.81, 4.59, 0.66, 14.53, 15.49, 14.43,
2.19, 1.35, 10.02, 12.93, 5.93, 2.92, 12.81, 4.88, 13.11, 5.8]
Y =[29.01, 4.7, 22.33, 24.99, 18.85, 14.89, 10.58, 36.84, 42.36, 39.73,
11.92, 7.45, 22.9, 36.62, 16.04, 16.56, 31.55, 20.04, 35.26, 23.59]
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
if __name__ == '__main__':
X =[12.46, 0.25, 5.22, 11.3, 6.81, 4.59, 0.66, 14.53, 15.49, 14.43,
2.19, 1.35, 10.02, 12.93, 5.93, 2.92, 12.81, 4.88, 13.11, 5.8]
Y =[29.01, 4.7, 22.33, 24.99, 18.85, 14.89, 10.58, 36.84, 42.36, 39.73,
11.92, 7.45, 22.9, 36.62, 16.04, 16.56, 31.55, 20.04, 35.26, 23.59]
#轉換成numpy的ndarray資料格式,n行1列,LinearRegression需要列格式資料,如下:
X_train = np.array(X).reshape((len(X), 1))
Y_train = np.array(Y).reshape((len(Y), 1))
# 轉換後資料格式如下
# X_train = [[12.46], [0.25], [5.22], [11.3], [6.81], [4.59], [0.66], [14.53], [15.49], [14.43], [2.19], [1.35],
# [10.02], [12.93], [5.93], [2.92], [12.81], [4.88], [13.11], [5.8]]
# Y_train = [[29.01], [4.7], [22.33], [24.99], [18.85], [14.89], [10.58], [36.84], [42.36], [39.73], [11.92], [7.45],
# [22.9], [36.62], [16.04], [16.56], [31.55], [20.04], [35.26], [23.59]]
#建立一個線性回歸模型,并把資料放進去對模型進行訓練
lineModel = LinearRegression()
lineModel.fit(X_train, Y_train)
#用訓練後的模型,進行預測
Y_predict = lineModel.predict(X_train)
#coef_是系數,intercept_是截距
a1 = lineModel.coef_[0][0]
b = lineModel.intercept_[0]
print("y=%.4f*x+%.4f" % (a1,b))
#對回歸模型進行評分,這裡簡單使用訓練集進行評分,實際很多時候用其他的測試集進行評分
print("得分", lineModel.score(X_train, Y_train))
#簡單畫圖顯示
plt.scatter(X, Y, c="blue")
plt.plot(X_train,Y_predict, c="red")
plt.show()
結果:
y=2.0532*x+7.1234
得分 0.9149096589144883
![](https://img.laitimes.com/img/__Qf2AjLwojIjJCLyojI0JCLiAzNfRHLGZkRGZkRfJ3bs92YsYTMfVmepNHL9MWbiZHatVGbo1mYohnMMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnLxgTMxMjMwgTMzADOwAjMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
多元線性回歸
這裡随機建立X1,X2,X3,Y四個數組,使Y=2*X1-3*X2+X3+8。然後加入一些幹擾噪聲,再嘗試做線性回歸。
from sklearn.linear_model import LinearRegression
import numpy as np
import random
if __name__ == '__main__':
# 随機建立X1,X2,X3,Y。使Y=2*X1-3*X2+X3+8
X1 = [random.randint(0,100) for i in range(0, 50)]
X2 = [random.randint(0,50) for i in range(0, 50)]
X3 = [random.randint(0, 25) for i in range(0, 50)]
Y =[2*x1-3*x2+x3+8 for x1,x2,x3 in zip(X1,X2,X3)]
# 組合X1,X2成n行2列資料
X_train = np.array(X1+X2+X3).reshape((len(X1), 3), order="F")
Y_train = np.array(Y).reshape((len(Y), 1))
# 加入噪聲幹擾
noise = np.random.randn(50, 1)
noise = noise - np.mean(noise)
Y_train = Y_train+noise
#建立一個線性回歸模型,并把資料放進去對模型進行訓練
lineModel = LinearRegression()
lineModel.fit(X_train, Y_train)
#用訓練後的模型,進行預測
Y_predict = lineModel.predict(X_train)
#coef_是系數,intercept_是截距
a_arr = lineModel.coef_[0]
b = lineModel.intercept_[0]
f=""
for i in range(0,len(a_arr)):
ai=a_arr[i]
if ai>=0:
ai = "+%.4f" %(ai)
else:
ai = "%.4f" % (ai)
f = f+"%s*x%s"%(ai, str(i+1))
f="y=%s+%.4f" % (f[1:],b)
print("拟合方程",f)
#對回歸模型進行評分,這裡簡單使用訓練集進行評分,實際很多時候用其他的測試集進行評分
print("得分", lineModel.score(X_train, Y_train))
結果:
拟合方程 y=1.9972*x1-3.0115*x2+1.0597*x3+7.7271
得分 0.9997880910740103
結果與預先設定的Y=2*X1-3*X2+X3+8相近,這裡采用随機産生樣本,每次運作結果不盡相同。
由于是多元,不友善畫圖展示。