天天看點

機器學習算法之多變量線性回歸練習(二)

題目要求

1.按要求完成下面的各項需求。

利用python編寫如下程式,

現有一個循環發電場樣本資料集,其中包括訓練集(ccpp_train.txt檔案)和測試集(ccpp_test.txt檔案)。資料集格式如下:

AT(溫度) V(壓力) AP(濕度) RH(壓強) PE(輸出電力)

8.34 40.77 1010.84 90.01 480.48

23.64 58.49 1011.4 74.2 445.75

29.74 56.9 1007.15 41.91 438.76

請通過Python實作線性回歸模型,并用此模型預測輸出電力,具體要求如下:

完成資料集的讀取

實作代價函數

實作梯度下降函數

要求輸出疊代過程中的代價函數值

完成測試集的資料預測,并計算在測試集上的代價函數值

以橫軸為真實值,縱軸為預測值,畫出散點圖

代碼實作

import numpy as np
from matplotlib import pyplot as plt
# 讀取資料
data_train = np.loadtxt(r'ccpp_train.txt',delimiter=',')
data_test = np.loadtxt(r'ccpp_train.txt',delimiter=',')


# 定義資料處理函數
def preprocess(data):
    # 資料提取
    X = data[:,:-1]
    y = data[:,-1]
    # 特征縮放
    X -= np.mean(X,axis=0)
    X /= np.std(X,axis=0,ddof=1)
    # 資料初始化
    X = np.c_[np.ones(len(X)),X]
    y = np.c_[y]
    # 資料處理完畢 傳回
    return X,y


# 調用資料處理函數獲得處理好的資料
X_train,y_train = preprocess(data_train)
X_test,y_test = preprocess(data_test)


# 定義模型
def model(X,theta):
    h = np.dot(X,theta)
    return h


# 定義代價函數
def costFunction(h,y):
    m = len(h)
    J = (1.0/(2 * m)) * np.sum(np.square(h -y))
    return J


# 定義梯度下降
def gradeDesc(X,y,alpha=0.01,iter_num=2000):
    # 資料準備
    # 擷取次元
    m,n = X.shape
    # 初始化theta
    theta = np.zeros((n,1))
    # 初始化代表值
    J_history = np.zeros(iter_num)
    # 執行梯度下降
    for i in range(iter_num):
        # 調用模型獲得資料的預測值
        h = model(X,theta)
        # 調用代價函數評價模型性能
        J_history[i] = costFunction(h,y)
        # 輸出代價函數值
        if i % 500 == 0:
            print(J_history[i])
        # 求取deltatheta
        deltatheta = (1.0/m) * np.dot(X.T,h-y)
        # 更新theta
        theta -= alpha * deltatheta
    # 模型訓練完畢,傳回theta
    return J_history,theta


# 調用梯度下降函數獲得訓練好的theta和帶價值記錄
J_history,theta = gradeDesc(X_train,y_train,alpha=0.01,iter_num=5000)
# 将theta和資料傳入模型,獲得預測值
y_test_h = model(X_test,theta)
# 将預測值和真實值傳入代價函數,求代價函數值
J_test_h = costFunction(y_test_h,y_test)
print('測試集代價函數值是:')
print(J_test_h)


# 求取精度
def score(h,y):
    u = np.sum(np.square(h - y))
    v = np.sum(np.square(y - np.mean(y)))
    return 1 - u/v


# 求取精度
ss = score(y_test_h,y_test)
print("精度是:")
print(ss)

# 畫出代價曲線圖
plt.plot(J_history)
plt.show()

# 畫出真實值與預測值的對比圖
plt.scatter(y_test,y_test)
plt.scatter(y_test,y_test_h)
plt.show()



           

資料準備

訓練集樣本抽樣

13.51,43.41,1015.94,75.22,463.86

18.87,60.07,1015.15,70.91,453.27

13.42,41.74,1020.96,61.8,473.45

18.36,56.65,1020.29,82,456.49

26.8,72.58,1008.94,78.24,428.62

14.46,42.86,1031.34,69.84,464.44

14.93,43.02,1012.11,45.56,468.19

20.19,44.57,1009.2,72.13,454.36

29.79,77.17,1009.68,64,432.84

13.9,39.59,1011.84,94.74,465

26.94,73.21,1002.83,91.25,431.19

25.31,65.48,1018.31,55.57,439.72

26.64,58.69,1007.99,75.68,439.32

14.02,40.75,1016.05,70.65,470.48

19.15,59.21,1018.41,88.9,450.26

22.46,48.41,1008.66,80.85,442.57

14.12,41.39,1018.73,76.51,472.88

24.57,49.5,1014.22,56.31,455.72

21.58,63.87,1015.27,63.15,451.88

19.3,46.93,1014.83,66.71,456.62

22.43,55.97,1008.97,85.98,443.74

30.91,76.2,1008.53,58.08,434.94

16.02,71.14,1019.75,70.42,456.35

24.14,60.07,1016.56,58.08,440.17

16.05,43.14,1010.67,79.36,463.06

22.7,64.05,1012.65,89.69,448.76

8.83,36.3,1027.08,72.69,479.86

24.61,69.68,1012.06,92.47,438.51

30.61,69.13,1009.32,55.17,429.1

5.68,40.77,1022.49,90.6,487.58

7.98,39.61,1018.57,77.04,479.78

測試集樣本抽樣

22.46,48.41,1008.66,80.85,442.57

14.12,41.39,1018.73,76.51,472.88

24.57,49.5,1014.22,56.31,455.72

21.58,63.87,1015.27,63.15,451.88

19.3,46.93,1014.83,66.71,456.62

22.43,55.97,1008.97,85.98,443.74

30.91,76.2,1008.53,58.08,434.94

16.02,71.14,1019.75,70.42,456.35

24.14,60.07,1016.56,58.08,440.17

16.05,43.14,1010.67,79.36,463.06

22.7,64.05,1012.65,89.69,448.76

8.83,36.3,1027.08,72.69,479.86

24.61,69.68,1012.06,92.47,438.51

30.61,69.13,1009.32,55.17,429.1

5.68,40.77,1022.49,90.6,487.58

7.98,39.61,1018.57,77.04,479.78

5.49,38.5,1012.18,79.33,490.84

26.31,71.29,1009.87,84.16,432.92

16.27,56.89,1013.74,84.36,454.88

12.73,44.34,1015.11,93.55,472.94

20.28,62.52,1017.89,75.67,452.45

4.96,39.4,1003.58,92.22,486.09

8.07,43.69,1017.05,87.34,485.18

8.74,40.03,1016.81,93.37,481.07

20.25,55.5,1020.03,69.33,455.13

20.13,60.07,1014.79,63.57,453.49

18.99,44.6,1014.7,40.11,463.48

24.4,67.45,1015.63,57.1,435.47

6.17,39.33,1012.57,93.32,491.54

22.49,45.61,1013.1,75.69,455.12

22.04,57.32,1012.54,62.17,447.27

21.81,63.77,1014.28,83.66,444.52

10.16,41.62,1013.15,94.3,465.05

10.07,44.68,1023.44,90.95,477.52

23.61,63.94,1012.9,87.06,441.57

23.34,59.44,1012.67,80.76,445.24

15.02,42.07,1017.89,83.68,460.82

7.6,41.04,1021.82,88.97,475.32

25.42,66.05,1016.74,68.92,442.6

23.71,60.23,1009.76,90.67,439.66

32.33,69.89,1014.18,50.93,427.29

6.49,39.33,1010.85,91.85,489.22

7.57,37.49,1009.73,83.07,481.98

13.89,44.84,1023.66,92.97,466.74

8.9,36.24,1013.29,89.35,479.03

15.49,54.3,1017.59,71.26,464.24

23.74,65.34,1013.7,62.9,447.31

26.02,68.67,1006.73,75.19,440.12

8.61,37.49,1009.35,82.62,477.13

13.31,41.26,1020.83,79.55,462.87

22.93,62.26,1011.25,83.66,438.34

22.83,70.79,1006.36,92.07,438

24.52,59.15,1014.03,74.83,439.55

30.55,70.04,1010.51,49.37,429.56

17.36,43.96,1013.02,79.59,466.36

24.21,71.77,1004.52,84.96,433.42

27.19,64.27,1013.06,58.13,444.54

19.05,59.21,1017.99,89.53,451

23.34,45.61,1012.73,74.09,455.82

32.69,72.86,1003.57,56.84,431.76

8.73,36.18,1013.66,77.74,479.25

10.41,44.68,1023.53,91.38,474.7

22.28,58.12,1014.54,83.27,448.97

25.14,60.93,1007.44,76.71,437.4

18.26,61.27,1019.1,74.74,428.67

23.74,63.9,1014.73,81.9,445.47

31.46,70.79,1003.54,59.51,425.68

31.68,70.79,1004.05,54.5,429.55

26.62,72.43,1006.79,82.74,430.22

21.47,50.12,1009.19,93.68,448.11

13.47,41.14,1026.09,82.96,463.67

14.8,43.99,1022.89,85.25,461.97

31.12,67.69,1005.3,50.46,425.21

29.2,64.84,1009.94,55.37,441.9

12.42,43.14,1015.88,79.48,471.1

17.51,53.16,1013.13,82.86,457.45

13.87,42.99,1007.45,81.52,471.12

25.07,77.95,1012.87,83,438.55

12.88,42.74,1026.25,74.54,470.89

23.31,60.08,1017.14,64.35,452.65

12.33,38.91,1017.24,79.84,472.49

20.51,39.72,1002.25,47.97,452.39

16.2,45.76,1014.73,89.84,460.87

29.6,71.58,1010.34,52.56,434.64

25.94,66.49,1012.83,61.81,433.38

7.73,39.04,1018.61,68.23,482.39

15.08,42.77,1018.67,73.89,461.6

9.83,41.17,1019.34,72.29,478.21

27.44,52.3,1008.15,58.92,441.75

12.01,41.48,1017.75,66.67,469.08

20.99,50.78,1008.55,75.14,449.07

5.4,39.4,1011.45,91.84,485.86

7.87,41.06,1020.91,87.64,486.57

20.78,62.52,1017.58,73.3,452.3

31.26,68.94,1005.94,39.49,438.03

20.03,60.77,1017.23,87.82,449.31

23.34,63.73,1012.1,79.11,443.68

23.14,60.27,1018.51,80.54,442.59

29.75,73.5,1011.13,67.31,433.63

21.93,62.91,1013.45,74.62,449.17

9.93,39.04,1023.78,77.08,480.54

18.7,52.72,1024.84,57.72,458.06

15.67,45.17,1018.73,94.74,462.09

23.62,45.87,1007.75,58.69,445.55

6.22,38.68,1017.87,69.41,483.55

24.66,63.73,1011.4,74.52,444.37

11.94,44.6,1018.69,85.33,468.53

7.11,43.13,1018.96,87.82,486.11

21.54,58.12,1015.33,78.67,454.32

29.45,64.96,1005.52,59.92,433.04

31.85,68.3,1014.76,47.06,428.72

27.3,65.12,1016.24,44.87,442.78

13.51,39.31,1012.18,75.19,466.46

16.73,39.64,1008.94,74.91,464.46

25.45,69.59,1008.51,83.17,445.61

12.88,44.34,1016.03,88.51,474.94

22.29,43.79,1015.68,41.75,461.23

12.07,40.81,1025.63,68.02,475.96

18.21,62.26,1011.97,87.28,455.88

17.53,42.24,1016.9,60.95,470.63

23.66,61.86,1013.33,83.09,444.27

14.18,40.69,1014.73,74.88,471.52

13.85,41.39,1018.62,75.55,471.45

繼續閱讀