3.3 编程实现对率回归,并给出西瓜数据集3.0 α \alpha α上的结果。
1.代码
import numpy as np
import matplotlib.pyplot as plt
def getDataSet():
"""
get watermelon data set 3.0 alpha.
:return: (feature array, label array)
"""
dataSet = np.array([
[0.697, 0.460, 1],
[0.774, 0.376, 1],
[0.634, 0.264, 1],
[0.608, 0.318, 1],
[0.556, 0.215, 1],
[0.403, 0.237, 1],
[0.481, 0.149, 1],
[0.437, 0.211, 1],
[0.666, 0.091, 0],
[0.243, 0.267, 0],
[0.245, 0.057, 0],
[0.343, 0.099, 0],
[0.639, 0.161, 0],
[0.657, 0.198, 0],
[0.360, 0.370, 0],
[0.593, 0.042, 0],
[0.719, 0.103, 0]
])
# insert number 1 before column 0.
# e.g: dataSet[0] = [1, 0.697, 0.460, 1]
dataSet = np.insert(dataSet, 0,
np.ones(dataSet.shape[0]),
axis=1)
dataArr = dataSet[:, :-1]
labelArr = dataSet[:, -1]
return dataArr, labelArr
def sigmoid(Z):
return 1.0 / (1 + np.exp(-Z))
def newton(dataArr, labelArr):
"""
calculate logistic parameters by newton method
:param dataArr: input data set with shape (m, n)
:param labelArr: the label of data set with shape (m, 1)
:return: returns the parameters obtained by newton method
"""
m, n = dataArr.shape
labelArr = labelArr.reshape(-1, 1)
beta = np.ones((n, 1))
errList = [] # save error history
z = np.dot(dataArr, beta)
oldLbeta = 0
# shape (m, 1)
newLBetaMat = -labelArr * z + np.log(1 + sigmoid(z))
newLBeta = np.sum(newLBetaMat)
it = 0
while abs(oldLbeta - newLBeta) > 1e-5:
it += 1
# py0 = p(y=0|x) with shape (m,1)
py0 = sigmoid(-np.dot(dataArr, beta))
py1 = 1 - py0
# 'reshape(n)' get shape (n,); 'np.diag' get diagonal matrix with shape (m,m)
p = np.diag((py0 * py1).reshape(m))
# shape (m,n)
dBetaMat = -dataArr * (labelArr - py1)
# first derivative with shape (1, n)
dBeta = np.sum(dBetaMat, axis=0, keepdims=True)
# second derivative with shape (n, n)
dBeta2 = dataArr.T.dot(p).dot(dataArr)
dBeta2Inv = np.linalg.inv(dBeta2)
# (n,1) (n,1) (n,n) (n,1)
beta = beta - np.dot(dBeta2Inv, dBeta.T)
z = np.dot(dataArr, beta)
oldLbeta = newLBeta
newLBetaMat = -labelArr * z + np.log(1 + sigmoid(z))
newLBeta = np.sum(newLBetaMat)
pre = predict(beta, dataArr)
errorRate = cntErrRate(pre, labelArr)
errList.append(errorRate)
print("newton iteration is ", it)
return beta, errList
def gradDescent(dataArr, labelArr, alpha, T):
"""
calculate logistic parameters by gradient descent
:param dataArr: input data set with shape (m, n)
:param labelArr: the label of data set with shape (m, 1)
:param alpha: step length (learning rate)
:param T: iteration
:return: parameters
"""
m, n = dataArr.shape
labelArr = labelArr.reshape(-1, 1)
errList = []
beta = np.ones((n, 1))
for t in range(T):
# py0 = p(y=1|x) with shape (m,1)
py1 = sigmoid(np.dot(dataArr, beta))
dBetaMat = -dataArr * (labelArr - py1)
# shape (1,n)
dBeta = np.sum(dBetaMat, axis=0, keepdims=True)
beta -= alpha * dBeta.T
# test code
pre = predict(beta, dataArr)
errorRate = cntErrRate(pre, labelArr)
errList.append(errorRate)
return beta, errList
def predict(beta, dataArr):
preArr = sigmoid(np.dot(dataArr, beta))
preArr[preArr > 0.5] = 1
preArr[preArr <= 0.5] = 0
return preArr
def cntErrRate(preLabel, label):
"""
calculate error rate
:param preLabel: predict label
:param label: real label
:return: error rate
"""
m = len(preLabel)
cnt = 0.0
for i in range(m):
if preLabel[i] != label[i]:
cnt += 1.0
return cnt / float(m)
def main():
dataArr, labelArr = getDataSet()
# newton
betaNew, errNew = newton(dataArr, labelArr)
print("newton error rate is ", errNew[-1])
# gradient descent
T = 500
learningRate = 0.001
betaGrad, errGrad = gradDescent(dataArr, labelArr, learningRate , T )
print("gradient descent error rate is ", errGrad[-1])
# positive points and negative points
posPoints = dataArr[labelArr > 0.5]
negPoints = dataArr[labelArr < 0.5]
# plot decision boundary
plt.figure()
plt.scatter(posPoints[:, 1], posPoints[:, 2])
plt.scatter(negPoints[:, 1], negPoints[:, 2])
x1 = np.linspace(0, 1, 100)
x2New = -(betaNew[0] + betaNew[1] * x1) / betaNew[2]
x2Grad = -(betaGrad[0] + betaGrad[1] * x1) / betaGrad[2]
plt.plot(x1, x2New, label="newton method")
plt.plot(x1, x2Grad, label="gradient descent")
plt.xlabel("x1")
plt.xlabel("x2")
plt.title("decision boundary")
plt.legend()
# plot newton Ein
length = len(errNew)
x1 = np.linspace(1, length+1, length)
plt.figure()
plt.plot(x1, errNew, label="newton method")
plt.xlabel("t")
plt.ylabel("Ein")
plt.title("newton method Ein")
plt.legend()
# plot gradient decent Ein
plt.figure()
x1 = np.linspace(1, T+1, T)
plt.plot(x1, errGrad, label="gradient descent")
plt.xlabel("t")
plt.ylabel("Ein")
plt.title("gradient descent Ein")
plt.legend()
plt.show()
if __name__ == '__main__':
main()
跑出来之后具有相同的 E i n Ein Ein

2.画决策边界
3.画Ein
牛顿法
梯度下降法
学习率为0.001
将近迭代500次收敛
学习率为0.01
将近75次收敛
学习率为0.1
错误率反而上升
学习率为1
学习率过高的话图形会开始一个区间里震荡。
总结
1.梯度下降法的学习率的选取要合适。
2.牛顿法的收敛速度更快。
程序参考:
1.西瓜书《机器学习》课后答案——Chapter3_3.3
2.机器学习实战第五章Logistic回归