梯度下降求解逻辑回归
目标:建立分类器 , 求解出三个参数 :
θ 0 θ 1 θ 2 \theta_0 \theta_1 \theta_2 θ0θ1θ2
设定阈值,根据阈值判断录取结果.
要完成的模块:
-
: 映射到概率的函数sigmoid
-
: 返回预测结果值model
-
: 根据参数计算损失cost
-
: 计算每个参数的梯度方向gradient
-
: 进行参数更新descent
-
: 计算精度accuracy
一 : sigmoid
函数
sigmoid
公式:
g ( z ) = 1 1 + e − z g(z) = \frac{1}{1+e^{-z}} g(z)=1+e−z1
def sigmoid(z):
return 1 / (1 + np.exp(-z))
二 : model
函数
model
公式:
( θ 0 θ 1 θ 2 ) × ( 1 x 1 x 2 ) = θ 0 + θ 1 x 1 + θ 2 x 2 \begin{array}{ccc} \begin{pmatrix}\theta_{0} & \theta_{1} & \theta_{2}\end{pmatrix} & \times & \begin{pmatrix}1\\ x_{1}\\ x_{2} \end{pmatrix}\end{array}=\theta_{0}+\theta_{1}x_{1}+\theta_{2}x_{2} (θ0θ1θ2)×⎝⎛1x1x2⎠⎞=θ0+θ1x1+θ2x2
def model(X, theta):
return sigmoid(np.dot(X, theta.T))
三 : cost
函数
cost
对数似然函数(去负号):
D ( h θ ( x ) , y ) = − y log ( h θ ( x ) ) − ( 1 − y ) log ( 1 − h θ ( x ) ) D(h_\theta(x), y) = -y\log(h_\theta(x)) - (1-y)\log(1-h_\theta(x)) D(hθ(x),y)=−ylog(hθ(x))−(1−y)log(1−hθ(x))
平均损失:
J ( θ ) = 1 n ∑ i = 1 n D ( h θ ( x i ) , y i ) J(\theta)=\frac{1}{n}\sum_{i=1}^{n} D(h_\theta(x_i), y_i) J(θ)=n1i=1∑nD(hθ(xi),yi)
def cost(X, y, theta):
left = np.multiply(-y, np.log(model(X, theta)))
right = np.multiply(1 - y, np.log(1- model(X, theta)))
return np.sum(left - right) / (len(X))
四 : 计算gradient梯度
公式:
∂ J ∂ θ j = − 1 m ∑ i = 1 n ( y i − h θ ( x i ) ) x i j \frac{\partial J}{\partial \theta_j}=-\frac{1}{m}\sum_{i=1}^{n}(y_i - h_\theta(x_i))x_{ij} ∂θj∂J=−m1i=1∑n(yi−hθ(xi))xij
def gradient(X, y, theta):
grad = np.zeros(theta.shape)
error = (model(X, theta) - y).ravel()
for j in range(len(theta.ravel())):
term = np.multiply(error, X[:,j])
grad[0, j] = np.sum(term) / len(X)
return grad
梯度下降-逻辑回归
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('LogiReg_data.txt', header=None, names=['Exam 1','Exam 2','Admitted'])
# 画图看一下
positive = df[df['Admitted'] == 1]
negative = df[df['Admitted'] == 0]
fig,ax = plt.subplots(figsize = (10,8))
ax.scatter(positive['Exam 1'], positive['Exam 2'], s=60,color='g',marker='o',label='Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s=60,color='r',marker='x',label='Not Admitted')
ax.legend()
ax.grid()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')

def sigmoid(z):
return 1 / (1 + np.exp(-z))
def model(X, theta):
return sigmoid(np.dot(X, theta.T))
df.insert(0,'Ones',1) #新建一列, 值为1
orig_data = df.values
cols = orig_data.shape[1]
X = orig_data[:,0:cols-1]
y = orig_data[:,cols-1:cols]
theta = np.zeros([1,3])
def cost(X, y, theta):
left = np.multiply(-y, np.log(model(X, theta)))
right= np.multiply(1-y, np.log(1-model(X,theta)))
return np.sum(left - right) / len(X)
cost(X, y, theta) # 损失
def gradient(X, y, theta):
grad = np.zeros(theta.shape)
error = (model(X, theta) - y).ravel()
for j in range(len(theta.ravel())):
term = np.multiply(error, X[:,j])
grad[0,j]= np.sum(term)/len(X)
return grad
gradient descent梯度下降三种方法:
stop_iter = 0
stop_cost = 1
stop_grad = 2
def stopCriterion(type, value, threshold):
if type == stop_iter:
return value > threshold
elif type == stop_cost:
return abs(value[-1]-value[-2]) < threshold
elif type == stop_grad:
return np.linalg.norm(value) < threshold # 求范数https://blog.csdn.net/hqh131360239/article/details/79061535
import numpy.random
def shuffleData(data):
np.random.shuffle(data)
cols = data.shape[1]
X = data[:, 0:cols-1]
y = data[:, cols-1:]
return X,y
import time
def descent(data, theta, batchSize, stopType, thresh, alpha):
init_time = time.time()
i = 0
k = 0
X,y = shuffleData(data)
grad = np.zeros(theta.shape)
costs = [cost(X,y,theta)]
while True:
grad = gradient(X[k:k+batchSize], y[k:k+batchSize], theta)
k += batchSize
if k >= n:
k = 0
X,y = shuffleData(data)
theta = theta - alpha*grad
costs.append(cost(X, y, theta))
i += 1
if stopType == stop_iter:
value = i
elif stopType == stop_cost:
value = costs
elif stopType == stop_grad:
value = grad
if stopCriterion(stopType, value, thresh):
break
return theta, i-1, costs, grad, time.time() - init_time
def runExpe(data, theta, batchSize, stopType, thresh, alpha):
theta, iter, costs, grad, dur = descent(data,theta,batchSize,stopType,thresh,alpha)
name = 'Original' if (data[:,1]>2).sum() > 1 else 'Scaled'
name += ' data - learning rate: {} - '.format(alpha)
if batchSize == n:
strDescType = 'Gradient'
elif batchSize == 1:
strDescType = 'Stochastic'
else:
strDescType = 'Mini-batch ({})'.format(batchSize)
name += strDescType + ' descent - Stop: '
if stopType == stop_iter:
strStop = '{} iterations'.format(thresh)
elif stopType == stop_cost:
strStop = 'costs change < {}'.format(thresh)
else:
strStop = 'gradient norm < {}'.format(thresh)
name += strStop
print('***{}\nTheta: {} - Iter: {} - Last cost: {:.2f} - Duration: {:.2f}s'.
format(name, theta, iter, costs[-1], dur))
fig,ax = plt.subplots(figsize=(15,4))
ax.plot(np.arange(len(costs)), costs, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title(name + ' - Error vs. Iteration')
ax.grid()
return theta
设定参数:
n = 100
runExpe(orig_data, theta, n, stop_iter, thresh=5000, alpha=0.000001)
调整参数:
runExpe(orig_data, theta, 1, stop_iter, thresh=1000, alpha=0.001)
runExpe(orig_data, theta, 1, stop_iter, thresh=15000, alpha=0.000001)
runExpe(orig_data, theta, 16, stop_iter, thresh=15000, alpha=0.0001)
- 数据预处理(标准化)后调参:
from sklearn import preprocessing as pp
scaled_data = orig_data.copy()
scaled_data[:, 1:3] = pp.scale(orig_data[:, 1:3])
runExpe(scaled_data,theta,n,stop_iter,thresh=5000,alpha=0.001)
runExpe(scaled_data, theta, n, stop_grad, thresh=0.02, alpha=0.001)
runExpe(scaled_data, theta, 1, stop_grad, thresh=0.0004, alpha=0.001)
- 计算精度
def predict(X, theta):
return [1 if x >= 0.5 else 0 for x in model(X, theta)]
scaled_X = scaled_data[:,:3]
y = scaled_data[:, 3]
predictions = predict(scaled_X, theta)
correct = [1 if ((a==1 and b==1) or (a==0 and b==0)) else 0 for (a,b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) % len(correct))
print('accuracy = {}%'.format(accuracy))
accuracy = 90%