SVM:
https://www.jianshu.com/p/6340c6f090e9
def svm_loss_vectorized(W, X, Y, reg):
"""
:param X: 200 X 3073
:param Y: 200
:param W: 3073 X 10
:return: reg: 正則化損失系數(無法通過拍腦袋設定,需要多試幾個值,交叉驗證,然後找個最優的)
"""
delta = 1.0
num_train = X.shape[0]
patch_X = X # 200 X 3073
patch_Y = Y # 200
patch_result = patch_X.dot(W) # 200 X 3073 3073 X 10 -> 200 X 10
sample_label_value = patch_result[[xrange(patch_result.shape[0])], patch_Y] # 1 X 200 切片操作,将得分array中标記位置的得分取出來作為新的array
loss_array = np.maximum(0, patch_result - sample_label_value.T + delta) # 200 X 10 計算誤差
loss_array[[xrange(patch_result.shape[0])], patch_Y] = 0 # 200 X 10 将label值所在的位置誤差置零
loss = np.sum(loss_array)
loss /= num_train # get mean
# regularization: 這裡給損失函數中正則損失項添加了一個0.5參數,是為了後面在計算損失函數中正則化損失項的梯度時和梯度參數2進行抵消
loss += 0.5 * reg * np.sum(W * W)
# 将loss_array大于0的項(有誤差的項)置為1,沒誤差的項為0
loss_array[loss_array > 0] = 1 # 200 X 10
# 沒誤差的項中有一項是标記項,計算标記項的權重分量對誤差也有共享,也需要更新對應的權重分量
# loss_array中這個參數就是目前樣本結果錯誤分類的數量
loss_array[[xrange(patch_result.shape[0])], patch_Y] = -np.sum(loss_array, 1)
# patch_X:200X3073 loss_array:200 X 10 -> 10*3072
dW = np.dot(np.transpose(patch_X), loss_array) # 3073 X 10
dW /= num_train # average out weights
dW += reg * W # regularize the weights
return loss, dW
softmax 求導:
https://www.jianshu.com/p/6e405cecd609
https://blog.csdn.net/Hearthougan/article/details/82706834
def softmax_loss_naive(W, X, y, reg):
"""
:param X: 200 X 3073
:param Y: 200
:param W: 3073 X 10
:return: reg: 正則化損失系數(無法通過拍腦袋設定,需要多試幾個值,然後找個最優的)
"""
dW = np.zeros(W.shape) # initialize the gradient as zero
# compute the loss and the gradient
num_classes = W.shape[1]
num_train = X.shape[0]
loss = 0.0
for k in xrange(num_train):
origin_scors = X[k].dot(W)
probabilities = np.zeros(origin_scors.shape)
logc = -np.max(origin_scors)
total_sum = np.sum(np.exp(origin_scors - logc))
for i in xrange(num_classes):
probabilities[i] = np.exp(origin_scors[i] - logc) / total_sum
for i in xrange(num_classes):
if i == y[k]:
dW[:, i] += - X[k] * (1 - probabilities[i]) # dW[:, i]:3073X1 X[k]: 3073 X 1
else:
dW[:, i] += X[k] * probabilities[i]
loss += -np.log(probabilities[y[k]])
# Right now the loss is a sum over all training examples, but we want it
# to be an average instead so we divide by num_train.
loss /= num_train
dW /= num_train
dW += reg*W # regularize the weights
# Add regularization to the loss.
loss += 0.5 * reg * np.sum(W * W)
return loss, dW