深度學習手寫代碼
- Conv2d前向與反向
- MaxPool2d
- BatchNorm2d
- Flatten層
- 全連接配接層的前向與反向
- Dropout前向與反向
- 激活函數
- ReLU
- Tanh
- Sigmoid
卷積層前向與反向傳播
Conv2d前向與反向
class Conv2d():
def __init__(self, in_channels, n_filter, filter_size, padding, stride):
"""
parameters:
in_channel: 輸入feature的通道數
n_filter: 卷積核數目
filter_size: 卷積核的尺寸(h_filter, w_filter)
padding: 0填充數目
stride: 卷積核滑動步幅
"""
self.in_channels = in_channels
self.n_filter = n_filter
self.h_filter, self.w_filter = filter_size
self.padding = padding
self.stride = stride
# 初始化參數,卷積網絡的參數size與輸入的size無關
self.W = np.random.randn(n_filter, self.in_channels, self.h_filter, self.w_filter) / np.sqrt(n_filter / 2.)
self.b = np.zeros((n_filter, 1))
self.params = [self.W, self.b]
def __call__(self, X):
# 計算輸出feature的尺寸
self.n_x, _, self.h_x, self.w_x = X.shape
self.h_out = (self.h_x + 2 * self.padding - self.h_filter) / self.stride + 1
self.w_out = (self.w_x + 2 * self.padding - self.w_filter) / self.stride + 1
if not self.h_out.is_integer() or not self.w_out.is_integer():
raise Exception("Invalid dimensions!")
self.h_out, self.w_out = int(self.h_out), int(self.w_out)
# 聲明Img2colIndices執行個體
self.img2col_indices = Img2colIndices((self.h_filter, self.w_filter), self.padding, self.stride)
return self.forward(X)
def forward(self, X):
# 将X轉換成col
self.x_col = self.img2col_indices.img2col(X)
# 轉換參數W的形狀,使它适合與col形态的x做計算
self.w_row = self.W.reshape(self.n_filter, -1)
# 計算前向傳播
out = self.w_row @ self.x_col + self.b # @在numpy中相當于矩陣乘法,等價于numpy.matmul()
out = out.reshape(self.n_filter, self.h_out, self.w_out, self.n_x)
out = out.transpose(3, 0, 1, 2)
return out
def backward(self, d_out):
"""
parameters:
d_out: loss對卷積輸出的梯度
"""
# 轉換d_out的形狀
d_out_col = d_out.transpose(1, 2, 3, 0)
d_out_col = d_out_col.reshape(self.n_filter, -1)
d_w = d_out_col @ self.x_col.T
d_w = d_w.reshape(self.W.shape) # shape=(n_filter, d_x, h_filter, w_filter)
d_b = d_out_col.sum(axis=1).reshape(self.n_filter, 1)
d_x = self.w_row.T @ d_out_col
# 将col态的d_x轉換成image格式
d_x = self.img2col_indices.col2img(d_x)
return d_x, [d_w, d_b]
MaxPool2d
class Maxpool():
def __init__(self, size, stride):
"""
parameters:
size: maxpool框框的尺寸,int類型
stride: maxpool框框的滑動步幅,一般設計步幅和size一樣
"""
self.size = size # maxpool框的尺寸
self.stride = stride
def __call__(self, X):
"""
parameters:
X: 輸入feature,shape=(batch_size, channels, height, width)
"""
self.n_x, self.c_x, self.h_x, self.w_x = X.shape
# 計算maxpool輸出尺寸
self.h_out = (self.h_x - self.size) / self.stride + 1
self.w_out = (self.w_x - self.size) / self.stride + 1
if not self.h_out.is_integer() or not self.w_out.is_integer():
raise Exception("Invalid dimensions!")
self.h_out, self.w_out = int(self.h_out), int(self.w_out)
# 聲明Img2colIndices執行個體
self.img2col_indices = Img2colIndices((self.size, self.size), padding=0, stride=self.stride) # maxpool不需要padding
return self.forward(X)
def forward(self, X):
"""
parameters:
X: 輸入feature,shape=(batch_size, channels, height, width)
"""
x_reshaped = X.reshape(self.n_x * self.c_x, 1, self.h_x, self.w_x)
self.x_col = self.img2col_indices.img2col(x_reshaped)
self.max_indices = np.argmax(self.x_col, axis=0)
out = self.x_col[self.max_indices, range(self.max_indices.size)]
out = out.reshape(self.h_out, self.w_out, self.n_x, self.c_x).transpose(2, 3, 0, 1)
return out
def backward(self, d_out):
"""
parameters:
d_out: loss多maxpool輸出的梯度,shape=(batch_size, channels, h_out, w_out)
"""
d_x_col = np.zeros_like(self.x_col) # shape=(size*size, h_out*h_out*batch*C)
d_out_flat = d_out.transpose(2, 3, 0, 1).ravel()
d_x_col[self.max_indices, range(self.max_indices.size)] = d_out_flat
# 将d_x由col形态轉換到img形态
d_x = self.img2col_indices.col2img(d_x_col)
d_x = d_x.reshape(self.n_x, self.c_x, self.h_x, self.w_x)
return d_x
BatchNorm2d
class BatchNorm2d():
"""
對卷積層來說,批量歸一化發生在卷積計算之後、應用激活函數之前。
如果卷積計算輸出多個通道,我們需要對這些通道的輸出分别做批量歸一化,且每個通道都擁有獨立的拉伸和偏移參數,并均為标量。
設小批量中有 m 個樣本。在單個通道上,假設卷積計算輸出的高和寬分别為 p 和 q 。我們需要對該通道中 m×p×q 個元素同時做批量歸一化。
對這些元素做标準化計算時,我們使用相同的均值和方差,即該通道中 m×p×q 個元素的均值和方差。
将訓練好的模型用于預測時,我們希望模型對于任意輸入都有确定的輸出。
是以,單個樣本的輸出不應取決于批量歸一化所需要的随機小批量中的均值和方差。
一種常用的方法是通過移動平均估算整個訓練資料集的樣本均值和方差,并在預測時使用它們得到确定的輸出。
"""
def __init__(self, n_channel, momentum):
"""
parameters:
n_channel: 輸入feature的通道數
momentum: moving_mean/moving_var疊代調整系數
"""
self.n_channel = n_channel
self.momentum = momentum
# 參與求梯度和疊代的拉伸和偏移參數,分别初始化成1和0
self.gamma = np.ones((1, n_channel, 1, 1))
self.beta = np.zeros((1, n_channel, 1, 1))
# 測試時使用的參數,初始化為0,需在訓練時動态調整
self.moving_mean = np.zeros((1, n_channel, 1, 1))
self.moving_var = np.zeros((1, n_channel, 1, 1))
self.params = [self.gamma, self.beta]
def __call__(self, X, mode):
"""
X: shape = (N, C, H, W)
mode: 訓練階段還是測試階段,train或test, 需要在調用時傳參
"""
self.X = X # 求gamma的梯度時用
return self.forward(X, mode)
def forward(self, X, mode):
"""
X: shape = (N, C, H, W)
mode: 訓練階段還是測試階段,train或test, 需要在調用時傳參
"""
if mode != 'train':
# 如果是在預測模式下,直接使用傳入的移動平均所得的均值和方差
self.x_norm = (X - self.moving_mean) / np.sqrt(self.moving_var + 1e-5)
else:
# 使用二維卷積層的情況,計算通道維上(axis=1)的均值和方差。
# 這裡我們需要保持X的形狀以便後面可以做廣播運算
mean = X.mean(axis=(0, 2, 3), keepdims=True)
self.var = X.var(axis=(0, 2, 3), keepdims=True) # 設為self,是因為backward時會用到
# 訓練模式下用目前的均值和方差做标準化。設為類執行個體的屬性,backward時用
self.x_norm = (X - mean) / (np.sqrt(self.var + 1e-5))
# 更新移動平均的均值和方差
self.moving_mean = self.momentum * self.moving_mean + (1 - self.momentum) * mean
self.moving_var = self.momentum * self.moving_var + (1 - self.momentum) * self.var
# 拉伸和偏移
out = self.x_norm * self.gamma + self.beta
return out
def backward(self, d_out):
"""
d_out的形狀與輸入的形狀一樣
"""
d_gamma = (d_out * self.x_norm).sum(axis=(0, 2, 3), keepdims=True)
d_beta = d_out.sum(axis=(0, 2, 3), keepdims=True)
d_x = (d_out * self.gamma) / np.sqrt(self.var + 1e-5)
return d_x, [d_gamma, d_beta]
Flatten層
class Flatten():
"""
最後的卷積層輸出的feature若要連接配接全連接配接層需要将feature拉平
單獨建立一個子產品是為了友善梯度反向傳播
"""
def __init__(self):
pass
def __call__(self, X):
self.x_shape = X.shape # (batch_size, channels, height, width)
return self.forward(X)
def forward(self, X):
out = X.ravel().reshape(self.x_shape[0], -1)
return out
def backward(self, d_out):
d_x = d_out.reshape(self.x_shape)
return d_x
全連接配接層的前向與反向
import numpy as np
# 定義線性層網絡
class Linear():
"""
線性全連接配接層
"""
def __init__(self, dim_in, dim_out):
"""
參數:
dim_in: 輸入次元
dim_out: 輸出次元
"""
# 初始化參數
scale = np.sqrt(dim_in / 2)
self.weight = np.random.standard_normal((dim_in, dim_out)) / scale
self.bias = np.random.standard_normal(dim_out) / scale
# self.weight = np.random.randn(dim_in, dim_out)
# self.bias = np.zeros(dim_out)
self.params = [self.weight, self.bias]
def __call__(self, X):
"""
參數:
X:這一層的輸入,shape=(batch_size, dim_in)
return:
xw + b
"""
self.X = X
return self.forward()
def forward(self):
return np.dot(self.X, self.weight) + self.bias
def backward(self, d_out):
"""
參數:
d_out:輸出的梯度, shape=(batch_size, dim_out)
return:
傳回loss對輸入 X 的梯度(前一層(l-1)的激活值的梯度)
"""
# 計算梯度
# 對input的梯度有batch次元,對參數的梯度對batch次元取平均
d_x = np.dot(d_out, self.weight.T) # 輸入也即上一層激活值的梯度
d_w = np.dot(self.X.T, d_out) # weight的梯度
d_b = np.mean(d_out, axis=0) # bias的梯度
return d_x, [d_w, d_b]
Dropout前向與反向
class Dropout():
"""
在訓練時随機将部分feature置為0
"""
def __init__(self, p):
"""
parameters:
p: 保留比例
"""
self.p = p
def __call__(self, X, mode):
"""
mode: 是在訓練階段還是測試階段. train 或者 test
"""
return self.forward(X, mode)
def forward(self, X, mode):
if mode == 'train':
self.mask = np.random.binomial(1, self.p, X.shape) / self.p
out = self.mask * X
else:
out = X
return out
def backward(self, d_out):
"""
d_out: loss對dropout輸出的梯度
"""
return d_out * self.mask
激活函數
ReLU
import numpy as np
# 定義Relu層
class Relu(object):
def __init__(self):
self.X = None
def __call__(self, X):
self.X = X
return self.forward(self.X)
def forward(self, X):
return np.maximum(0, X)
def backward(self, grad_output):
"""
grad_output: loss對relu激活輸出的梯度
return: relu對輸入input_z的梯度
"""
grad_relu = self.X > 0 # input_z大于0的提放梯度為1,其它為0
return grad_relu * grad_output # numpy中*為點乘
Tanh
class Tanh():
def __init__(self):
self.X = None
def __call__(self, X):
self.X = X
return self.forward(self.X)
def forward(self, X):
return np.tanh(X)
def backward(self, grad_output):
grad_tanh = 1 - (np.tanh(self.X)) ** 2
return grad_output * grad_tanh
Sigmoid
class Sigmoid():
def __init__(self):
self.X = None
def __call__(self, X):
self.X = X
return self.forward(self.X)
def forward(self, X):
return self._sigmoid(X)
def backward(self, grad_output):
sigmoid_grad = self._sigmoid(self.X) * (1 - self._sigmoid(self.X))
return grad_output * sigmoid_grad
def _sigmoid(self, X):
return 1.0 / (1 + np.exp(-X))
損失函數
import numpy as np
# 交叉熵損失
class CrossEntropyLoss():
"""
對最後一層的神經元輸出計算交叉熵損失
"""
def __init__(self):
self.X = None
self.labels = None
def __call__(self, X, labels):
"""
參數:
X: 模型最後fc層輸出
labels: one hot标注,shape=(batch_size, num_class)
"""
self.X = X
self.labels = labels
return self.forward(self.X)
def forward(self, X):
"""
計算交叉熵損失
參數:
X:最後一層神經元輸出,shape=(batch_size, C)
label:資料onr-hot标注,shape=(batch_size, C)
return:
交叉熵loss
"""
self.softmax_x = self.softmax(X)
log_softmax = self.log_softmax(self.softmax_x)
cross_entropy_loss = np.sum(-(self.labels * log_softmax), axis=1).mean()
return cross_entropy_loss
def backward(self):
grad_x = (self.softmax_x - self.labels) # 傳回的梯度需要除以batch_size
return grad_x / self.X.shape[0]
def log_softmax(self, softmax_x):
"""
參數:
softmax_x, 在經過softmax處理過的X
return:
log_softmax處理後的結果shape = (m, C)
"""
return np.log(softmax_x + 1e-5)
def softmax(self, X):
"""
根據輸入,傳回softmax
代碼利用softmax函數的性質: softmax(x) = softmax(x + c)
"""
batch_size = X.shape[0]
# axis=1 表示在二維數組中沿着橫軸進行取最大值的操作
max_value = X.max(axis=1)
#每一行減去自己本行最大的數字,防止取指數後出現inf,性質:softmax(x) = softmax(x + c)
# 一定要新定義變量,不要用-=,否則會改變輸入X。因為在調用計算損失時,多次用到了softmax,input不能改變
tmp = X - max_value.reshape(batch_size, 1)
# 對每個數取指數
exp_input = np.exp(tmp) # shape=(m, n)
# 求出每一行的和
exp_sum = exp_input.sum(axis=1, keepdims=True) # shape=(m, 1)
return exp_input / exp_sum
優化器
SGD
class SGD():
"""
随機梯度下降
parameters: 模型需要訓練的參數
lr: float, 學習率
momentum: float, 動量因子,預設為None不使用動量梯度下降
"""
def __init__(self, parameters, lr, momentum=None):
self.parameters = parameters
self.lr = lr
self.momentum = momentum
if momentum is not None:
self.velocity = self.velocity_initial()
def update_parameters(self, grads):
"""
grads: 調用network的backward方法,傳回的grads.
"""
if self.momentum == None:
for param, grad in zip(self.parameters, grads):
param -= self.lr * grad
else:
for i in range(len(self.parameters)):
self.velocity[i] = self.momentum * self.velocity[i] - self.lr * grads[i]
self.parameters[i] += self.velocity[i]
def velocity_initial(self):
"""
初始化velocity,按照parameters的參數順序依次将v初始化為0
"""
velocity = []
for param in self.parameters:
velocity.append(np.zeros_like(param))
return velocity