上一篇寫了一個基于 LeNet 的 CNN 模型模型 demo,這次搞個AlexNet。相較于LeNet,AlexNet在模型構造方面有本質的不同,理論層面的文章大家可以自行搜尋學習,咱直接上 demo 幹貨,注釋詳細。
AlexNet CNN 網絡結構
模型構造
import torch.nn as nn
import torch
class AlexNet(nn.Module): # AlexNet類
def __init__(self, num_classes=5, init_weights=False):
super(AlexNet, self).__init__() # 用nn.Sequential()将網絡打包成一個子產品,精簡代碼
self.features = nn.Sequential( # 卷積層提取圖像特征
nn.Conv2d(in_channels=3, out_channels=48, kernel_size=11, stride=4, padding=0), # input[3, 227, 227] output[48, 55, 55]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
nn.Conv2d(in_channels=48, out_channels=128, kernel_size=5, stride=1, padding=2), # output[128, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13],stride=1可省略
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
)
self.classifier = nn.Sequential( # 全連接配接層對圖像分類
nn.Linear(in_features=128 * 6 * 6, out_features=2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5), # Dropout 随機失活神經元,預設比例為0.5
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x): # 前向傳播
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
def _initialize_weights(self): # 網絡權重初始化
for m in self.modules():
if isinstance(m, nn.Conv2d): # 若是卷積層
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 用何凱明法初始化權重
if m.bias is not None:
nn.init.constant_(m.bias, 0) # 初始化偏重為0
elif isinstance(m, nn.Linear): # 若是全連接配接層
nn.init.normal_(m.weight, 0, 0.01) # 正态分布初始化
nn.init.constant_(m.bias, 0) # 初始化偏重為0
喂資料訓練模型
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
os.environ["OMP_NUM_THREADS"] = "1"
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 判斷用GPU或CPU訓練
print("using {} device.".format(device))
data_transform = { # 圖檔變換
"train": transforms.Compose([transforms.RandomResizedCrop(227), # 随機裁剪,再縮放成 227×227
transforms.RandomHorizontalFlip(), # 随機在水準或者垂直方向翻轉圖檔
transforms.ToTensor(), # 将灰階範圍從[0,255]變換到[0,1]
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), # 将資料轉換為[-1,1]的标準高斯分布
"val": transforms.Compose([transforms.Resize((227, 227)), # 将輸入圖檔轉化為(227, 227)的輸入特征圖
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
image_path = "flower_data" # flower data路徑
# 導入訓練集并進行預處理
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset) # 訓練集有多少張圖檔
# 字典,類别:索引 {'daisy':0, 'dandelion':1, -='roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 将 flower_list 中的 key 和 val 調換位置
cla_dict = dict((val, key) for key, val in flower_list.items())
# 将 cla_dict 寫入 json 檔案中
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset, # 導入的訓練集
batch_size=batch_size, # 每批訓練的樣本數
shuffle=True, # 是否打亂訓練集
num_workers=nw) # 加載資料(batch)的線程數目
# 導入驗證集并進行預處理
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
# 加載驗證集
validate_loader = torch.utils.data.DataLoader(validate_dataset, # 導入的驗證集
batch_size=4, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
net = AlexNet(num_classes=5, init_weights=True) # 執行個體化網絡(輸出類型為5,初始化權重)
net.to(device) # 配置設定網絡到指定的裝置(GPU/CPU)訓練
loss_function = nn.CrossEntropyLoss() # 交叉熵損失
optimizer = optim.Adam(net.parameters(), lr=0.0002) # 優化器(訓練參數,學習率)
epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0 # 曆史最優準确率
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train() # 訓練過程中開啟 Dropout
running_loss = 0.0 # 每個 epoch 都會對 running_loss 清零
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data # 擷取訓練集的圖像和标簽
optimizer.zero_grad() # 清除曆史梯度
outputs = net(images.to(device)) # 正向傳播
loss = loss_function(outputs, labels.to(device)) # 計算損失
loss.backward() # 反向傳播
optimizer.step() # 優化器更新參數
# 列印訓練進度(使訓練過程可視化)
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,epochs,loss)
# validate
net.eval() # 驗證過程中關閉 Dropout
acc = 0.0 # 每次epoch清空累計驗證正确樣本個數
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device)) # 驗證圖檔指認到device上,傳到網絡,進行正向傳播,得到輸出
predict_y = torch.max(outputs, dim=1)[1] # 以output中值最大位置對應的索引(标簽)作為預測輸出
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num # 驗證集正确率 = 驗證正确樣本個數/總的樣本數
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc: # 目前準确率大于曆史最優的準确率
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()