1. 摘要
圖像分類,也可以稱作圖像識别,顧名思義,就是辨識圖像中的物體屬于什麼類别。核心是從給定的分類集合中給圖像配置設定一個标簽的任務。實際上,這意味着我們的任務是分析一個輸入圖像并傳回一個将圖像分類的标簽。在這裡,我們将分别自己搭建卷積神經網路、遷移學習分别對圖像資料集進行分類。本篇使用的資料集下載下傳位址為:
連結:https://pan.baidu.com/s/1mS4xIf1sr3mhYn-cJNMqjQ
提取碼:k57i
Pytorch_datasets檔案夾底下包括兩個檔案夾存放各自的圖檔資料集。
2.搭建卷積神經網絡實作圖像分類
卷積神經網絡與普通的神經網絡的差別在于,卷積神經網絡包含了一個卷積層convolutional layer和池化層pooling layer構成的特征提取器。卷積神經網路中每層卷積層由若幹卷積單元組成,每個卷積單元的參數都是通過反向傳播算法優化得到的。卷積運算的目的是提取輸入的不同特征,第一層卷積層可能隻能提取一些低級的特征如邊緣、線條和角等層級,更多層的網絡能從低級特征中疊代提取更複雜的特征。池化層(Pooling layer),通常在卷積層之後會得到次元很大的特征,将特征切成幾個區域,取其最大值或平均值,得到新的、次元較小的特征。
如上圖左,全連接配接神經網絡是一個“平面”,包括輸入層—激活函數—全連接配接層,右圖的卷積神經網絡是一個“立體”,包括輸入層—卷積層—激活函數—池化層—全連接配接層。卷積神經網絡提取的資料量更大,是以常用在圖像處理上。
接下來,我們自己搭建神經網絡對上面同樣的資料集進行分類
- 首先,導入相應的包
import os
import torch
from torch import nn,optim
from torch.nn import functional as F
from torch.utils import data
from torchvision import datasets,transforms
- 圖像資料預處理
train_path = "./pytorch_datasets/train"
test_path = "./pytorch_datasets/test"
#定義資料集預處理的方法
data_transform = transforms.Compose([
# transforms.Resize((224,224)),
transforms.RandomResizedCrop(150),
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,))
])
datasets_train = datasets.ImageFolder(train_path,data_transform)
datasets_test = datasets.ImageFolder(test_path,data_transform)
train_loader = data.DataLoader(datasets_train,batch_size=32,shuffle=True)
test_loader = data.DataLoader(datasets_test,batch_size=16,shuffle=False)
- pytorch搭建卷積神經網絡
#搭建網絡
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.max_pool1 = nn.MaxPool2d(2)
self.conv2 = nn.Conv2d(32, 64, 3)
self.max_pool2 = nn.MaxPool2d(2)
self.conv3 = nn.Conv2d(64, 128, 3)
self.max_pool3 = nn.MaxPool2d(2)
self.conv4 = nn.Conv2d(128, 128, 3)
self.max_pool4 = nn.MaxPool2d(2)
self.fc1 = nn.Linear(6272, 512)
self.fc2 = nn.Linear(512, 1)
def forward(self, x):
in_size = x.size(0)
x = self.conv1(x)
x = F.relu(x)
x = self.max_pool1(x)
x = self.conv2(x)
x = F.relu(x)
x = self.max_pool2(x)
x = self.conv3(x)
x = F.relu(x)
x = self.max_pool3(x)
x = self.conv4(x)
x = F.relu(x)
x = self.max_pool4(x)
# 展開
x = x.view(in_size, -1)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = torch.sigmoid(x)
return x
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)
model = CNN().to(DEVICE)
optimizer = optim.Adam(model.parameters(),lr=0.0001)
- 對圖像資料集進行訓練30次,列印損失
for epoch in range(30):
model.train()
for i,(image,label) in enumerate(train_loader):
data,target = Variable(image).cuda(),Variable(label.cuda()).unsqueeze(-1)
optimizer.zero_grad()
output = model(data)
output=output.to(torch.float32)
target=target.to(torch.float32)
# print(output.shape,target.shape)
loss = F.binary_cross_entropy(output,target)
loss.backward()
optimizer.step()
if (i+1)%10==0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (i+1) * len(data), len(train_loader.dataset),
100. * (i+1) / len(train_loader), loss.item()))
- 對測試集資料進行驗證評估,列印精确度
for epoch in range(30):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE).float().unsqueeze(-1)
output = model(data)
test_loss += F.binary_cross_entropy(output, target, reduction='sum').item() # 将一批的損失相加
pred = torch.tensor([[1] if num[0] >= 0.5 else [0] for num in output]).to(DEVICE)
correct += pred.eq(target.long()).sum().item()
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
3.遷移學習實作圖像分類
問題來了?什麼是遷移學習?遷移學習(Transfer learning) 顧名思義就是把已訓練好的模型(預訓練模型)參數遷移到新的模型來幫助新模型訓練。考慮到大部分資料或任務都是存在相關性的,是以通過遷移學習我們可以将已經學到的模型參數(也可了解為模型學到的知識)通過某種方式來分享給新模型進而加快并優化模型的學習效率不用像大多數網絡那樣從零學習。其中,實作遷移學習有以下三種手段:
1.Transfer Learning:當機預訓練模型的全部卷積層,隻訓練自己定制的全連接配接層。
2.Extract Feature Vector:先計算出預訓練模型的卷積層對所有訓練和測試資料的特征向量,然後抛開預訓練模型,隻訓練自己定制的簡配版全連接配接網絡。
3.Fine-tuning:當機預訓練模型的部分卷積層(通常是靠近輸入的多數卷積層,因為這些層保留了大量底層資訊)甚至不當機任何網絡層,訓練剩下的卷積層(通常是靠近輸出的部分卷積層)和全連接配接層。
預訓練模型有很多,本文選用InceptionV3預訓練模型,它是由谷歌團隊從ImageNet的1000個類别的超大資料集訓練而來的,表現優異,經常用來做計算機視覺方面的遷移學習研究和應用。
- 同樣,首先導入所需要的包
from sklearn.model_selection import train_test_split
import numpy as np
import os
from tqdm import tqdm
from PIL import Image
import torch
import torchvision.datasets
import torchvision.transforms as transforms
import torchvision.models as models
- 圖檔資料預處理
trainpath = "./pytorch_datasets/train"
testpath = "./pytorch_datasets/test"
batch_size = 16
traintransform = transforms.Compose([transforms.RandomRotation(20),
transforms.ColorJitter(brightness=0.1),
transforms.Resize([224,224]),
transforms.ToTensor()])
valtransform = transforms.Compose([transforms.Resize([224,224]),
transforms.ToTensor()])
trainData = torchvision.datasets.ImageFolder(trainpath,transform=traintransform)
testData = torchvision.datasets.ImageFolder(testpath,transform=valtransform)
trainLoader = torch.utils.data.DataLoader(dataset=trainData,batch_size=batch_size,shuffle=True)
testLoader = torch.utils.data.DataLoader(dataset=testData,batch_size=batch_size,shuffle=False)
- 微調模型進行訓練
model = models.resnet34(pretrained=True)
model.fc = torch.nn.Linear(512,2)
- 定義損失函數和優化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
- 進行訓練資料集
from torch.autograd import Variable
import time
train_loss = []
valid_loss = []
accuracy = []
for epoch in range(100):
epoch_start_time = time.time()
model.train()
total_loss = 0
train_corrects = 0
for i,(image,label) in enumerate(trainLoader):
image = Variable(image.cuda())
label = Variable(label.cuda())
model.cuda()
target = model(image)
loss = criterion(target,label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
max_value,max_index = torch.max(target,1)
pred_label = max_index.cpu().numpy()
true_label = label.cpu().numpy()
train_corrects += np.sum(pred_label==true_label)
loss = total_loss/float(len(trainLoader))
train_acc = train_corrects/100
train_loss.append(loss)
for epoch in range(100):
model.eval()
corrects = eval_loss = 0
with torch.no_grad():
for image,label in testLoader:
image = Variable(image.cuda())
label = Variable(label.cuda())
model.cuda()
pred = model(image)
loss = criterion(pred,label)
eval_loss += loss.item()
max_value,max_index = torch.max(pred,1)
pred_label = max_index.cpu().numpy()
true_label = label.cpu().numpy()
corrects += np.sum(pred_label==true_label)
loss = eval_loss/float(len(testLoader))
acc = corrects/100
valid_loss.append(loss)
accuracy.append(acc)
import matplotlib.pyplot as plt
print("**********ending*********")
plt.plot(train_loss)
plt.plot(valid_loss)
plt.title('loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
原文連結:https://blog.csdn.net/qq_43018832/article/details/127832688?spm=1001.2014.3001.5502