在之前我們做過這樣的研究: 5圖分類 CBIR問題 各不相同的 5類的圖形,每類100張 import numpy as np
from keras.datasets import mnist
import gc
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.applications.vgg16 import VGG16
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
import cv2
import h5py as h5py
import numpy as np
import os import math from matplotlib import pyplot as plt
#全局變量
RATIO = 0.2
train_dir = 'D:/dl4cv/datesets/littleCBIR/'
#根據分類總數确定one-hot總類
NUM_DENSE = 5
#訓練總數
epochs = 10
def tran_y(y):
y_ohe = np.zeros(NUM_DENSE)
y_ohe[y] = 1
return y_ohe
#根據Ratio獲得訓練和測試資料集的圖檔位址和标簽
##生成資料集,本例先驗3**汽車、4**恐龍、5**大象、6**花、7**馬
def get_files(file_dir, ratio):
'''
Args:
file_dir: file directory
Returns:
list of images and labels
'''
image_list = []
label_list = []
for file in os .listdir(file_dir):
if file[0:1]=='3':
image_list.append(file_dir + file)
label_list.append(0)
elif file[0:1]=='4':
image_list.append(file_dir + file)
label_list.append(1)
elif file[0:1]=='5':
image_list.append(file_dir + file)
label_list.append(2)
elif file[0:1]=='6':
image_list.append(file_dir + file)
label_list.append(3)
else :
image_list.append(file_dir + file)
label_list.append(4)
print ('資料集導入完畢')
#圖檔list和标簽list
#hstack 水準(按列順序)把數組給堆疊起來
image_list = np.hstack(image_list)
label_list = np.hstack(label_list)
temp = np. array ([image_list, label_list])
temp = temp.transpose()
np. random .shuffle(temp)
all_image_list = temp[:, 0]
all_label_list = temp[:, 1]
n_sample = len (all_label_list)
#根據比率,确定訓練和測試數量
n_val = math .ceil(n_sample*ratio) # number of validation samples
n_train = n_sample - n_val # number of trainning samples
tra_images = []
val_images = []
#按照0-n_train為tra_images,後面位val_images的方式來排序
for index in range (n_train):
image = cv2.imread(all_image_list[index])
#灰階,然後縮放
image = cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
image = cv2.resize(image,(48,48))#到底在這個地方修改,還是在後面修改,需要做具體實驗
tra_images.append(image)
tra_labels = all_label_list[:n_train]
tra_labels = [ int ( float (i)) for i in tra_labels]
for index in range (n_val):
image = cv2.imread(all_image_list[n_train+index])
#灰階,然後縮放
image = cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
image = cv2.resize(image,(32,32))
val_images.append(image)
val_labels = all_label_list[n_train:]
val_labels = [ int ( float (i)) for i in val_labels]
return np. array (tra_images),np. array (tra_labels),np. array (val_images),np. array (val_labels)
# colab+VGG要求至少48像素在現有資料集上,已經能夠完成不錯情況
ishape=48
#(X_train, y_train), (X_test, y_test) = mnist.load_data()
#獲得資料集
#X_train, y_train, X_test, y_test = get_files(train_dir, RATIO)
#保持資料
##np.savez("D:\\dl4cv\\datesets\\littleCBIR.npz",X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test)
#讀取資料
path='littleCBIR.npz'
#https://github.com/jsxyhelu/GOCW/raw/master/littleCBIR.npz
path = get_file(path,origin='https://github.com/jsxyhelu/GOCW/raw/master/littleCBIR.npz')
f = np.load(path)
X_train, y_train = f['X_train'], f['y_train']
X_test, y_test = f['X_test'], f['y_test']
X_train = [cv2.cvtColor(cv2.resize(i, (ishape, ishape)), cv2.COLOR_GRAY2BGR) for i in X_train]
X_train = np.concatenate([arr[np.newaxis] for arr in X_train]).astype('float32')
X_train /= 255.0
X_test = [cv2.cvtColor(cv2.resize(i, (ishape, ishape)), cv2.COLOR_GRAY2BGR) for i in X_test]
X_test = np.concatenate([arr[np.newaxis] for arr in X_test]).astype('float32')
X_test /= 255.0
y_train_ohe = np. array ([tran_y(y_train[i]) for i in range ( len (y_train))])
y_test_ohe = np. array ([tran_y(y_test[i]) for i in range ( len (y_test))])
y_train_ohe = y_train_ohe.astype('float32')
y_test_ohe = y_test_ohe.astype('float32')
model_vgg = VGG16(include_top = False , weights = 'imagenet', input_shape = (ishape, ishape, 3))
#for i, layer in enumerate(model_vgg.layers):
# if i<20:
for layer in model_vgg.layers:
layer.trainable = False
model = Flatten()(model_vgg.output)
model = Dense(4096, activation='relu', name='fc1')(model)
model = Dense(4096, activation='relu', name='fc2')(model)
model = Dropout(0.5)(model)
model = Dense(NUM_DENSE, activation = 'softmax', name='prediction')(model)
model_vgg_pretrain = Model(model_vgg. input , model, name = 'vgg16_pretrain')
#model_vgg_pretrain.summary()
print ("vgg準備完畢\n")
sgd = SGD(lr = 0.05, decay = 1e-5)
model_vgg_pretrain. compile (loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'])
print ("vgg開始訓練\n")
log = model_vgg_pretrain.fit(X_train, y_train_ohe, validation_data = (X_test, y_test_ohe), epochs = epochs, batch_size = 64)
score = model_vgg_pretrain.evaluate(X_test, y_test_ohe, verbose=0)
print ('Test loss:', score[0])
print ('Test accuracy:', score[1])
plt.figure('acc')
plt.subplot(2, 1, 1)
plt.plot(log.history['acc'],'r--',label='Training Accuracy')
plt.plot(log.history['val_acc'],'r-',label='Validation Accuracy')
plt.legend(loc='best')
plt.xlabel('Epochs')
plt.axis([0, epochs, 0.5, 1])
plt.figure('loss')
plt.subplot(2, 1, 2)
plt.plot(log.history['loss'],'b--',label='Training Loss')
plt.plot(log.history['val_loss'],'b-',label='Validation Loss')
plt.legend(loc='best')
plt.xlabel('Epochs')
plt.axis([0, epochs, 0, 1])
plt.show()
os .system("pause")
epoch = 10
Test loss: 0.5930496269464492
Test accuracy: 0.83
epoch = 30
Test loss: 1.254318968951702
Test accuracy: 0.68
原圖修正
Test loss: 0.2156761786714196
Test accuracy: 0.93
應該說,主要代碼問題不大,也摸索出來了系列方法,但是需要進一步豐富。今天,我在之前的基礎上繼續研究,解決以下問題:
1、在5Type4CBIR(
https://www.kaggle.com/jsxyhelu/5type4cbir)做訓練;
2、使用aumentation方法,提高前期效果;
3、不僅僅是曲線,結果要以可視化方法繪制出來;
4、最終的結果要能夠下載下傳下來;
做訓練 ; 基于之前的積累,我已經将資料集變成了npz格式,可以直接使用keras進行下載下傳。為此,我專(peng)門(qiao)在kaggle上建立了這個資料集,立刻感到B格高漲。
如果從一般意義上來說,aumentation方法的引入肯定能夠提高處理效果的,特别對于這樣一個資料集比較小的問題來說尤其是這樣。從目前這個意義上來看:
img_generator.fit(X_train)
# fits the model_2 on batches with real-time data augmentation:
log = model_vgg_pretrain.fit_generator(img_generator.flow(X_train,y_train_ohe, batch_size=64),
steps_per_epoch=len(X_train), epochs=epochs)
score = model_vgg_pretrain.evaluate(X_test, y_test_ohe, verbose=0)
簡單幾行代碼,就可以解決這裡的問題。如果确實有效,那麼将被複用;
在不generate下,達到95Epoch 30/30
400/400 [==============================] - 15s 36ms/step - loss: 0.1731 - acc: 0.9375 - val_loss: 0.1910 - val_acc: 0.9500
Test loss: 0.19096931144595147
Test accuracy: 0.95
x
在對generate進一步了解後,我認為現在的結果應該能夠更高,經過夜間訓練,得到這樣結果
Epoch 10/10
18/400 [>.............................] - ETA: 20:44 - loss: 0.0221 - acc: 0.9935
400/400 [==============================] - 1275s 3s/step - loss: 0.0449 - acc: 0.9875 - val_loss: 0.0667 - val_acc: 0.9700
Test loss: 0.06673358775209635
Test accuracy: 0.97
其中,核心代碼及參數為:
log = model_vgg_pretrain.fit_generator(img_generator.flow(X_train,y_train_ohe, batch_size= 128), steps_per_epoch = 400, epochs=10,validation_data=(X_test, y_test_ohe),workers=4)
反觀資料,可以發現在epoch = 7 的時候,就已經達到最好;這個曲線較前面的訓練更為平滑,總體品質也更高。98的準确率對于消費級的應用是一個很好的結果。
augumentation的方法将會被經常複用。
這裡的意思也很直接,就是我要真實地去驗證資料,要把模型結果顯示出來,而不僅僅是在模型裡面fit。這個顯示首先是在juypter裡面直覺的觀看。這個是給我來看的。一個是要在後面的結果應用中能夠下載下傳,并且被我現在的軟體來使用。
但是目前這一步無法完成,因為前期我使用OpenCV讀入圖像、預處理、壓制成npz格式的,目前我認為這個效果很好。OpenCV的Mat格式目前無法在Juypter中進行顯示。
colab中有一段可以下載下傳的代碼的,翻閱了一些資料,解決了這個問題:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
!pip install -U -q PyDrive
from
pydrive.auth
import
GoogleAuth
from
pydrive.drive
import
GoogleDrive
from
google.colab
import
auth
from
oauth2client.client
import
GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
# Create & upload a text file.
uploaded = drive.CreateFile()
uploaded.SetContentFile('5type4cbirMODEL.h5')
uploaded.Upload()
print
('Uploaded file with ID {}'.format(uploaded.get('id')))
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from
pydrive.auth
import
GoogleAuth
from
pydrive.drive
import
GoogleDrive
from
google.colab
import
auth
from
oauth2client.client
import
GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
#根據檔案名進行下載下傳
file_id = '1qjxAm_QiXdSqBmyIoPl3bfnyLNJxwKo9'
downloaded = drive.CreateFile({'id': file_id})
print
('Downloaded content "{}"'.format(downloaded.GetContentString()))
還是要通過id來進行調用。畢竟這個調用隻會發生在内部情況下,可以了解。
到了小結時間了:
昨天和今天在做CBIR的實驗,得到目前這個結果,應該說能夠解決一些問題,特别是模型訓練這個方面的問題了,還是有很多收獲的,當然主要的收獲還是在代碼裡面:
1、agumenntation可以提高品質,fit_generate也是有技巧的(step_per_epoch),更不要說在資料集準備這塊前期積累的技巧;
2、transform是可以用來解決垂直領域問題的,下一步就是需要驗證;
3、研究問題的時候,首先是不能失去信心,然後就是要總結方法。
來自為知筆記(Wiz)附件清單
目前方向:圖像拼接融合、圖像識别
聯系方式:[email protected]