說明:
主要參考Francois Chollet《Deep Learning with Python》
代碼運作環境為kaggle中的kernels;
資料集IMDB需要手動添加;
循環神經網絡和LSTM請參考:【深度學習】:循環神經網(RNN)、【深度學習】:長期依賴與LSTM
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
# Any results you write to the current directory are saved as output.
['imdb.npz']
一、使用Numpy實作簡單RNN的前向傳播
timesteps = 100
input_features = 32
output_features = 64
# 輸入有100個時間點,每個時間點有32維的資料
inputs = np.random.random((timesteps,input_features))
state_t = np.zeros((output_features,))
W = np.random.random((output_features,input_features)) # input的權重
U = np.random.random((output_features,output_features)) # state的權重
b = np.random.random((output_features,)) # bias
successive_outputs = []
for input_t in inputs:
# 按timesteps進行疊代
# output_t是一個64維的向量
output_t = np.tanh(np.dot(W,input_t)+np.dot(U,state_t)+b)
# 将目前時刻的輸出儲存到successive_outputs中
successive_outputs.append(output_t)
# 目前時刻的輸出作為下一時刻的state
state_t = output_t
final_output_sequence = np.concatenate(successive_outputs,axis=0)
二、Keras中的循環層
上面實作的RNN在Keras中對應SimpleRNN層,唯一的不同是SimpleRNN可以處理batch資料,其輸入為(batch_size,timesteps,output_features)。
上面實作的RNN在每個timestep都有輸出,其實也可以隻讓最後一個timestep時有輸出。
1.隻在最後的timestep輸出結果
from keras.models import Sequential
from keras.layers import Embedding,SimpleRNN
model = Sequential()
model.add(Embedding(10000,32))
model.add(SimpleRNN(32))
2.在每個timestep都有輸出
model = Sequential()
model.add(Embedding(10000,32))
model.add(SimpleRNN(32,return_sequences=True))
3.堆疊多個SimpleRNN層
model = Sequential()
model.add(Embedding(10000,32))
model.add(SimpleRNN(32,return_sequences=True))
model.add(SimpleRNN(32,return_sequences=True))
model.add(SimpleRNN(32,return_sequences=True))
model.add(SimpleRNN(32))
三、使用RNN對IMDB電影評論進行模組化
1.準備資料
from keras.datasets import imdb
from keras.preprocessing import sequence
max_features=10000
maxlen = 500
batch_size=32
print('Loading data...')
(input_train,y_train),(input_test,y_test) = imdb.load_data(path='/kaggle/input/imdb.npz',
num_words=max_features)
print(len(input_train),'train sequences')
print(len(input_test),'test sequences')
print('Pad sequences (samples x time)')
input_train = sequence.pad_sequences(input_train,maxlen=maxlen)
input_test = sequence.pad_sequences(input_test,maxlen=maxlen)
print('input_train shape:',input_train.shape)
print('input_test shape:',input_test.shape)
Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
input_train shape: (25000, 500)
input_test shape: (25000, 500)
2.建立模型并訓練
from keras.layers import Dense
model = Sequential()
model.add(Embedding(max_features,32))
model.add(SimpleRNN(32))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['acc'])
history = model.fit(input_train,y_train,
epochs=10,
batch_size=128,
validation_split=0.2)
Train on 20000 samples, validate on 5000 samples
Epoch 1/10
20000/20000 [==============================] - 24s 1ms/step - loss: 0.6481 - acc: 0.6028 - val_loss: 0.4828 - val_acc: 0.7812
...
Epoch 10/10
20000/20000 [==============================] - 23s 1ms/step - loss: 0.0210 - acc: 0.9941 - val_loss: 0.6618 - val_acc: 0.8160
3.繪制曲線
import matplotlib.pyplot as plt
%matplotlib inline
def plot_curve(history):
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(acc)+1)
plt.plot(epochs,acc,'bo',label='Training acc')
plt.plot(epochs,val_acc,'b',label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs,loss,'bo',label='Training loss')
plt.plot(epochs,val_loss,'b',label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plot_curve(history)
四、LSTM層
SimpleRNN層不擅長于處理較長的序列,而LSTM則相對于SimpleRNN更适合處理較長的序列。
from keras.layers import LSTM
model = Sequential()
model.add(Embedding(max_features,32))
model.add(LSTM(32))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['acc'])
history = model.fit(input_train,y_train,
epochs = 10,
batch_size=128,
validation_split=0.2)
Train on 20000 samples, validate on 5000 samples
Epoch 1/10
20000/20000 [==============================] - 65s 3ms/step - loss: 0.5227 - acc: 0.7557 - val_loss: 0.4223 - val_acc: 0.8082
...
Epoch 10/10
20000/20000 [==============================] - 65s 3ms/step - loss: 0.1075 - acc: 0.9630 - val_loss: 0.3759 - val_acc: 0.8838