1 導入庫函數
import torch
import numpy as np
import matplotlib.pyplot as plt
2 設定超參數
TIME_STEP=10
INPUT_SIZE=1
HIDDEN_SIZE=32
LR=0.02
3 定義RNN
class RNN(torch.nn.Module):
def __init__(self):
super(RNN,self).__init__()
self.rnn=torch.nn.RNN(
input_size=INPUT_SIZE,
hidden_size=HIDDEN_SIZE,
num_layers=1,
batch_first=True)
#設定batch_first為True,那麼輸入資料的次元為(batch_size,time_step,input_size)
#如果不設定這個值,或者設定為False,那麼輸入資料的次元為(time_step,batch_size,input_size)
self.out=torch.nn.Linear(HIDDEN_SIZE,1)
#将隐藏層輸出轉化為需要的輸出
def forward(self,x,h_state):
#因為在RNN中,下一個時間片隐藏層狀态的計算需要上一個時間片的隐藏層狀态,是以我們要一直傳遞這個h_state
#x (batch_size,time_step,INPUT_SIZE)
r_out,h_state=self.rnn(x,h_state)
#h_state也要作為RNN的一個輸入和一個輸出
#r_out:(batch_size,time_step,HIDDEN_SIZE)
#h_state:(batch_size,time_step,HIDDEN_SIZE)
outs=[]
for time_step in range(r_out.size()[1]):
outs.append(self.out(r_out[:,time_step,:]))
#每一個要被self.out運算的元素[batch_size,1,HIDDEN_SIZE]
#每個計算完,被append到outs的元素[batch_size,1,1]
return torch.stack(outs,dim=1),h_state
#傳回的第一個元素[batch_size,time_step,1]
#torch.stack函數的次元和axis不一樣,dim=1的意思是在第一個次元處疊加
rnn=RNN()
print(rnn)
'''
RNN(
(rnn): RNN(1, 32, batch_first=True)
(out): Linear(in_features=32, out_features=1, bias=True)
)
'''
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsICM38FdsYkRGZkRG9lcvx2bjxiNx8VZ6l2csg3Y61kMBpWT3FleYhnRzwEMW1mY1RzRapnTtxkb5ckYplTeMZTTINGMShUYfRHelRHLwEzX39GZhh2css2RkBnVHFmb1clWvB3MaVnRtp1XlBXe0xyayFWbyVGdhd3LcV2Zh1Wa9M3clN2byBXLzN3btg3Pn5GcugzM5EDOyADMyAjNwEjMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
或者foward函數也可以這麼寫:
class RNN(torch.nn.Module):
def __init__(self):
super(RNN,self).__init__()
self.rnn=torch.nn.RNN(
input_size=INPUT_SIZE,
hidden_size=HIDDEN_SIZE,
num_layers=1,
batch_first=True)
#設定batch_first為True,那麼輸入資料的次元為(batch,time_step,input_size)
#如果不設定這個值,或者設定為False,那麼輸入資料的次元為(time_step,batch,input_size)
self.out=torch.nn.Linear(HIDDEN_SIZE,1)
def forward(self,x,h_state):
r_out,h_state=self.rnn(x,h_state)
#在此之前的部分不動
r_out=r_out.view(-1,HIDDEN_SIZE)
out=self.out(r_out)
out=out.view(-1,TIME_STEP,1)
return(out,h_state)
rnn=RNN()
print(rnn)
4 設定優化器和損失函數
optimizer=torch.optim.Adam(rnn.parameters(),lr=LR)
loss_func=torch.nn.MSELoss()
5 訓練RNN
我們這裡希望用sin函數預測cos函數
h_state=None
for step in range(100):
start=step*np.pi
end=(step+1)*np.pi
steps=np.linspace(start,end,TIME_STEP,dtype=np.float32)
#這裡dtype這一部分一定要加,不然的話會報錯,RuntimeError: expected scalar type Double but found Float
x_np=np.sin(steps).reshape(1,TIME_STEP,INPUT_SIZE)
y_np=np.cos(steps).reshape(1,TIME_STEP,1)
#目标:用sin預測cos
x=torch.from_numpy(x_np)
y=torch.from_numpy(y_np)
prediction,h_state=rnn(x,h_state)
#每一組input,都對應了一個h_state和一個prediction
h_state=h_state.data
#将對應的h_state向後傳
loss=loss_func(prediction,y)
optimizer.zero_grad()
#清空上一步的參與更新參數值
loss.backward()
#誤差反向傳播,計算參數更新值
optimizer.step()
#将參數更新值施加到rnn的parameters上
if(step % 10==0):
plt.plot(steps,prediction.data.numpy().flatten(),'g*')
plt.plot(steps,y_np.flatten(),'r-')
plt.show()
6 實驗結果
一開始
最終
7 整體函數
import torch
import numpy as np
import matplotlib.pyplot as plt
TIME_STEP=10
INPUT_SIZE=1
HIDDEN_SIZE=32
LR=0.02
class RNN(torch.nn.Module):
def __init__(self):
super(RNN,self).__init__()
self.rnn=torch.nn.RNN(
input_size=INPUT_SIZE,
hidden_size=HIDDEN_SIZE,
num_layers=1,
batch_first=True)
#設定batch_first為True,那麼輸入資料的次元為(batch_size,time_step,input_size)
#如果不設定這個值,或者設定為False,那麼輸入資料的次元為(time_step,batch_size,input_size)
self.out=torch.nn.Linear(HIDDEN_SIZE,1)
#将隐藏層輸出轉化為需要的輸出
def forward(self,x,h_state):
#因為在RNN中,下一個時間片隐藏層狀态的計算需要上一個時間片的隐藏層狀态,是以我們要一直傳遞這個h_state
#x (batch_size,time_step,INPUT_SIZE)
r_out,h_state=self.rnn(x,h_state)
#h_state也要作為RNN的一個輸入和一個輸出
#r_out:(batch_size,time_step,HIDDEN_SIZE)
#h_state:(batch_size,time_step,HIDDEN_SIZE)
outs=[]
for time_step in range(r_out.size()[1]):
outs.append(self.out(r_out[:,time_step,:]))
#每一個要被self.out運算的元素[batch_size,1,HIDDEN_SIZE]
#每個計算完,被append到outs的元素[batch_size,1,1]
return torch.stack(outs,dim=1),h_state
#傳回的第一個元素[batch_size,time_step,1]
#torch.stack函數的次元和axis不一樣,dim=1的意思是在第一個次元處疊加
rnn=RNN()
print(rnn)
'''
RNN(
(rnn): RNN(1, 32, batch_first=True)
(out): Linear(in_features=32, out_features=1, bias=True)
)
'''
optimizer=torch.optim.Adam(rnn.parameters(),lr=LR)
loss_func=torch.nn.MSELoss()
h_state=None
for step in range(100):
start=step*np.pi
end=(step+1)*np.pi
steps=np.linspace(start,end,TIME_STEP,dtype=np.float32)
#這裡dtype這一部分一定要加,不然的話會報錯,RuntimeError: expected scalar type Double but found Float
x_np=np.sin(steps).reshape(1,TIME_STEP,INPUT_SIZE)
y_np=np.cos(steps).reshape(1,TIME_STEP,1)
#目标:用sin預測cos
x=torch.from_numpy(x_np)
y=torch.from_numpy(y_np)
prediction,h_state=rnn(x,h_state)
#每一組input,都對應了一個h_state和一個prediction
h_state=h_state.data
#将對應的h_state向後傳
loss=loss_func(prediction,y)
optimizer.zero_grad()
#清空上一步的參與更新參數值
loss.backward()
#誤差反向傳播,計算參數更新值
optimizer.step()
#将參數更新值施加到rnn的parameters上
if(step % 10==0):
plt.plot(steps,prediction.data.numpy().flatten(),'g*')
plt.plot(steps,y_np.flatten(),'r-')
plt.show()