文章目錄
- pipeline 總結
-
- 1、[簡易流水線實作](https://zhuanlan.zhihu.com/p/56317767)
-
- 不用流水線方式
- 流水線方式
- 2、[ valid/ready 信号結合pipeline](https://blog.csdn.net/rill_zhen/article/details/45980039)
- 3、[流水線設計高速乘法器(移位實作)](https://www.runoob.com/w3cnote/verilog-pipeline-design.html)
-
- 設計原理
- 乘法器設計 - 非流水線
- 乘法器設計- 流水線
pipeline 總結
1、簡易流水線實作
參考:https://zhuanlan.zhihu.com/p/56317767
不用流水線方式
always@(posedge clk or negedge rst_n) begin
if(!rst_n)
y <= 1'b0;
else
y <= a_reg * b_reg + c_reg * d_reg + (e_reg - f_reg)
end
流水線方式
always@(posedge clk or negedge rst_n) begin
if(!rst_n) begin
mux_pre_1 <= 'b0;
mux_pre_2 <= 'b0;
sub_pre_1 <= 'b0;
end
else begin
mux_pre_1 <= a_reg * b_reg;
mux_pre_2 <= c_reg * d_reg;
sub_pre_1 <= e_reg - f_reg;
end
end
always@(posedge clk or negedge rst_n) begin
if(!rst_n) begin
add_pre_1 <= 'b0;
sub_pre_2 <= 'b0;
end
else begin
add_pre_1 <= mux_pre1 + mux_pre_2;
sub_pre_2 <= sub_pre_1;
end
end
always@(posedge clk or negedge rst_n) begin
if(!rst_n)
add_pre_2 <= 'b0;
else
add_pre_2 <= add_pre_1 + sub_pre_2;
end
仿真結果
2、 valid/ready 信号結合pipeline
參考:https://blog.csdn.net/rill_zhen/article/details/45980039
module Mpipeline(
input clk,
input rst_n,
input en_i,
input [7:0] data_i,
output en_o,
output [7:0] data_o,
output idle);
wire rdy_pb2pa;
wire vld_pa2pb;
wire [7:0] data_pa2pb;
wire rdy_pc2pb;
wire vld_pb2pc;
wire [7:0] data_pb2pc;
wire rdy_pa;
Mpa pa(
.clk (clk),
.rst_n (rst_n),
.valid_i (en_i),
.data_i (data_i),
.ready_i (rdy_pb2pa),
.ready_o (rdy_pa),
.valid_o (vld_pa2pb),
.data_o (data_pa2pb)
);
Mpb pb(
.clk (clk),
.rst_n (rst_n),
.valid_i (vld_pa2pb),
.data_i (data_pa2pb),
.ready_i (rdy_pc2pb),
.ready_o (rdy_pb2pa),
.valid_o (vld_pb2pc),
.data_o (data_pb2pc)
);
Mpc pc(
.clk (clk),
.rst_n (rst_n),
.valid_i (vld_pb2pc),
.data_i (data_pb2pc),
.ready_i (1'b1),
.ready_o (rdy_pc2pb),
.valid_o (en_o),
.data_o (data_o)
);
assign idle = ~vld_pa2pb & ~vld_pb2pc & ~en_o;
endmodule
module Mpa(
input clk,
input rst_n,
input valid_i, //from pre-stage
input [7:0] data_i, //from pre-stage
input ready_i, //from post-stage
output ready_o,//to pre-stage
output valid_o, //to post-stage
output [7:0] data_o //to post-stage
);
reg valid_o_r;
reg [7:0] data_o_r;
wire [7:0] calc;
assign calc = data_i + 1'b1;
// module Mpb:assign calc = data_i << 1'b1;
// module Mpc:assign calc = data_i - 1'b1;
always @(posedge clk)
if(~rst_n)
valid_o_r <= 1'b0;
else if(valid_i)
valid_o_r <= 1'b1;
else if(~valid_i)
valid_o_r <= 1'b0;
always @(posedge clk)
if(~rst_n)
data_o_r <= 8'b0;
else if(valid_i)
data_o_r <= calc;
assign ready_o = ready_i;
assign valid_o = valid_o_r;
assign data_o = data_o_r;
endmodule
testbench
module Ttb;
reg clk;
reg rst_n;
reg en_i_r;
reg [7:0] data_i_r;
wire en_o;
wire [7:0] data_o;
wire idle;
Mpipeline pipeline
(
.clk (clk),
.rst_n (rst_n),
.en_i (en_i_r),
.data_i (data_i_r),
.en_o (en_o),
.data_o (data_o),
.idle (idle)
);
initial
begin
clk = 1'b0;
rst_n = 1'b0;
en_i_r = 1'b0;
data_i_r = 8'b0;
fork
forever #5 clk = ~clk;
join_none
repeat(10) @(posedge clk);
rst_n = 1'b1;
repeat(10) @(posedge clk);
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h1;
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h2;
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h3;
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h4;
@(posedge clk);
en_i_r <= 1'b0;
data_i_r <= 8'h0;
repeat(10) @(posedge clk);
$finish();
end
endmodule
思路:
- 将幾個結構相似的子產品串聯起來,前一個子產品的輸出作為後一個子產品的輸入,前一個子產品的輸出有效信号 valid_o 作為後一個子產品的輸入有效信号valid_i
3、流水線設計高速乘法器(移位實作)
參考:https://www.runoob.com/w3cnote/verilog-pipeline-design.html
直接用 * 或 調用IP 實作乘法,可能産生的電路性能不好。用移位實作高速乘法器,适用于硬體
設計原理
多比特數相乘,相當于被乘數按照乘數對應bit位進行移位累加
乘法器設計 - 非流水線
思路:将乘數展開為和結果相同位數,用位數較少的操作數作為乘數,根據乘數逐位是1或0 決定部分積是對位(部分積的最低位和對應的乘數所在位對齊)後的被乘數,還是0,然後将部分積加起來即得到結果
module mult_low
#(parameter N=4,
parameter M=4
)(
input clk,
input rstn,
input data_rdy , //資料輸入使能
input [N-1:0] mult1, //被乘數
input [M-1:0] mult2, //乘數
output res_rdy , //資料輸出使能
output [N+M-1:0] res //乘法結果
);
//calculate counter 乘法周期計數器
reg [31:0] cnt ;
wire [31:0] cnt_temp = (cnt == M)? 'b0 : cnt + 1'b1 ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
cnt <= 'b0 ;
end
else if (data_rdy) begin //資料使能時開始計數
cnt <= cnt_temp ;
end
else if (cnt != 0 ) begin //防止輸入使能端持續時間過短
cnt <= cnt_temp ;
end
else begin
cnt <= 'b0 ;
end
end
//multiply
reg [M-1:0] mult2_shift ;
reg [M+N-1:0] mult1_shift ;
reg [M+N-1:0] mult1_acc ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
mult2_shift <= 'b0 ;
mult2_shift <= 'b0 ;
mult1_acc <= 'b0 ;
end
else if (data_rdy && cnt=='b0) begin //初始化
mult1_shift <= {{(N){1'b0}}, mult1} << 1 ;
mult2_shift <= mult2 >> 1 ;
mult1_acc <= mult2[0] ? {{(N){1'b0}}, mult1} : 'b0 ;
end
else if (cnt != M) begin
mult1_shift <= mult1_shift << 1 ; //被乘數乘2
mult2_shift <= mult2_shift >> 1 ; //乘數右移,友善判斷
//判斷乘數對應為是否為1,為1則累加
mult1_acc <= mult2_shift[0] ? mult1_acc + mult1_shift : mult1_acc;
end
else begin
mult2_shift <= 'b0 ;
mult2_shift <= 'b0 ;
mult1_acc <= 'b0 ;
end
end
//results
reg [M+N-1:0] res_r ;
reg res_rdy_r ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
res_r <= 'b0 ;
res_rdy_r <= 'b0 ;
end
else if (cnt == M) begin
res_r <= mult1_acc ; //乘法周期結束時輸出結果
res_rdy_r <= 1'b1 ;
end
else begin
res_r <= 'b0 ;
res_rdy_r <= 'b0 ;
end
end
assign res_rdy = res_rdy_r;
assign res = res_r;
endmodule
testbench
`timescale 1ns/1ns
module test ;
parameter N = 8 ;
parameter M = 4 ;
reg clk, rstn;
//clock
always begin
clk = 0 ; #5 ;
clk = 1 ; #5 ;
end
//reset
initial begin
rstn = 1'b0 ;
#8 ; rstn = 1'b1 ;
end
=============================================//
//no pipeline
reg data_rdy_low ;
reg [N-1:0] mult1_low ;
reg [M-1:0] mult2_low ;
wire [M+N-1:0] res_low ;
wire res_rdy_low ;
//使用任務周期激勵
task mult_data_in ;
input [M+N-1:0] mult1_task, mult2_task ;
wait(!test.u_mult_low.res_rdy) ; //not output state
@(negedge clk ) ;
data_rdy_low = 1'b1 ;
mult1_low = mult1_task ;
mult2_low = mult2_task ;
@(negedge clk ) ;
data_rdy_low = 1'b0 ;
wait(test.u_mult_low.res_rdy) ; //test the output state
endtask
//driver
initial begin
#55 ;
mult_data_in(25, 5 ) ;
mult_data_in(16, 10 ) ;
mult_data_in(10, 4 ) ;
mult_data_in(15, 7) ;
mult_data_in(215, 9) ;
end
mult_low #(.N(N), .M(M))
u_mult_low
(
.clk (clk),
.rstn (rstn),
.data_rdy (data_rdy_low),
.mult1 (mult1_low),
.mult2 (mult2_low),
.res_rdy (res_rdy_low),
.res (res_low));
//simulation finish
initial begin
forever begin
#100;
if ($time >= 10000) $finish ;
end
end
endmodule // test
仿真結果
乘法器設計- 流水線
思路:将每次的資料移位(部分積最低位和乘數對應位移位對齊,乘數對應位右移,将乘數右移後的最低位作為下次的乘數,來得到最終結果)和得到部分積操作,作為流水線的基本元件
// 單次累加計算
module mult_cell
#(parameter N=4,
parameter M=4)
(
input clk,
input rstn,
input en,
input [M+N-1:0] mult1, //被乘數
input [M-1:0] mult2, //乘數
input [M+N-1:0] mult1_acci, //上次累加結果
output reg [M+N-1:0] mult1_o, //被乘數移位後儲存值
output reg [M-1:0] mult2_shift, //乘數移位後儲存值
output reg [N+M-1:0] mult1_acco, //目前累加結果
output reg rdy );
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
rdy <= 'b0 ;
mult1_o <= 'b0 ;
mult1_acco <= 'b0 ;
mult2_shift <= 'b0 ;
end
else if (en) begin
rdy <= 1'b1 ;
mult2_shift <= mult2 >> 1 ;
mult1_o <= mult1 << 1 ;
if (mult2[0]) begin //乘數對應位為1則累加
mult1_acco <= mult1_acci + mult1 ;
end
else begin
mult1_acco <= mult1_acci ; //乘數對應位為0則保持
end
end
else begin
rdy <= 'b0 ;
mult1_o <= 'b0 ;
mult1_acco <= 'b0 ;
mult2_shift <= 'b0 ;
end
end
endmodule
// 頂層子產品
module mult_man
#(parameter N=4,
parameter M=4)
(
input clk,
input rstn,
input data_rdy ,
input [N-1:0] mult1,
input [M-1:0] mult2,
output res_rdy ,
output [N+M-1:0] res );
wire [N+M-1:0] mult1_t [M-1:0] ;
wire [M-1:0] mult2_t [M-1:0] ;
wire [N+M-1:0] mult1_acc_t [M-1:0] ;
wire [M-1:0] rdy_t ;
//第一次例化相當于初始化,不能用 generate 語句
mult_cell #(.N(N), .M(M))
u_mult_step0
(
.clk (clk),
.rstn (rstn),
.en (data_rdy),
.mult1 ({{(M){1'b0}}, mult1}),
.mult2 (mult2),
.mult1_acci ({(N+M){1'b0}}),
//output
.mult1_acco (mult1_acc_t[0]),
.mult2_shift (mult2_t[0]),
.mult1_o (mult1_t[0]),
.rdy (rdy_t[0]) );
//多次子產品例化,用 generate 語句
genvar i ;
generate
for(i=1; i<=M-1; i=i+1) begin: mult_stepx
mult_cell #(.N(N), .M(M))
u_mult_step
(
.clk (clk),
.rstn (rstn),
.en (rdy_t[i-1]),
.mult1 (mult1_t[i-1]),
.mult2 (mult2_t[i-1]),
//上一次累加結果作為下一次累加輸入
.mult1_acci (mult1_acc_t[i-1]),
//output
.mult1_acco (mult1_acc_t[i]),
.mult1_o (mult1_t[i]), //被乘數移位狀态傳遞
.mult2_shift (mult2_t[i]), //乘數移位狀态傳遞
.rdy (rdy_t[i]) );
end
endgenerate
assign res_rdy = rdy_t[M-1];
assign res = mult1_acc_t[M-1];
endmodule
testbench 關鍵部分
reg data_rdy ;
reg [N-1:0] mult1 ;
reg [M-1:0] mult2 ;
wire res_rdy ;
wire [N+M-1:0] res ;
//driver
initial begin
#55 ;
@(negedge clk ) ;
data_rdy = 1'b1 ;
mult1 = 25; mult2 = 5;
#10 ; mult1 = 16; mult2 = 10;
#10 ; mult1 = 10; mult2 = 4;
#10 ; mult1 = 15; mult2 = 7;
mult2 = 7; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 1; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 15; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 3; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 11; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 4; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 9; repeat(32) #10 mult1 = mult1 + 1 ;
end
//對輸入資料進行移位,友善後續校驗
reg [N-1:0] mult1_ref [M-1:0];
reg [M-1:0] mult2_ref [M-1:0];
always @(posedge clk) begin
mult1_ref[0] <= mult1 ;
mult2_ref[0] <= mult2 ;
end
genvar i;
generate
for(i=1; i<=M-1; i=i+1) begin
always @(posedge clk) begin
mult1_ref[i] <= mult1_ref[i-1];
mult2_ref[i] <= mult2_ref[i-1];
end
end
endgenerate
//自校驗
reg error_flag ;
always @(posedge clk) begin
# 1 ;
if (mult1_ref[M-1] * mult2_ref[M-1] != res && res_rdy) begin
error_flag <= 1'b1 ;
end
else begin
error_flag <= 1'b0 ;
end
end
//module instantiation
mult_man #(.N(N), .M(M))
u_mult(
.clk (clk),
.rstn (rstn),
.data_rdy (data_rdy),
.mult1 (mult1),
.mult2 (mult2),
.res_rdy (res_rdy),
.res (res));
仿真結果