天天看點

圖像處理算法的FPGA實作研究

Table_y0.vhd                     \\ y0=c0×t0+c0×t2+c0×t4+c0×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y0 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y0;

architecture one of table_y0 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00010110";

 when "0010"=>databuff<="00010110";

 when "0011"=>databuff<="00101100";

 when "0100"=>databuff<="00010110";

 when "0101"=>databuff<="00101100";

 when "0110"=>databuff<="00101100";

 when "0111"=>databuff<="01000010";

 when "1000"=>databuff<="00010110";

 when "1001"=>databuff<="00101100";

 when "1010"=>databuff<="00101100";

 when "1011"=>databuff<="01000010";

 when "1100"=>databuff<="00101100";

 when "1101"=>databuff<="01000010";

 when "1110"=>databuff<="01000010";

 when "1111"=>databuff<="01011000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y1.vhd                      \\ y1=c1×t1+c3×t3+c5×t5+c7×t7

library ieee;

use ieee.std_logic_1164.all;

entity table_y1 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y1;

architecture one of table_y1 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00011111";

 when "0010"=>databuff<="00011010";

 when "0011"=>databuff<="00111001";

 when "0100"=>databuff<="00010001";

 when "0101"=>databuff<="00110000";

 when "0110"=>databuff<="00101011";

 when "0111"=>databuff<="01001010";

 when "1000"=>databuff<="00000110";

 when "1001"=>databuff<="00100101";

 when "1010"=>databuff<="00100000";

 when "1011"=>databuff<="00111111";

 when "1100"=>databuff<="00010111";

 when "1101"=>databuff<="00110110";

 when "1110"=>databuff<="00110001";

 when "1111"=>databuff<="01010000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y2.vhd                       \\ y2=c2×t0+c6×t2-c6×t4-c2×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y2 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y2;

architecture one of table_y2 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00011101";

 when "0010"=>databuff<="00001100";

 when "0011"=>databuff<="00101001";

 when "0100"=>databuff<="11110100";

 when "0101"=>databuff<="00010001";

 when "0110"=>databuff<="00000000";

 when "0111"=>databuff<="00011101";

 when "1000"=>databuff<="11100011";

 when "1001"=>databuff<="00000000";

 when "1010"=>databuff<="11101111";

 when "1011"=>databuff<="00001100";

 when "1100"=>databuff<="11010111";

 when "1101"=>databuff<="11110100";

 when "1110"=>databuff<="11100011";

 when "1111"=>databuff<="00000000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y3.vhd                      \\ y3=c3×t0-c7×t2-c1×t4-c5×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y3 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y3;

architecture one of table_y3 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00011010";

 when "0010"=>databuff<="11111010";

 when "0011"=>databuff<="00010100";

 when "0100"=>databuff<="11100001";

 when "0101"=>databuff<="11111011";

 when "0110"=>databuff<="11011011";

 when "0111"=>databuff<="11110101";

 when "1000"=>databuff<="11101111";

 when "1001"=>databuff<="00001001";

 when "1010"=>databuff<="11101001";

 when "1011"=>databuff<="00000011";

 when "1100"=>databuff<="11010000";

 when "1101"=>databuff<="11101010";

 when "1110"=>databuff<="11001010";

 when "1111"=>databuff<="11100100";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y4.vhd                       \\ y4=c4×t0-c4×t2-c4×t4+c4×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y4 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y4;

architecture one of table_y4 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00010110";

 when "0010"=>databuff<="00010110";

 when "0011"=>databuff<="00000000";

 when "0100"=>databuff<="00010110";

 when "0101"=>databuff<="00000000";

 when "0110"=>databuff<="11010100";

 when "0111"=>databuff<="11101010";

 when "1000"=>databuff<="00010110";

 when "1001"=>databuff<="00101100";

 when "1010"=>databuff<="00000000";

 when "1011"=>databuff<="00010110";

 when "1100"=>databuff<="00000000";

 when "1101"=>databuff<="00010110";

 when "1110"=>databuff<="11101010";

 when "1111"=>databuff<="00000000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y5.vhd                       \\y5=c5×t0-c5×t2+c7×t4+c3×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y5 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y5;

architecture one of table_y5 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00010001";

 when "0010"=>databuff<="11100001";

 when "0011"=>databuff<="11110010";

 when "0100"=>databuff<="00000110";

 when "0101"=>databuff<="00010111";

 when "0110"=>databuff<="11100111";

 when "0111"=>databuff<="11111000";

 when "1000"=>databuff<="00011010";

 when "1001"=>databuff<="00101011";

 when "1010"=>databuff<="11111011";

 when "1011"=>databuff<="00001100";

 when "1100"=>databuff<="00100000";

 when "1101"=>databuff<="00110001";

 when "1110"=>databuff<="00000001";

 when "1111"=>databuff<="00010010";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y6.vhd                    \\ y6=c6×t0-c2×t2+c2×t4-c6×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y6 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y6;

architecture one of table_y6 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00001100";

 when "0010"=>databuff<="11100011";

 when "0011"=>databuff<="11101111";

 when "0100"=>databuff<="00011101";

 when "0101"=>databuff<="00101001";

 when "0110"=>databuff<="00000000";

 when "0111"=>databuff<="00001100";

 when "1000"=>databuff<="11110100";

 when "1001"=>databuff<="00000000";

 when "1010"=>databuff<="11010111";

 when "1011"=>databuff<="11100011";

 when "1100"=>databuff<="00010001";

 when "1101"=>databuff<="00011101";

 when "1110"=>databuff<="11110100";

 when "1111"=>databuff<="00000000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Table_y7.vhd                       \\ y7=c7×t0-c5×t2+c3×t4-c1×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y7 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y7;

architecture one of table_y7 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00000110";

 when "0010"=>databuff<="11101111";

 when "0011"=>databuff<="11110101";

 when "0100"=>databuff<="00011010";

 when "0101"=>databuff<="00100000";

 when "0110"=>databuff<="00001001";

 when "0111"=>databuff<="00001111";

 when "1000"=>databuff<="11100001";

 when "1001"=>databuff<="11100111";

 when "1010"=>databuff<="11010000";

 when "1011"=>databuff<="11010110";

 when "1100"=>databuff<="11111011";

 when "1101"=>databuff<="00000001";

 when "1110"=>databuff<="11101010";

 when "1111"=>databuff<="11110000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

Convert.vhd                      \\

\\t1=x0-x7,t3=x1-x6,t5=x2-x3,t7=x3-x4

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity convert is

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     t7,t6,t5,t4,t3,t2,t1,t0:out std_logic_vector(11 downto 0));

end entity convert;

architecture one of convert is

begin

t7<=x3-x4;t6<=x3+x4;t5<=x2-x5;t4<=x2+x5;t3<=x1-x6;t2<=x1+x6;t1<=x0-x7;t0<=x0+x7;

end architecture one;

One_d_dct.vhd         \\實作一維離散餘弦變換

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity one_d_dct is

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     y7,y6,y5,y4,y3,y2,y1,y0:out std_logic_vector(11 downto 0);

     start,rst,clk:in std_logic;

     done:buffer std_logic);

end entity one_d_dct;

architecture one of one_d_dct is

component table_y0                 \\引用table_y0 實體

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y1                 \\引用table_y1 實體

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y2                \\引用table_y2 實體

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y3                 \\引用table_y3 實體

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y4                 \\引用table_y4 實體

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y5                 \\引用table_y5 實體

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y6                 \\引用table_y6 實體

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y7                 \\引用table_y7 實體

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component convert                  \\引用convert 實體

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     t7,t6,t5,t4,t3,t2,t1,t0:out std_logic_vector(11 downto 0));

end component;

function sgn_extend (data_8:std_logic_vector(7 downto 0))

 return std_logic_vector is                    \\實作符号位數擴充

begin

 return data_8(7)&data_8&"0000000";

end function sgn_extend;

function sgn_cut (data_16:std_logic_vector(15 downto 0))

 return std_logic_vector is                    \\實作位數删減

begin

 return data_16(13 downto 2);

end function sgn_cut;

signal count:integer range 0 to 11;

signal compute:std_logic;

signal t7,t6,t5,t4,t3,t2,t1,t0:std_logic_vector(11 downto 0);

signal d7,d6,d5,d4,d3,d2,d1,d0:std_logic_vector(7 downto 0);

signal tt7,tt6,tt5,tt4,tt3,tt2,tt1,tt0:std_logic_vector(11 downto 0);

signal dy7,dy6,dy5,dy4,dy3,dy2,dy1,dy0:std_logic_vector(15 downto 0);

signal outy7,outy6,outy5,outy4,outy3,outy2,outy1,outy0:std_logic_vector(11 downto 0);

begin

y7<=outy7;y6<=outy6;y5<=outy5;y4<=outy4;y3<=outy3;y2<=outy2;y1<=outy1;y0<=outy0;

u:convert port map(x7=>x7,x6=>x6,x5=>x5,x4=>x4,x3=>x3,x2=>x2,x1=>x1,x0=>x0,t7=>t7,t6=>t6,t5=>t5,t4=>t4,t3=>t3,t2=>t2,t1=>t1,t0=>t0);

u7:table_y7 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d7);

u6:table_y6 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d6);

u5:table_y5 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d5);

u4:table_y4 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d4);

u3:table_y3 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d3);

u2:table_y2 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d2);

u1:table_y1 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d1);

u0:table_y0 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d0);

outy7<=sgn_cut(dy7);outy6<=sgn_cut(dy6);outy5<=sgn_cut(dy5);outy4<=sgn_cut(dy4);outy3<=sgn_cut(dy3);outy2<=sgn_cut(dy2);outy1<=sgn_cut(dy1);outy0<=sgn_cut(dy0);

process(clk,rst)

begin

 if clk'event and clk='1' then

   if rst='1' then                                \\複位處理

     count<=0;done<='0';compute<='0';     dy7<=(others=>'0');dy6<=(others=>'0');dy5<=(others=>'0');dy4<=(others=>'0');dy3<=(others=>'0');dy2<=(others=>'0');dy1<=(others=>'0');dy0<=(others=>'0');     tt7<=(others=>'0');tt6<=(others=>'0');tt5<=(others=>'0');tt4<=(others=>'0');tt3<=(others=>'0');tt2<=(others=>'0');tt1<=(others=>'0');tt0<=(others=>'0');

   else

     if done='1' then done<='0';end if;

     if compute='1' then                    \\開始計算

       if count=11 then                    \\計算符号位

         dy7<=dy7(15)&dy7(15 downto 1)-sgn_extend(d7);

         dy6<=dy6(15)&dy6(15 downto 1)-sgn_extend(d6);

         dy5<=dy5(15)&dy5(15 downto 1)-sgn_extend(d5);

         dy4<=dy4(15)&dy4(15 downto 1)-sgn_extend(d4);

         dy3<=dy3(15)&dy3(15 downto 1)-sgn_extend(d3);

         dy2<=dy2(15)&dy2(15 downto 1)-sgn_extend(d2);

         dy1<=dy1(15)&dy1(15 downto 1)-sgn_extend(d1);

         dy0<=dy0(15)&dy0(15 downto 1)-sgn_extend(d0);

         done<='1';compute<='0';

       else                              \\移位相加

         dy7<=dy7(15)&dy7(15 downto 1)+sgn_extend(d7);

         dy6<=dy6(15)&dy6(15 downto 1)+sgn_extend(d6);

         dy5<=dy5(15)&dy5(15 downto 1)+sgn_extend(d5);

         dy4<=dy4(15)&dy4(15 downto 1)+sgn_extend(d4);

         dy3<=dy3(15)&dy3(15 downto 1)+sgn_extend(d3);

         dy2<=dy2(15)&dy2(15 downto 1)+sgn_extend(d2);

         dy1<=dy1(15)&dy1(15 downto 1)+sgn_extend(d1);

         dy0<=dy0(15)&dy0(15 downto 1)+sgn_extend(d0);

       end if;

       count<=count+1;

     end if;

     if start='1' and done='0' then compute<='1';end if;

     if start='1' then

       count<=0; tt7<=t7;tt6<=t6;tt5<=t5;tt4<=t4;tt3<=t3;tt2<=t2;tt1<=t1;tt0<=t0;       dy7<=(others=>'0');dy6<=(others=>'0');dy5<=(others=>'0');dy4<=(others=>'0');dy3<=(others=>'0');dy2<=(others=>'0');dy1<=(others=>'0');dy0<=(others=>'0');

     else

       tt7(10 downto 0)<=tt7(11 downto 1);tt6(10 downto 0)<=tt6(11 downto 1);tt5(10 downto 0)<=tt5(11 downto 1);tt4(10 downto 0)<=tt4(11 downto 1);tt3(10 downto 0)<=tt3(11 downto 1);tt2(10 downto 0)<=tt2(11 downto 1);tt1(10 downto 0)<=tt1(11 downto 1);tt0(10 downto 0)<=tt0(11 downto 1);

     end if;

   end if;

 end if;

end process;

end architecture one;

Two_d_dct.vhd                    \\二維DCT算法子產品

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity two_d_dct is

port(din:in std_logic_vector(767 downto 0);

     dout:out std_logic_vector(767 downto 0);

     clk,start,rst:in std_logic;

     done:buffer std_logic);

end entity two_d_dct;

architecture one of two_d_dct is

component one_d_dct                     \\引用一維DCT算法的one_d_dct實體

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     y7,y6,y5,y4,y3,y2,y1,y0:out std_logic_vector(11 downto 0);

     start,rst,clk:in std_logic;

     done:buffer std_logic);

end component;

signal data1D:std_logic_vector(767 downto 0);

signal data2D:std_logic_vector(767 downto 0);

signal donerow0,donerow1,donerow2,donerow3,donerow4,donerow5,donerow6,donerow7:std_logic;

signal donecol0,donecol1,donecol2,donecol3,donecol4,donecol5,donecol6,donecol7:std_logic;

begin                                              \\引腳連接配接

c0:one_d_dct port map(x0=>din(11 downto 0),x1=>din(23 downto 12),x2=>din(35 downto 24),x3=>din(47 downto 36),x4=>din(59 downto 48),x5=>din(71 downto 60),x6=>din(83 downto 72),x7=>din(95 downto 84),

                      y0=>data1D(11 downto 0),y1=>data1D(23 downto 12),y2=>data1D(35 downto 24),y3=>data1D(47 downto 36),y4=>data1D(59 downto 48),y5=>data1D(71 downto 60),y6=>data1D(83 downto 72),y7=>data1D(95 downto 84),

                      start=>start,clk=>clk,rst=>rst,done=>donecol0);

c1:one_d_dct port map(x0=>din(107 downto 96),x1=>din(119 downto 108),x2=>din(131 downto 120),x3=>din(143 downto 132),x4=>din(155 downto 144),x5=>din(167 downto 156),x6=>din(179 downto 168),x7=>din(191 downto 180),

                      y0=>data1D(107 downto 96),y1=>data1D(119 downto 108),y2=>data1D(131 downto 120),y3=>data1D(143 downto 132),y4=>data1D(155 downto 144),y5=>data1D(167 downto 156),y6=>data1D(179 downto 168),y7=>data1D(191 downto 180),

                      start=>start,clk=>clk,rst=>rst,done=>donecol1);

c2:one_d_dct port map(x0=>din(203 downto 192),x1=>din(215 downto 204),x2=>din(227 downto 216),x3=>din(239 downto 228),x4=>din(251 downto 240),x5=>din(263 downto 252),x6=>din(275 downto 264),x7=>din(287 downto 276),

                      y0=>data1D(203 downto 192),y1=>data1D(215 downto 204),y2=>data1D(227 downto 216),y3=>data1D(239 downto 228),y4=>data1D(251 downto 240),y5=>data1D(263 downto 252),y6=>data1D(275 downto 264),y7=>data1D(287 downto 276),

                      start=>start,clk=>clk,rst=>rst,done=>donecol2);

c3:one_d_dct port map(x0=>din(299 downto 288),x1=>din(311 downto 300),x2=>din(323 downto 312),x3=>din(335 downto 324),x4=>din(347 downto 336),x5=>din(359 downto 348),x6=>din(371 downto 360),x7=>din(383 downto 372),

                      y0=>data1D(299 downto 288),y1=>data1D(311 downto 300),y2=>data1D(323 downto 312),y3=>data1D(335 downto 324),y4=>data1D(347 downto 336),y5=>data1D(359 downto 348),y6=>data1D(371 downto 360),y7=>data1D(383 downto 372),

                      start=>start,clk=>clk,rst=>rst,done=>donecol3);

c4:one_d_dct port map(x0=>din(395 downto 384),x1=>din(407 downto 396),x2=>din(419 downto 408),x3=>din(431 downto 420),x4=>din(443 downto 432),x5=>din(455 downto 444),x6=>din(467 downto 456),x7=>din(479 downto 468),

                      y0=>data1D(395 downto 384),y1=>data1D(407 downto 396),y2=>data1D(419 downto 408),y3=>data1D(431 downto 420),y4=>data1D(443 downto 432),y5=>data1D(455 downto 444),y6=>data1D(467 downto 456),y7=>data1D(479 downto 468),

                      start=>start,clk=>clk,rst=>rst,done=>donecol4);

c5:one_d_dct port map(x0=>din(491 downto 480),x1=>din(503 downto 492),x2=>din(515 downto 504),x3=>din(527 downto 516),x4=>din(539 downto 528),x5=>din(551 downto 540),x6=>din(563 downto 552),x7=>din(575 downto 564),

                      y0=>data1D(491 downto 480),y1=>data1D(503 downto 492),y2=>data1D(515 downto 504),y3=>data1D(527 downto 516),y4=>data1D(539 downto 528),y5=>data1D(551 downto 540),y6=>data1D(563 downto 552),y7=>data1D(575 downto 564),

                      start=>start,clk=>clk,rst=>rst,done=>donecol5);

c6:one_d_dct port map(x0=>din(587 downto 576),x1=>din(599 downto 588),x2=>din(611 downto 600),x3=>din(623 downto 612),x4=>din(635 downto 624),x5=>din(647 downto 636),x6=>din(659 downto 648),x7=>din(671 downto 660),

                      y0=>data1D(587 downto 576),y1=>data1D(599 downto 588),y2=>data1D(611 downto 600),y3=>data1D(623 downto 612),y4=>data1D(635 downto 624),y5=>data1D(647 downto 636),y6=>data1D(659 downto 648),y7=>data1D(671 downto 660),

                      start=>start,clk=>clk,rst=>rst,done=>donecol6);

c7:one_d_dct port map(x0=>din(683 downto 672),x1=>din(695 downto 684),x2=>din(707 downto 696),x3=>din(719 downto 708),x4=>din(731 downto 720),x5=>din(743 downto 732),x6=>din(755 downto 744),x7=>din(767 downto 756),

                      y0=>data1D(683 downto 672),y1=>data1D(695 downto 684),y2=>data1D(707 downto 696),y3=>data1D(719 downto 708),y4=>data1D(731 downto 720),y5=>data1D(743 downto 732),y6=>data1D(755 downto 744),y7=>data1D(767 downto 756),

                      start=>start,clk=>clk,rst=>rst,done=>donecol7);

r0:one_d_dct port map(x0=>data1D(11 downto 0),x1=>data1D(107 downto 96),x2=>data1D(203 downto 192),x3=>data1D(299 downto 288),x4=>data1D(395 downto 384),x5=>data1D(491 downto 480),x6=>data1D(587 downto 576),x7=>data1D(683 downto 672),

                      y0=>data2D(11 downto 0),y1=>data2D(23 downto 12),y2=>data2D(35 downto 24),y3=>data2D(47 downto 36),y4=>data2D(59 downto 48),y5=>data2D(71 downto 60),y6=>data2D(83 downto 72),y7=>data2D(95 downto 84),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow0);

r1:one_d_dct port map(x0=>data1D(23 downto 12),x1=>data1D(119 downto 108),x2=>data1D(215 downto 204),x3=>data1D(311 downto 300),x4=>data1D(407 downto 396),x5=>data1D(503 downto 492),x6=>data1D(599 downto 588),x7=>data1D(695 downto 684),

                      y0=>data2D(107 downto 96),y1=>data2D(119 downto 108),y2=>data2D(131 downto 120),y3=>data2D(143 downto 132),y4=>data2D(155 downto 144),y5=>data2D(167 downto 156),y6=>data2D(179 downto 168),y7=>data2D(191 downto 180),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow1);

r2:one_d_dct port map(x0=>data1D(35 downto 24),x1=>data1D(131 downto 120),x2=>data1D(227 downto 216),x3=>data1D(323 downto 312),x4=>data1D(419 downto 408),x5=>data1D(515 downto 504),x6=>data1D(611 downto 600),x7=>data1D(707 downto 696),

                      y0=>data2D(203 downto 192),y1=>data2D(215 downto 204),y2=>data2D(227 downto 216),y3=>data2D(239 downto 228),y4=>data2D(251 downto 240),y5=>data2D(263 downto 252),y6=>data2D(275 downto 264),y7=>data2D(287 downto 276),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow2);

r3:one_d_dct port map(x0=>data1D(47 downto 36),x1=>data1D(143 downto 132),x2=>data1D(239 downto 228),x3=>data1D(335 downto 324),x4=>data1D(431 downto 420),x5=>data1D(527 downto 516),x6=>data1D(623 downto 612),x7=>data1D(719 downto 708),

                      y0=>data2D(299 downto 288),y1=>data2D(311 downto 300),y2=>data2D(323 downto 312),y3=>data2D(335 downto 324),y4=>data2D(347 downto 336),y5=>data2D(359 downto 348),y6=>data2D(371 downto 360),y7=>data2D(383 downto 372),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow3);

r4:one_d_dct port map(x0=>data1D(59 downto 48),x1=>data1D(155 downto 144),x2=>data1D(251 downto 240),x3=>data1D(347 downto 336),x4=>data1D(443 downto 432),x5=>data1D(539 downto 528),x6=>data1D(635 downto 624),x7=>data1D(731 downto 720),

                      y0=>data2D(395 downto 384),y1=>data2D(407 downto 396),y2=>data2D(419 downto 408),y3=>data2D(431 downto 420),y4=>data2D(443 downto 432),y5=>data2D(455 downto 444),y6=>data2D(467 downto 456),y7=>data2D(479 downto 468),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow4);

r5:one_d_dct port map(x0=>data1D(71 downto 60),x1=>data1D(167 downto 156),x2=>data1D(263 downto 252),x3=>data1D(359 downto 348),x4=>data1D(455 downto 444),x5=>data1D(551 downto 540),x6=>data1D(647 downto 636),x7=>data1D(743 downto 732),

                      y0=>data2D(491 downto 480),y1=>data2D(503 downto 492),y2=>data2D(515 downto 504),y3=>data2D(527 downto 516),y4=>data2D(539 downto 528),y5=>data2D(551 downto 540),y6=>data2D(563 downto 552),y7=>data2D(575 downto 564),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow5);

r6:one_d_dct port map(x0=>data1D(83 downto 72),x1=>data1D(179 downto 168),x2=>data1D(275 downto 264),x3=>data1D(371 downto 360),x4=>data1D(467 downto 456),x5=>data1D(563 downto 552),x6=>data1D(659 downto 648),x7=>data1D(755 downto 744),

                      y0=>data2D(587 downto 576),y1=>data2D(599 downto 588),y2=>data2D(611 downto 600),y3=>data2D(623 downto 612),y4=>data2D(635 downto 624),y5=>data2D(647 downto 636),y6=>data2D(659 downto 648),y7=>data2D(671 downto 660),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow6);

r7:one_d_dct port map(x0=>data1D(95 downto 84),x1=>data1D(191 downto 180),x2=>data1D(287 downto 276),x3=>data1D(383 downto 372),x4=>data1D(479 downto 468),x5=>data1D(575 downto 564),x6=>data1D(671 downto 660),x7=>data1D(767 downto 756),

                      y0=>data2D(683 downto 672),y1=>data2D(695 downto 684),y2=>data2D(707 downto 696),y3=>data2D(719 downto 708),y4=>data2D(731 downto 720),y5=>data2D(743 downto 732),y6=>data2D(755 downto 744),y7=>data2D(767 downto 756),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow7);

process(clk,rst)

begin

  if clk'event and clk='1' then

    if rst='1' then                          \\複位處理

      done<='0';dout<=(others=>'0');

    else

      done<=donerow0;

      if donerow0='1' then dout<=data2D; end if;

    end if;

  end if;

end process;

end architecture one;

Dct.vhd                                  \\頂層子產品檔案

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity dct is

port(datain:in std_logic_vector(11 downto 0);

     dataout:out std_logic_vector(11 downto 0);

     clk,start,rst,dinclk,doutclk:in std_logic;

     done:buffer std_logic);

end entity dct;

architecture one of dct is

component two_d_dct                    \\引用二維DCT算法two_d_dct實體

port(din:in std_logic_vector(767 downto 0);

     dout:out std_logic_vector(767 downto 0);

     clk,start,rst:in std_logic;

     done:buffer std_logic);

end component;

signal inbuff:std_logic_vector(767 downto 0);

signal outbuff:std_logic_vector(767 downto 0);

signal doutbuff:std_logic_vector(767 downto 0);

signal donestate,startstate:std_logic;

begin

u:two_d_dct port map(din=>inbuff,dout=>outbuff,clk=>clk,start=>start,rst=>rst,done=>done);

process(dinclk)

begin

  if dinclk'event and dinclk='1' then             \\資料輸入時鐘信号

    if startstate='0' then

      inbuff(767 downto 756)<=datain;

      inbuff(755 downto 0)<=inbuff(767 downto 12);

    end if;

  end if;

end process;

process(start)                               \\開始狀态處理

begin

  if start'event and start='1' then

    startstate<=not startstate;

  end if;

end process;

process(done)                              \\結束狀态處理

begin

  if done'event and done='1' then

    donestate<=not donestate;

  end if;

end process;

process(doutclk)                         

begin

  if doutclk'event and doutclk='1' then          \\輸出時鐘信号

    if donestate='1' then

      if done='1' then

        doutbuff<=outbuff;

      else

        dataout<=doutbuff(11 downto 0);

        doutbuff(755 downto 0)<=doutbuff(767 downto 12);

      end if;

    end if;

  end if;

end process;

end architecture one;

Main.vhd                         \\增加序列槽子產品的頂層檔案

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity main is

port( datain:in std_logic;

     clk,start,rst,doutclk:in std_logic;

     dataout:out std_logic;

     done:buffer std_logic  );   

end main;

architecture one of main is

component dct is

port(datain:in std_logic_vector(11 downto 0);

     dataout:out std_logic_vector(11 downto 0);

     clk,start,rst,dinclk,doutclk:in std_logic;

     done:buffer std_logic);

end component;

signal ready,doutclk2:std_logic;

signal data:std_logic_vector(11 downto 0);

signal data_out:std_logic_vector(11 downto 0);

signal dataoutbuff:std_logic_vector(11 downto 0);

signal datainbuff:std_logic_vector(7 downto 0);

signal count:std_logic_vector(2 downto 0);

signal count2:std_logic_vector(3 downto 0);

begin

u: dct port map(datain=>data,dataout=>data_out,clk=>clk,start=>start,rst=>rst,

dinclk=>ready,doutclk=>doutclk2,done=>done);

process(rst,clk)

begin

  if rst='0' then                         \\複位處理

    count<=(others=>'0');

  else                                 \\串并轉換

    if (clk'event and clk='1') then

      datainbuff(7 downto 1)<=datainbuff(6 downto 0);

      datainbuff(0)<=datain;

      if count="111" then

         data<=”0000”&datainbuff; ready<='1'; count<="000";

      else

         count<=count+1;

      end if;

    end if;

  end if;

end process;

process(rst,doutclk)

begin

if rst=’0’ then                      \\複位處理

  count2<=(others=>’0’);

else                              \\并串轉換

  if (doutclk’event and doutclk=’1’ ) then

    dataout<=datadataoutbuff(0);

dataoutbuff(10 downto 0)<=dataoutbuff(11 downto 1);

if count2=”1011” then

  doutclk2<=’1’; dataoutbuff<=data_out; count2<=”0000”;

else

  count2<=count+1; doutclk2<=’0’;

end if;

    end if

  end if;   

end one;

4.2.1 乘法器快速算法

由圖4-5可知

y0=c0×t0+c0×t2+c0×t4+c0×t6           式(4-3)

y2=c2×t0+c6×t2-c6×t4-c2×t6            式(4-4)

y4=c4×t0-c4×t2-c4×t4+c4×t6            式(4-5)

y6=c6×t0-c2×t2+c2×t4-c6×t6            式(4-6)

y1=c1×t1+c3×t3+c5×t5+c7×t7           式(4-7)

y3=c3×t0-c7×t2-c1×t4-c5×t6            式(4-8)

y5=c5×t0-c5×t2+c7×t4+c3×t6           式(4-9)

y7=c7×t0-c5×t2+c3×t4-c1×t6           式(4-10)

其中t0=x0+x7,t2=x1+x6,t4=x2+x5,t6=x3+x4,t1=x0-x7,t3=x1-x6,t5=x2-x3,t7=x3-x4

    由此看出,一維DCT變換的核心算法是乘法器算法。

因乘積項中的DCT系數值是已知的,是以采用查詢表和移位相加的方法進行乘法運算,此DA算法可以使硬體結構比較簡單,消耗的硬體資源相對比較少,最重要的是運算周期比較短。

其具體的例子如下:

Y=x0×5+x1×3,比如(x0=1,x1=2),它們用二進制表示為x0(0001),x1(0010),5(0101),

3(0011).首先列出一張5和3相加所有可能的結果的表,其内容如下:

“00”:0000  0*3+0*5  “01”:0101  0*3+1*5  “10”:0011  1*3+0*5  “11”:1000 1*3+1*5

然後用x0和x1第1位組成的兩位數(01)查得的資料為0101,第2位(10)查得的資料為0011,第3位(00)查得的資料為0000,第4位(00)查得的資料為0000,最後相加。

      0101

     0011

    0000

   0000

   0001011

其結果(0001011)化為十進制數為11,值和實際結果相符y=1*5+2*3=11.

本文中的乘法算法基本思想如上所述,本文乘法查詢表的資料如下:

[y0]

“0000” :00000000              “0001” :00010110  c0  

“0010” :00010110  c0          “0011” :00101100  c0+c0

“0100” :00010110  c0          “0101” :00101100  c0+c0

“0110” :00101100  c0+c0       “0111” :01000010  c0+c0+c0

“1000” :00010110  c0          “1001” :00101100  c0+c0

“1010” :00101100  c0+c0       “1011” :01000010  c0+c0+c0

“1100” :00101100  c0+c0       “1101” :01000010  c0+c0+c0

“1110” :01000010  c0+c0+c0    “1111” :01011000  c0+c0+c0+c0

[y2]

“0000” :00000000              “0001” :00011101  c2  

“0010” :00001100  c6          “0011” :00101001  c2+c6

“0100” :11110100  -c6          “0101” :00010001  c2-c6

“0110” :00000000  c6-c6        “0111” :00011101  c2+c6-c6=c2

“1000” :11100011  -c2          “1001” :00000000  c2-c2

“1010” :11101111  c6-c2        “1011” :00001100  c2+c6-c2=c6

“1100” :11010111  -c6-c2       “1101” :11110100  c2-c6-c2=-c6

“1110” :11100011  c6-c6-c2=-c2  “1111” :00000000  c2+c6-c6-c2

[y4]同理

[y6]同理

[y1]同理

[y3]同理

[y5]同理

[y7]同理

4.2.2 乘法查詢表的VHDL程式

Table_y0.vhd                     \\ y0=c0×t0+c0×t2+c0×t4+c0×t6

​​見附錄​​

Table_y1.vhd                     \\ y1=c1×t1+c3×t3+c5×t5+c7×t7

​​見附錄​​

Table_y2.vhd                     \\ y2=c2×t0+c6×t2-c6×t4-c2×t6

​​見附錄​​

Table_y3.vhd                     \\ y3=c3×t0-c7×t2-c1×t4-c5×t6

​​見附錄​​

Table_y4.vhd                     \\ y4=c4×t0-c4×t2-c4×t4+c4×t6

​​見附錄​​

Table_y5.vhd                     \\y5=c5×t0-c5×t2+c7×t4+c3×t6

​​見附錄​​

Table_y6.vhd                    \\ y6=c6×t0-c2×t2+c2×t4-c6×t6

​​見附錄​​

Table_y7.vhd                    \\ y7=c7×t0-c5×t2+c3×t4-c1×t6

​​見附錄​​

4.2.3 乘法查詢表與硬體之間的映射問題

其中某個查詢表的頂層檔案引腳如圖4-6所示.

圖像處理算法的FPGA實作研究

圖4-6 table_y0引腳圖

查詢表的RTL視圖如圖4-7所示。

圖像處理算法的FPGA實作研究

圖4-7 查詢表的RTL視圖

由RTL視圖可知,查詢表采用的硬體結構為多路選擇器MUX。通過對多路選擇器MUX的DATA端賦初值,可以快速的實作乘法查詢。

由4.1.2節可知,輸入資料先經過預處理(convert),即(t0=x0+x7,t2=x1+x6,……),然後通過查詢表查找資料,再次進行移位相加,如此循環,就可以得到8位×12位的乘法結果。其總的流程如圖4-9所示。

圖像處理算法的FPGA實作研究

一維DCT算法中的循環移位子產品、查找乘法表後相加子產品分别如圖4-11、圖4-12所示。

圖像處理算法的FPGA實作研究

圖4-11 循環移位子產品

圖像處理算法的FPGA實作研究

圖4-12 查找乘法表後相加子產品

由圖4-11可以看出,循環移位子產品是通過控制線把多路選擇器和寄存器相結合的方法來實作循環移位功能。

因為一維DCT算法子產品中的資料是并形的,是以總的輸入輸出都要進行串行并行轉換。

其總流程如圖4-14所示。

圖4-14 二維DCT算法總流程

4.4.2二維DCT算法VHDL程式

Two_d_dct.vhd                    \\二維DCT算法子產品

​​見附錄​​

Dct.vhd                           \\頂層子產品檔案

​​見附錄​​

如果需要序列槽輸入資料,就要增加串并轉換子產品,那麼頂層檔案如下:

Main.vhd                         \\增加序列槽子產品的頂層檔案

​​見附錄​​

4.4.3二維DCT算法與硬體之間的映射問題

二維DCT算法的頂層檔案引腳如圖4-15所示.

圖像處理算法的FPGA實作研究

圖 4-15 二維DCT算法的頂層檔案引腳圖

    引腳的功能分别為clk(時鐘信号),rst(複位信号),start(開始信号),datain(串行資料輸入),doutclk(資料輸出時鐘信号),done(計算結束信号),dataout(串行資料輸出)。

4.4.4二維DCT算法仿真

其仿真波形如圖4-16所示。