天天看點

視訊YUV4:2:2轉4:2:0的TI DSP源代碼

     網絡上有很多有關YUV4:2:2轉YUV4:2:0的描述,但大多數都是講解原理,沒有實際性的做法,本文把自己在TI DAVINCI DM6446 端的測試過的代碼奉獻出來,供大家參考和學習,同時抛磚引玉,希望得到大家的指點。本方法适合TI DM642,DM643x,DM644x等DSP系列,前段圖像采集格式一般都是YCbCr 4:2:2(YUV4:2:2),但很多視訊應用都需要對YUV4:2:2進行轉化成YUV4:2:0的格式,比如jpeg,MPEG4,H.264等,在DM643x,DM644x上,TI 采用EDMA3的方式實作轉換,那是另外的方法,這裡專門介紹通用的做法,在DM6441(513MHz)上處理640x480隻需要7.5ms,而且還可以再優化,這個大家可以試試。

/***********************************************/

以PAL制為例,這裡的YCbCr 4:2:2(YUV4:2:2)像素排列方式是:

U0,0 Y0,0 V0,0 Y 0,1  U0,1 Y0,2 V0,1 Y 0,3......U0,359 Y0,718 V0,359 Y 0,719

..............................

U575,0 Y575,0 V575,0 Y 575,1 ...........U575,359 Y575,718 V575,359 Y 575,719

我們要轉化成YUV4:2:0的格式:

/*視訊輸入格式定義*/

#define PAL    1   /*PAL制CCD攝像頭圖像采集*/

#define NTSC  0   /*NTSC制CCD攝像頭圖像采集*/

#define CMOS  0   /*CMOS攝像頭圖像采集*/

#if (1== PAL)

#define ORG_IMG_WIDTH     720      /*D1 格式*/

#define ORG_IMG_HEIGHT    576

#elif  (1==NTSC )

#define ORG_IMG_HEIGHT    480

#elif (1==CMOS)

#define ORG_IMG_WIDTH     640     /*VGA 格式*/

#endif

yuv422to420(const *unsigned char YCbCr_buf) /*YCbCr_buf指向YUV4:2:2的空間*/

{

        unsigned int m0,m1,m2,m3,x,y;

        unsigned int tmp,tmp0,tmp1,tmp2;

        tmp = (unsigned int)YCbCr_buf;/*對于D1,CIF,QCIF,VGA,QVGA的BUF肯定是4位元組對齊,是以這裡定義unsigned int也是可以的,當然你也可以使用指針*/

        tmp0 = (unsigned int)Y_buf;

        for(y=0;y<ORG_IMG_HEIGHT;y++)

       {

            for(x=0;x<(ORG_IMG_WIDTH>>1);x+=4)

           { 

                 m0 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<1) + (x<<2)); /

                 m1 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<1) + ((x+1)<<2));

                 m2 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<1) + ((x+2)<<2));

                m3 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<1) + ((x+3)<<2));

                *(unsigned short*)(tmp0+y*ORG_IMG_WIDTH + (x<<1))=(unsigned short)(((m0>>16)&0xFF00)|((m0>>8)&0x00FF));

                 *(unsigned short*)(tmp0+y*ORG_IMG_WIDTH + ((x+1)<<1))=(unsigned short)(((m1>>16)&0xFF00)|((m1>>8)&0x00FF));

                 *(unsigned short*)(tmp0+y*ORG_IMG_WIDTH + ((x+2)<<1))=(unsigned short)(((m2>>16)&0xFF00)|((m2>>8)&0x00FF));

                 *(unsigned short*)(tmp0+y*RG_IMG_WIDTH + ((x+3)<<1))=(unsigned short)(((m3>>16)&0xFF00)|((m3>>8)&0x00FF));

             }

       }

       tmp1=(unsigned int)U_buf;

       tmp2=(unsigned int)V_buf;

       for(y=0;y<(ORG_IMG_HEIGHT>>1);y++)

      {

             for(x=0;x<(ORG_IMG_WIDTH>>1);x+=4)

             {

                  m0 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<2) + (x<<2));

                  m1 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<2) + ((x+1)<<2));

                  m2 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<2) + ((x+2)<<2));

                 m3 = *(unsigned int*)(tmp+y*(ORG_IMG_WIDTH<<2) + ((x+3)<<2));

                 *(unsigned char*)(tmp1+y*(ORG_IMG_WIDTH>>1) + x)=(unsigned char)m0;

                 *(unsigned char*)(tmp2+y*(ORG_IMG_WIDTH>>1) + x)=(unsigned char)(m0>>16);

                *(unsigned char*)(tmp1+y*(ORG_IMG_WIDTH>>1) + x + 1)=(unsigned char)m1;

                *(unsigned char*)(tmp2+y*(ORG_IMG_WIDTH>>1) + x + 1)=(unsigned char)(m1>>16);

               *(unsigned char*)(tmp1+y*(ORG_IMG_WIDTH>>1) + x + 2)=(unsigned char)m2;

               *(unsigned char*)(tmp2+y*(ORG_IMG_WIDTH>>1) + x + 2)=(unsigned char)(m2>>16);

              *(unsigned char*)(tmp1+y*(ORG_IMG_WIDTH>>1) + x + 3)=(unsigned char)m3;

              *(unsigned char*)(tmp2+y*(ORG_IMG_WIDTH>>1) + x + 3)=(unsigned char)(m3>>16);

            }

       }

}

對上面的代碼點評:DSP的優化原則,能移位,就不要乘除;能int 讀記憶體,就不要用char讀記憶體,因為C64,C64+的DSP 讀記憶體指令需要4個時鐘周期;循環能成4的倍數,最好拆4次操作,形成管道流水線操作,當然循環内部不能有if, break等語句。

另外,DM642或DM643有自己的效率更高的程式,這裡也奉獻給大家。

#include <csl.h>

#include <csl_dat.h>

#include <csl_cache.h>

#pragma DATA_SECTION(int_mem_temp, ".img_buf");/*可以把.img_buf定義到L2RAM*/

#pragma DATA_ALIGN(int_mem_temp, 128);

unsigned char int_mem_temp[720];

void yuv422to420( char *frameIn[], char *frm_out[],

                  int width, int height)

    char *pSrcY = frameIn[0];

    char *pSrcU = frameIn[1];

    char *pSrcV = frameIn[2];

    char *pDestY = frm_out[0];

    char *pDestU = frm_out[1];

    char *pDestV = frm_out[2];

    unsigned int id;

    unsigned int i;

    for( i = 0; i < height; i++)

    {

        id = DAT_copy(pSrcY + (i * 720), int_mem_temp, 720);

        id = DAT_copy(int_mem_temp,      pDestY + (i * 720),  720);

        DAT_wait(id);

    }

    for( i = 0; i < (height >> 1); i++)

        id = DAT_copy(pSrcU + (i * 720), int_mem_temp, 360);

        id = DAT_copy(int_mem_temp,      pDestU + (i * 360),  360);

        id = DAT_copy(pSrcV + (i * 720), int_mem_temp, 360);

        id = DAT_copy(int_mem_temp,      pDestV + (i * 360),  360);

    return ;

void yuv420to422( char *frameIn[], char *frm_out[],

                int width, int height)

        id = DAT_copy(pSrcU + (i * 360), int_mem_temp, 360);

        id = DAT_copy(int_mem_temp,      pDestU + ((2 * i) * 360),   360);

        id = DAT_copy(int_mem_temp,      pDestU + ((2*i + 1)* 360),  360);

        id = DAT_copy(pSrcV + (i * 360), int_mem_temp, 360);

        id = DAT_copy(int_mem_temp,      pDestV + ((2*i) * 360),    360);

        id = DAT_copy(int_mem_temp,      pDestV + ((2*i+1) * 360),  360);

   return ;

本文轉自 zjb_integrated 51CTO部落格,原文連結:http://blog.51cto.com/zjbintsystem/235094,如需轉載請自行聯系原作者

繼續閱讀