原文連結:https://blog.csdn.net/u010126792/article/details/86593199
一. YUV轉換
1.1 YV12T轉NV21
//YV12T轉NV21
int YV12ToNV21(char *srcY, char *srcCb, char *srcCr,
char *dstY, char *dstCrCb,
uint32_t srcStride, uint32_t dstStride,
uint32_t width, uint32_t height)
{
uint32_t i, j;
char *psrc = srcY;
char *pdst = dstY;
char *psrcCb = srcCb;
char *psrcCr = srcCr;
ALOGD("srcY %p, srcCb %p, srcCr %p, dstY %p, dstCrCb %p, srcStride %d, dstStride %d, width %d, height %d",
srcY, srcCb, srcCr, dstY, dstCrCb, srcStride, dstStride, width, height);
// Y
#if 1
for (i = 0; i < height; i++) {
memcpy(pdst, psrc, width);
psrc += srcStride;
pdst += dstStride;
}
#else
memcpy(pdst, psrc, width * height);
#endif
// CrCb
pdst = dstCrCb;
for (i = 0; i < (height >> 1); i++) {
for (j = 0; j < (width >> 1); j++) {
pdst[j << 1] = psrcCr[j];
pdst[(j << 1)+ 1] = psrcCb[j];
}
psrcCb += srcStride >> 1;
psrcCr += srcStride >> 1;
pdst += dstStride;
}
return 0;
}
1.1 YV12T轉YV12
int YV12ToYV12( char *srcY, char *srcU, char *srcV,
char *dstY, char *dstU, char *dstV,
uint32_t srcStride, uint32_t dstStrideY, uint32_t dstStrideUV,
uint32_t width, uint32_t height )
{
uint32_t i, j;
char *pSrc = srcY;
char *pDst = dstY;
char *pSrc2 = srcV;
char *pDst2 = dstV;
// Copy Y
if( srcStride == dstStrideY )
{
memcpy( dstY, srcY, srcStride*height );
}
else
{
for( i=0 ; i<height; i++ )
{
memcpy(pDst, pSrc, width);
pSrc += srcStride;
pDst += dstStrideY;
}
}
// Copy UV
pSrc = srcU;
pDst = dstU;
height /= 2;
width /= 2;
for( i=0 ; i<height ; i++ )
{
memcpy( pDst , pSrc , width );
memcpy( pDst2, pSrc2, width );
pSrc += srcStride/2;
pDst += dstStrideUV;
pSrc2 += srcStride/2;
pDst2 += dstStrideUV;
}
return 0;
}
2. RGB 轉YUV
2.1 ARGB8888 轉NV12
void ARGB8888_to_NV12(unsigned char *dstY, unsigned char *dstCbCr, unsigned char *src, unsigned int Width, unsigned int Height, uint32_t dstStrideY, uint32_t dstStrideUV)
{
int w, h, iTmp;
unsigned char *pDstY = NULL;
unsigned char *pDstCbCr = NULL;
for (h = 0 ; h < (int)Height ; h++) {
pDstY = dstY;
pDstCbCr = dstCbCr;
for (w = 0 ; w < (int)Width ; w++) {
unsigned char byR = *(src);
unsigned char byG = *(src+1);
unsigned char byB = *(src+2);
iTmp = (((66 * byR) + (129 * byG) + (25 * byB) + (128)) >> 8) + 16;
if (iTmp > 255) iTmp = 255;
else if (iTmp < 0) iTmp = 0;
*pDstY = (unsigned char)iTmp; pDstY += 1;
if ( ((h&1)==0) && ((w&1)==0) )
{
iTmp = (((-38 * byR) - (74 * byG) + (112 * byB) + 128) >> 8) + 128;
if (iTmp > 255) iTmp = 255;
else if (iTmp < 0) iTmp = 0;
*pDstCbCr = (unsigned char)iTmp; pDstCbCr += 1;
iTmp = (((112 * byR) - (94 * byG) - (18 * byB) + 128) >> 8) + 128;
if (iTmp > 255) iTmp = 255;
else if (iTmp < 0) iTmp = 0;
*pDstCbCr = (unsigned char)iTmp; pDstCbCr += 1;
}
src += 4;
}
dstY = dstY + dstStrideY;
dstCbCr = dstCbCr + dstStrideUV/2;
}
}
彙編:
.arch armv7-a
.text
.global ARGB8888_to_NV12_NEON
.type ARGB8888_to_NV12_NEON, %function
ARGB8888_to_NV12_NEON:
.fnstart
@r0 pDstY
@r1 pDstUV
@r2 pSrcRGB
@r3 nWidth
@r4 pDstY2 = pDstY + nWidth
@r5 pSrcRGB2 = pSrcRGB + nWidthx2
@r6 temp7, nWidth16m
@r7 temp6, accumilator
@r8 temp5, nWidthTemp
@r9 temp4, Raw RGB565
@r10 temp3, r,g,b
@r11 temp2, immediate operand
@r12 temp1, nHeight
@r14 temp0, debugging pointer
.equ CACHE_LINE_SIZE, 32
.equ PRE_LOAD_OFFSET, 6
stmfd sp!, {r4-r12,r14} @ backup registers
ldr r12, [sp, #40] @ load nHeight
@ldr r14, [sp, #44] @ load pTest
add r4, r0, r3 @r4: pDstY2 = pDstY + nWidth
add r5, r2, r3, lsl #2 @r5: pSrcRGB2 = tmpSrcRGB + nWidthx4
sub r8, r3, #16 @r8: nWidthTmp = nWidth -16
@q0: temp1, R
@q1: temp2, GB
@q2: R
@q3: G
@q4: B
@q5: temp3, output
vmov.u16 q6, #66 @coefficient assignment
vmov.u16 q7, #129
vmov.u16 q8, #25
vmov.u16 q9, #0x8080 @ 128<<8 + 128
vmov.u16 q10, #0x1000 @ 16<<8 + 128
vorr.u16 q10, #0x0080
vmov.u16 q11, #38 @#-38
vmov.u16 q12, #74 @#-74
vmov.u16 q13, #112
vmov.u16 q14, #94 @#-94
vmov.u16 q15, #18 @#-18
LOOP_NHEIGHT2:
stmfd sp!, {r12} @ backup registers
LOOP_NWIDTH16:
pld [r2, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
@-------------------------------------------YUV ------------------------------------------
vmov.u16 q14, #94 @#94
vmov.u16 q15, #18 @#18
vld4.8 {d0,d1,d2,d3}, [r2]! @loadRGB interleavely
vld4.8 {d4,d5,d6,d7}, [r2]! @loadRGB interleavely
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vand.u16 q4,#0x00FF @B
vand.u16 q5,#0x00FF @G
vand.u16 q6,#0x00FF @R
vmov.u16 q7,q9 @ CalcU()
vmla.u16 q7,q4,q13 @112 * B[k]
vmls.u16 q7,q6,q11 @q0:U -(38 * R[k]) @128<<6+ 32 + u>>2
vmls.u16 q7,q5,q12 @-(74 * G[k])
vshr.u16 q7,q7, #8 @(128<<8+ 128 + u)>>8
vmov.u16 q8,q9 @CalcV()
vmla.u16 q8,q6,q13 @112 * R[k]
vmls.u16 q8,q5,q14 @q0:U -(94 * G[k]) @128<<6+ 32 + v>>2
vmls.u16 q8,q4,q15 @-(18 * B[k])
vshr.u16 q8,q8, #8 @(128<<8+ 128 + v)>>8
vtrn.8 q7,q8
vst1.8 {q7}, [r1]! @write UV component to yuv420_buffer+linear_ylanesiez
@-------------------------------------------Y ------------------------------------------
vmov.u16 q14, #66 @#66
vmov.u16 q15, #129 @#129
vmov.u16 q8, #25 @#25
@CalcY_Y()
vmul.u16 q7,q6,q14 @q0 = 66 *R[k]
vmla.u16 q7,q5,q15 @q0 += 129 *G[k]
vmla.u16 q7,q4,q8 @q0 += 25 *B[k]
vadd.u16 q7,q7,q10
vshr.u16 q7,q7, #8
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vshr.u16 q4,q4,#8 @B
vshr.u16 q5,q5,#8 @G
vshr.u16 q6,q6,#8 @R
vmul.u16 q0,q6,q14 @q0 = 66 *R[k]
vmla.u16 q0,q5,q15 @q0 += 129 *G[k]
vmla.u16 q0,q4,q8 @q0 += 25 *B[k]
vadd.u16 q0,q0,q10
vshr.u16 q0,q0, #8
vtrn.8 q7,q0
vst1.8 {q7}, [r0][email protected] to Y to yuv420_buffer
@-------------------------------------------Y ------------------------------------------
@---------------------------------------------Y1-------------------------------------------
pld [r5, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
vld4.8 {d0,d1,d2,d3}, [r5]! @loadRGB interleavely
vld4.8 {d4,d5,d6,d7}, [r5]! @loadRGB interleavely
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vand.u16 q4,#0x00FF @B
vand.u16 q5,#0x00FF @G
vand.u16 q6,#0x00FF @R
vmul.u16 q7,q6,q14 @q0 = 66 *R[k]
vmla.u16 q7,q5,q15 @q0 += 129 *G[k]
vmla.u16 q7,q4,q8 @q0 += 25 *B[k]
vadd.u16 q7,q7,q10
vshr.u16 q7,q7, #8
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vshr.u16 q4,q4,#8 @B
vshr.u16 q5,q5,#8 @G
vshr.u16 q6,q6,#8 @R
vmul.u16 q0,q6,q14 @q0 = 66 *R[k]
vmla.u16 q0,q5,q15 @q0 += 129 *G[k]
vmla.u16 q0,q4,q8 @q0 += 25 *B[k]
vadd.u16 q0,q0,q10
vshr.u16 q0,q0, #8
vtrn.8 q7,q0
vst1.8 {q7}, [r4][email protected] to Y to yuv420_buffer
subs r8,r8,#16 @nWidth16--
BPL LOOP_NWIDTH16 @if nWidth16>0
@-----------------------------------unaligned ---------------------------------------
adds r8,r8,#16 @ + 16 - 2
BEQ NO_UNALIGNED @in case that nWidht is multiple of 16
LOOP_NWIDTH2:
@----------------------------------pDstRGB1--Y------------------------------------------
@stmfd sp!, {r14} @backup r14
ldr r9, [r2], #4 @loadBGR int
ldr r12, [r2], #4 @loadBGR int
mov r10, r9 @copy to r10
mov r14, r12,lsl #16 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
ldr r6, =0x00FF0000
and r14, r14, r6 @R: (rgbIn[k] & 0xF800) >> 10;
add r10,r10,r14
mov r11, #66 @accumilator += R*66
mul r7, r10, r11
mov r10, r9,lsr #8 @copy to r10
mov r14, r12,lsl #8 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @G:
ldr r6, =0x00FF0000
and r14, r14, r6 @G:
add r10,r10,r14
mov r11, #129 @accumilator += G *129
mla r7, r10, r11, r7
mov r10, r9,lsr #16 @copy to r10
mov r14, r12 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @B
ldr r6, =0x00FF0000
and r14, r14, r6 @B
add r10,r10,r14
mov r11, #25 @accumilator 1 -= B *25
mla r7, r10, r11, r7
ldr r6, =0x10801080
add r7, r6
lsr r7, #8
strb r7, [r0],#1
lsr r7,#16
strb r7, [r0],#1
@ldmfd sp!, {r14} @load r14
@----------------------------------pDstRGB2--UV------------------------------------------
mov r10, r9 @copy to r10
ldr r7,=0x00008080
mov r12,r7
ldr r6, =0x000000FF
and r10, r10, r6 @R:
mov r11, #38 @accumilator -= R*38
mul r11, r10, r11
sub r7, r7, r11
mov r11, #112 @accumilator += R*112
mla r12, r10, r11, r12
mov r10, r9, lsr #16 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @B: (rgbIn[k] & 0xF800) >> 10;
mov r11, #112 @accumilator += B *112
mla r7, r10, r11, r7
mov r11, #18 @accumilator -= B *18
mul r11, r10, r11
sub r12, r12, r11
mov r10, r9,lsr #8 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @G: (rgbIn[k] & 0x07E0) >> 5;
mov r11, #74 @accumilator -= G*74
mul r11, r10, r11
sub r7, r7, r11
mov r11, #94 @accumilator -= G*94
mul r11, r10, r11
sub r12, r12, r11
lsr r7, #8 @ >>8
strb r7, [r1],#1
lsr r12, #8 @ >>8
strb r12, [r1],#1
@----------------------------------pDstRGB2--Y------------------------------------------
@stmfd sp!, {r14} @backup r14
ldr r9, [r5], #4 @loadBGR int
ldr r12, [r5], #4 @loadBGR int
mov r10, r9 @copy to r10
mov r14, r12,lsl #16 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
ldr r6, =0x00FF0000
and r14, r14, r6 @R: (rgbIn[k] & 0xF800) >> 10;
add r10,r10,r14
mov r11, #66 @accumilator += R*66
mul r7, r10, r11
mov r10, r9,lsr #8 @copy to r10
mov r14, r12,lsl #8 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @G:
ldr r6, =0x00FF0000
and r14, r14, r6 @G:
add r10,r10,r14
mov r11, #129 @accumilator += G *129
mla r7, r10, r11, r7
mov r10, r9,lsr #16 @copy to r10
mov r14, r12 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @B
ldr r6, =0x00FF0000
and r14, r14, r6 @B
add r10,r10,r14
mov r11, #25 @accumilator 1 -= B *25
mla r7, r10, r11, r7
ldr r6, =0x10801080
add r7, r6
lsr r7, #8
strb r7, [r4],#1
lsr r7,#16
strb r7, [r4],#1
@ldmfd sp!, {r14} @load r14
subs r8,r8,#2 @ nWidth2 -= 2
BGT LOOP_NWIDTH2 @ if nWidth2>0
NO_UNALIGNED: @in case that nWidht is multiple of 16
@-----------------------------------------------------------------------------
sub r8, r3, #16 @r8: nWidthTmp = nWidth -16
add r0, r0, r3 @pDstY + nwidth
add r2, r2, r3, lsl #2 @pSrcBGR + nwidthx4
add r4, r4, r3 @pDstY2 + nwidth
add r5, r5, r3, lsl #2 @pSrcBGR2 + nwidthx4
ldmfd sp!, {r12}
subs r12,r12,#2 @nHeight -=2
BGT LOOP_NHEIGHT2 @if nHeight2>0
ldmfd sp!, {r4-r12,pc} @ backup registers
.fnend
2.2 ARGB8888 轉 NV21
void ARGB8888_to_NV21(unsigned char *dstY, unsigned char *dstCbCr, unsigned char *src, unsigned int Width, unsigned int Height, uint32_t dstStrideY, uint32_t dstStrideUV)
{
int w, h, iTmp;
unsigned char *pDstY = NULL;
unsigned char *pDstCbCr = NULL;
for (h = 0 ; h < (int)Height ; h++) {
pDstY = dstY;
pDstCbCr = dstCbCr;
for (w = 0 ; w < (int)Width ; w++) {
unsigned char byR = *(src);
unsigned char byG = *(src+1);
unsigned char byB = *(src+2);
iTmp = (((66 * byR) + (129 * byG) + (25 * byB) + (128)) >> 8) + 16;
if (iTmp > 255) iTmp = 255;
else if (iTmp < 0) iTmp = 0;
*pDstY = (unsigned char)iTmp; pDstY += 1;
if ( ((h&1)==0) && ((w&1)==0) )
{
iTmp = (((112 * byR) - (94 * byG) - (18 * byB) + 128) >> 8) + 128;
if (iTmp > 255) iTmp = 255;
else if (iTmp < 0) iTmp = 0;
*pDstCbCr = (unsigned char)iTmp; pDstCbCr += 1;
iTmp = (((-38 * byR) - (74 * byG) + (112 * byB) + 128) >> 8) + 128;
if (iTmp > 255) iTmp = 255;
else if (iTmp < 0) iTmp = 0;
*pDstCbCr = (unsigned char)iTmp; pDstCbCr += 1;
}
src += 4;
}
dstY = dstY + dstStrideY;
dstCbCr = dstCbCr + dstStrideUV/2;
}
}
彙編:
.arch armv7-a
.text
.global ARGB8888_to_NV21_NEON
.type ARGB8888_to_NV21_NEON, %function
ARGB8888_to_NV21_NEON:
.fnstart
@r0 pDstY
@r1 pDstUV
@r2 pSrcRGB
@r3 nWidth
@r4 pDstY2 = pDstY + nWidth
@r5 pSrcRGB2 = pSrcRGB + nWidthx2
@r6 temp7, nWidth16m
@r7 temp6, accumilator
@r8 temp5, nWidthTemp
@r9 temp4, Raw RGB565
@r10 temp3, r,g,b
@r11 temp2, immediate operand
@r12 temp1, nHeight
@r14 temp0, debugging pointer
.equ CACHE_LINE_SIZE, 32
.equ PRE_LOAD_OFFSET, 6
stmfd sp!, {r4-r12,r14} @ backup registers
ldr r12, [sp, #40] @ load nHeight
@ldr r14, [sp, #44] @ load pTest
add r4, r0, r3 @r4: pDstY2 = pDstY + nWidth
add r5, r2, r3, lsl #2 @r5: pSrcRGB2 = tmpSrcRGB + nWidthx4
sub r8, r3, #16 @r8: nWidthTmp = nWidth -16
@q0: temp1, R
@q1: temp2, GB
@q2: R
@q3: G
@q4: B
@q5: temp3, output
vmov.u16 q6, #66 @coefficient assignment
vmov.u16 q7, #129
vmov.u16 q8, #25
vmov.u16 q9, #0x8080 @ 128<<8 + 128
vmov.u16 q10, #0x1000 @ 16<<8 + 128
vorr.u16 q10, #0x0080
vmov.u16 q11, #38 @#-38
vmov.u16 q12, #74 @#-74
vmov.u16 q13, #112
vmov.u16 q14, #94 @#-94
vmov.u16 q15, #18 @#-18
LOOP_NHEIGHT2:
stmfd sp!, {r12} @ backup registers
LOOP_NWIDTH16:
pld [r2, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
@-------------------------------------------YUV ------------------------------------------
vmov.u16 q14, #94 @#94
vmov.u16 q15, #18 @#18
vld4.8 {d0,d1,d2,d3}, [r2]! @loadRGB interleavely
vld4.8 {d4,d5,d6,d7}, [r2]! @loadRGB interleavely
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vand.u16 q4,#0x00FF @R
vand.u16 q5,#0x00FF @G
vand.u16 q6,#0x00FF @B
vmov.u16 q8,q9 @ CalcU()
vmla.u16 q8,q6,q13 @112 * B[k]
vmls.u16 q8,q4,q11 @q0:U -(38 * R[k]) @128<<6+ 32 + u>>2
vmls.u16 q8,q5,q12 @-(74 * G[k])
vshr.u16 q8,q8, #8 @(128<<8+ 128 + u)>>8
vmov.u16 q7,q9 @CalcV()
vmla.u16 q7,q4,q13 @112 * R[k]
vmls.u16 q7,q5,q14 @q0:U -(94 * G[k]) @128<<6+ 32 + v>>2
vmls.u16 q7,q6,q15 @-(18 * B[k])
vshr.u16 q7,q7, #8 @(128<<8+ 128 + v)>>8
vtrn.8 q8,q7
vst1.8 {q8}, [r1]! @write UV component to yuv420_buffer+linear_ylanesiez
@-------------------------------------------Y ------------------------------------------
vmov.u16 q14, #66 @#66
vmov.u16 q15, #129 @#129
vmov.u16 q8, #25 @#25
@CalcY_Y()
vmul.u16 q7,q4,q14 @q0 = 66 *R[k]
vmla.u16 q7,q5,q15 @q0 += 129 *G[k]
vmla.u16 q7,q6,q8 @q0 += 25 *B[k]
vadd.u16 q7,q7,q10
vshr.u16 q7,q7, #8
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vshr.u16 q4,q4,#8 @R
vshr.u16 q5,q5,#8 @G
vshr.u16 q6,q6,#8 @B
vmul.u16 q0,q4,q14 @q0 = 66 *R[k]
vmla.u16 q0,q5,q15 @q0 += 129 *G[k]
vmla.u16 q0,q6,q8 @q0 += 25 *B[k]
vadd.u16 q0,q0,q10
vshr.u16 q0,q0, #8
vtrn.8 q7,q0
vst1.8 {q7}, [r0][email protected] to Y to yuv420_buffer
@-------------------------------------------Y ------------------------------------------
@---------------------------------------------Y1-------------------------------------------
pld [r5, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
vld4.8 {d0,d1,d2,d3}, [r5]! @loadRGB interleavely
vld4.8 {d4,d5,d6,d7}, [r5]! @loadRGB interleavely
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vand.u16 q4,#0x00FF @R
vand.u16 q5,#0x00FF @G
vand.u16 q6,#0x00FF @B
vmul.u16 q7,q4,q14 @q0 = 66 *R[k]
vmla.u16 q7,q5,q15 @q0 += 129 *G[k]
vmla.u16 q7,q6,q8 @q0 += 25 *B[k]
vadd.u16 q7,q7,q10
vshr.u16 q7,q7, #8
vmov.u16 d8,d2
vmov.u16 d9,d6
vmov.u16 d10,d1
vmov.u16 d11,d5
vmov.u16 d12,d0
vmov.u16 d13,d4
vshr.u16 q4,q4,#8 @R
vshr.u16 q5,q5,#8 @G
vshr.u16 q6,q6,#8 @B
vmul.u16 q0,q4,q14 @q0 = 66 *R[k]
vmla.u16 q0,q5,q15 @q0 += 129 *G[k]
vmla.u16 q0,q6,q8 @q0 += 25 *B[k]
vadd.u16 q0,q0,q10
vshr.u16 q0,q0, #8
vtrn.8 q7,q0
vst1.8 {q7}, [r4][email protected] to Y to yuv420_buffer
subs r8,r8,#16 @nWidth16--
BPL LOOP_NWIDTH16 @if nWidth16>0
@-----------------------------------unaligned ---------------------------------------
adds r8,r8,#16 @ + 16 - 2
BEQ NO_UNALIGNED @in case that nWidht is multiple of 16
LOOP_NWIDTH2:
@----------------------------------pDstRGB1--Y------------------------------------------
@stmfd sp!, {r14} @backup r14
ldr r9, [r2], #4 @loadRGB int
ldr r12, [r2], #4 @loadRGB int
mov r10, r9,lsr #16 @copy to r10
mov r14, r12 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
ldr r6, =0x00FF0000
and r14, r14, r6 @R: (rgbIn[k] & 0xF800) >> 10;
add r10,r10,r14
mov r11, #66 @accumilator += R*66
mul r7, r10, r11
mov r10, r9,lsr #8 @copy to r10
mov r14, r12,lsl #8 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @G:
ldr r6, =0x00FF0000
and r14, r14, r6 @G:
add r10,r10,r14
mov r11, #129 @accumilator += G *129
mla r7, r10, r11, r7
mov r10, r9 @copy to r10
mov r14, r12,lsl #16 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @B
ldr r6, =0x00FF0000
and r14, r14, r6 @B
add r10,r10,r14
mov r11, #25 @accumilator 1 -= B *25
mla r7, r10, r11, r7
ldr r6, =0x10801080
add r7, r6
lsr r7, #8
strb r7, [r0],#1
lsr r7,#16
strb r7, [r0],#1
@ldmfd sp!, {r14} @load r14
@----------------------------------pDstRGB2--UV------------------------------------------
mov r10, r9 @copy to r10
ldr r7,=0x00008080
mov r12,r7
ldr r6, =0x000000FF
and r10, r10, r6 @B:
mov r11, #112 @accumilator += B*112
mla r7, r10, r11, r7
mov r11, #18 @accumilator -= B*18
mul r11, r10, r11
sub r12, r12, r11
mov r10, r9, lsr #16 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
mov r11, #38 @accumilator -= R *38
mul r11, r10, r11
sub r7, r7, r11
mov r11, #112 @accumilator = R *112
mla r12, r10, r11, r12
mov r10, r9,lsr #8 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @G: (rgbIn[k] & 0x07E0) >> 5;
mov r11, #74 @accumilator -= G*74
mul r11, r10, r11
sub r7, r7, r11
mov r11, #94 @accumilator -= G*94
mul r11, r10, r11
sub r12, r12, r11
lsr r7, #8 @ >>8
strb r7, [r1],#1
lsr r12, #8 @ >>8
strb r12, [r1],#1
@----------------------------------pDstRGB2--Y------------------------------------------
@stmfd sp!, {r14} @backup r14
ldr r9, [r5], #4 @loadRGB int
ldr r12, [r5], #4 @loadRGB int
mov r10, r9,lsr #16 @copy to r10
mov r14, r12 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
ldr r6, =0x00FF0000
and r14, r14, r6 @R: (rgbIn[k] & 0xF800) >> 10;
add r10,r10,r14
mov r11, #66 @accumilator += R*66
mul r7, r10, r11
mov r10, r9,lsr #8 @copy to r10
mov r14, r12,lsl #8 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @G:
ldr r6, =0x00FF0000
and r14, r14, r6 @G:
add r10,r10,r14
mov r11, #129 @accumilator += G *129
mla r7, r10, r11, r7
mov r10, r9 @copy to r10
mov r14, r12,lsl #16 @copy to r10
ldr r6, =0x000000FF
and r10, r10, r6 @B
ldr r6, =0x00FF0000
and r14, r14, r6 @B
add r10,r10,r14
mov r11, #25 @accumilator 1 -= B *25
mla r7, r10, r11, r7
ldr r6, =0x10801080
add r7, r6
lsr r7, #8
strb r7, [r4],#1
lsr r7,#16
strb r7, [r4],#1
@ldmfd sp!, {r14} @load r14
subs r8,r8,#2 @ nWidth2 -= 2
BGT LOOP_NWIDTH2 @ if nWidth2>0
NO_UNALIGNED: @in case that nWidht is multiple of 16
@-----------------------------------------------------------------------------
sub r8, r3, #16 @r8: nWidthTmp = nWidth -16
add r0, r0, r3 @pDstY + nwidth
add r2, r2, r3, lsl #2 @pSrcRGB + nwidthx4
add r4, r4, r3 @pDstY2 + nwidth
add r5, r5, r3, lsl #2 @pSrcRGB2 + nwidthx4
ldmfd sp!, {r12}
subs r12,r12,#2 @nHeight -=2
BGT LOOP_NHEIGHT2 @if nHeight2>0
ldmfd sp!, {r4-r12,pc} @ backup registers
.fnend