天天看點

【native crash】android7.0 monkey + gms包應用測試低機率發生整型溢出導緻surfaceflinger主動abort問題分析

1、問題描述

monkey+gms包應用測試過程中偶現monkey測試停止,android重新開機,分析log發現是surfaceflinger 發生signal 6觸發abort,surfacefinger程序被kill導緻init 會kill zygote程序導緻系統重新開機,進而monkey測試異常停止。是以目前問題重點就是分析surfacefinger為何會主動abort。

2、問題分析

棧資訊如下:

pid: 224, tid: 224, name: surfaceflinger  >>> /system/bin/surfaceflinger<<<

signal 6 (SIGABRT), code -6 (SI_TKILL), faultaddr --------

    r000000000  r1 000000e0  r2 00000006 r3 00000008

    r4ac49c58c  r5 00000006  r6 ac49c534 r7 0000010c

    r8abe32000  r9 00000550  sl 000002a2 fp 00000002

    ip0000000b  sp be888660  lr ac1b2597 pc ac1b4df4  cpsr 20070010

backtrace:

    #00pc 00049df4  /system/lib/libc.so(tgkill+12)

    #01pc 00047593  /system/lib/libc.so(pthread_kill+34)

    #02pc 0001d855  /system/lib/libc.so(raise+10)

    #03pc 000193a1  /system/lib/libc.so(__libc_android_abort+34)

    #04pc 00017014  /system/lib/libc.so(abort+4)

    #05pc 00007ca3  /system/lib/libui.so (_ZN7android4Rect8offsetByEii+92)

    #06pc 00008799  /system/lib/libui.so(_ZN7android6Region9translateERS0_ii+90)

    #07pc 00008931  /system/lib/libui.so(_ZNK7android6Region9translateEii+72)

    #08pc 0001e287 /system/lib/libsurfaceflinger.so

    #09pc 0002945b /system/lib/libsurfaceflinger.so

    #10pc 000272d1 /system/lib/libsurfaceflinger.so

    #11pc 00026ec9  /system/lib/libsurfaceflinger.so

    #12pc 00011dfd  /system/lib/libutils.so(_ZN7android6Looper9pollInnerEi+492)

    #13pc 00011b83  /system/lib/libutils.so(_ZN7android6Looper8pollOnceEiPiS1_PPv+26)

    #14pc 0001cfd5 /system/lib/libsurfaceflinger.so

    #15pc 00026a99 /system/lib/libsurfaceflinger.so (_ZN7android14SurfaceFlinger3runEv+10)

    #16pc 0000102b  /system/bin/surfaceflinger

    #17pc 00016c2d  /system/lib/libc.so(__libc_init+48)

    #18pc 00000e1c  /system/bin/surfaceflinger

從上面棧資訊看是libui.so中調用android::Rect::offsetBy()函數時主動abort,通過add2line解析棧資訊分析。

addr2line -e lib/libui.so 00007ca3 00008799 00008931

#5

/proc/self/cwd/frameworks/native/libs/ui/Rect.cpp:68 (discriminator 1)

67Rect& Rect::offsetBy(int32_t x, int32_t y) {

68    left += x;

69    top += y;

70    right += x;

71    bottom += y;

72    return *this;

73}

#6

/proc/self/cwd/frameworks/native/libs/ui/Region.cpp:745

736void Region::translate(Region& reg, int dx, int dy)

737{

738    if ((dx || dy) && !reg.isEmpty()) {

739#if VALIDATE_REGIONS

740        validate(reg, "translate (before)");

741#endif

742        size_t count = reg.mStorage.size();

743        Rect* rects = reg.mStorage.editArray();

744        while (count) {

745            rects->offsetBy(dx, dy);

746            rects++;

747            count--;

748        }

749#if VALIDATE_REGIONS

750        validate(reg, "translate (after)");

751#endif

752    }

753}

#7

/proc/self/cwd/frameworks/native/libs/ui/Region.cpp:758

755void Region::translate(Region& dst, const Region& reg, int dx, int dy)

756{

757    dst = reg;

758    translate(dst, dx, dy);

759}

從#5代碼看并沒有代碼主動調用abort,也沒有剪掉通過log或函數間接觸發abort的相關代碼,就是一個指派和傳回目前的this指針。那為何會發生abort呢?一時沒有明白,這時需要通過gdb分析coredump中彙編代碼來看下:

(gdb) f 5

#5  0xac118ca6 in android::Rect::offsetBy(this=0x0, x=<optimized out>, y=<optimized out>)

    at frameworks/native/libs/ui/Rect.cpp:68

 注:這裡this指針為0,是gdb解析時有bug導緻,不是this真實為0。

(gdb) disass

Dump ofassembler code for function android::Rect::offsetBy(int, int):

   0xac118c46 <+0>:        push     {r7,lr}

   0xac118c48 <+2>:        ldr      r3,[r0, #0]//r3 = [r0] = [this]

   0xac118c4a <+4>:        mov.w    lr,#1

   0xac118c4e <+8>:        add.w    r12,r3, r1

   0xac118c52 <+12>:       cmp      r12,r3//結果會影響cpsr中的C标志位

   0xac118c54 <+14>:       mov.w    r3,#1

   0xac118c58 <+18>:       it       vc     //這裡it指令與之後的品質判斷下面的r3是否有溢出的可能         

   0xac118c5a <+20>:       movvc    r3,#0

   0xac118c5c <+22>:       cbnz     r3,0xac118ca2 <android::Rect::offsetBy(int,int)+92>//這裡r3是非0則跳轉到abort,也就是r3有發生溢出就會跳轉到abort函數

   0xac118c5e <+24>:       str.w    r12,[r0]

   0xac118c62 <+28>:       ldr      r3,[r0, #4]     

   0xac118c64 <+30>:       add.w    r12,r3, r2

   0xac118c68 <+34>:       cmp      r12,r3

   0xac118c6a <+36>:       it       vc

   0xac118c6c <+38>:       movvc.w  lr,#0

   0xac118c70 <+42>:       cmp.w    lr,#0

   0xac118c74 <+46>:       bne.n    0xac118ca2 <android::Rect::offsetBy(int,int)+92>

   0xac118c76 <+48>:       str.w    r12,[r0, #4]

   0xac118c7a <+52>:       ldr      r3,[r0, #8]             

   0xac118c7c <+54>:       add.w    r12,r3, r1              

   0xac118c80 <+58>:       movs     r1,#1

   0xac118c82 <+60>:       cmp      r12,r3

   0xac118c84 <+62>:       mov.w    r3,#1

   0xac118c88 <+66>:       it       vc

   0xac118c8a <+68>:       movvc    r3,#0

   0xac118c8c <+70>:       cbnz     r3,0xac118ca2 <android::Rect::offsetBy(int, int)+92>

   0xac118c8e <+72>:       str.w    r12,[r0, #8]

   0xac118c92 <+76>:       ldr      r3,[r0, #12]     

   0xac118c94 <+78>:       add      r2,r3          

   0xac118c96 <+80>:       cmp      r2,r3

   0xac118c98 <+82>:       it       vc

   0xac118c9a <+84>:       movvc    r1,#0

   0xac118c9c <+86>:       cbnz     r1,0xac118ca2 <android::Rect::offsetBy(int, int)+92>

   0xac118c9e <+88>:       str      r2,[r0, #12]

   0xac118ca0 <+90>:       pop      {r7,pc}

   0xac118ca2 <+92>:       blx      0xac115e34//plt表中表中找到<[email protected]>

從上面彙編我們可知代碼中确實會調用到abort,#5棧中代碼abort可能是C/C++編譯器中引入的功能。那這裡為何會觸發abort呢?

從上面的彙編可以看出,這裡是判斷寄存器中的值是否有發生溢出,如果有發生溢出的可能就會觸發abort,是以這裡abort的原因可以推斷就是由于int32溢出導緻了abort。

這裡Rect&Rect::offsetBy(int32_t x, int32_t y)函數及函數中使用變量相關的結構如下.

typedef struct ARect {

#ifdef __cplusplus

   typedef int32_t value_type;

#endif

   int32_t left;

   int32_t top;

   int32_t right;

   int32_t bottom;

} ARect;

由于Rect&Rect::offsetBy(int32_t x, int32_t y)函數有三個參數this、x、y,是以是這三個參數中的内容不正确才可能導緻後面計算時發生整型溢出。

67Rect& Rect::offsetBy(int32_t x, int32_t y) {

68    left += x;

69    top += y;

70    right += x;

71    bottom += y;

72    return *this;

73}

下面我們就通過分析coredump來看看推斷是否正确。

#0 tgkill () at bionic/libc/arch-arm/syscalls/tgkill.S:10

#1 0xaacea596 in pthread_kill (t=<optimized out>, sig=6) atbionic/libc/bionic/pthread_kill.cpp:45

#2 0xaacc0858 in raise (sig=225) at bionic/libc/bionic/raise.cpp:34

#3 0xaacbc3ce in __libc_android_abort () at bionic/libc/bionic/abort.cpp:57

#4 0xaacba018 in abort () at bionic/libc/arch-arm/bionic/abort_arm.S:43

#5 0xaac9bca6 in android::Rect::offsetBy (this=0x0, x=<optimizedout>, y=<optimized out>) at frameworks/native/libs/ui/Rect.cpp:68

#6 0xaac9c79c in android::Region::translate (reg=..., dx=<optimizedout>, dy=<optimized out>) at frameworks/native/libs/ui/Region.cpp:745

#7 0xaac9c934 in translate (dst=..., reg=..., dx=110, dy=2147482368) atframeworks/native/libs/ui/Region.cpp:758

#8 android::Region::translate (this=<optimized out>, x=224, y=6) atframeworks/native/libs/ui/Region.cpp:370

#9 0xaac1528a in android::Transform::transform (this=<optimized out>,reg=...) at frameworks/native/services/surfaceflinger/Transform.cpp:236

#10 0xaac2045e inandroid::SurfaceFlinger::handleTransactionLocked (this=<optimized out>,transactionFlags=<optimized out>) atframeworks/native/services/surfaceflinger/SurfaceFlinger_hwc1.cpp:1626

#11 0xaac1e2d4 inandroid::SurfaceFlinger::handleTransaction (this=<optimized out>,transactionFlags=225) atframeworks/native/services/surfaceflinger/SurfaceFlinger_hwc1.cpp:1346

從f7層看傳入f6之後又傳入f5的x和y就是這裡的dx = 110,dy = 2147482368=0x7fffffb00

(gdb) f 7

#7  0xac119934 in translate (dst=..., reg=..., dx=110, dy=2147482368)

    at frameworks/native/libs/ui/Region.cpp:758

這裡的dy比較可以,dy=0x7fffffb00,而INT32_MAX = 0x7ffffffff,是以這裡dy傳入後進行加操作就會導緻整型溢出的發生

f 7入參reg如下

reg = @0xbe8887c4: {-----》這個為f 5中對應的this指針

  <android::LightFlattenable<android::Region>> = {<No data fields>}, 

  members of android::Region: 

  static INVALID_REGION = {

    <android::LightFlattenable<android::Region>> = {<No data fields>}, 

    members of android::Region: 

    static INVALID_REGION = <same as static member of an already seen type>, 

    mStorage = {

      <android::VectorImpl> = {

        _vptr$VectorImpl = 0xac11ebd4 <vtable for android::Vector<android::Rect>+8>, 

        mStorage = 0xabe2d100, 

        mCount = 1, 

        mFlags = 7, 

        mItemSize = 16

      }, <No data fields>}

  }, 

  mStorage = {

    <android::VectorImpl> = {

      _vptr$VectorImpl = 0xac11ebd4 <vtable for android::Vector<android::Rect>+8>, 

      mStorage = 0xa8a34dc0, 

      mCount = 1, 

      mFlags = 7, 

      mItemSize = 16

    }, <No data fields>}

}

(gdb) p /x *(android::Rect*)0xa8a34dc0

$3 = {

  <ARect> = {

    left = 0x0, 

    top = 0x0, 

    right = 0x2a2,--》674 

    bottom = 0x550--》1360

  }, 

從這裡可以知道f5中Rect結構中的值

bottom += y;===》0x7fffffb00+0x550 = 0x8 0000 0050 說明會發生整型溢出

這裡需要繼續分析為何傳入的dy會變成一個0x7fffffb00這麼大的值。下看下f8~f11的代碼

addr2line -e lib/libsurfaceflinger.so 0001e287 0002945b 000272d1 00026ec9
#8
frameworks/native/services/surfaceflinger/Transform.cpp:236
233    } else {
234        int xpos = floorf(tx() + 0.5f);
235        int ypos = floorf(ty() + 0.5f);
236        out = reg.translate(xpos, ypos);
237    }
238    return out;
239}
#9
frameworks/native/services/surfaceflinger/SurfaceFlinger_hwc1.cpp:1626
1618        for (size_t i=0 ; i<count ; i++) {
1619            const sp<Layer>& layer(layers[i]);
1620            if (currentLayers.indexOf(layer) < 0) {
1625                const Layer::State& s(layer->getDrawingState());
1626                Region visibleReg = s.active.transform.transform(
1627                        Region(Rect(s.active.w, s.active.h)));
1628                invalidateLayerStack(s.layerStack, visibleReg);
1629            }
1630        }
1631    }
#10
frameworks/native/services/surfaceflinger/SurfaceFlinger_hwc1.cpp:1346
1344
1345    transactionFlags = getTransactionFlags(eTransactionMask);
1346    handleTransactionLocked(transactionFlags);
1347
1348    mLastTransactionTime = systemTime() - now;
1349    mDebugInTransaction = 0;
1350    invalidateHwcGeometry();
1351    // here the transaction has been committed
1352}
#11
frameworks/native/services/surfaceflinger/SurfaceFlinger_hwc1.cpp:932
929 bool SurfaceFlinger::handleMessageTransaction() {
930    uint32_t transactionFlags = peekTransactionFlags(eTransactionMask);
931    if (transactionFlags) {
932        handleTransaction(transactionFlags);
933        return true;
934    }
935    return false;
936}      

從之前棧資訊分析,dx和dy是f8傳入的x和y,而f8中傳入的dx和dy是從f9中Rect(s.active.w, s.active.h)傳入,從棧f 8 的入參資訊看this = 0,x=224, y=6,與f7的參數完全對應不上,這裡可能是gdb在回溯棧資訊是沒能很好的回溯(gdb可能存在bug)導緻參數出現錯誤,x= 224變成了目前出問題時的程序pid,而y=6變成了異常信号的值SIGABORT = 6,這裡會導緻我們分析方向發生變化,産生錯誤判斷,誤認為是記憶體錯誤或者this為0導緻的問題,給分析問題增加了難度。下面就來分析f10~f7的過程,看為何dy傳入了一個異常的值。由于這個過程中C++有大量模闆類的使用導緻彙編代碼冗長,是以下面分析有省略。

f 10彙編,sp = 0xbe888770

  0xaac20402 <+2210>:     add.w    r0,r6, #368     ; 0x170

……//這裡的r0 = s.active.transform      
  0xaac20418 <+2232>:     str      r0,[sp, #64]    ; 0x40
……//這裡把s.active.transform放到了[sp, #64]      
  0xac144452 <+2290>:     add      r4, sp, #104     ; 0x68  
   0xac144454 <+2292>:     ldr      r1, [sp, #64]    ; 0x40         

  0xac144456 <+2294>:     mov      r2, r5  

  0xac144458 <+2296>:     mov      r0, r4  

  0xac14445a <+2298>:     bl       0xac139218<android::Transform::transform(android::Region const&) const>

=> 0xac14445e <+2302>:    mov      r0,r5

  0xac144460 <+2304>:     blx      0xac12bcf4

//r4 = sp+0x68 = 0xbe888770+0x68 = 0xBE8887D8

//r1 = [sp+0x40] = [0xBE8887B0] =0xa8cb8d70

//r2 = r5 = sp+0x54 = 0xBE8887C4

//r0 = r4 = sp+0x68 = 0xBE8887D8

通過上面彙編我們可以計算出transform函數的入參

Region visibleReg = s.active.transform.transform(Region(Rect(s.active.w, s.active.h)));

f 9層彙編代碼

#9  0xac13928ain android::Transform::transform (this=<optimized out>, reg=...)

    atframeworks/native/services/surfaceflinger/Transform.cpp:236

(gdb) disass 0xac13928b

Dump of assembler code for functionandroid::Transform::transform(android::Region const&) const:

  0xac139218 <+0>:        stmdb    sp!, {r4, r5, r6, r7, r8, lr}

  0xac13921c <+4>:        vpush    {d8-d9}

  0xac139220 <+8>:        sub      sp, #80  ;0x50

  0xac139222 <+10>:       mov      r4, r0           //r4= r0 = this = 0xbe8887d8

  0xac139224 <+12>:       ldr      r0, [pc, #240]   ; (0xac139318 <android::Transform::transform(android::Regionconst&) const+256>)

  0xac139226 <+14>:       mov      r6, r2           //r6= r2 = 0xBE8887C4            

  0xac139228 <+16>:       mov      r5, r1           //r5= r1 = 0xa8cb8d70

  0xac13922a <+18>:       add      r0, pc

  0xac13922c <+20>:       ldr      r0, [r0, #0]

   0xac13922e<+22>:       ldr      r0, [r0, #0]

  0xac139230 <+24>:       str      r0, [sp, #76]    ; 0x4c//[sp+0x4c] = [BE888744] = stack_chk_guard --》0xac04db93

  0xac139232 <+26>:       mov      r0, r4       //r0 = r4 = 0xbe8887d8

  0xac139234 <+28>:       blx      0xac12bc40       //跳轉到got表0xAC164900<[email protected]>

  0xac139238 <+32>:       mov      r0, r5           //r0= r5 =0xa8cb8d70

  0xac13923a <+34>:       bl       0xac139320 <android::Transform::type()const>

  0xac13923e <+38>:       cmp      r0, #2

  0xac139240 <+40>:       bcs.n    0xac1392b4<android::Transform::transform(android::Region const&) const+156>

  0xac139242 <+42>: vmov.f32 s16, #96 ; 0x60 //s16 = 0.5f

   0xac139246 <+46>: vldr s0, [r5, #24] //mMatrix[2][0] = 0 s0 = [0xa8cb8d70+0x18] = [0xA8CB8D88] = 110

   0xac13924a <+50>: vldr s18, [r5, #28] //mMatrix[2][1] = 0 s18 = [0xa8cb8d70+0x1c] = [0xA8CB8D8C] = 2.14748237e+09

   0xac13924e <+54>: vadd.f32 s0, s0, s16 //s0 = 110+0.5f

   0xac139252 <+58>: vmov r0, s0 //tx()

   0xac139256 <+62>: blx 0xac12c51c // froorf()

   0xac13925a <+66>: vadd.f32 s0, s18, s16 

   0xac13925e <+70>: vmov s16, r0 //s16=floorf(tx() + 0.5f) 

   0xac139262 <+74>: vmov r1, s0

   0xac139266 <+78>: mov r0, r1 //ty()

   0xac139268 <+80>: blx 0xac12c51c // froorf()

   0xac13926c <+84>: vmov s0, r0 //s0 = floorf(ty() + 0.5f)

   0xac139270 <+88>: add r5, sp, #4 //r5 = sp+4 = 0xbe8886f8+4 = 0xbe8886fc

   0xac139272 <+90>: vcvt.s32.f32 s2, s16 //這裡就是将s16浮點型轉化為整型  

   0xac139276 <+94>: mov r1, r6 //r1 = r6 = r2 = 0xbe8887c4

   0xac139278 <+96>: vcvt.s32.f32 s0, s0 //這裡就是将s0浮點型轉化為整型

   0xac13927c <+100>: mov r0, r5 //r0 = 0xbe8886fc

   0xac13927e <+102>: vmov r2, s2 //r2 = s2 = dx = 110

   0xac139282 <+106>: vmov r3, s0 //r3 = s0 = dy = 2147482368

   0xac139286 <+110>: blx 0xac12c528

=> 0xac13928a <+114>:	mov	r0, r4      

這裡我們需要重點看下紅色背景部分

r5 = r1 = 0xa8cb8d70,這裡的r5用于計算dy的值即mMatrix的值。

r6 = r2 = 0xBE8887C4,這裡r2中儲存的是android::Region類的this指針。

r4 = r0 = this = 0xbe8887d8儲存的是android::Transform類的this指針。

f 8層彙編

#8 android::Region::translate (this=<optimized out>, x=224, y=6) atframeworks/native/libs/ui/Region.cpp:370

 (gdb)disass 0xac1198e9

Dump of assembler code for functionandroid::Region::translate(int, int) const:

  0xac1198e8 <+0>:        push     {r4, r5, r6, r7, lr}

  0xac1198ea <+2>:        sub      sp, #20

   0xac1198ec <+4>:        mov      r7, r0   //r7= r0 這裡的r0不是this,this指針儲存在後面的r1裡面,那麼這裡的r0是誰?從上一層棧看r0 = r5 = sp+4 = 0xbe8886fc

   0xac1198ee <+6>:        ldr      r0, [pc, #92]    ; r0=[0xac11994c] = 0x0000548c(0xac11994c<android::Region::translate(int, int) const+100>)

  0xac1198f0 <+8>:        mov      r5, r2   //r5= r2 = x,r3 = y

  0xac1198f2 <+10>:       mov      r6, r1   //r6= r1 = this

  0xac1198f4 <+12>:       add      r0, pc   //r0= r0+pc = 0xac1198f8+0x0000548c = 0xAC11ED84

  0xac1198f6 <+14>:       movs     r1, #16  //r1= 16

  0xac1198f8 <+16>:       movs     r2, #7   //r2= 7

  0xac1198fa <+18>:       mov      r4, r3   //r4= r3 = y

  0xac1198fc <+20>:       ldr      r0, [r0, #0]     //r0 = [0xAC11ED84] = 0xac1f7008

  0xac1198fe <+22>:       ldr      r0, [r0, #0]     //r0 = [0xac1f7008] = 0xac1f7008 <__stack_chk_guard>:         0xac04db93

  0xac119900 <+24>:       str      r0, [sp, #16]    //[sp+0x10] = r0 = 0xac04db93,将__stack_chk_guard插入棧

  0xac119902 <+26>:       mov      r0, r7           //r0= r7 = 從上一層棧看r0 = r5 = sp+4 = 0xbe8886fc

  0xac119904 <+28>:       blx      0xac1161d0       //got表0xAC11EF58<[email protected]>

  0xac119908 <+32>:       ldr      r0, [pc, #68]    ; //r0 = [0xac119950] =0x000054b0 (0xac119950<android::Region::translate(int, int) const+104>)

  0xac11990a <+34>:       vmov.i32 q8, #0   ;0x00000000

  0xac11990e <+38>:       mov      r1, sp           //r1= sp =0xbe8886f8

  0xac119910 <+40>:       add      r0, pc           //r0= 0x000054b0+0xac119914 = 0xAC11EDC4

  0xac119912 <+42>:       vst1.64  {d16-d17}, [r1]------------------》這個操作是執行到rect()模闆類操作

  0xac119916 <+46>:       ldr      r0, [r0, #0]     //r0 = [0xAC11EDC4] = 0xac11ebcc---》ac11e000-ac11f000r--p 0000c000 b3:14 1439      /system/lib/libui.so

  0xac119918 <+48>:       adds     r0, #8   //r0= 0xac11ebcc+8 = AC11EBD4

  0xac11991a <+50>:       str      r0, [r7, #0]     //[r7] =[0xbe8886fc] = r0 = 0xac11ebd4

  0xac11991c <+52>:       mov      r0, r7                    //r0= r7=0xbe8886fc ---》 0xac11ebd4         0xabaa9650       0x00000001       0x00000007

  0xac11991e <+54>:       blx      0xac1161dc       //got表0xAC11EF5C<[email protected]>

  0xac119922 <+58>:       mov      r0, r7            //r0 = r7=0xbe8886fc

  0xac119924 <+60>:       mov      r1, r6           //r1= r6 = this = 0xbe8887c4

  0xac119926 <+62>:       blx      0xac11620c       //got表0xac11ef6c<[email protected]>

  0xac11992a <+66>:       mov      r0, r7   //r0= 0xbe8886fc

  0xac11992c <+68>:       mov      r1, r5   //r1= r5 = x = 110

  0xac11992e <+70>:       mov      r2, r4   //r2= r4 = r3 = y = 2147482368

  0xac119930 <+72>:       blx      0xac116260       //got表0xac11ef88<[email protected]>這裡調用了0xac11973f

   這裡直接調用了android::Region::translate(android::Region&,int, int)編譯器将Region::translate(intx, int y)優化了

這裡需要重點分析,也是了解這塊的難點,正常情況下C++中一個函數的入參r0是this指針,r1是第一個參數,r2是第二個參數以此類推 。但在f 9 到f 8層棧的過程中,這裡的r0并不是android::Region類的this指針,而這裡的r0儲存的是sp+4的内容,r1儲存的是android::Region類的this指針,而r2儲存的是dx,r3儲存的是dy。這樣就可以了解f8中的傳參過程了,同樣這個傳參過程在f10到f9也有,這裡由于篇幅不在詳述。

    根據f 9中算出的dx和dy可以知道f 8層的入參x和y就是dx = 110和dy = 2147482368 = 0x7fffffB00,也就是f7層中的入參。而這個值是通過ty()函數中mMatrix變量擷取的。

mMatrix的計算是在F9中計算的

0xac139246 <+46>: vldr s0, [r5, #24]

//mMatrix[2][0] = 0 s0 = [0xa8cb8d70+0x18] = [0xA8CB8D88] = 110

   0xac13924a <+50>: vldr s18, [r5, #28]

//mMatrix[2][1] = 0 s18 = [0xa8cb8d70+0x1c] = [0xA8CB8D8C] = 2.14748237e+09

這裡計算出mMatrix[2][1]的值是2.14748237e+09,經過浮點轉換為整型後就是2147482368,也就是出錯時候的值了。

從這裡看這個dy即s.active.transform的y的值是上層傳下來的。通過代碼分析

frameworks/native/services/surfaceflinger/SurfaceFlinger_hwc1.cpp

2275uint32_t SurfaceFlinger::setClientStateLocked(

2276        const sp<Client>& client,

2277        const layer_state_t& s)//這裡會傳入layer_state_t& s

2278{

2279    uint32_t flags = 0;

2280    sp<Layer> layer(client->getLayerUser(s.surface));

2281    if (layer != 0) {

2282        const uint32_t what = s.what;

2283        bool positionAppliesWithResize =

2284                what & layer_state_t::ePositionAppliesWithResize;

2285        if (what & layer_state_t::ePositionChanged) {

2286            if (layer->setPosition(s.x, s.y, !positionAppliesWithResize)) {//這裡會調用setPosition設定x和y,而這個x和y就是後面使用的x,y

2287                flags |= eTraversalNeeded;

2288            }

2289        }

frameworks/native/services/surfaceflinger/Layer.cpp

1532bool Layer::setPosition(float x, float y, bool immediate) {

1533    if (mCurrentState.requested.transform.tx() == x && mCurrentState.requested.transform.ty() == y)

1534        return false;

1535    mCurrentState.sequence++;

1536

1537    // We update the requested and active position simultaneously because

1538    // we want to apply the position portion of the transform matrix immediately,

1539    // but still delay scaling when resizing a SCALING_MODE_FREEZE layer.

1540    mCurrentState.requested.transform.set(x, y);

1541    if (immediate && !mFreezePositionUpdates) {

1542        mCurrentState.active.transform.set(x, y);

1543    }

1544    mFreezePositionUpdates = mFreezePositionUpdates || !immediate;

1545

1546    mCurrentState.modified = true;

1547    setTransactionFlags(eTransactionNeeded);

1548    return true;

1549}

1368void SurfaceFlinger::handleTransactionLocked(uint32_t transactionFlags)

1369{

......

1628    if (mLayersRemoved) {

1629        mLayersRemoved = false;

1630        mVisibleRegionsDirty = true;

1631        const size_t count = layers.size();

1632        for (size_t i=0 ; i<count ; i++) {

1633            const sp<Layer>& layer(layers[i]);

1634            if (currentLayers.indexOf(layer) < 0) {

1635                // this layer is not visible anymore

1636                // TODO: we could traverse the tree from front to back and

1637                //       compute the actual visible region

1638                // TODO: we could cache the transformed region

1639                const Layer::State& s(layer->getDrawingState());

1640                Region visibleReg = s.active.transform.transform(

1641                        Region(Rect(s.active.w, s.active.h)));

1642                invalidateLayerStack(s.layerStack, visibleReg);

1643            }

1644        }

1645    }

setClientStateLocked函數對應用戶端是SurfaceComposerClient.cpp,而這裡的x和y也是在SurfaceComposerClient.cpp中設定,下面來看代碼

frameworks/base/core/java/android/view/SurfaceControl.java

406    public void setPosition(float x, float y) {

407        checkNotReleased();

408        nativeSetPosition(mNativeObject, x, y);//上層就是通過這裡寫入了一個異常的值

409    }

frameworks/base/core/jni/android_view_SurfaceControl.cpp 

243static void nativeSetPosition(JNIEnv* env, jclass clazz, jlong nativeObject, jfloat x, jfloat y) {

244    SurfaceControl* const ctrl = reinterpret_cast<SurfaceControl *>(nativeObject);

245    status_t err = ctrl->setPosition(x, y);

246    if (err < 0 && err != NO_INIT) {

247        doThrowIAE(env);

248    }

249}   

frameworks/native/libs/gui/SurfaceControl.cpp

110status_t SurfaceControl::setPosition(float x, float y) {

111    status_t err = validate();

112    if (err < 0) return err;

113    return mClient->setPosition(mHandle, x, y);

114}

frameworks/native/libs/gui/SurfaceComposerClient.cpp   

276status_t Composer::setPosition(const sp<SurfaceComposerClient>& client,

277        const sp<IBinder>& id, float x, float y) {

278    Mutex::Autolock _l(mLock);

279    layer_state_t* s = getLayerStateLocked(client, id);

280    if (!s)

281        return BAD_INDEX;

282    s->what |= layer_state_t::ePositionChanged;

283    s->x = x;

284    s->y = y;

285    return NO_ERROR;

286}

此時代碼流程基本清楚,但為何這裡y值會異常呢?請graphics和view的同僚添加log,複現問題。

Y007-21 14:17:03.185   227   227 E SurfaceFlinger: ts-graphics: transform this=0xa9e57570 xpos=110 ypos=2147483520
Y007-21 14:17:03.185   227   227 E Region  : ts-graphics: translate 1 begin this=0xbeb306c8 result=0xbeb30448 x=110 y=2147483520
Y007-21 14:17:03.185   227   227 E Region  : ts-graphics: translate 2 begin reg=0xbeb306c8, dst=0xbeb30448 dx=110 dy=2147483520
Y007-21 14:17:03.185   227   227 E Region  : ts-graphics: translate 3 reg=0xbeb30448, rects=0xa6a27060 dx=110 dy=2147483520 count=1
Y007-21 14:17:03.185   227   227 E         : ts-graphics: offsetBy this=0xa6a27060 x=110 y=2147483520      

從log上也可以确認,确實是SurfaceControl.java中在setPosition設定的值有問題,view子產品同僚分析後确認是因為monkey測試時部分gms應用popup框的region過大導緻y異常,間接就導緻surfaceflinger發abort,系統發生重新開機。這裡修改比較簡單就是在setPosition中對x和y進行了判斷,解決此問題。