天天看点

windbg调试驱动自旋锁死锁

    驱动程序中常用自旋锁来保护互斥资源。由于驱动常以异步的方式工作,很难保证不发生死锁,这就需要一种调试命令来查看引发死锁的位置。windbg的!running扩展提供了这样的能力。看看windbg help的说明:

!running
显示正在处理器上运行的线程
The !running extension displays a list of running threads on all processors of the target computer.      

    这段描述正适合检索自旋锁这种尸位素餐的行为。

    下面的代码片模拟了这样的场景:2个线程已不同的顺序获得自旋锁并且不释放锁,这样必然引起死锁:

#include <wdm.h>

typedef struct _THREADCTX
{
  KSPIN_LOCK spin1, spin2;
}THREADCTX;

VOID KeThreadRoutine1(void* StartContext)
{
  unsigned int i=0;
  THREADCTX* threadCtx = (THREADCTX*)StartContext;
  KIRQL oldIrql1, oldIrql2;
  KEVENT waitEvt;
  LARGE_INTEGER tickIntval;

  tickIntval = RtlConvertLongToLargeInteger(-10 * 1000 * 1000 * 1);
  KeInitializeEvent(&waitEvt, SynchronizationEvent, FALSE);
  KeAcquireSpinLock(&threadCtx->spin2, &oldIrql2); //先获得2号锁 再获得1号;另一个线程已相反的顺序获得锁
  KeAcquireSpinLock(&threadCtx->spin1, &oldIrql1);

  while (1)
  {
    KeDelayExecutionThread(KernelMode,FALSE,&tickIntval);
    i++;
  }

  return;
}

VOID KeThreadRoutine2(void* StartContext)
{
  unsigned int i=0;
  THREADCTX* threadCtx = (THREADCTX*)StartContext;
  KIRQL oldIrql1, oldIrql2;
  KEVENT waitEvt;
  LARGE_INTEGER tickIntval;

  tickIntval = RtlConvertLongToLargeInteger(-10 * 1000 * 1000 * 1);
  KeInitializeEvent(&waitEvt, SynchronizationEvent, FALSE);
  KeAcquireSpinLock(&threadCtx->spin1, &oldIrql1);
  KeAcquireSpinLock(&threadCtx->spin2, &oldIrql2);

  while (1)
  {
    KeDelayExecutionThread(KernelMode,FALSE,&tickIntval);
    i++;
  }

  return;
}

NTSTATUS DriverEntry(PDRIVER_OBJECT drvObj, PUNICODE_STRING regPath)
{
  NTSTATUS status;
  HANDLE threadHd;
  THREADCTX threadCtx;
  CLIENT_ID cid;
  KIRQL oldIrql1, oldIrql2;
  LARGE_INTEGER tickIntval;
  KEVENT waitEvt;
  
  
  
  tickIntval = RtlConvertLongToLargeInteger(-10*1000*1000*1);
  KeInitializeEvent(&waitEvt, SynchronizationEvent, FALSE);
  RtlZeroMemory(&threadCtx, sizeof(THREADCTX));
  KeInitializeSpinLock(&threadCtx.spin1);
  KeInitializeSpinLock(&threadCtx.spin2);
        //创建线程
  status = PsCreateSystemThread(&threadHd, 0, NULL, NULL, &cid, KeThreadRoutine1, &threadCtx);
  status = PsCreateSystemThread(&threadHd, 0, NULL, NULL, &cid, KeThreadRoutine2, &threadCtx);
  if (status != STATUS_SUCCESS)
    return STATUS_FAILED_DRIVER_ENTRY;

  return STATUS_SUCCESS;
}      

    加载驱动后,系统可能马上卡死。用windbg查看一下发生了什么:

kd> !running -it

System Processors: (00000001) 
  Idle Processors: (00000000) 
;Current给出了占用CPU运行的thread
       Prcbs     Current   Next    
  0    82b30d20  8689d020            ................

ChildEBP RetAddr  
8cae9c08 82a7238f nt!RtlpBreakWithStatusInstruction
8cae9c10 82a72361 nt!KdCheckForDebugBreak+0x22
8cae9c40 82a721ef nt!KeUpdateRunTime+0x164
8cae9c98 82a77577 nt!KeUpdateSystemTime+0x613
8cae9c98 82e1a749 nt!KeUpdateSystemTimeAssist+0x13
8cae9d1c 9447b04f hal!KeAcquireSpinLockRaiseToSynch+0x39
8cae9d50 82c1566d deadlock!KeThreadRoutine1+0x3f [c:\studio\deadlock\deadlock.c @ 18]  <---发生死锁的模块
8cae9d90 82ac70d9 nt!PspSystemThreadStartup+0x9e
00000000 00000000 nt!KiThreadStartup+0x19      

    !running输出了发生死锁的线程TEB和线程堆栈,可以用!thread查看更详细的信息:

kd> !thread 8689d020            
THREAD 8689d020  Cid 0004.0dc0  Teb: 00000000 Win32Thread: 00000000 RUNNING on processor 0
Not impersonating
DeviceMap                 89e088d8
Owning Process            84fe8ae8       Image:         System
Attached Process          N/A            Image:         N/A
Wait Start TickCount      8955           Ticks: 1292 (0:00:00:20.155)
Context Switch Count      1             
UserTime                  00:00:00.000
KernelTime                00:00:20.155
Win32 Start Address deadlock!KeThreadRoutine1 (0x9447b010)
Stack Init 8cae9fd0 Current 8cae9d84 Base 8caea000 Limit 8cae7000 Call 0
Priority 8 BasePriority 8 UnusualBoost 0 ForegroundBoost 0 IoPriority 2 PagePriority 5
ChildEBP RetAddr  Args to Child              
8cae9c08 82a7238f 00000001 82a72361 00000000 nt!RtlpBreakWithStatusInstruction (FPO: [1,0,0])
8cae9c10 82a72361 00000000 00000000 00009d88 nt!KdCheckForDebugBreak+0x22 (FPO: [0,0,0])
8cae9c40 82a721ef 82e1a749 a7ff8a01 00000000 nt!KeUpdateRunTime+0x164
8cae9c98 82a77577 95bc5002 95bc5002 000000d1 nt!KeUpdateSystemTime+0x613
8cae9c98 82e1a749 95bc5002 95bc5002 000000d1 nt!KeUpdateSystemTimeAssist+0x13 (FPO: [0,2] TrapFrame @ 8cae9cac)
8cae9d1c 9447b04f 00000dbc ff676980 ffffffff hal!KeAcquireSpinLockRaiseToSynch+0x39 (FPO: [0,0,0])
8cae9d50 82c1566d 807f1ad0 d1cb3c49 00000000 deadlock!KeThreadRoutine1+0x3f (FPO: [Non-Fpo]) (CONV: stdcall) [c:\studio\deadlock\deadlock.c @ 18]
8cae9d90 82ac70d9 9447b010 807f1ad0 00000000 nt!PspSystemThreadStartup+0x9e
00000000 00000000 00000000 00000000 00000000 nt!KiThreadStartup+0x19      

继续阅读