linux 中斷與異常---源碼分析（三）

概述冷啟動、熱重新開機、非屏蔽中斷異常入口始終為0xBFC00000，任何其他寄存器都改變不了一些比較頻繁發生的異常為了效率的考慮有專用的異常入口，如TLB refill 除此之外所有的其他異常共用一個異常入口，稱為通用異常入口，專用異常入口和通用異常入口可通過寄存器改變

通用異常發生後， CPU 硬體設定 CAUSE寄存器的 ExcCode位，就跳轉到通用異常入口處， ExcCode 位段用來描述通用異常類型，共 5 位，故而可以描述 2^5 = 32 個異常類型。位于通用異常入口處的是作業系統設定的一個簡單的異常處理程式，它會取出 CAUSE 寄存器的 ExcCode 域（5 位，可以描述 32 個異常），用之索引一個通用異常處理表 (exception_handlers)，并跳轉到異常處理表項所指向的處理程式。

來自硬體的中斷，CPU 會自動将 CAUSE 寄存器的 ExcCode 域(6:2)設為 0，其最終會執行總的中斷處理程式 handle_int。 ExcCode 位為 0 時，隻是籠統地描述為中斷，具體的是何種中斷，還要借助 CAUSE 寄存器的 IP 位(15:8, IP7-IP0)來描述。硬體中斷出現時，CPU 會根據中斷信号的來源，設定 CAUSE 之 IP 位。IP 位共 8 位，每位對應一個中斷。

可以看到，一個硬體中斷的流程應該是這樣的（以鍵盤為例）： 1. 使用者擊鍵後，鍵盤控制器 8042 産生中斷，通過中斷控制器在 CPU 的中斷引腳上引起中斷； 2. CPU 自動設定 CAUSE 的 ExcCode 位為 0，并設定相應的IP位，并跳轉到通用異常入口； 3. 位于通用異常入口處的簡單異常處理程式，根據 ExcCode 的值索引異常處理表 (exception_handlers)，擷取到 0 号異常的處理程式是 handle_int，并跳轉過去； 4. handle_int 根據 CAUSE 之 IP 位的值跳轉到中斷控制器相關的中斷處理函數； 5. 中斷控制器的處理函數讀取中斷控制器的寄存器，通過簡單的計算得到中斷号，進而調用 do_IRQ 進入相應的中斷處理程式。

下面以broadcom CPU_BMIPS4380， linux-3.3.8分析為例：

ebase寄存器可用于改變異常向量入口，是在 MIPS III R2 規範中被指定的 void __cpuinit bmips_ebase_setup (void) { unsigned long new_ebase = ebase;

#if defined(CONFIG_CPU_BMIPS4350) set_uncached_handler(BMIPS_WARM_RESTART_VEC - CKSEG0, &bmips_smp_int_vec, 0x80); __sync(); return; #elif defined(CONFIG_CPU_BMIPS3300) || defined(CONFIG_CPU_BMIPS4380) new_ebase = 0x80000400; bmips_set_reset_vec(0, RESET_FROM_KSEG0); #elif defined(CONFIG_CPU_BMIPS5000) new_ebase = 0x80001000; bmips_set_reset_vec(0, RESET_FROM_KSEG0); write_c0_ebase(new_ebase); #else return; #endif board_nmi_handler_setup = &bmips_nmi_handler_setup; ebase = new_ebase; }

#define cpu_has_mips32r1 1 #define cpu_has_mips32r2 0 #define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 bmips4380屬于mips32r1， ebase固定在 0x80000400，不可改變，bmips5000的ebase可以通過write_c0_ebase（）設定協處理器來改變，這裡ebase是一個全局變量，後面安裝異常向量處理函數的時候會用到。

1.cache 錯誤異常入口初始化，位于： ifeq ($(CONFIG_BRCMSTB),y) obj-y += c-brcmstb.o cex-gen.o tlb-r4k.o else obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o tlb-r4k.o endif

[arch/mips/mm/c-brcmstb.c] void __cpuinit r4k_cache_init(void) { ... set_uncached_handler (0x100, &except_vec2_generic, 0x80); //cache有專用異常入口 ... } void __cpuinit set_uncached_handler(unsigned long offset, void *addr, unsigned long size) { unsigned long uncached_ebase = CKSEG1ADDR(ebase) ;

if (!addr) panic(panic_null_cerr);

memcpy((void *)(uncached_ebase + offset), addr, size); }

因為 cache 錯誤時可以 cache 的 KSEG0 段不能用了，則 cache 錯誤異常處理程式位于 KSEG1 之 0xA0000000 + 0x100 處，長度最大為 128 Bytes，異常處理程式為 except_vec2_generic，定義于： [arch/mips/mm/cex-gen.S ] LEAF(except_vec2_generic) .set noreorder .set noat .set mips0 mfc0 k0,CP0_CONFIG li k1,~CONF_CM_CMASK and k0,k0,k1 ori k0,k0,CONF_CM_UNCACHED mtc0 k0,CP0_CONFIG nop nop nop

j cache_parity_error nop END(except_vec2_generic) 關閉KSEG0的cache功能

2.TLB異常 tlb refill 異常處理程式，不像其他異常處理程式那樣事先編寫好，而是通過一些函數動态生成，然後複制到對應的入口處的。至于為何采取這種方式，主要是因為要根據使用者的配置生成适合各種 MIPS平台的 tlb_refill_handler ，由于要考慮的情況過多，使用通常的條件編譯的方式已經不能滿足需求。位于：[arch/mips/kernel/tlb-r4k.c] void __cpuinit tlb_init(void) { ... build_tlb_refill_handler(); } [arch/mips/kernel/tlbex.c] void __cpuinit build_tlb_refill_handler(void) { ... build_r4000_tlb_load_handler(); build_r4000_tlb_store_handler(); build_r4000_tlb_modify_handler(); build_r4000_tlb_refill_handler(); } u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned; u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned; u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned; static void __cpuinit build_r4000_tlb_load_handler(void) { u32 *p = handle_tlbl; memset(handle_tlbl, 0, sizeof(handle_tlbl)); wr = build_r4000_tlbchange_handler_head(&p, &l, &r); ... uasm_l_nopage_tlbl(&l, p); build_restore_work_registers(&p); uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); ... } 其作用就是構造指令填充handle_tlbl資料區域。handle_tlbl，handle_tlbs，handle_tlbm的入口在通用異常中調用，見後面通用異常介紹。

static u32 tlb_handler[128] __cpuinitdata; static u32 final_handler[64] __cpuinitdata;

static void __cpuinit build_r4000_tlb_refill_handler(void) { memset(tlb_handler, 0, sizeof(tlb_handler)); memset(final_handler, 0, sizeof(final_handler)); ... memcpy((void *)ebase, final_handler, 0x100); //TLB refill有專用異常向量入口 }

3.通用異常處理程式對于MIPS來說，一般外部中斷隻有一個入口（0x180），而且是很多異常共享這一個入口，是以需要額外的代碼來判斷異常的類型。确定是外部中斷後，還需要根據IP位來dispatch中斷。這就造成MIPS的interrupt latency比較長。是以在Release 2中引入了VINT和VEIC。

MIPSr2 architecture supports 3 interrupt modes. It provides interrupt compatibility mode, which behaves identical to MIPSr1. It also supports vectored interrupt (VI) mode, and permits the use of an external interrupt controller (EIC).

VI mode adds the ability to prioritize and vector interrupts to a handler dedicated to that interrupt. Each interrupt starts at the address according to the interrupt signal. It also allows us to configure the space between different entry points (VS of IntCtl register). In EIC mode, the six independent signals become a 6-bit binary number: Zero means no interrupt, others are 63 distinct interrupt codes.

An exception vector is where the exception handling starts. MIPS exceptions are handled either through the general exception vector (offset 0x180), or the special interrupt vector (0x200), based on the value of IV of Cause register. 可以設定intctl寄存器打開VINT/VEIC模式，預設是關閉的。

[arch/mips/kernel/traps.c] void __init trap_init() { if (cpu_has_veic || cpu_has_vint) { //打開VEIC或者VINT unsigned long size = 0x200 + VECTORSPACING*64; ebase = (unsigned long) __alloc_bootmem(size, 1 << fls(size), 0); } else { ebase = CKSEG0; if (cpu_has_mips_r2) ebase += (read_c0_ebase() & 0x3ffff000); // MIPSr2 provides an option to adjust ebase according to VA (bit 29:12) of EBase register. }

if (board_ebase_setup) board_ebase_setup();

per_cpu_trap_init(); //cache錯誤和TLB refill異常初始化 ...

set_handler(0x180, &except_vec3_generic, 0x80); for (i = 0; i <= 31; i++) set_except_vector(i, handle_reserved); //安裝異常處理表 ... set_except_vector(0, rollback ? rollback_handle_int : handle_int ); set_except_vector(1, handle_tlbm ); set_except_vector(2, handle_tlbl ); set_except_vector(3, handle_tlbs );

set_except_vector(4, handle_adel); set_except_vector(5, handle_ades);

set_except_vector(6, handle_ibe); set_except_vector(7, handle_dbe);

set_except_vector(8, handle_sys ); //對應系統調用 set_except_vector(9, handle_bp); set_except_vector(10, rdhwr_noopt ? handle_ri : (cpu_has_vtag_icache ? handle_ri_rdhwr_vivt : handle_ri_rdhwr)); set_except_vector(11, handle_cpu); set_except_vector(12, handle_ov);

set_except_vector(13, handle_tr); ... if (board_nmi_handler_setup) //安裝非屏蔽中斷 board_nmi_handler_setup(); ... } 在上面我們看到，核心首先用 handle_reserved 填充整個exception_handlers，然後依次各 MIPS CPU 的特點填充相應的處理函數。

void __init set_handler(unsigned long offset, void *addr, unsigned long size) { memcpy((void *)( ebase + offset), addr, size); local_flush_icache_range(ebase + offset, ebase + offset + size); }

void __init *set_except_vector(int n, void *addr) { unsigned long handler = (unsigned long) addr; unsigned long old_handler = exception_handlers [n];

exception_handlers[n] = handler; if (n == 0 && cpu_has_divec) { //如果有divec 除0異常，那麼中斷必須要有專用的入口，這時hander放入在ebase+0x200處 unsigned long jump_mask = ~((1 << 28) - 1); u32 *buf = (u32 *)(ebase + 0x200); unsigned int k0 = 26; if ((handler & jump_mask) == ((ebase + 0x200) & jump_mask)) { uasm_i_j(&buf, handler & ~jump_mask); uasm_i_nop(&buf); } else { UASM_i_LA(&buf, k0, handler); uasm_i_jr(&buf, k0); uasm_i_nop(&buf); } local_flush_icache_range(ebase + 0x200, (unsigned long)buf); } return (void *)old_handler; } static inline void __cpuinit bmips_nmi_handler_setup(void) { bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec, &bmips_reset_nmi_vec_end); bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec, &bmips_smp_int_vec_end); } handle_reserved() and handle_watch() are both built in arch/mips/kernel/genex.S as the followed: 1 BUILD_HANDLER adel ade ade silent 2 BUILD_HANDLER ades ade ade silent 3 BUILD_HANDLER ibe be cli silent 4 BUILD_HANDLER dbe be cli silent 5 BUILD_HANDLER bp bp sti silent 6 BUILD_HANDLER ri ri sti silent 7 BUILD_HANDLER cpu cpu sti silent 8 BUILD_HANDLER ov ov sti silent 9 BUILD_HANDLER tr tr sti silent 10 BUILD_HANDLER fpe fpe fpe silent 11 BUILD_HANDLER mdmx mdmx sti silent 12 #ifdef CONFIG_HARDWARE_WATCHPOINTS 13 17 BUILD_HANDLER watch watch cli silent 18 #else 19 BUILD_HANDLER watch watch sti verbose 20 #endif 21 BUILD_HANDLER mcheck mcheck cli verbose 22 BUILD_HANDLER mt mt sti silent 23 BUILD_HANDLER dsp dsp sti silent 24 BUILD_HANDLER reserved reserved sti verbose Each call to BUILD_HANDLER will build two functions, handle_\exception and handle_\exception_int. Thus, you may look the symbol table for handle_adel(), handle_adel_int(), handle_ades(), handle_ades_int(), and so on.

How do these handlers get prepared and handle exceptions? 1 .macro __BUILD_HANDLER exception handler clear verbose ext 2 .align 5 3 NESTED(handle_\exception, PT_SIZE, sp) 4 .set noat 5 SAVE_ALL 6 FEXPORT(handle_\exception\ext) 7 __BUILD_clear_\clear 8 .set at 9 __BUILD_\verbose \exception 10 move a0, sp 11 PTR_LA ra, ret_from_exception 12 j do_\handler 13 END(handle_\exception) 14 .endm 15 16 .macro BUILD_HANDLER exception handler clear verbose 17 __BUILD_HANDLER \exception \handler \clear \verbose _int 18 .endm

except_vec3_generic定義于： [arch/mips/kernel/genex.S] NESTED(except_vec3_generic, 0, sp) .set push .set noat #if R5432_CP0_INTERRUPT_WAR mfc0 k0, CP0_INDEX #endif mfc0 k1, CP0_CAUSE andi k1, k1, 0x7c #ifdef CONFIG_64BIT dsll k1, k1, 1 #endif PTR_L k0, exception_handlers(k1) jr k0 .set pop END(except_vec3_generic) 這段程式完成的功能為：取 cause 寄存器之 ExcCode 值，然後跳轉到 exception_handlers+ExcCode*4 處，注意 ExcCode 為 cause 寄存器的位 6:2，因此 CAUSE & 0x7c 就是 ExcCode*4。

4.中斷處理 handle_int定義位于：[arch/mips/kernel/genex.S] NESTED(handle_int, PT_SIZE, sp) SAVE_ALL // 儲存上下文 CLI TRACE_IRQS_OFF

LONG_L s0, TI_REGS($28) //asm-offsets.s:373:->TI_REGS 72 offsetof(struct thread_info, regs) # ，即 s0=*($28+72) LONG_S sp, TI_REGS($28) // *（$28+72）=sp PTR_LA ra, ret_from_irq //從irq傳回 j plat_irq_dispatch END(handle_int)

SAVE_ALL用于儲存上下文，定義位于：[arch/mips/include/asm/stackframe.h] .macro SAVE_ALL SAVE_SOME SAVE_AT SAVE_TEMP SAVE_STATIC .endm

.macro SAVE_SOME .set push .set noat .set reorder mfc0 k0, CP0_STATUS sll k0, 3 .set noreorder bltz k0, 8f //在核心模式下cp0肯定可用，在使用者态模式下，cp0不可用，是以可以根據CU0 （coprocessor usability）來判斷是否在使用者态模式，如果為1，則在核心态 move k1, sp //在slot中的指令肯定會被執行 .set reorder get_saved_sp //從使用者模式轉到核心模式，需要切換棧，将核心态幀頂儲存到k1中 8: move k0, sp //将目前sp的位置儲存到k0中，即k0=sp PTR_SUBU sp, k1, PT_SIZE //sp=k1-PT_SIZE LONG_S k0, PT_R29(sp) //asm-offsets.s:->PT_R29 140 offsetof(struct pt_regs, regs[29]) #, 即 *(sp+140)=k0,儲存中斷發生時棧頂的位置 LONG_S $3, PT_R3(sp) LONG_S $0, PT_R0(sp) mfc0 v1, CP0_STATUS LONG_S $2, PT_R2(sp) ...

.endm

.macro get_saved_sp //單處理器版本 lui k1, %hi(kernelsp) //unsigned long kernelsp[NR_CPUS]; 取kernelsp變量位址的高位部分存儲到k1中 LONG_L k1, %lo(kernelsp)(k1) // 取 kernelsp變量位址的低位部分合并K1中的高位部分形成記憶體尋址位址，即将kernelsp[0]儲存到k1中 .endm

為何會将核心棧儲存在kernelsp中呢？這就得看看程序切換的過程： #define switch_to(prev, next, last) \ do { \ __mips_mt_fpaff_switch_to(prev); \ if (cpu_has_dsp) \ __save_dsp(prev); \ __clear_software_ll_bit(); \ (last) = resume(prev, next, task_thread_info(next)); \ } while (0)

resume函數的實作位于：[arch/mips/kernel/r4k_switch.S]，這裡隻解釋相關的部分： .align 5 LEAF(resume) ...

move $28, a2 //a2就是這裡的next_ti ...

PTR_ADDU t0, $28, _THREAD_SIZE - 32 //t0=$28+_THREAD_SIZE-32

set_saved_sp t0, t1, t2 ...

END(resume)

.macro set_saved_sp stackp temp temp2 LONG_S \stackp, kernelsp .endm 這樣kernelsp[0]就指向了目前程序核心态的棧頂，這裡都隻說明了單處理器的情況。

關于CLI （clear interrupt）： .macro CLI mfc0 t0, CP0_STATUS li t1, ST0_CU0 | STATMASK //#define STATMASK 0x1f or t0, t1 xori t0, STATMASK mtc0 t0, CP0_STATUS 關于status寄存器的低位含義： #define ST0_IE 0x00000001 #define ST0_EXL 0x00000002 #define ST0_ERL 0x00000004 #define ST0_KSU 0x00000018 # define KSU_USER 0x00000010 //使用者模式 # define KSU_SUPERVISOR 0x00000008 //管理者模式 # define KSU_KERNEL 0x00000000 //核心模式

是以CLI指令的作用: KSU[4：3]:00 核心模式 ERL[2]:0 EXL[1]:0 IE[0]:0 ，因為EPC已經通過SAVE_ALL儲存在棧上了，是以可以清ERL和EXL，進入核心模式，關閉中斷

.macro STI //set interrupt mfc0 t0, CP0_STATUS li t1, ST0_CU0 | STATMASK or t0, t1 xori t0, STATMASK & ~1 mtc0 t0, CP0_STATUS STI指令的作用: KSU[4：3]:00 核心模式 ERL[2]:0 EXL[1]:0 IE[0]:1 ，因為EPC已經通過 SAVE_ALL儲存在棧上了，是以可以清ERL和EXL，進入核心模式，打開中斷

local_irq_save(flags); //關閉本地中斷，并将原來的中斷标志儲存在flags變量中 #define local_irq_save(flags) \ do { \ raw_local_irq_save(flags); \ } while (0) #define raw_local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ flags = arch_local_irq_save(); \ } while (0) static inline unsigned long arch_local_irq_save(void) { unsigned long flags; asm volatile("arch_local_irq_save\t%0" : "=r" (flags) : : "memory"); return flags; } __asm__( " .macro arch_local_irq_save result \n" " .set push \n" " .set reorder \n" " .set noat \n" " di \\result \n" " andi \\result, 1 \n" " irq_disable_hazard \n" " .set pop \n" " .endm \n"); di指令用于disable interrupt，清除status寄存器的IE位，并将原來的值儲存在指定的寄存器中 local_irq_restore(flags); //恢複儲存在flags變量中的中斷狀态

[arch/mips/brcmstb/irq.c] asmlinkage void plat_irq_dispatch(struct pt_regs *regs) { unsigned int pend = ((read_c0_cause() & read_c0_status()) >> 8) & 0xff; unsigned int shift;

while ((shift = ffs(pend)) != 0) { shift--; pend ^= 1 << shift; if (shift == 2) brcm_mips_int2_dispatch(regs); //處理器間中斷 #ifdef CONFIG_SMP else if (unlikely(shift == 3)) brcm_mips_int3_dispatch(regs); #endif else do_IRQ (MIPS_CPU_IRQ_BASE + shift); //由IP得到對應的shift } } void __irq_entry do_IRQ(unsigned int irq) { irq_enter(); check_stack_overflow(); if (!smtc_handle_on_other_cpu(irq)) generic_handle_irq(irq); irq_exit(); } int generic_handle_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc (irq);

if (!desc) return -EINVAL; generic_handle_irq_desc(irq, desc); return 0; } irq_desc是在request_irq（）中安裝的。

從中斷傳回的過程： [arch/mips/kernel/entry.S] FEXPORT(ret_from_irq) LONG_S s0, TI_REGS($28) //恢複之前儲存在s0中的值 FEXPORT(__ret_from_irq) LONG_L t0, PT_STATUS(sp) # returning to kernel mode? // t0= *(sp+152), 異常發生後status寄存器的值儲存在棧中這個位置 andi t0, t0, KU_USER //#define KU_USER 0x10 t0= t0 & 0x10, 判斷中斷發生時是否在使用者态 beqz t0, resume_kernel

#define resume_kernel restore_all //傳回到核心态

resume_userspace: //傳回到使用者态 local_irq_disable # make sure we dont miss an # interrupt setting need_resched # between sampling and return LONG_L a2, TI_FLAGS($28) # current->work struct thread_info { ...; unsigned long flags;...} andi t0, a2, _TIF_WORK_MASK # (ignoring syscall_trace) bnez t0, work_pending //判斷目前程序是否有pending的工作要做，包括排程标志和pending的信号 j restore_all

work_pending: andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS //判斷是否需要排程 beqz t0, work_notifysig work_resched: //進行排程 jal schedule

work_notifysig: # deal with pending signals and # notify-resume requests move a0, sp li a1, 0 jal do_notify_resume # a2 already loaded //處理pending的signal，該函數定義在：arch/mips/kernel/signal.c j resume_userspace

其中RESTORE_ALL為了恢複上下文： .macro RESTORE_ALL RESTORE_TEMP RESTORE_STATIC RESTORE_AT RESTORE_SOME RESTORE_SP .endm

.macro RESTORE_SOME mfc0 a0, CP0_STATUS ori a0, STATMASK xori a0, STATMASK mtc0 a0, CP0_STATUS //取出目前status寄存器值 li v1, 0xff00 and a0, v1 //保留第二個byte LONG_L v0, PT_STATUS(sp) //取出棧中保留的異常發生時的status值 nor v1, $0, v1 // v1=~(0|v1) and v0, v1 //清除第二個byte，其他byte維持不變 or v0, a0 mtc0 v0, CP0_STATUS //這段代碼的意思就是從棧中恢複保留的status的值，但是維持第二個byte的内容不變

5.系統調用系統調用異常處理 handle_sys，打算另開一篇再分析。

linux 中斷與異常---源碼分析（三）

繼續閱讀

Apache (You don't have permission to access / on this server.）

debian9更新4.9.0核心到4.19.2核心過程

centOS7 配置 vsftpd 虛拟使用者及權限Vsftpd配置虛拟使用者及權限

linux-svn解除安裝與安裝

vsftp虛拟多使用者多權限一鍵部署腳本

Ubuntu14.04 LTS下安裝mongodb

httpd服務的部署、啟動、配置和簡單優化一、部署二、啟動三、配置檔案

配置網頁内容通路

手動安裝Intel network I217-LM網卡的Linux驅動

禁止ubuntu系統彈出報錯界面

Ubuntu Linux下Apache的配置檔案

samba伺服器的功能

【Linux】UDP廣播封包接收速率問題

Linux裝置模型（中）之上層容器

PowerPC平台 Linux移植三