天天看點

PCIE AER and hot removal

It is very usefull to disable/enable during our verification of some functions, such as PCIE hotplug, firmware upgrade. We need use AER to maks or enable PCIE errors to stop unexpected reset.

Following script give us an example how to control AER and do hot removal and insertion, please feel free to give your comments:

#!/bin/bash

# Notice: following script is only for SLIC2

#set -n;

#set  -v;

#set  -x;

source ./aer_ctr.sh

# Disable SMI error caused by hot removal/insertion 

function disable_smi()

{

    setpci -s 00:05.2 0x19C.w=0xffffffff ;

    setpci -s 00:05.2 0x1a0.w=0xffffffff ;

    setpci -s 00:05.2 0x1a4.w=0xffffffff ;

    setpci -s 00:05.2 0x1a8.w=0xffffffff ;

    setpci -s 00:05.2 0x1c8.w=0xffffffff ;

}

# Enable SMI error caused by hot removal/insertion  

function enable_smi()

    setpci -s 00:05.2 0x19C.w=0x00000000 ;

    setpci -s 00:05.2 0x1a0.w=0x00000000 ;

    setpci -s 00:05.2 0x1a4.w=0x00000000 ;

    setpci -s 00:05.2 0x1a8.w=0x00000000 ;

# disable SP reset on PCIE device event

function disable_sp_reset_on_slicevent()

    ipmitool raw 0x30 0x81 0x13 0x10 0x00 0x00 0x00

# Power off PCIE card

function poweroff_slic()

    ipmitool raw 0x30 0x81 0x13 0x01 0x00 0x00 0x00

    # !!! DON"T REMOVE SLEEP , it is for PCIE DLL ready

sleep 1;

# Power on PCIE card 2 

function poweron_slic()

    ipmitool raw 0x30 0x81 0x13 0x01 0x00 0x01 0x00

# disable SLIC hold in reset status

function disable_slic_holdinreset()

    ipmitool raw 0x30 0x81 0x13 0x04 0x00 0x00 0x00

  sleep 1;

# The serdes value are only for PCIE card 2

# below function also works as C code, it doesn't set 504.b

function apply_serdes()

echo "Before Apply serdes:";

#lspci -xxxxx -s 00:07.0 | tee -a before_070_0819.pci

#lspci -xxxxx -s 00:07.1 | tee -a before_071_0819.pci

  #

#  setpci -s 00:06.7 0x504.b=0x76

   # [CTL] 0xa80 # set serdes table

   # read pci offset: bnd[0~30] to  A80 

   # original value is 0x55555555

  #setpci  -s 00:07.0 0xa80.l=0x31810842  # correct one

  setpci  -s 00:07.0 0xa80.l=0x00d10842  # correct one

#  setpci  -s 00:07.0 0xa84.l=0x04  

#  setpci  -s 00:07.0 0x438.l=0x2757F  

  #setpci  -s 00:07.0 0x3cc.l=0x01d80000 

  #setpci  -s 00:07.1 0x3cc.l=0x01d80000

  setpci  -s 00:07.0 0x3cc.l=0x01d80000 

  setpci  -s 00:07.1 0x3cc.l=0x01d80000

# The serdes value are only for SLIC 2

## good one which follow all steps

function apply_serdes_origin()

 # the value of 504.b is always 0x76

  setpci -s 00:06.7 0x504.b=0x76

  setpci  -s 00:07.0 0xa80.l=0x00

  #[cursor 0] 0x3cc  s.Cursorbnd0/1

  # original both 0x00 0x00

  #[cursor 1] 0x3cc  s.Cursorbnd2/3

  #lspci  -s 00:07.1 0xa80.l=0xd80000 

  setpci  -s 00:07.0 0xa80.l=0x0000   # according to the C code, write 00:07.0 also

  # re-training, set bit, sleep , then clear the re-trian bit

  #[PR ]   0x3E   link control bit

  ##  check link -status [A2]

# do PCIE retraning by set the bridge control register 

function retrain()

    #exp=83;

# step 0.0: display original link status reg

# let exp=exp+0x40;

# step 1: clear the linkBandwidth management bit

#setpci -s 00:03.0 0xa2.b=0xff;

#lspci -xxxxx -s 00:03.0 | tee -a  gdrootp2_0.lspci

# step 1.2: use link bridge contrl to reset the link bit

setpci -s 00:03.0 0x3e.w=0x0053;

#sleep 1

usleep 500000

#lspci -xxxxx -s 00:03.0 | tee -a  gdrootp2_1.lspci

setpci -s 00:03.0 0x3e.w=0x0013;

#lspci -xxxxx -s 00:03.0 | tee -a  gdrootp2_3.lspci

# step 1.5: save original link status reg

temp=`lspci -xxx -s 00:03.0 | grep "a0:" `;

     exp=`echo $temp | awk '{print $4}'`;

echo "Notice!!!! current link status is : $exp";

# step 2: set the link train bit of control reg

    # for root port 2 (slot 2)

    # original 0x40

#setpci -s 00:03.0 0xa0.b=0x60;

#lspci -xxx -s 00:03.0 | grep "a0:"

    # wait until link staus is ready

# step 3: check and wait until finish link training

#    status=`lspci -xxx -s 00:03.0 | grep "a0:" | awk '{print $4}'`;

#

#    while [ $exp -ne $status ]

#    do 

# echo "Current Link Status is $status" | tee -a lspci.log;

#        sleep 1;

#        status=`lspci -xxx -s 00:03.0 | grep a0: | awk '{print $4}' `

#    done

    #setpci -s 1f:00.0 0xbc.b=0x6;

    #setpci -s 1f:00.1 0xbc.b=0x6;

    #setpci -s 1f:00.2 0xbc.b=0x6;

    #setpci -s 1f:00.3 0xbc.b=0x6;

# test shows it doesn't matter whether it is set or not

function restore_bar()

echo "restore_bar";

function restore_cmd()

echo "restore_cmd";

function unhide_regs()

# original value

# b0: c0 00 20 01 ce 00 20 01 c0 00 24 01 ec 00 20 01

# c0: e0 08 48 00 e0 0f 48 00 e0 0f 04 00 e0 05 00 00

    setpci -s 7f:10.7 0xb0.l=0x00; 

    setpci -s 7f:10.7 0xb4.l=0x00; 

    setpci -s 7f:10.7 0xb8.l=0x00; 

    setpci -s 7f:10.7 0xbc.l=0x00; 

    setpci -s 7f:10.7 0xc0.l=0x00; 

    setpci -s 7f:10.7 0xc4.l=0x00; 

    setpci -s 7f:10.7 0xc8.l=0x00; 

    setpci -s 7f:10.7 0xcc.l=0x00; 

    setpci -s ff:10.7 0xb0.l=0x00; 

    setpci -s ff:10.7 0xb4.l=0x00; 

    setpci -s ff:10.7 0xb8.l=0x00; 

    setpci -s ff:10.7 0xbc.l=0x00; 

    setpci -s ff:10.7 0xc0.l=0x00; 

    setpci -s ff:10.7 0xc4.l=0x00; 

    setpci -s ff:10.7 0xc8.l=0x00; 

    setpci -s ff:10.7 0xcc.l=0x00; 

    #echo "After unhide regs:" | tee -a lspci.log

    #lspci -vvvv -t | tee -a lspci.log

function hide_regs1()

    setpci -s 7f:10.7 0xb0.l=0xffffffff; 

    setpci -s 7f:10.7 0xb4.l=0xffffffff; 

    setpci -s 7f:10.7 0xb8.l=0xffffffff; 

    setpci -s 7f:10.7 0xbc.l=0xffffffff; 

    setpci -s 7f:10.7 0xc0.l=0xffffffff; 

    setpci -s 7f:10.7 0xc4.l=0xffffffff; 

    setpci -s 7f:10.7 0xc8.l=0xffffffff; 

    setpci -s 7f:10.7 0xcc.l=0xffffffff; 

    setpci -s ff:10.7 0xb0.l=0xffffffff; 

    setpci -s ff:10.7 0xb4.l=0xffffffff; 

    setpci -s ff:10.7 0xb8.l=0xffffffff; 

    setpci -s ff:10.7 0xbc.l=0xffffffff; 

    setpci -s ff:10.7 0xc0.l=0xffffffff; 

    setpci -s ff:10.7 0xc4.l=0xffffffff; 

    setpci -s ff:10.7 0xc8.l=0xffffffff; 

    setpci -s ff:10.7 0xcc.l=0xffffffff; 

    echo "After hide regs:" | tee -a lspci.log

    lspci -vvvv -t | tee -a lspci.log

function hide_regs()

    setpci -s 7f:10.7 0xb0.b=0xc0; 

    setpci -s 7f:10.7 0xb4.b=0xce; 

    setpci -s 7f:10.7 0xb8.b=0xc0; 

    setpci -s 7f:10.7 0xbc.b=0xec; 

    setpci -s 7f:10.7 0xc0.b=0xe0; 

    setpci -s 7f:10.7 0xc4.b=0xe0; 

    setpci -s 7f:10.7 0xc8.b=0xe0; 

    setpci -s 7f:10.7 0xcc.b=0xe0; 

    setpci -s ff:10.7 0xb0.b=0xc0; 

    setpci -s ff:10.7 0xb4.b=0xce; 

    setpci -s ff:10.7 0xb8.b=0xc0; 

    setpci -s ff:10.7 0xbc.b=0xec; 

    setpci -s ff:10.7 0xc0.b=0xe0; 

    setpci -s ff:10.7 0xc4.b=0xe0; 

    setpci -s ff:10.7 0xc8.b=0xe0; 

    setpci -s ff:10.7 0xcc.b=0xe0; 

    #echo "After hide regs:" | tee -a lspci.log

function unhide_regs_original()

    setpci -s 7f:10.7 0xb0.b=0x00; 

    setpci -s 7f:10.7 0xb4.b=0x0e; 

    setpci -s 7f:10.7 0xb8.b=0x00; 

    setpci -s 7f:10.7 0xbc.b=0x2c; 

    #setpci -s 7f:10.7 0xc0.b=0x20; 

    #setpci -s 7f:10.7 0xc4.b=0x20; 

    #setpci -s 7f:10.7 0xc8.b=0x20; 

    #setpci -s 7f:10.7 0xcc.b=0x20; 

    setpci -s ff:10.7 0xb0.b=0x00; 

    setpci -s ff:10.7 0xb4.b=0x0e; 

    setpci -s ff:10.7 0xb8.b=0x00; 

    setpci -s ff:10.7 0xbc.b=0x2c; 

    #setpci -s ff:10.7 0xc0.b=0x20; 

    #setpci -s ff:10.7 0xc4.b=0x20; 

    #setpci -s ff:10.7 0xc8.b=0x20; 

    #setpci -s ff:10.7 0xcc.b=0x20; 

    echo "After unhide regs:" | tee -a lspci.log

    lspci -vvvv -t -A intel-conf1 | tee -a lspci.log

function hide_regs_original()

    #setpci -s 7f:10.7 0xc0.b=0xe0; 

    #setpci -s 7f:10.7 0xc4.b=0xe0; 

    #setpci -s 7f:10.7 0xc8.b=0xe0; 

    #setpci -s 7f:10.7 0xcc.b=0xe0; 

    #setpci -s ff:10.7 0xc0.b=0xe0; 

    #setpci -s ff:10.7 0xc4.b=0xe0; 

    #setpci -s ff:10.7 0xc8.b=0xe0; 

    #setpci -s ff:10.7 0xcc.b=0xe0; 

# to set eeprom, however it doens't matter to verify the process of hot plug even if it is emplty

set_eeprom()

echo "Set EEPROM done!";

# the main process, notice:

## 1. either disable_aer/enable_aer or disable_smi/enable_smi can be used, but it is not recommended

##    to use like this method: disable_aer/enable_smi or disable_smi/enable_aer.

## 2. This fucntion is only for PCIE card 2 and assume the card is: Broadcom Corporation BCM57840 NetXtreme II 10 Gigabit Ethernet,

##      that means it is a ROckslide (Broadcom q-port 10GbE) card

main3()

if [ $1 -eq 1 ]

then

    #disable_smi;

    #enable_aer 00:03.0

    disable_aer 00:03.0

    disable_sp_reset_on_slicevent;

    poweroff_slic;

    # do remove the slic

    echo "after power off slic" | tee -a lspci.log

    #lspci -vvvv -t -A intel-conf1 | tee -a lspci.log

    clear_aer 00:03.0

    #enable_smi

    #disable_aer 00:03.0

fi

   #echo "sleep 2s";

   #sleep 2;

   #echo "sleep 2s done";

    # insert the SLIC again

    #echo 1 > /sys/bus/pci/rescan

if [ $1 -eq 0 ]

temp=`lspci -xxx -s 00:03.0| grep "a0:"`;

echo "Before power on slic: Original link status is : $exp";

    poweron_slic

    disable_slic_holdinreset

    unhide_regs_original;

    unhide_regs

    echo 1 > /sys/bus/pci/rescan

    apply_serdes

echo "Before retarin: Original link status is : $exp";

    retrain

    #hide_regs_original

    #clear_aer 00:03.0

    #clear_aer 80:02.0

    #clear_aer 00:01.0

    #hide_regs

    #echo 1 > /sys/bus/pci/devices/0000:00:03.0/rescan

    restore_bar;

    restore_cmd;

    enable_aer 00:03.0

   # echo "After retrain and rescan" | tee -a lspci.log

   # lspci -vvvv -t -A intel-conf1 | tee -a lspci.log

main3 $1;

## usage as below:  ###

#main3 0 ## hot insertion

#main3 1 ## hot removal

本文轉自存儲之廚51CTO部落格,原文連結:http://blog.51cto.com/xiamachao/1844984 ,如需轉載請自行聯系原作者