天天看點

[Android6.0] 資料業務重試機制

Android 6.0 Framework telephony中資料業務連結錯誤處理一般分3種情況:

1. SETUP_DATA_CALL 時傳回錯誤

2. Modem上報DATA_CALL_LIST包含錯誤碼或者連結中斷

3. 一段時間内沒有上下行資料(TX/RX)

下面具體來看每種情況的處理。

1. SETUP_DATA_CALL失敗

DataConnection在收到SETUP_DATA_CALL結果後,用Message通知DcTracker處理:

protected void onDataSetupComplete(AsyncResult ar) {
    if (ar.exception == null) {
        //連結成功
    }else{
        ...
        //标記permanent fail的次數,會影響後面onDataSetupCompleteError的判斷
        if (isPermanentFail(cause)) apnContext.decWaitingApnsPermFailCount(); 
        apnContext.removeWaitingApn(apnContext.getApnSetting()); //從waiting清單中移除已經失敗的APN
        onDataSetupCompleteError(ar);//繼續處理錯誤
        ...
    }
}
           

處理Error的邏輯: 1. 如果apnContext中的所有waiting APN都失敗了,且不是每個都發生permanent fail(永久性錯誤),則設定delay并重新發起這次連接配接 2. 如果apnContext中仍有沒有嘗試的waiting APN,則設定delay并嘗試用下一個APN去連接配接

/**
     * Error has occurred during the SETUP {aka bringUP} request and the DCT
     * should either try the next waiting APN or start over from the
     * beginning if the list is empty. Between each SETUP request there will
     * be a delay defined by {@link #getApnDelay()}.
     */
    @Override
    protected void onDataSetupCompleteError(AsyncResult ar) {
        String reason = "";
        ApnContext apnContext = getValidApnContext(ar, "onDataSetupCompleteError");


        if (apnContext == null) return;


        //已經嘗試過所有APN
        if (apnContext.getWaitingApns().isEmpty()) {
            apnContext.setState(DctConstants.State.FAILED);//apnContext state設定成FAILED
            mPhone.notifyDataConnection(Phone.REASON_APN_FAILED, apnContext.getApnType());

            //清除DataConnection
            apnContext.setDataConnectionAc(null);

            //如果所有APN都發生Permanent fail,則不做重試
            if (apnContext.getWaitingApnsPermFailCount() == 0) {
                if (DBG) {
                    log("onDataSetupCompleteError: All APN's had permanent failures, stop retrying");
                }
            } else {//執行重試
                int delay = getApnDelay(Phone.REASON_APN_FAILED);
                if (DBG) {
                    log("onDataSetupCompleteError: Not all APN's had permanent failures delay="
                            + delay);
                }
                startAlarmForRestartTrySetup(delay, apnContext);
            }
        } else {//waitingAPN中還有沒有嘗試的APN,繼續嘗試下一個
            if (DBG) log("onDataSetupCompleteError: Try next APN");
            apnContext.setState(DctConstants.State.SCANNING);
            // Wait a bit before trying the next APN, so that
            // we're not tying up the RIL command channel
            startAlarmForReconnect(getApnDelay(Phone.REASON_APN_FAILED), apnContext);//試下一個APN

        }
    }
           

附:ApnContext的所有狀态

/**
     * IDLE: ready to start data connection setup, default state
     * CONNECTING: state of issued startPppd() but not finish yet
     * SCANNING: data connection fails with one apn but other apns are available
     *           ready to start data connection on other apns (before INITING)
     * CONNECTED: IP connection is setup
     * DISCONNECTING: Connection.disconnect() has been called, but PDP
     *                context is not yet deactivated
     * FAILED: data connection fail for all apns settings
     * RETRYING: data connection failed but we're going to retry.
     *
     * getDataConnectionState() maps State to DataState
     *      FAILED or IDLE : DISCONNECTED
     *      RETRYING or CONNECTING or SCANNING: CONNECTING
     *      CONNECTED : CONNECTED or DISCONNECTING
     */
    public enum State {
        IDLE,
        CONNECTING,
        SCANNING,
        CONNECTED,
        DISCONNECTING,
        FAILED,
        RETRYING
    }
           

2. 連結中斷

DcController監聽RIL_UNSOL_DATA_CALL_LIST_CHANGED消息,獲得每一個資料連接配接的更新:

mPhone.mCi.registerForDataNetworkStateChanged(getHandler(),
                    DataConnection.EVENT_DATA_STATE_CHANGED, null);
           

RIL上報DATA_CALL_LIST_CHANGED時會帶上目前的Modem中的DataCall list,DcController将此dataCall list和上層的active list做對比:

1. 已經丢失 及 斷開 的連接配接将會重試

2. 發生變化 和 發生永久錯誤的連結則需要清除

private void onDataStateChanged(ArrayList<DataCallResponse> dcsList) {

            // Create hashmap of cid to DataCallResponse
            HashMap<Integer, DataCallResponse> dataCallResponseListByCid =
                    new HashMap<Integer, DataCallResponse>();
            for (DataCallResponse dcs : dcsList) {
                dataCallResponseListByCid.put(dcs.cid, dcs);
            }

            //如果上報的dcsList中并沒有找到對應的active的連結,則預設連接配接丢失并加入重試List
            ArrayList<DataConnection> dcsToRetry = new ArrayList<DataConnection>();
            for (DataConnection dc : mDcListActiveByCid.values()) {
                if (dataCallResponseListByCid.get(dc.mCid) == null) {
                    if (DBG) log("onDataStateChanged: add to retry dc=" + dc);
                    dcsToRetry.add(dc);
                }
            }
            // Find which connections have changed state and send a notification or cleanup
            // and any that are in active need to be retried.
            ArrayList<ApnContext> apnsToCleanup = new ArrayList<ApnContext>();

            boolean isAnyDataCallDormant = false;
            boolean isAnyDataCallActive = false;

            for (DataCallResponse newState : dcsList) {

                DataConnection dc = mDcListActiveByCid.get(newState.cid);
                //不在Active MAP中的連接配接,表明這個連接配接還沒同步到上層,會有其他地方處理。
                if (dc == null) {
                    // UNSOL_DATA_CALL_LIST_CHANGED arrived before SETUP_DATA_CALL completed.
                    loge("onDataStateChanged: no associated DC yet, ignore");
                    continue;
                }
                
                if (dc.mApnContexts.size() == 0) {
                    if (DBG) loge("onDataStateChanged: no connected apns, ignore");
                } else {
                    // Determine if the connection/apnContext should be cleaned up
                    // or just a notification should be sent out.
                    if (newState.active == DATA_CONNECTION_ACTIVE_PH_LINK_INACTIVE) {
                            //連接配接INACTIVE,按照錯誤類型區分處理
                            DcFailCause failCause = DcFailCause.fromInt(newState.status);
                            if (failCause.isRestartRadioFail()) {
                                //恢複需要重新開機radio
                                mDct.sendRestartRadio();
                            } else if (mDct.isPermanentFail(failCause)) {
                                //連結發生不可恢複的錯誤,需要Cleanup
                                apnsToCleanup.addAll(dc.mApnContexts.keySet());
                            } else {
                                for (ApnContext apnContext : dc.mApnContexts.keySet()) {
                                    if (apnContext.isEnabled()) {
                                        //apn是enabled狀态,重試
                                        dcsToRetry.add(dc);
                                        break;
                                    } else {
                                        //apn已經disabled,需要cleanup
                                        apnsToCleanup.add(apnContext);
                                    }
                                }
                            }

                    } else {
                        //LinkProperty發生變化
                        UpdateLinkPropertyResult result = dc.updateLinkProperty(newState);
                        if (result.oldLp.equals(result.newLp)) {
                            if (DBG) log("onDataStateChanged: no change");
                        } else {
                            //判斷interface是否一緻
                            if (result.oldLp.isIdenticalInterfaceName(result.newLp)) {
                                if (! result.oldLp.isIdenticalDnses(result.newLp) ||
                                        ! result.oldLp.isIdenticalRoutes(result.newLp) ||
                                        ! result.oldLp.isIdenticalHttpProxy(result.newLp) ||
                                        ! result.oldLp.isIdenticalAddresses(result.newLp)) {
                                    // If the same address type was removed and
                                    // added we need to cleanup
                                    CompareResult<LinkAddress> car =
                                        result.oldLp.compareAddresses(result.newLp);
                                    if (DBG) {
                                        log("onDataStateChanged: oldLp=" + result.oldLp +
                                                " newLp=" + result.newLp + " car=" + car);
                                    }
                                    boolean needToClean = false;
                                    //如果address發生變化,需要清除這個old connection
                                    for (LinkAddress added : car.added) {
                                        for (LinkAddress removed : car.removed) {
                                            if (NetworkUtils.addressTypeMatches(
                                                    removed.getAddress(),
                                                    added.getAddress())) {
                                                needToClean = true;
                                                break;
                                            }
                                        }
                                    }
                                    if (needToClean) {

                                        apnsToCleanup.addAll(dc.mApnContexts.keySet());
                                    } else {
                                        if (DBG) log("onDataStateChanged: simple change");
                                        //其他的LP變化,隻做notify
                                        for (ApnContext apnContext : dc.mApnContexts.keySet()) {
                                             mPhone.notifyDataConnection(
                                                 PhoneConstants.REASON_LINK_PROPERTIES_CHANGED,
                                                 apnContext.getApnType());
                                        }
                                    }
                                } else {
                                    if (DBG) {
                                        log("onDataStateChanged: no changes");
                                    }
                                }
                            } else {
                                //interface發生改變,cleanUp這個old connection
                                apnsToCleanup.addAll(dc.mApnContexts.keySet());
                                if (DBG) {
                                    log("onDataStateChanged: interface change, cleanup apns="
                                            + dc.mApnContexts);
                                }
                            }
                        }
                    }
                }

                ...
            }

            ...

            //清除連結
            for (ApnContext apnContext : apnsToCleanup) {
               mDct.sendCleanUpConnection(true, apnContext);
            }

            //通知DataConnection連結丢失,需要發起重連
            for (DataConnection dc : dcsToRetry) {
                dc.sendMessage(DataConnection.EVENT_LOST_CONNECTION, dc.mTag);
            }
        }
    }
           

DataConnection ActiveState在收到LOST_CONNECTION消息後:

1. 如果重試次數沒有達到上限,則設定定時重試,并切換到RetryingState

2. 如果不需要重試,則切換到Inactive狀态,并可能通知DcTracker處理(onDataSetupCompleteError,可看第一種情況)

case EVENT_LOST_CONNECTION: {
                    if (DBG) {
                        log("DcActiveState EVENT_LOST_CONNECTION dc=" + DataConnection.this);
                    }
                    if (mRetryManager.isRetryNeeded()) {
                        // We're going to retry
                        int delayMillis = mRetryManager.getRetryTimer();
                        //重試
                        mDcRetryAlarmController.startRetryAlarm(EVENT_RETRY_CONNECTION, mTag,
                                delayMillis);
                        transitionTo(mRetryingState);
                    } else {
                        mInactiveState.setEnterNotificationParams(DcFailCause.LOST_CONNECTION);
                        transitionTo(mInactiveState);
                    }
                    retVal = HANDLED;
                    break;
                }
           

RetryingState 收到RETRY消息後,發起連接配接并切換到ActivatingState

case EVENT_RETRY_CONNECTION: {
                    if (msg.arg1 == mTag) {
                        mRetryManager.increaseRetryCount();//計數

                        onConnect(mConnectionParams);//開始連接配接
                        transitionTo(mActivatingState);//切換到Activating State
                    } else {
                        if (DBG) {
                            log("DcRetryingState stale EVENT_RETRY_CONNECTION"
                                    + " tag:" + msg.arg1 + " != mTag:" + mTag);
                        }
                    }
                    retVal = HANDLED;
                    break;
                }
           

RetryManager負責重試相關的計數:

public boolean isRetryNeeded() {
        boolean retVal = mRetryForever || (mRetryCount < mCurMaxRetryCount);
        if (DBG) log("isRetryNeeded: " + retVal);
        return retVal;
    }
           

3. 一段時間内持續沒有接收到新的資料包

在Data完成連接配接後,DcTracker會定時檢查TX/RX的更新,如果RX的值持續沒有更新并超過設定的上限值,就會觸發Recovery動作。

[Android6.0] 資料業務重試機制
[Android6.0] 資料業務重試機制
[Android6.0] 資料業務重試機制

首先來看方法onDataStallAlarm,它由Alarm定時觸發,執行這些操作:

更新TX/RX資料 -> 判斷是否需要Recover并執行 -> 重新設定Alarm來觸發下一次檢查。

protected void onDataStallAlarm(int tag) {
        if (mDataStallAlarmTag != tag) {
            if (DBG) {
                log("onDataStallAlarm: ignore, tag=" + tag + " expecting " + mDataStallAlarmTag);
            }
            return;
        }
        //更新mSentSinceLastRecv
        updateDataStallInfo();

        //預設值是10
        int hangWatchdogTrigger = Settings.Global.getInt(mResolver,
                Settings.Global.PDP_WATCHDOG_TRIGGER_PACKET_COUNT,
                NUMBER_SENT_PACKETS_OF_HANG);

        boolean suspectedStall = DATA_STALL_NOT_SUSPECTED;
        if (mSentSinceLastRecv >= hangWatchdogTrigger) {
            //一段時間沒有RX,且超過watchdog的值,需要recover
            suspectedStall = DATA_STALL_SUSPECTED;
            sendMessage(obtainMessage(DctConstants.EVENT_DO_RECOVERY));
        } else {
            if (VDBG_STALL) {
                log("onDataStallAlarm: tag=" + tag + " Sent " + String.valueOf(mSentSinceLastRecv) +
                    " pkts since last received, < watchdogTrigger=" + hangWatchdogTrigger);
            }
        }
        //重新設定Alarm任務,一段時間後再次執行本方法(onDataStallAlarm)
        startDataStallAlarm(suspectedStall);
    }
           

updateDataStallInfo()負責記數,處理分3種情況:

1. 有TX 也有RX  -> 正常,重置計數和Recovery action(Recovery action後面會寫到)

2. 有TX沒有RX -> 異常,累計TX資料

3. 沒有TX 隻有RX  -> 正常,重置計數和Recovery action

private void updateDataStallInfo() {
        long sent, received;
        
        TxRxSum preTxRxSum = new TxRxSum(mDataStallTxRxSum);
        mDataStallTxRxSum.updateTxRxSum();

        sent = mDataStallTxRxSum.txPkts - preTxRxSum.txPkts;
        received = mDataStallTxRxSum.rxPkts - preTxRxSum.rxPkts;

        //收發正常,RecoveryAction重置
        if ( sent > 0 && received > 0 ) {
            if (VDBG_STALL) log("updateDataStallInfo: IN/OUT");
            mSentSinceLastRecv = 0;
            putRecoveryAction(RecoveryAction.GET_DATA_CALL_LIST);
        } else if (sent > 0 && received == 0) {

            //沒有RX;若不在通話狀态則需要累計本次發送量
            if (isPhoneStateIdle()) {
                mSentSinceLastRecv += sent;
            } else {
                mSentSinceLastRecv = 0;
            }

          //沒有發資料,RecoveryAction重置
        } else if (sent == 0 && received > 0) {
            if (VDBG_STALL) log("updateDataStallInfo: IN");
            mSentSinceLastRecv = 0;
            putRecoveryAction(RecoveryAction.GET_DATA_CALL_LIST);
        } else {
            if (VDBG_STALL) log("updateDataStallInfo: NONE");
        }
    }
           

TX/RX資料由TrafficStats提供的靜态方法獲得,是native層方法統計所有Mobile的iface後傳回的資料:

public void updateTxRxSum() {
            this.txPkts = TrafficStats.getMobileTcpTxPackets();
            this.rxPkts = TrafficStats.getMobileTcpRxPackets();
        }
           

最後看下doRecovery方法如何執行恢複資料。

doRecovery方法中有5種不同的Recovery action對應着各自的處理: 1. 向Modem主動查詢DATA CALL LIST 2. 清除現有的資料連結 3. 重新駐網 4. 重新開機Radio 5. 深度重新開機Radio(根據高通的注釋,這個操作涉及到RIL的設計)

如果一種方法執行之後,連接配接依然有問題,則執行下一種恢複方法,順序類似于循環連結清單,直到恢複正常後updateDataStallInfo()将Action重置:

protected void doRecovery() {
        if (getOverallState() == DctConstants.State.CONNECTED) {
            // Go through a series of recovery steps, each action transitions to the next action
            int recoveryAction = getRecoveryAction();
            switch (recoveryAction) {
            case RecoveryAction.GET_DATA_CALL_LIST:
                mPhone.mCi.getDataCallList(obtainMessage(DctConstants.EVENT_DATA_STATE_CHANGED));
                putRecoveryAction(RecoveryAction.CLEANUP);
                break;
            case RecoveryAction.CLEANUP:
                cleanUpAllConnections(Phone.REASON_PDP_RESET);
                putRecoveryAction(RecoveryAction.REREGISTER);
                break;
            case RecoveryAction.REREGISTER:
                mPhone.getServiceStateTracker().reRegisterNetwork(null);
                putRecoveryAction(RecoveryAction.RADIO_RESTART);
                break;
            case RecoveryAction.RADIO_RESTART:
                putRecoveryAction(RecoveryAction.RADIO_RESTART_WITH_PROP);
                restartRadio();
                break;
            case RecoveryAction.RADIO_RESTART_WITH_PROP:
                // This is in case radio restart has not recovered the data.
                // It will set an additional "gsm.radioreset" property to tell
                // RIL or system to take further action.
                // The implementation of hard reset recovery action is up to OEM product.
                // Once RADIO_RESET property is consumed, it is expected to set back
                // to false by RIL.
                EventLog.writeEvent(EventLogTags.DATA_STALL_RECOVERY_RADIO_RESTART_WITH_PROP, -1);
                if (DBG) log("restarting radio with gsm.radioreset to true");
                SystemProperties.set(RADIO_RESET_PROPERTY, "true");
                // give 1 sec so property change can be notified.
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {}
                restartRadio();
                putRecoveryAction(RecoveryAction.GET_DATA_CALL_LIST);
                break;
            default:
                throw new RuntimeException("doRecovery: Invalid recoveryAction=" +
                    recoveryAction);
            }
            mSentSinceLastRecv = 0;
        }
    }