天天看點

tcp/ip源代碼(17)——ip_fragment

作者:[email protected]部落格:linuxfocus.blog.chinaunix.net

今天學習上次剩下的最後一個UDP 發送資料調用中的最後一個函數,ip_fragment。這個函數用于當IP資料包過大時,對其進行分片發送。

/*

 *    This IP datagram is too large to be sent in one piece. Break it up into

 *    smaller pieces (each of size equal to IP header plus

 *    a block of the data of the original IP data part) that will yet fit in a

 *    single device frame, and queue such a frame for sending.

 */

int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))

{

    struct iphdr *iph;

    int ptr;

    struct net_device *dev;

    struct sk_buff *skb2;

    unsigned int mtu, hlen, left, len, ll_rs;

    int offset;

    __be16 not_last_frag;

    struct rtable *rt = skb_rtable(skb);

    int err = 0;

    dev = rt->dst.dev;

    /*

     *    Point into the IP datagram header.

     */

    /* 得到IP封包頭的指針 */

    iph = ip_hdr(skb);

    if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {

        /* 禁止分片,增加錯誤計數 */

        IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);

        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,

             htonl(ip_skb_dst_mtu(skb)));

        kfree_skb(skb);

        return -EMSGSIZE;

    }

     *    Setup starting values.

     /* 得到IP封包總長度 */

    hlen = iph->ihl * 4;

    /* 這裡的mtu為真正的MTU-IP封包頭,即允許的最大IP資料長度 */

    mtu = dst_mtu(&rt->dst) - hlen;    /* Size of data space */

#ifdef CONFIG_BRIDGE_NETFILTER

    if (skb->nf_bridge)

        mtu -= nf_bridge_mtu_reduction(skb);

#endif

    /* 為這個skb_buff置上分片完成的标志 */

    IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;

    /* When frag_list is given, use it. First, check its validity:

     * some transformers could create wrong frag_list or break existing

     * one, it is not prohibited. In this case fall back to copying.

     *

     * LATER: this step can be merged to real generation of fragments,

     * we can switch to copy when see the first bad fragment.

    /* 根據前面的學習,我們知道4層有可能會将資料包分片。這些分片存放在skb的frag_list中*/

    if (skb_has_frags(skb)) {

        /* skb_buffer已經有了一個frag list */

        struct sk_buff *frag, *frag2;

        /* 拿到資料包的長度 */

        int first_len = skb_pagelen(skb);

         /*

         1.資料包的長度超過了MTU;

         2.資料包長度沒有按8位元組對齊;

         3.資料包設定了IP_MF或者IP_OFFSET位

         這樣,進入slow_path

         */

        if (first_len - hlen > mtu ||

         ((first_len - hlen) & 7) ||

         (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||

         skb_cloned(skb))

            goto slow_path; //跳到slow_path

         /* 周遊每一個分片 */

        skb_walk_frags(skb, frag) {

            /* 檢查每個分片,如果有一個分片不符合要求,就隻能使用slow path */

            /* Correct geometry. */

            if (frag->len > mtu ||

             ((frag->len & 7) && frag->next) ||

             skb_headroom(frag) hlen)

                goto slow_path_clean;

            /* Partially cloned skb? */

            if (skb_shared(frag))

            BUG_ON(frag->sk);

            if (skb->sk) {

                frag->sk = skb->sk;

                frag->destructor = sock_wfree;

            }

            skb->truesize -= frag->truesize;

        }

        /* Everything is OK. Generate! */

/* 現在可以進行fast path了*/

        err = 0;

        offset = 0;

        /* 拿到frag list */

        frag = skb_shinfo(skb)->frag_list;

        /* 重置原來的frag list,相當于從skb_buff上取走了frag list */

        skb_frag_list_init(skb);

        /* 

        得到實際的資料長度,置分片标志位和校驗和

        */

        skb->data_len = first_len - skb_headlen(skb);

        skb->len = first_len;

        iph->tot_len = htons(first_len);

        iph->frag_off = htons(IP_MF);

        ip_send_check(iph);

         /* 分别處理每一個分片 */

        for (;;) {

            /* Prepare header of the next frame,

             * before previous one went down. */

            if (frag) {

                /* 表示checksm已經算好*/

                frag->ip_summed = CHECKSUM_NONE;

                /* 設定傳輸層*/

                skb_reset_transport_header(frag);

                __skb_push(frag, hlen);

                /* 設定網絡層 */

                skb_reset_network_header(frag);

                memcpy(skb_network_header(frag), iph, hlen);

                iph = ip_hdr(frag);

                iph->tot_len = htons(frag->len);

                ip_copy_metadata(frag, skb);

                if (offset == 0)

                    ip_options_fragment(frag);

                offset = skb->len - hlen;

                iph->frag_off = htons(offset>>3);

                if (frag->next != NULL)

                    iph->frag_off |= htons(IP_MF);

                /* Ready, complete checksum */

/* 計算分片的校驗和 */

                ip_send_check(iph);

     /* 發送 */

            err = output(skb);

            if (!err)

                IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);

            if (err || !frag)

                break;

            skb = frag;

            frag = skb->next;

            skb->next = NULL;

        if (err == 0) {

            IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);

            return 0;

         /* 出錯是否記憶體 */

        while (frag) {

            skb = frag->next;

            kfree_skb(frag);

            frag = skb;

        return err;

slow_path_clean:

        /* 清除shared sk_buff */

        skb_walk_frags(skb, frag2) {

            if (frag2 == frag)

            frag2->sk = NULL;

            frag2->destructor = NULL;

            skb->truesize = frag2->truesize;

slow_path:

    left = skb->len - hlen;        /* Space per frame */

    ptr = hlen;        /* Where to start from */

    /* for bridged IP traffic encapsulated inside f.e. a vlan header,

     * we need to make room for the encapsulating header

    ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));

     *    Fragment the datagram.

     /* 得到偏移 */

    offset = (ntohs(iph->frag_off) & IP_OFFSET) 3;

   /* 通過IP_MF标志位,判斷是否是最後一個分片 */

    not_last_frag = iph->frag_off & htons(IP_MF);

     *    Keep copying data until we run out.

    while (left > 0) {

        /* 計算分片長度 */

        len = left;

        /* IF: it doesn't fit, use 'mtu' - the data space left */

        if (len > mtu)

            len = mtu;

        /* IF: we are not sending upto and including the packet end

         then align the next start on an eight byte boundary */

        if (len left)    {

            len &= ~7;

        /*

         *    Allocate buffer.

         */

          /* 為分片申請該分片申請一個sk_buff */

        if ((skb2 = alloc_skb(len hlen ll_rs, GFP_ATOMIC)) == NULL) {

            NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");

            err = -ENOMEM;

            goto fail;

         *    Set up data on packet

         /* 複制資料,以及運輸層 */

        ip_copy_metadata(skb2, skb);

        skb_reserve(skb2, ll_rs);

        skb_put(skb2, len hlen);

        skb_reset_network_header(skb2);

        skb2->transport_header = skb2->network_header hlen;

         *    Charge the memory for the fragment to any owner

         *    it might possess

        if (skb->sk)

            skb_set_owner_w(skb2, skb->sk);

         *    Copy the packet header into the new buffer.

        skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);

         *    Copy a block of the IP datagram.

        if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))

            BUG();

        left -= len;

         *    Fill in the new header fields.

        /* 填充網絡層 */

        iph = ip_hdr(skb2);

        iph->frag_off = htons((offset >> 3));

        /* ANK: dirty, but effective trick. Upgrade options only if

         * the segment to be fragmented was THE FIRST (otherwise,

         * options are already fixed) and make it ONCE

         * on the initial skb, so that all the following fragments

         * will inherit fixed options.

        /* 如果是第一個分片, 填充ip option */

        if (offset == 0)

            ip_options_fragment(skb);

         *    Added AC : If we are fragmenting a fragment that's not the

         *         last fragment then keep MF on each bit

        /* 設定IP_MF标志位 */

        if (left > 0 || not_last_frag)

            iph->frag_off |= htons(IP_MF);

        ptr = len;

        offset = len;

         *    Put this fragment into the sending queue.

        iph->tot_len = htons(len hlen);

 /* 計算校驗和 */

 /* 發送該分片 */

        err = output(skb2);

        if (err)

        IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);

     /* 釋放sk_buff */

    kfree_skb(skb);

    IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);

    return err;

fail:

    IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);

}

前段時間一直沒有大塊的時間,每天工作回家比較累,也就懶得看了。周末又陪老婆打球,休息了休息,也沒看。今天總算給看完了。

繼續閱讀