天天看點

UDP socket流程(13)——ip_push_pending_frames

作者:[email protected]

部落格:linuxfocus.blog.chinaunix.net

今天開始學習新的函數ip_push_pending_frames,這個函數會被icmp_push_reply,ip_send_reply,raw_sendmsg,和udp_push_pending_frames調用。該函數用于将該socket上的所有pending的IP分片,組成一個IP封包發送出去。

下面是函數的具體的代碼。

int ip_push_pending_frames(struct sock *sk)

{

    struct sk_buff *skb, *tmp_skb;

    struct sk_buff **tail_skb;

    struct inet_sock *inet = inet_sk(sk);

    struct net *net = sock_net(sk);

    struct ip_options *opt = NULL;

    struct rtable *rt = (struct rtable *)inet->cork.dst;

    struct iphdr *iph;

    __be16 df = 0;

    __u8 ttl;

    int err = 0;

     /* 發送隊列可能為空 */

    if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)

        goto out;

    /* 獲得分片連結清單 */

    tail_skb = &(skb_shinfo(skb)->frag_list);

    /* move skb->data to ip header from ext header */

    /* 調整data指針位置 */

    if (skb->data skb_network_header(skb))

        __skb_pull(skb, skb_network_offset(skb));

     /* 調整所有發送緩沖中的sk_buff的data指針位置,并更新第一個sk_buff的資料長度 */

    while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {

        __skb_pull(tmp_skb, skb_network_header_len(skb));

        *tail_skb = tmp_skb;

        tail_skb = &(tmp_skb->next);

        skb->len = tmp_skb->len;

        skb->data_len = tmp_skb->len;

        skb->truesize = tmp_skb->truesize;

        tmp_skb->destructor = NULL;

        tmp_skb->sk = NULL;

    }

    /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow

     * to fragment the frame generated here. No matter, what transforms

     * how transforms change size of the packet, it will come out.

     */

    /* 允許本地分片 */

    if (inet->pmtudisc IP_PMTUDISC_DO)

        skb->local_df = 1;

    /* DF bit is set when we want to see DF on outgoing frames.

     * If local_df is set too, we still allow to fragment this frame

     * locally. */

    /* 不允許分片,或者不需要分片 */

    if (inet->pmtudisc >= IP_PMTUDISC_DO ||

     (skb->len = dst_mtu(&rt->dst) &&

     ip_dont_fragment(sk, &rt->dst)))

        df = htons(IP_DF);

      /* ip option 儲存在cork中, 則使用cork中的option */

    if (inet->cork.flags & IPCORK_OPT)

        opt = inet->cork.opt;

      /* 選擇合适的TTL值 */

    if (rt->rt_type == RTN_MULTICAST)

        ttl = inet->mc_ttl;

    else

        ttl = ip_select_ttl(inet, &rt->dst);

     /* 得到IP封包頭的位址 */ 

    iph = (struct iphdr *)skb->data;

    /* 初始化IP封包頭的内容 */

    iph->version = 4;

    iph->ihl = 5;

    if (opt) {

        /*

        填充IP option

        看到這裡,可以發現opt隻可能從cork中獲得

         */

        iph->ihl = opt->optlen>>2;

        ip_options_build(skb, opt, inet->cork.addr, rt, 0);

    iph->tos = inet->tos;

    iph->frag_off = df;

    ip_select_ident(iph, &rt->dst, sk);

    iph->ttl = ttl;

    iph->protocol = sk->sk_protocol;

    iph->saddr = rt->rt_src;

    iph->daddr = rt->rt_dst;

    skb->priority = sk->sk_priority;

    skb->mark = sk->sk_mark;

    /*

     * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec

     * on dst refcount

    inet->cork.dst = NULL;

    skb_dst_set(skb, &rt->dst);

    if (iph->protocol == IPPROTO_ICMP)

        icmp_out_count(net, ((struct icmphdr *)

            skb_transport_header(skb))->type);

    /* Netfilter gets whole the not fragmented skb. */

    /* 發送資料 */

    err = ip_local_out(skb);

    if (err) {

        if (err > 0)

            err = net_xmit_errno(err);

        if (err)

            goto error;

out:

    ip_cork_release(inet);

    return err;

error:

    IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);

    goto out;

}

這個函數比較容易看懂——當然不是所有細節都非常清楚了。

第一遍浏覽這些API,隻要搞懂該API的大緻流程和用途。當整個架構建立好以後,再慢慢補充細節。

繼續閱讀