部落格:linuxfocus.blog.chinaunix.net
今天開始學習新的函數ip_push_pending_frames,這個函數會被icmp_push_reply,ip_send_reply,raw_sendmsg,和udp_push_pending_frames調用。該函數用于将該socket上的所有pending的IP分片,組成一個IP封包發送出去。
下面是函數的具體的代碼。
int ip_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
struct rtable *rt = (struct rtable *)inet->cork.dst;
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
int err = 0;
/* 發送隊列可能為空 */
if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
goto out;
/* 獲得分片連結清單 */
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
/* 調整data指針位置 */
if (skb->data skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
/* 調整所有發送緩沖中的sk_buff的data指針位置,并更新第一個sk_buff的資料長度 */
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
__skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len = tmp_skb->len;
skb->data_len = tmp_skb->len;
skb->truesize = tmp_skb->truesize;
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
}
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
* to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out.
*/
/* 允許本地分片 */
if (inet->pmtudisc IP_PMTUDISC_DO)
skb->local_df = 1;
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame
* locally. */
/* 不允許分片,或者不需要分片 */
if (inet->pmtudisc >= IP_PMTUDISC_DO ||
(skb->len = dst_mtu(&rt->dst) &&
ip_dont_fragment(sk, &rt->dst)))
df = htons(IP_DF);
/* ip option 儲存在cork中, 則使用cork中的option */
if (inet->cork.flags & IPCORK_OPT)
opt = inet->cork.opt;
/* 選擇合适的TTL值 */
if (rt->rt_type == RTN_MULTICAST)
ttl = inet->mc_ttl;
else
ttl = ip_select_ttl(inet, &rt->dst);
/* 得到IP封包頭的位址 */
iph = (struct iphdr *)skb->data;
/* 初始化IP封包頭的内容 */
iph->version = 4;
iph->ihl = 5;
if (opt) {
/*
填充IP option
看到這裡,可以發現opt隻可能從cork中獲得
*/
iph->ihl = opt->optlen>>2;
ip_options_build(skb, opt, inet->cork.addr, rt, 0);
iph->tos = inet->tos;
iph->frag_off = df;
ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
iph->saddr = rt->rt_src;
iph->daddr = rt->rt_dst;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount
inet->cork.dst = NULL;
skb_dst_set(skb, &rt->dst);
if (iph->protocol == IPPROTO_ICMP)
icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
/* Netfilter gets whole the not fragmented skb. */
/* 發送資料 */
err = ip_local_out(skb);
if (err) {
if (err > 0)
err = net_xmit_errno(err);
if (err)
goto error;
out:
ip_cork_release(inet);
return err;
error:
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
goto out;
}
這個函數比較容易看懂——當然不是所有細節都非常清楚了。
第一遍浏覽這些API,隻要搞懂該API的大緻流程和用途。當整個架構建立好以後,再慢慢補充細節。