天天看點

核心中的UDP socket流程(8)——udp_sendmsg

作者:[email protected]

繼續分析udp_sendmsg,

     ipc.oif = sk->sk_bound_dev_if;

     err = sock_tx_timestamp(msg, sk, &ipc.shtx);

     if (err)

          return err;

     if (msg->msg_controllen) {

          err = ip_cmsg_send(sock_net(sk), msg, &ipc);

          if (err)

               return err;

          if (ipc.opt)

               free = 1;

          connected = 0;

     }

     if (!ipc.opt)

          ipc.opt = inet->opt;

ipc.oif設定為socket bind的interface,設定發送資料包時間戳産生政策。msg->msg_controllen若不為0,那麼就調用ip_cmsg_send——從函數名字上看,好像是要發送cmsg,然而實際上卻沒有任何資料發送。請看它的定義。

int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)

{

     int err;

     struct cmsghdr *cmsg;

     for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {

          if (!CMSG_OK(msg, cmsg))

               return -EINVAL;

          if (cmsg->cmsg_level != SOL_IP)

               continue;

          switch (cmsg->cmsg_type) {

          case IP_RETOPTS:

               err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));

               err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),

                              err 40 ? err : 40);

               if (err)

                    return err;

               break;

          case IP_PKTINFO:

          {

               struct in_pktinfo *info;

               if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))

                    return -EINVAL;

               info = (struct in_pktinfo *)CMSG_DATA(cmsg);

               ipc->oif = info->ipi_ifindex;

               ipc->addr = info->ipi_spec_dst.s_addr;

          }

          default:

     return 0;

}

從以上代碼可以看出,cmsg隻支援兩種類型:1個是IP_RETOPTS——設定包的option,另外一個是IP_PKTINFO——具體定義,可以使用man 7 ip來檢視。

     saddr = ipc.addr;

     ipc.addr = faddr = daddr;

     if (ipc.opt && ipc.opt->srr) {

          if (!daddr)

          faddr = ipc.opt->faddr;

     tos = RT_TOS(inet->tos);

     if (sock_flag(sk, SOCK_LOCALROUTE) ||

         (msg->msg_flags & MSG_DONTROUTE) ||

         (ipc.opt && ipc.opt->is_strictroute)) {

          tos |= RTO_ONLINK;

前面對于位址指派的幾行代碼,我還沒有看出什麼用途來——哪個高手指教一下。後面是幾種情況下,設定發送該包是不需要路由标志——表示到達目的位址是不需要下一跳的。

     if (ipv4_is_multicast(daddr)) {

          if (!ipc.oif)

               ipc.oif = inet->mc_index;

          if (!saddr)

               saddr = inet->mc_addr;

如果目的位址是多點傳播位址且沒有bind interface,那麼就使用本地多點傳播interface,如果沒有設定源位址,那麼就使用本地多點傳播位址。

     if (connected)

          rt = (struct rtable *)sk_dst_check(sk, 0);

     if (rt == NULL) {

          struct flowi fl = { .oif = ipc.oif,

                        .mark = sk->sk_mark,

                        .nl_u = { .ip4_u =

                               { .daddr = faddr,

                              .saddr = saddr,

                              .tos = tos } },

                        .proto = sk->sk_protocol,

                        .flags = inet_sk_flowi_flags(sk),

                        .uli_u = { .ports =

                                { .sport = inet->inet_sport,

                              .dport = dport } } };

          struct net *net = sock_net(sk);

          security_sk_classify_flow(sk, &fl);

          err = ip_route_output_flow(net, &rt, &fl, sk, 1);

          if (err) {

               if (err == -ENETUNREACH)

                    IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);

               goto out;

          err = -EACCES;

          if ((rt->rt_flags & RTCF_BROADCAST) &&

              !sock_flag(sk, SOCK_BROADCAST))

          if (connected)

               sk_dst_set(sk, dst_clone(&rt->dst));

如果是面向連接配接的socket,那麼首先檢查該socket之前使用的路由。如果沒有有效的路由,那麼就使用ip_route_output_flow來查找到一個路由。如果沒有找到,就傳回錯誤。如果是面向連接配接的socket,那麼就将該路由儲存到socket結構中。

      if (msg->msg_flags&MSG_CONFIRM)

          goto do_confirm;

如果設定了MSG_CONFIRM标志,那麼就跳轉到do_confirm.MSG_CONFIRM的具體作用,請檢視man 2 sendto。

back_from_confirm:

     saddr = rt->rt_src;

     if (!ipc.addr)

          daddr = ipc.addr = rt->rt_dst;

     lock_sock(sk);

     if (unlikely(up->pending)) {

          /* The socket is already corked while preparing it. */

          /* ... which is an evident application bug. --ANK */

          release_sock(sk);

          LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");

          err = -EINVAL;

          goto out;

     /*

     * Now cork the socket to pend data.

     */

     inet->cork.fl.fl4_dst = daddr;

     inet->cork.fl.fl_ip_dport = dport;

     inet->cork.fl.fl4_src = saddr;

     inet->cork.fl.fl_ip_sport = inet->inet_sport;

     up->pending = AF_INET;

使用路由的源位址作為包的源位址,檢查這個socket是否還有pending的資料——如果有的話,根據注釋就是出錯了,而且這個不應該發生。正常情況下,設定inet->cork.fl,并置socket為pending狀态。

do_append_data:

     up->len += ulen;

     getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;

     err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,

               sizeof(struct udphdr), &ipc, &rt,

               corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);

          udp_flush_pending_frames(sk);

     else if (!corkreq)

          err = udp_push_pending_frames(sk);

     else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))

          up->pending = 0;

     release_sock(sk);

使用ip_append_data将這次要發送的資料追加到該socket上——這個函數下次進行具體的分析。如果出錯,就drop所有socket上的資料包。如果corkreq為0,那麼就調用udp_push_pending_frames去發送資料。如果該socket的發送隊列為空,那麼就将socket的pending狀态重置。最後釋放socket鎖——從這裡可以看出,在使用socket發送資料時,當多線程一起發送時,雖然不能保證包的順序,但是可以保證每個資料包不會與其他資料包混在一起。

out:

     ip_rt_put(rt);

     if (free)

          kfree(ipc.opt);

     if (!err)

          return len;

     * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting

     * ENOBUFS might not be good (it's not tunable per se), but otherwise

     * we don't have a good statistic (IpOutDiscards but it can be too many

     * things). We could add another new stat but at least for now that

     * seems like overkill.

     if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {

          UDP_INC_STATS_USER(sock_net(sk),

                    UDP_MIB_SNDBUFERRORS, is_udplite);

     return err;

最後進行資源的釋放。如果沒有出錯,就傳回發送的資料長度,如果出錯,就增加錯誤統計。

繼續閱讀