天天看點

Linux網絡解讀(5) - UDP資料接收

UDP

UDP和IP的差別是多了端口的概念。

看一下端口綁定的過程:

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
        struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
        struct sock *sk = sock->sk;
        struct inet_sock *inet = inet_sk(sk);
        unsigned short snum;
        int chk_addr_ret;
        int err;
        // 如果套接字有自己定義的bind函數則調用,隻有raw類型的才有自定義的bind函數。
        if (sk->sk_prot->bind) {
                err = sk->sk_prot->bind(sk, uaddr, addr_len);
                goto out;
        }
        err = -EINVAL;
        if (addr_len < sizeof(struct sockaddr_in))
                goto out;
        chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
        err = -EADDRNOTAVAIL;
        if (!sysctl_ip_nonlocal_bind &&
            !inet->freebind &&
            addr->sin_addr.s_addr != INADDR_ANY &&
            chk_addr_ret != RTN_LOCAL &&
            chk_addr_ret != RTN_MULTICAST &&
            chk_addr_ret != RTN_BROADCAST)
                goto out;
        snum = ntohs(addr->sin_port);
        err = -EACCES;
        if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
                goto out;
        lock_sock(sk);
        err = -EINVAL;
        if (sk->sk_state != TCP_CLOSE || inet->num)
                goto out_release_sock;
        inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
        if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
                inet->saddr = 0;  /* Use device */
                
        //調用具體協定的get_port方法,擷取一個端口。
        if (sk->sk_prot->get_port(sk, snum)) {
                inet->saddr = inet->rcv_saddr = 0;
                err = -EADDRINUSE;
                goto out_release_sock;
        }
        if (inet->rcv_saddr)
                sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
        if (snum)
                sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
        inet->sport = htons(inet->num);
        inet->daddr = 0;
        inet->dport = 0;
        sk_dst_reset(sk);
        err = 0;
out_release_sock:
        release_sock(sk);
out:
        return err;
}      

調用具體協定的get_port方法,擷取一個端口:sk->sk_prot->get_port(sk, snum)

sk_prot是inetsw_array中的udp_prot,是以get_port是udp_v4_get_port。

注意:從使用者态往核心态的協定選擇走inetsw_array

static int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
        struct hlist_node *node;
        struct sock *sk2;
        struct inet_sock *inet = inet_sk(sk);
        write_lock_bh(&udp_hash_lock);
        if (snum == 0) {
                int best_size_so_far, best, result, i;
                
                // 端口号的範圍在sysctl_local_port_range
                // 每次調用都從udp_port_rover開始查找可用的端口
                if (udp_port_rover > sysctl_local_port_range[1] ||
                    udp_port_rover < sysctl_local_port_range[0])
                        udp_port_rover = sysctl_local_port_range[0];
                        
                best_size_so_far = 32767;
                best = result = udp_port_rover;
                
                // 周遊udp_hash表
                // 在128個桶中尋找桶最小的,然後每隔128次試探一次。
                for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
                        struct hlist_head *list;
                        int size;
                        list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
                        if (hlist_empty(list)) {
                                if (result > sysctl_local_port_range[1])
                                        result = sysctl_local_port_range[0] +
                                                ((result - sysctl_local_port_range[0]) &
                                                 (UDP_HTABLE_SIZE - 1));
                                goto gotit;
                        }
                        size = 0;
                        // 計算目前桶的大小,是否是最小的桶,如果是更新best_size_so_far
                        sk_for_each(sk2, node, list)
                                if (++size >= best_size_so_far)
                                        goto next;
                        best_size_so_far = size;
                        best = result;
                next:;
                }
                result = best;
                // 最小的桶每隔128試探一次。
                for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
                        if (result > sysctl_local_port_range[1])
                                result = sysctl_local_port_range[0]
                                        + ((result - sysctl_local_port_range[0]) &
                                           (UDP_HTABLE_SIZE - 1));
                        if (!udp_lport_inuse(result))
                                break;
                }
                if (i >= (1 << 16) / UDP_HTABLE_SIZE)
                        goto fail;
gotit:
                udp_port_rover = snum = result;
        } else {
                sk_for_each(sk2, node,
                            &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
                        struct inet_sock *inet2 = inet_sk(sk2);
                        if (inet2->num == snum &&
                            sk2 != sk &&
                            !ipv6_only_sock(sk2) &&
                            (!sk2->sk_bound_dev_if ||
                             !sk->sk_bound_dev_if ||
                             sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
                            (!inet2->rcv_saddr ||
                             !inet->rcv_saddr ||
                             inet2->rcv_saddr == inet->rcv_saddr) &&
                            (!sk2->sk_reuse || !sk->sk_reuse))
                                goto fail;
                }
        }
        inet->num = snum;
        if (sk_unhashed(sk)) {
                struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
                sk_add_node(sk, h);
                sock_prot_inc_use(sk->sk_prot);
        }
        write_unlock_bh(&udp_hash_lock);
        return 0;
fail:
        write_unlock_bh(&udp_hash_lock);
        return 1;
}      
Linux網絡解讀(5) - UDP資料接收

下面看一下UDP在核心中收資料過程。軟中斷最終調用到ip_local_deliver_finish,根據IP封包頭部的協定字段skb->nh.iph->protocol在inet_protos找到對應的協定處理函數icmp, udp, tcp。

對于UDP來說

static struct net_protocol udp_protocol = {

.handler = udp_rcv,

.err_handler = udp_err,

.no_policy = 1,

};

int udp_rcv(struct sk_buff *skb)
{
          struct sock *sk;
          struct udphdr *uh;
        unsigned short ulen;
        struct rtable *rt = (struct rtable*)skb->dst;
        
        // 從skb擷取源位址目的位址
        u32 saddr = skb->nh.iph->saddr;
        u32 daddr = skb->nh.iph->daddr;
        int len = skb->len;
        uh = skb->h.uh;
        ulen = ntohs(uh->len);
        if (ulen > len || ulen < sizeof(*uh))
                goto short_packet;
        if (pskb_trim(skb, ulen))
                goto short_packet;
        // 檢查校驗和        
        if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
                goto csum_error;
        if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
                return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
        // 根據skb中的源位址和端口,目的位址和端口,反向查找到這個資料應該發送到哪個sock上
        sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
        if (sk != NULL) {
                // 把skb添加到sk->sk_receive_queue尾部
                // 并喚醒等待在sleep上的程序
                int ret = udp_queue_rcv_skb(sk, skb);
                sock_put(sk);
                if (ret > 0)
                        return -ret;
                return 0;
        }
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
                goto drop;
        /* No socket. Drop packet silently, if checksum is wrong */
        if (udp_checksum_complete(skb))
                goto csum_error;
        UDP_INC_STATS_BH(UDP_MIB_NOPORTS);
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
        // 忽略錯誤處理
        ...
        ...
        ...
}      

下面看一下UDP如何把資料傳回給使用者。

static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                       size_t len, int noblock, int flags, int *addr_len)
{
        struct inet_sock *inet = inet_sk(sk);
          struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
          struct sk_buff *skb;
          int copied, err;
        if (addr_len)
                *addr_len=sizeof(*sin);
        if (flags & MSG_ERRQUEUE)
                return ip_recv_error(sk, msg, len);
try_again:
        // 接收一個skb,如果是blocking的socket,而且此時沒有資料包會進入wait_for_packet
        skb = skb_recv_datagram(sk, flags, noblock, &err);
        if (!skb)
                goto out;
                
        // 下面開始向使用者态拷貝資料        
          copied = skb->len - sizeof(struct udphdr);
        if (copied > len) {
                copied = len;
                msg->msg_flags |= MSG_TRUNC;
        }
        if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
                                              copied);
        } else if (msg->msg_flags&MSG_TRUNC) {
                if (__udp_checksum_complete(skb))
                        goto csum_copy_err;
                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
                                              copied);
        } else {
                err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
                if (err == -EINVAL)
                        goto csum_copy_err;
        }
        if (err)
                goto out_free;
        sock_recv_timestamp(msg, sk, skb);
        if (sin)
        {
                sin->sin_family = AF_INET;
                sin->sin_port = skb->h.uh->source;
                sin->sin_addr.s_addr = skb->nh.iph->saddr;
                memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
          }
        if (inet->cmsg_flags)
                ip_cmsg_recv(msg, skb);
        err = copied;
        if (flags & MSG_TRUNC)
                err = skb->len - sizeof(struct udphdr);
                
        // 省略錯誤處理        
        ...
        ...
        ...
}      

繼續閱讀