UDP
UDP和IP的差別是多了端口的概念。
看一下端口綁定的過程:
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
unsigned short snum;
int chk_addr_ret;
int err;
// 如果套接字有自己定義的bind函數則調用,隻有raw類型的才有自定義的bind函數。
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
}
err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
err = -EADDRNOTAVAIL;
if (!sysctl_ip_nonlocal_bind &&
!inet->freebind &&
addr->sin_addr.s_addr != INADDR_ANY &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
goto out;
snum = ntohs(addr->sin_port);
err = -EACCES;
if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
goto out;
lock_sock(sk);
err = -EINVAL;
if (sk->sk_state != TCP_CLOSE || inet->num)
goto out_release_sock;
inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->saddr = 0; /* Use device */
//調用具體協定的get_port方法,擷取一個端口。
if (sk->sk_prot->get_port(sk, snum)) {
inet->saddr = inet->rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
if (inet->rcv_saddr)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->sport = htons(inet->num);
inet->daddr = 0;
inet->dport = 0;
sk_dst_reset(sk);
err = 0;
out_release_sock:
release_sock(sk);
out:
return err;
}
調用具體協定的get_port方法,擷取一個端口:sk->sk_prot->get_port(sk, snum)
sk_prot是inetsw_array中的udp_prot,是以get_port是udp_v4_get_port。
注意:從使用者态往核心态的協定選擇走inetsw_array
static int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
struct hlist_node *node;
struct sock *sk2;
struct inet_sock *inet = inet_sk(sk);
write_lock_bh(&udp_hash_lock);
if (snum == 0) {
int best_size_so_far, best, result, i;
// 端口号的範圍在sysctl_local_port_range
// 每次調用都從udp_port_rover開始查找可用的端口
if (udp_port_rover > sysctl_local_port_range[1] ||
udp_port_rover < sysctl_local_port_range[0])
udp_port_rover = sysctl_local_port_range[0];
best_size_so_far = 32767;
best = result = udp_port_rover;
// 周遊udp_hash表
// 在128個桶中尋找桶最小的,然後每隔128次試探一次。
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
struct hlist_head *list;
int size;
list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
if (hlist_empty(list)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
((result - sysctl_local_port_range[0]) &
(UDP_HTABLE_SIZE - 1));
goto gotit;
}
size = 0;
// 計算目前桶的大小,是否是最小的桶,如果是更新best_size_so_far
sk_for_each(sk2, node, list)
if (++size >= best_size_so_far)
goto next;
best_size_so_far = size;
best = result;
next:;
}
result = best;
// 最小的桶每隔128試探一次。
for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0]
+ ((result - sysctl_local_port_range[0]) &
(UDP_HTABLE_SIZE - 1));
if (!udp_lport_inuse(result))
break;
}
if (i >= (1 << 16) / UDP_HTABLE_SIZE)
goto fail;
gotit:
udp_port_rover = snum = result;
} else {
sk_for_each(sk2, node,
&udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet2 = inet_sk(sk2);
if (inet2->num == snum &&
sk2 != sk &&
!ipv6_only_sock(sk2) &&
(!sk2->sk_bound_dev_if ||
!sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!inet2->rcv_saddr ||
!inet->rcv_saddr ||
inet2->rcv_saddr == inet->rcv_saddr) &&
(!sk2->sk_reuse || !sk->sk_reuse))
goto fail;
}
}
inet->num = snum;
if (sk_unhashed(sk)) {
struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
sk_add_node(sk, h);
sock_prot_inc_use(sk->sk_prot);
}
write_unlock_bh(&udp_hash_lock);
return 0;
fail:
write_unlock_bh(&udp_hash_lock);
return 1;
}
下面看一下UDP在核心中收資料過程。軟中斷最終調用到ip_local_deliver_finish,根據IP封包頭部的協定字段skb->nh.iph->protocol在inet_protos找到對應的協定處理函數icmp, udp, tcp。
對于UDP來說
static struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
};
int udp_rcv(struct sk_buff *skb)
{
struct sock *sk;
struct udphdr *uh;
unsigned short ulen;
struct rtable *rt = (struct rtable*)skb->dst;
// 從skb擷取源位址目的位址
u32 saddr = skb->nh.iph->saddr;
u32 daddr = skb->nh.iph->daddr;
int len = skb->len;
uh = skb->h.uh;
ulen = ntohs(uh->len);
if (ulen > len || ulen < sizeof(*uh))
goto short_packet;
if (pskb_trim(skb, ulen))
goto short_packet;
// 檢查校驗和
if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
goto csum_error;
if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
// 根據skb中的源位址和端口,目的位址和端口,反向查找到這個資料應該發送到哪個sock上
sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
if (sk != NULL) {
// 把skb添加到sk->sk_receive_queue尾部
// 并喚醒等待在sleep上的程序
int ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);
if (ret > 0)
return -ret;
return 0;
}
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
/* No socket. Drop packet silently, if checksum is wrong */
if (udp_checksum_complete(skb))
goto csum_error;
UDP_INC_STATS_BH(UDP_MIB_NOPORTS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
// 忽略錯誤處理
...
...
...
}
下面看一下UDP如何把資料傳回給使用者。
static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
int copied, err;
if (addr_len)
*addr_len=sizeof(*sin);
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len);
try_again:
// 接收一個skb,如果是blocking的socket,而且此時沒有資料包會進入wait_for_packet
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto out;
// 下面開始向使用者态拷貝資料
copied = skb->len - sizeof(struct udphdr);
if (copied > len) {
copied = len;
msg->msg_flags |= MSG_TRUNC;
}
if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
copied);
} else if (msg->msg_flags&MSG_TRUNC) {
if (__udp_checksum_complete(skb))
goto csum_copy_err;
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
copied);
} else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
if (err)
goto out_free;
sock_recv_timestamp(msg, sk, skb);
if (sin)
{
sin->sin_family = AF_INET;
sin->sin_port = skb->h.uh->source;
sin->sin_addr.s_addr = skb->nh.iph->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
err = copied;
if (flags & MSG_TRUNC)
err = skb->len - sizeof(struct udphdr);
// 省略錯誤處理
...
...
...
}