天天看点

内核中的UDP socket流程(5)——inet_create

内核中的UDP socket流程(5)——inet_create

作者:[email protected]

进入函数inet_create

static int inet_create(struct net *net, struct socket *sock, int protocol,

         int kern)

{

    struct sock *sk;

    struct inet_protosw *answer;

    struct inet_sock *inet;

    struct proto *answer_prot;

    unsigned char answer_flags;

    char answer_no_check;

    int try_loading_module = 0;

    int err;

    if (unlikely(!inet_ehash_secret))

        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)

            build_ehash_secret();

当socket为UDP时,sock->type = SOCK_DGRAM。所以这里不会调用到huild_ehash_secret()————其实这个函数是为了产生一个随机的数赋给inet_ehash_secret————暂时还不知道这个变量的确切用途。

通过sock->type和protocol在inetsw链表中找到对应的proto;

/* Look for the requested type/protocol pair. */

lookup_protocol:

    err = -ESOCKTNOSUPPORT;

    rcu_read_lock();

    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

        err = 0;

        /* Check the non-wild match. */

        if (protocol == answer->protocol) {

            if (protocol != IPPROTO_IP)

                break;

        } else {

            /* Check for the two wild cases. */

            if (IPPROTO_IP == protocol) {

                protocol = answer->protocol;

            }

            if (IPPROTO_IP == answer->protocol)

        }

        err = -EPROTONOSUPPORT;

    }

从以上代码可以看出,IPPROTO_IP是作为一个wild protocol使用的。首先是尽量寻找与参数protocol完全匹配的协议,如果找不到完全匹配的,如果参数是IPPROTO_IP,那么任一protocol都可以使用,如果找到PPROTO_IP协议的话,就使用该协议。

inetsw是一个static的全局变量

static struct list_head inetsw[SOCK_MAX];

该全局变量在函数inet_init中初始化

static int __init inet_init(void)

    /* 

    Skip some codes

    */

    /* Register the socket-side information for inet_create. */

    for (r = &inetsw[0]; r &inetsw[SOCK_MAX]; ++r)

        INIT_LIST_HEAD(r);

    for (q = inetsw_array; q &inetsw_array[INETSW_ARRAY_LEN]; ++q)

        inet_register_protosw(q);

通过inetsw_array来初始化inetsw,那么在inetsw中保存的协议顺序就与inetsw相同。

/* Upon startup we insert all the elements in inetsw_array[] into

 * the linked list inetsw.

 */

static struct inet_protosw inetsw_array[] =

    {

        .type = SOCK_STREAM,

        .protocol = IPPROTO_TCP,

        .prot = &tcp_prot,

        .ops = &inet_stream_ops,

        .no_check = 0,

        .flags = INET_PROTOSW_PERMANENT |

             INET_PROTOSW_ICSK,

    },

        .type = SOCK_DGRAM,

        .protocol = IPPROTO_UDP,

        .prot = &udp_prot,

        .ops = &inet_dgram_ops,

        .no_check = UDP_CSUM_DEFAULT,

        .flags = INET_PROTOSW_PERMANENT,

       },

       {

     .type = SOCK_RAW,

     .protocol = IPPROTO_IP,    /* wild card */

     .prot = &raw_prot,

     .ops = &inet_sockraw_ops,

     .no_check = UDP_CSUM_DEFAULT,

     .flags = INET_PROTOSW_REUSE,

       }

};

那么对于UDP来说,再找到对应的UDP protocol后,answer就指向inetsw_array[1]。

    sock->ops = answer->ops;

    answer_prot = answer->prot;

    answer_no_check = answer->no_check;

    answer_flags = answer->flags;

    rcu_read_unlock();

对于UDP来说,sock->ops就指向inet_dgram_ops,answer_prot就是udp_prot,answer_no_check为UDP_CSUM_DEFAULT,answer_flags为INET_PROTOSW_PERMANENT。

下面是inet_dgram_ops的定义

const struct proto_ops inet_dgram_ops = {

    .family         = PF_INET,

    .owner         = THIS_MODULE,

    .release     = inet_release,

    .bind         = inet_bind,

    .connect     = inet_dgram_connect,

    .socketpair     = sock_no_socketpair,

    .accept         = sock_no_accept,

    .getname     = inet_getname,

    .poll         = udp_poll,

    .ioctl         = inet_ioctl,

    .listen         = sock_no_listen,

    .shutdown     = inet_shutdown,

    .setsockopt     = sock_common_setsockopt,

    .getsockopt     = sock_common_getsockopt,

    .sendmsg     = inet_sendmsg,

    .recvmsg     = inet_recvmsg,

    .mmap         = sock_no_mmap,

    .sendpage     = inet_sendpage,

#ifdef CONFIG_COMPAT

    .compat_setsockopt = compat_sock_common_setsockopt,

    .compat_getsockopt = compat_sock_common_getsockopt,

#endif

answer_prot的定义是

struct proto udp_prot = {

    .name         = "UDP",

    .close         = udp_lib_close,

    .connect     = ip4_datagram_connect,

    .disconnect     = udp_disconnect,

    .ioctl         = udp_ioctl,

    .destroy     = udp_destroy_sock,

    .setsockopt     = udp_setsockopt,

    .getsockopt     = udp_getsockopt,

    .sendmsg     = udp_sendmsg,

    .recvmsg     = udp_recvmsg,

    .sendpage     = udp_sendpage,

    .backlog_rcv     = __udp_queue_rcv_skb,

    .hash         = udp_lib_hash,

    .unhash         = udp_lib_unhash,

    .rehash         = udp_v4_rehash,

    .get_port     = udp_v4_get_port,

    .memory_allocated = &udp_memory_allocated,

    .sysctl_mem     = sysctl_udp_mem,

    .sysctl_wmem     = &sysctl_udp_wmem_min,

    .sysctl_rmem     = &sysctl_udp_rmem_min,

    .obj_size     = sizeof(struct udp_sock),

    .slab_flags     = SLAB_DESTROY_BY_RCU,

    .h.udp_table     = &udp_table,

    .compat_setsockopt = compat_udp_setsockopt,

    .compat_getsockopt = compat_udp_getsockopt,

继续后面的代码

     err = -ENOBUFS;

     sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);

     if (sk == NULL)

          goto out;

     err = 0;

     sk->sk_no_check = answer_no_check;

     if (INET_PROTOSW_REUSE & answer_flags)

          sk->sk_reuse = 1;

首先申请一个struct sock的内存,然后sk->sk_no_check用于表示是否检查checksum,1为不检查,0为检查。通过与INET_PROTOSW_REUSE的位与操作——该宏表示这个协议是自动重用socket。

     inet = inet_sk(sk);

     inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;

     inet->nodefrag = 0;

首先,将通用sock结构用函数inet_sk转为inet_sock——UDP的域是AF_INET,然后与宏INET_PROTOSW_ICSK相与来确定是否是基于连接——查看inetsw_array,只有TCP是有这个标志的。inet->nodefrag设为0,是允许分片。

     if (SOCK_RAW == sock->type) {

          inet->inet_num = protocol;

          if (IPPROTO_RAW == protocol)

               inet->hdrincl = 1;

     }

如果创建的是RAW socket,那么就以protocol协议号作为本地端口。如果protocol是IPPROTO_RAW,那么就给inet->hdincl置1,表示由用户来创建IP头,而不再由内核添加IP头。

if (ipv4_config.no_pmtu_disc)

          inet->pmtudisc = IP_PMTUDISC_DONT;

     else

          inet->pmtudisc = IP_PMTUDISC_WANT;

     inet->inet_id = 0;

设置MTU的策略——这里说策略不太合适,IP_PMTUDISC_DONT表示不对frame帧分片,IP_PMTUDISC_WANT表示通过route转发过来的包来确定最小的MTU。然后初始化inet_id.

sock_init_data(sock, sk);

     sk->sk_destruct = inet_sock_destruct;

     sk->sk_protocol = protocol;

     sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;

初始化sk的结构体。

     inet->uc_ttl = -1;

     inet->mc_loop = 1;

     inet->mc_ttl = 1;

     inet->mc_all = 1;

     inet->mc_index = 0;

     inet->mc_list = NULL;

     sk_refcnt_debug_inc(sk);

初始化剩余的inet的成员变量,这些变量的含义可以直接看inet_sock结构体的注释,很清晰。uc_ttl表示单播ttl,mc_loop表示回环是否有效,mc_ttl表示组播ttl,mc_all是为了支持新的socket option IP_MULTICAST_ALL,如果设置了这个标志,表示所有发往端口的组播都会传给这个socket,如果没有设置,那么只有加入了的组的组播才会传给socket。

if (inet->inet_num) {

          /* It assumes that any protocol which allows

          * the user to assign a number at socket

          * creation time automatically

          * shares.

          */

          inet->inet_sport = htons(inet->inet_num);

          /* Add to protocol hash chains. */

          sk->sk_prot->hash(sk);

     if (sk->sk_prot->init) {

          err = sk->sk_prot->init(sk);

          if (err)

               sk_common_release(sk);

当inet->inet_num不为0时,设置inet的source port,并把sk加到hash表中——对于UDP,不会执行这个。如果该协议有init函数,就调用init。通过inetsw_array可知,udp是没有init函数的,而tcp和ip,raw的init函数分别为tcp_v4_init_sock,raw_init。

到此,UDP的socket已经建立成功了。

继续阅读