天天看點

socket-select ,poll ,epoll 代碼分析

refer to:

linux programming 4

select、poll、epoll之間的差別總結[整理]

UNP 1

三種模型中主要的結構體

int select(int nfds,fd_set *readfds,fd_set *writefds,fd_set *exceptfds,struct timeval *timeout);

typedef struct{ __fd_mask  fds_bits[__FD_SETSIZE / __NFDBITS];} fd_set;

typedef long int __fd_mask;

#define __NFDBITS (8 * (int) sizeof (__fd_mask))

#define __FD_SETSIZE1024

sizeof (__fd_mask) = 4

是以fd_set相當于int x[32] ,其每一位代表一個fd

使用方法:

1.将需要監視的fd加入readfds,FD_SET(fd, &readfds);

2.給select提供一個fd_set *readfds(和writefds,exceptfds)類型的參數,其實是32個int的數組。以便select去監視之。

隻要fd_set *readfds(或writefds,exceptfds)中有任意一個fd有動作,select就會傳回。

3.由于傳回的仍然是整個fd_set,是以需要輪詢整個所有fd_set的所有元素才能知道是哪個fds裡的哪個fd在動作

在循環中,可以用FD_ISSET(fd,readfds)來檢查某個fd是否可讀。(因為select函數成功傳回時會将未準備好的描述符位清零。通常我們使用FD_ISSET是為了檢查在select函數傳回後,某個描述符是否準備好,以便進行接下來的處理操作。)

int poll(struct pollfd *fds, nfds_t nfds, int timeout);

struct pollfd

  {

    int fd;

    short int events;

    short int revents;

  };

typedef unsigned long int nfds_t;

poll傳回的是傳回值int 和 參數 fds

1.将需要監視的fd通過放入pollfd[x].fd

2.給poll提供一個pollfd數組,poll會監視pollfd數組中的所有fd的動作。隻要有一個有動作,就會傳回。

3.由于傳回的仍然是整個pollfd數組,是以需要輪詢整個pollfd數組才能知道到底是pollfd數組中的哪個fd有動作。

在循環中,用  if( pollfd[x].revents& POLLIN )來測試是否pollfd[x].fd可讀。

extern int epoll_wait (int __epfd, struct epoll_event *__events,

      int __maxevents, int __timeout);

typedef union epoll_data

{

  void *ptr;

  int fd;

  uint32_t u32;

  uint64_t u64;

} epoll_data_t;

struct epoll_event

{

  uint32_t events;

  epoll_data_t data;

} __attribute__ ((__packed__)); 

使用方法: 

1.将需要監視的fd加入到epoll_create建立的執行個體epollfd中:epoll_ctrl。此時,需要注意fd和epoll_event綁定(epoll_event.data.fd=fd):因為epoll_wait傳回的是epoll_event,而不是fd。

2.給epoll_wait提供一個epoll_event的數組, epoll_wait 會監視epollfd中的所有fd,隻要epollfd中有任意一個fd有動作,epoll_wait就會傳回對應有動作的epoll_wait到epoll_event數組。

3.讀寫epoll_wait[i].data.fd即可。 

以上可知: 

1.在每次循環執行select之前需要重新設定select監視的fd_set。因為select傳回時把監視的fd_set的未有動作的fd位清空了。這樣,select傳回之後,就能用 

FD_ISSET來判斷是哪個fd準備好了。 

而poll使用的是pollfd結構體,此結構體有一個成員revents可以訓示目前的fd是否可讀或可寫。 

不管怎麼樣,poll和select都要循環整個已被監視的數組,才能得知是哪個fd就緒了(有動作了)。 

2. 

epoll_wait傳回的就是已就緒數組。 

select 模型 

[[email protected] chapter15]# cat server5.c 
/*  For our final example, server5.c, 
    we include the sys/time.h and sys/ioctl.h headers in place of signal.h
    in our last program and declare some extra variables to deal with select.  */

#include <sys/types.h>
#include <sys/socket.h>
#include <stdio.h>
#include <netinet/in.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <stdlib.h>
#define DEBUG
#ifdef DEBUG  
#define DBG(...) fprintf(stderr, " DBG(%s, %s(), %d): ", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, __VA_ARGS__)  
#else  
#define DBG(...)  
#endif  
int main()
{
    int server_sockfd, client_sockfd;
    int server_len, client_len;
    struct sockaddr_in server_address;
    struct sockaddr_in client_address;
    int result;
    fd_set readfds, testfds;

/*  Create and name a socket for the server.  */

    server_sockfd = socket(AF_INET, SOCK_STREAM, 0);
	DBG("server_sockfd = %d \n",server_sockfd );
    server_address.sin_family = AF_INET;
    server_address.sin_addr.s_addr = htonl(INADDR_ANY);
    server_address.sin_port = htons(9734);
    server_len = sizeof(server_address);

    bind(server_sockfd, (struct sockaddr *)&server_address, server_len);

/*  Create a connection queue and initialize readfds to handle input from server_sockfd.  */

    listen(server_sockfd, 5);

    FD_ZERO(&readfds);
    FD_SET(server_sockfd, &readfds);

/*  Now wait for clients and requests.
    Since we have passed a null pointer as the timeout parameter, no timeout will occur.
    The program will exit and report an error if select returns a value of less than 1.  */

    while(1) {
        char ch;
        int fd;
        int nread;

        testfds = readfds;

        DBG("server waiting\n");
       // result = select(FD_SETSIZE, &testfds, (fd_set *)0,(fd_set *)0, (struct timeval *) 0);  
        result = select(FD_SETSIZE, &testfds, (fd_set *)0,(fd_set *)0,NULL);
           

        if(result < 1) {
            perror("server5");
            exit(1);
        }

/*  Once we know we've got activity,
    we find which descriptor it's on by checking each in turn using FD_ISSET.  */
	DBG("FD_SETSIZE = %d-------\n",FD_SETSIZE);
	DBG("%d times cycle---\n",FD_SETSIZE);
        for(fd = 0; fd < FD_SETSIZE; fd++) {
	  DBG("the %d times cycle---\n",fd);
          if(FD_ISSET(fd,&testfds)) {
		DBG("the %d times cycle catched------\n",fd);
/*  If the activity is on server_sockfd, it must be a request for a new connection
    and we add the associated client_sockfd to the descriptor set.  */

                if(fd == server_sockfd) {
                    client_len = sizeof(client_address);
                    client_sockfd = accept(server_sockfd,(struct sockaddr *)&client_address, &client_len);
		     DBG("client_sockfd = %d \n",client_sockfd );
                    FD_SET(client_sockfd, &readfds);
                    printf("adding client on fd %d\n", client_sockfd);
                }

/*  If it isn't the server, it must be client activity.
    If close is received, the client has gone away and we remove it from the descriptor set.
    Otherwise, we 'serve' the client as in the previous examples.  */

                else {
                    ioctl(fd, FIONREAD, &nread);
	            //FIONREAD,用于擷取輸入緩沖區的可用位元組數
                    if(nread == 0) {
                        close(fd);
                        FD_CLR(fd, &readfds);
                        DBG("removing client on fd %d\n", fd);
                    }

                    else {
                        read(fd, &ch, 1);
                       // sleep(5);
                        DBG("serving client on fd %d\n", fd);
                        ch++;
                        write(fd, &ch, 1);
                    }
                }
            }
        }
    }
}
[[email protected] chapter15]# 

           
[[email protected] chapter15]# ./server5&
[1] 16058
[[email protected] chapter15]#  DBG(server5.c, main(), 31): server_sockfd = 3
 DBG(server5.c, main(), 57): server waiting
 ./client3
 DBG(server5.c, main(), 69): FD_SETSIZE = 1024------- 
 DBG(server5.c, main(), 70): 1024 times cycle--- 
 DBG(server5.c, main(), 72): the 0 times cycle--- 
 DBG(server5.c, main(), 72): the 1 times cycle--- 
 DBG(server5.c, main(), 72): the 2 times cycle--- 
 DBG(server5.c, main(), 72): the 3 times cycle--- 
 DBG(server5.c, main(), 74): the 3 times cycle catched------ 
 DBG(server5.c, main(), 82): client_sockfd = 4  
adding client on fd 4 
 DBG(server5.c, main(), 72): the 4 times cycle--- 
 DBG(server5.c, main(), 72): the 5 times cycle--- 
... 
 DBG(server5.c, main(), 72): the 1022 times cycle--- 
 DBG(server5.c, main(), 72): the 1023 times cycle--- 
 DBG(server5.c, main(), 57): server waiting 
 DBG(server5.c, main(), 69): FD_SETSIZE = 1024------- 
 DBG(server5.c, main(), 70): 1024 times cycle--- 
 DBG(server5.c, main(), 72): the 0 times cycle--- 
 DBG(server5.c, main(), 72): the 1 times cycle--- 
 DBG(server5.c, main(), 72): the 2 times cycle--- 
 DBG(server5.c, main(), 72): the 3 times cycle--- 
 DBG(server5.c, main(), 72): the 4 times cycle--- 
 DBG(server5.c, main(), 74): the 4 times cycle catched------ 
 DBG(server5.c, main(), 103): serving client on fd 4 
 DBG(server5.c, main(), 72): the 5 times cycle--- 
 DBG(server5.c, main(), 72): the 6 times cycle---
... 
 DBG(server5.c, main(), 72): the 142 times cycle--- 
char from server = B 
 DBG(server5.c, main(), 72): the 143 times cycle---
... 
 DBG(server5.c, main(), 72): the 1022 times cycle--- 
 DBG(server5.c, main(), 72): the 1023 times cycle--- 
 DBG(server5.c, main(), 57): server waiting 
 DBG(server5.c, main(), 69): FD_SETSIZE = 1024------- 
 DBG(server5.c, main(), 70): 1024 times cycle--- 
 DBG(server5.c, main(), 72): the 0 times cycle--- 
 DBG(server5.c, main(), 72): the 1 times cycle--- 
 DBG(server5.c, main(), 72): the 2 times cycle--- 
 DBG(server5.c, main(), 72): the 3 times cycle--- 
 DBG(server5.c, main(), 72): the 4 times cycle--- 
 DBG(server5.c, main(), 74): the 4 times cycle catched------ 
 DBG(server5.c, main(), 97): removing client on fd 4 
 DBG(server5.c, main(), 72): the 5 times cycle--- 
... 
 DBG(server5.c, main(), 72): the 1022 times cycle--- 
 DBG(server5.c, main(), 72): the 1023 times cycle--- 
 DBG(server5.c, main(), 57): server waiting
           

 client3和上一篇一樣 

line45 向readfds添加一個server_sockfd描述符,從列印結果看出server_sockfd=3 

剛好是2開始的最小數 

剛開始,程序被select阻塞,偵聽testfds檔案描述符集合的動作,從列印結果FD_SETSIZE=1024, 

可以看出testfds集合預設可以容納1024個檔案的動作,以被select偵聽 

當testfds有動作,就檢查是哪個fd的動作(由于testfds裡面現還未添加client_sockfd,大半是server_sockfd動作) 

而server_sockfd的動靜肯定是有客戶請求連接配接 

于是用accept函數請求連接配接,accept會自動複制一份server_sockfd成為client_sockfd 

以後程序自動用client_sockfd與這個客戶通信 

從列印結果看client_sockfd=4 

将client_sockfd加入testfds,現在集合裡面有2個sockets描述符了 

... 

程序執行1024次檢查之後,又被select阻塞,

testfds第2次動作是4号sockets引起,由于客戶已經将資料寫入,4号sockets裡面有資料了

nread>0,可判斷此種情況

...

testfds第3次動作是4号sockets引起,由于客戶讀取過資料已經退出, 

nread=0,可判斷此種情況 

可以看出,testfds有三次動作 

而每次testfds有動作,程序都要執行1024次循環去檢查testfds中的具體哪個檔案有動作 

這個有點浪費,不如直接去檢查3和4号sockets 

在多客戶的情況下,檢查到最大的那個client_sockfd就可以了 

FD_ISSET 檢查指定fd是否屬于指定set 

       Four macros are provided to manipulate the sets.   FD_ZERO()  clears  a 

       set.   FD_SET()  and  FD_CLR() respectively add and remove a given file 

       descriptor from a set.  FD_ISSET() tests to see if a file descriptor is 

       part of the set; this is useful after select() returns. 

... 

poll模型如下 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include <netinet/in.h>
#include <sys/socket.h>
#include <poll.h>
#include <unistd.h>
#include <sys/types.h>

#define IPADDRESS   "127.0.0.1"
#define PORT        8787
#define MAXLINE     1024
#define LISTENQ     5
#define OPEN_MAX    1000
#define INFTIM      -1

//函數聲明
//建立套接字并進行綁定
static int socket_bind(const char* ip,int port);
//IO多路複用poll
static void do_poll(int listenfd);
//處理多個連接配接
static void handle_connection(struct pollfd *connfds,int num);

int main(int argc,char *argv[])
{
    int  listenfd,connfd,sockfd;
    struct sockaddr_in cliaddr;
    socklen_t cliaddrlen;
    listenfd = socket_bind(IPADDRESS,PORT);
    listen(listenfd,LISTENQ);
    do_poll(listenfd);
    return 0;
}

static int socket_bind(const char* ip,int port)
{
    int  listenfd;
    struct sockaddr_in servaddr;
    listenfd = socket(AF_INET,SOCK_STREAM,0);
    if (listenfd == -1)
    {
        perror("socket error:");
        exit(1);
    }
    bzero(&servaddr,sizeof(servaddr));
    servaddr.sin_family = AF_INET;
    inet_pton(AF_INET,ip,&servaddr.sin_addr);
    servaddr.sin_port = htons(port);
    if (bind(listenfd,(struct sockaddr*)&servaddr,sizeof(servaddr)) == -1)
    {
        perror("bind error: ");
        exit(1);
    }
    return listenfd;
}

static void do_poll(int listenfd)
{
    int  connfd,sockfd;
    struct sockaddr_in cliaddr;
    socklen_t cliaddrlen;
    struct pollfd clientfds[OPEN_MAX];
    int maxi;
    int i;
    int nready;
    //添加監聽描述符
    clientfds[0].fd = listenfd;
    clientfds[0].events = POLLIN;
    //初始化客戶連接配接描述符
    for (i = 1;i < OPEN_MAX;i++)
        clientfds[i].fd = -1;
    maxi = 0;
    //循環處理
    for ( ; ; )
    {
        //擷取可用描述符的個數
        nready = poll(clientfds,maxi+1,INFTIM);
        if (nready == -1)
        {
            perror("poll error:");
            exit(1);
        }
        //測試監聽描述符是否準備好
        if (clientfds[0].revents & POLLIN)
        {
            cliaddrlen = sizeof(cliaddr);
            //接受新的連接配接
            if ((connfd = accept(listenfd,(struct sockaddr*)&cliaddr,&cliaddrlen)) == -1)
            {
                if (errno == EINTR)
                    continue;
                else
                {
                   perror("accept error:");
                   exit(1);
                }
            }
            fprintf(stdout,"accept a new client: %s:%d\n", inet_ntoa(cliaddr.sin_addr),cliaddr.sin_port);
            //将新的連接配接描述符添加到數組中
            for (i = 1;i < OPEN_MAX;i++)
            {
                if (clientfds[i].fd < 0)
                {
                    clientfds[i].fd = connfd;
                    break;
                }
            }
            if (i == OPEN_MAX)
            {
                fprintf(stderr,"too many clients.\n");
                exit(1);
            }
            //将新的描述符添加到讀描述符集合中
            clientfds[i].events = POLLIN;
            //記錄客戶連接配接套接字的個數
            maxi = (i > maxi ? i : maxi);
            if (--nready <= 0)
                continue;
        }
        //處理客戶連接配接
        handle_connection(clientfds,maxi);
    }
}

static void handle_connection(struct pollfd *connfds,int num)
{
    int i,n;
    char buf[MAXLINE];
    memset(buf,0,MAXLINE);
    for (i = 1;i <= num;i++)
    {
        if (connfds[i].fd < 0)
            continue;
        //測試客戶描述符是否準備好
        if (connfds[i].revents & POLLIN)
        {
            //接收用戶端發送的資訊
            n = read(connfds[i].fd,buf,MAXLINE);
            if (n == 0)
            {
                close(connfds[i].fd);
                connfds[i].fd = -1;
                continue;
            }
           // printf("read msg is: ");
            write(STDOUT_FILENO,buf,n);
            //向用戶端發送buf
            write(connfds[i].fd,buf,n);
        }
    }
}
           

epoll模型如下 

#include "stdio.h"
#include "string.h"
#include "stdlib.h"
#include "sys/epoll.h"
#include <netinet/in.h>   
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/file.h>
#include <netinet/in.h>
#include <netinet/ip.h>0
#include <arpa/ftp.h>
#include <arpa/inet.h>
#include <arpa/telnet.h>
#include <stdio.h>
#include <signal.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <netdb.h>

#define MAXEVENTS 64
 
static int
create_and_bind (char *port)
{
  struct addrinfo hints;
  struct addrinfo *result, *rp;
  int s, sfd;
 
  memset (&hints, 0, sizeof (struct addrinfo));
  hints.ai_family = AF_UNSPEC;     /* Return IPv4 and IPv6 choices */
  hints.ai_socktype = SOCK_STREAM; /* We want a TCP socket */
  hints.ai_flags = AI_PASSIVE;     /* All interfaces */
 
  s = getaddrinfo (NULL, port, &hints, &result);
  if (s != 0)
    {
      fprintf (stderr, "getaddrinfo: %s\n", gai_strerror (s));
      return -1;
    }
 
  for (rp = result; rp != NULL; rp = rp->ai_next)
    {
      sfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
      if (sfd == -1)
        continue;
 
      s = bind (sfd, rp->ai_addr, rp->ai_addrlen);
      if (s == 0)
        {
          /* We managed to bind successfully! */
          break;
        }
 
      close (sfd);
    }
 
  if (rp == NULL)
    {
      fprintf (stderr, "Could not bind\n");
      return -1;
    }
 
  freeaddrinfo (result);
 
  return sfd;
}

static int
make_socket_non_blocking (int sfd)
{
  int flags, s;
 
  flags = fcntl (sfd, F_GETFL, 0);
  if (flags == -1)
    {
      perror ("fcntl");
      return -1;
    }
 
  flags |= O_NONBLOCK;
  s = fcntl (sfd, F_SETFL, flags);
  if (s == -1)
    {
      perror ("fcntl");
      return -1;
    }
 
  return 0;
}


int
main (int argc, char *argv[])
{
  int sfd, s;
  int efd;
  struct epoll_event event;
  struct epoll_event *events;
 
  if (argc != 2)
    {
      fprintf (stderr, "Usage: %s [port]\n", argv[0]);
      exit (EXIT_FAILURE);
    }
 
  sfd = create_and_bind (argv[1]);
  if (sfd == -1)
    abort ();
 
  s = make_socket_non_blocking (sfd);
  if (s == -1)
    abort ();
 
  s = listen (sfd, 1024);
  if (s == -1)
    {
      perror ("listen");
      abort ();
    }
 
  efd = epoll_create1 (0);
  if (efd == -1)
    {
      perror ("epoll_create");
      abort ();
    }
 
  event.data.fd = sfd;
  event.events = EPOLLIN | EPOLLET;
  s = epoll_ctl (efd, EPOLL_CTL_ADD, sfd, &event);
  if (s == -1)
    {
      perror ("epoll_ctl");
      abort ();
    }
 
  /* Buffer where events are returned */
  events = calloc (MAXEVENTS, sizeof event);
 
  /* The event loop */
  while (1)
    {
      int n, i;
 
      n = epoll_wait (efd, events, MAXEVENTS, -1);
      for (i = 0; i < n; i++)
    {
      if ((events[i].events & EPOLLERR) ||
              (events[i].events & EPOLLHUP) ||
              (!(events[i].events & EPOLLIN)))
        {
              /* An error has occured on this fd, or the socket is not
                 ready for reading (why were we notified then?) */
          fprintf (stderr, "epoll error\n");
          close (events[i].data.fd);
          continue;
        }
 
      else if (sfd == events[i].data.fd)//如果傳回的是sfd,說明是新連接配接請求,這點和select的傳回一緻
        {
              /* We have a notification on the listening socket, which
                 means one or more incoming connections. */
              while (1)
                {
                  struct sockaddr in_addr;

                  socklen_t in_len;
                  int infd;
                  char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
 
                  in_len = sizeof in_addr;
                  infd = accept (sfd, &in_addr, &in_len);
                  if (infd == -1)
                    {
                      if ((errno == EAGAIN) ||
                          (errno == EWOULDBLOCK))
                        {
                          /* We have processed all incoming
                             connections. */
                          break;
                        }
                      else
                        {
                          perror ("accept");
                          break;
                        }
                    }
 
                  s = getnameinfo (&in_addr, in_len,
                                   hbuf, sizeof hbuf,
                                   sbuf, sizeof sbuf,
                                   NI_NUMERICHOST | NI_NUMERICSERV);
                  if (s == 0)
                    {
                      printf("Accepted connection on descriptor %d "
                             "(host=%s, port=%s)\n", infd, hbuf, sbuf);
                    }
 
                  /* Make the incoming socket non-blocking and add it to the
                     list of fds to monitor. */
                  s = make_socket_non_blocking (infd);
                  if (s == -1)
                    abort ();
 
                  event.data.fd = infd;
                  event.events = EPOLLIN | EPOLLET;
                  s = epoll_ctl (efd, EPOLL_CTL_ADD, infd, &event);
                  if (s == -1)
                    {
                      perror ("epoll_ctl");
                      abort ();
                    }
                }
              continue;
            }
          else
            {
              /* We have data on the fd waiting to be read. Read and
                 display it. We must read whatever data is available
                 completely, as we are running in edge-triggered mode
                 and won't get a notification again for the same
                 data. */
              int done = 0;
 
              while (1)
                {
                  ssize_t count;
                  char buf[512];
 
                  count = read (events[i].data.fd, buf, sizeof buf);
                  if (count == -1)
                    {
                      /* If errno == EAGAIN, that means we have read all
                         data. So go back to the main loop. */
                      if (errno != EAGAIN)
                        {
                          perror ("read");
                          done = 1;
                        }
                      break;
                    }
                  else if (count == 0)
                    {
                      /* End of file. The remote has closed the
                         connection. */
                      done = 1;
                      break;
                    }
 
                  /* Write the buffer to standard output */
                  s = write (1, buf, count);
                  if (s == -1)
                    {
                      perror ("write");
                      abort ();
                    }
                }
 
              if (done)
                {
                  printf ("Closed connection on descriptor %d\n",
                          events[i].data.fd);
 
                  /* Closing the descriptor will make epoll remove it
                     from the set of descriptors which are monitored. */
                  close (events[i].data.fd);
                }
            }
        }
    }
 
  free (events);
 
  close (sfd);
 
  return EXIT_SUCCESS;
}
           

參考文獻:

http://www.voidcn.com/article/p-nbdiazny-wm.html

繼續閱讀