天天看點

Linux 網卡驅動學習(三)(net_device 等資料結構)

【摘要】前文對網絡驅動例子進行一個簡單的梳理總結,本文貼出 net_device 的資料結構以及一些驅動中常用的資料結構。

1、網絡裝置驅動結構

下圖摘自http://blog.chinaunix.net/uid-20672257-id-3147768.html

Linux 網卡驅動學習(三)(net_device 等資料結構)

1)、網絡協定接口層向網絡層協定提供提供統一的資料包收發接口,不論上層協定為ARP還是IP,都通過dev_queue_xmit()函數發送資料,并通過netif_rx()函數接受資料。這一層的存在使得上層協定獨立于具體的裝置。

2)、網絡裝置接口層向協定接口層提供統一的用于描述具體網絡裝置屬性和操作的結構體net_device,該結構體是裝置驅動功能層中各函數的容器。實際上,網絡裝置接口層從宏觀上規劃了具體操作硬體的裝置驅動功能層的結構。

3)、裝置驅動功能層各函數是網絡裝置接口層net_device資料結構的具體成員,是驅使網絡裝置硬體完成相應動作的程式,他通過hard_start_xmit()函數啟動發送操作,并通過網絡裝置上的中斷觸發接受操作。

4)、網絡裝置與媒介層是完成資料包發送和接受的實體實體,包括網絡擴充卡和具體的傳輸媒介,網絡擴充卡被驅動功能層中的函數實體上驅動。對于Linux系統而言,網絡裝置和媒介都可以是虛拟的。

2、網卡驅動中重要的資料結構

struct softnet_data
{
    int            throttle;
    int            cng_level;
    int            avg_blog;
    struct sk_buff_head    input_pkt_queue;
    struct list_head    poll_list;
    struct net_device    *output_queue;
    struct sk_buff        *completion_queue;

    struct net_device    backlog_dev;    /* Sorry. 8) */
};



struct packet_type {
    unsigned short        type;    /* This is really htons(ether_type).    */
    struct net_device        *dev;    /* NULL is wildcarded here        */
    int            (*func) (struct sk_buff *, struct net_device *,
                     struct packet_type *);
    void            *af_packet_priv;
    struct list_head    list;
};


struct netif_rx_stats
{
    unsigned total;
    unsigned dropped;
    unsigned time_squeeze;
    unsigned throttled;
    unsigned fastroute_hit;
    unsigned fastroute_success;
    unsigned fastroute_defer;
    unsigned fastroute_deferred_out;
    unsigned fastroute_latency_reduction;
    unsigned cpu_collision;
};


struct net_device_stats
{
    unsigned long    rx_packets;        /* total packets received    */
    unsigned long    tx_packets;        /* total packets transmitted    */
    unsigned long    rx_bytes;        /* total bytes received     */
    unsigned long    tx_bytes;        /* total bytes transmitted    */
    unsigned long    rx_errors;        /* bad packets received        */
    unsigned long    tx_errors;        /* packet transmit problems    */
    unsigned long    rx_dropped;        /* no space in linux buffers    */
    unsigned long    tx_dropped;        /* no space available in linux    */
    unsigned long    multicast;        /* multicast packets received    */
    unsigned long    collisions;

    /* detailed rx_errors: */
    unsigned long    rx_length_errors;
    unsigned long    rx_over_errors;        /* receiver ring buff overflow    */
    unsigned long    rx_crc_errors;        /* recved pkt with crc error    */
    unsigned long    rx_frame_errors;    /* recv'd frame alignment error */
    unsigned long    rx_fifo_errors;        /* recv'r fifo overrun        */
    unsigned long    rx_missed_errors;    /* receiver missed packet    */

    /* detailed tx_errors */
    unsigned long    tx_aborted_errors;
    unsigned long    tx_carrier_errors;
    unsigned long    tx_fifo_errors;
    unsigned long    tx_heartbeat_errors;
    unsigned long    tx_window_errors;
    
    /* for cslip etc */
    unsigned long    rx_compressed;
    unsigned long    tx_compressed;
};


/* Media selection options. */
enum {
        IF_PORT_UNKNOWN = 0,
        IF_PORT_10BASE2,
        IF_PORT_10BASET,
        IF_PORT_AUI,
        IF_PORT_100BASET,
        IF_PORT_100BASETX,
        IF_PORT_100BASEFX
};


struct net_device
{

    /*
     * This is the first field of the "visible" part of this structure
     * (i.e. as seen by users in the "Space.c" file).  It is the name
     * the interface.
     */
    char            name[IFNAMSIZ];      //eth0 eth1 ... ethn

    /*
     *    I/O specific fields
     *    FIXME: Merge these and struct ifmap into one
     */
    unsigned long        mem_end;    /* shared mem end    */
    unsigned long        mem_start;    /* shared mem start    */
    unsigned long        base_addr;    /* device I/O address    */ //網絡接口的I/O基位址,由驅動在裝置探測時指派 ifconfig可以顯示或
                                        修改目前值,該字段可以在系統啟動時在核心指令行中顯式指派,或者
                                        在子產品加載時指派。這個成員一般不被引用
    unsigned int        irq;        /* device IRQ number    */    //網絡裝置使用的中斷号。irq值常常在網絡裝置啟動時加載設定,
                                            并且在後來由ifconfig列印出來。

    /*
     *    Some hardware also needs these fields, but they are not
     *    part of the usual set specified in Space.c.
     */

    unsigned char        if_port;    /* Selectable AUI, TP,..*/   //多端口裝置中使用的端口。該成員在同軸線(IF_PORT_10BASE2)和
                                        //雙絞線(IF_PORT_100BASET)以太網連接配接時使用
    unsigned char        dma;        /* DMA channel        */     //在某些外設總線時有意義,如ISA總線。它不在裝置驅動自身以外使用

    unsigned long        state;         //網絡裝置和網絡擴充卡的狀态資訊

    struct net_device    *next;        //下一個struct net_device Linux中所有網絡裝置都以dev_base指針開頭的單線行連結清單管理
    
    /* The device initialization function. Called only once. */
    int            (*init)(struct net_device *dev);    用來搜尋并初始化網絡裝置。該方法負責尋找并初始化目前類型的網絡擴充卡。首選必須建立net_device結構并将網絡裝置和網絡驅動程式的資料(驅動相關的)填充進去。其次,register_netdevice()注冊網絡裝置


    /* ------- Fields preinitialized in Space.c finish here ------- */

    struct net_device    *next_sched;

    /* Interface index. Unique device identifier    */
    int            ifindex;
    int            iflink;


    struct net_device_stats* (*get_stats)(struct net_device *dev);  應用程式需要擷取網絡接口的統計資訊時會調用這個方法。例如,在運作ifconfig或netstat -i時,會調用該方法
    struct iw_statistics*    (*get_wireless_stats)(struct net_device *dev);

    /* List of functions to handle Wireless Extensions (instead of ioctl).
     * See <net/iw_handler.h> for details. Jean II */
    const struct iw_handler_def *    wireless_handlers;
    /* Instance data managed by the core of Wireless Extensions. */
    struct iw_public_data *    wireless_data;

    struct ethtool_ops *ethtool_ops;

    /*
     * This marks the end of the "visible" part of the structure. All
     * fields hereafter are internal to the system, and may change at
     * will (read: may be cleaned up at will).
     */

    /* These may be needed for future network-power-down code. */
    unsigned long        trans_start;    /* Time (in jiffies) of last Tx    */
    unsigned long        last_rx;    /* Time of last Rx    */

    unsigned short        flags;    /* interface flags (a la BSD)    */
    unsigned short        gflags;
        unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
        unsigned short          unused_alignment_fixer; /* Because we need priv_flags,
                                                         * and we want to be 32-bit aligned.
                                                         */

    unsigned        mtu;    /* interface MTU value        */   //最大傳輸單元。它指定鍊路層每幀有效載荷最大長度。網絡層各協定必須
                                        考慮該值,以確定不會向網絡擴充卡發送多餘的位元組,以太網1500,通
                                        過ifconfig指令可改變
    unsigned short        type;    /* interface hardware type    */     //指定了網絡擴充卡的硬體類型。這個成員由ARP用來決定網絡擴充卡
                                        支援的硬體位址。對以太網接口一般由ether_setup()函數設定其值為
                                        ARPHRD_ETHER

    unsigned short        hard_header_len;    /* hardware hdr length    */  //指定鍊路層資料幀標頭長度。對于以太網接口為14
    void            *priv;    /* pointer to private data    */

    struct net_device    *master; /* Pointer to master device of a group,
                      * which this device is member of.
                      */

    /* Interface address info. */
    unsigned char        broadcast[MAX_ADDR_LEN];    /* hw bcast add    */    //廣播位址       
    /*以太網位址長度是6個位元組(我們指的是接口闆的硬體ID),廣播位址由6個0xff位元組組成。這些字段一般由ether_setup()函數設定。驅動程式必須以特定于裝置的方式從接口闆讀出,并複制到dev_addr結構。網絡裝置的硬體位址用來産生正确的以太網頭*/
    unsigned char        dev_addr[MAX_ADDR_LEN];    /* hw address    */               //存放裝置硬體位址
    unsigned char        addr_len;    /* hardware address length    */       //硬體(MAC)位址長度

    struct dev_mc_list    *mc_list;    /* Multicast mac addresses    */     /*指向具有多點傳播的第二層位址的線性表。當網絡擴充卡收集到具有包含在dev_mc_list中目标位址後,網絡擴充卡必須将包傳遞給更高層。驅動程式中方法set_multicast_list用來将該清單中的位址傳遞給網絡擴充卡。該網絡擴充卡的硬體過濾器(如果有)負責隻将與該計算機有關的包傳遞給核心 */

    int            mc_count;    /* Number of installed mcasts    */  //dev_mc_list包含的位址數量
    int            promiscuity;
    int            allmulti;

/*下面兩個變量用來發現擴充卡在發送包時遇到的問題。
    int            watchdog_timeo;        
    struct timer_list    watchdog_timer;  /*在網絡裝置啟動時打開,每經過watch_timeo時間後立即被調用。處理程式dev_watchdog()檢查從上一次(存儲在stans_start中)包傳輸後是否經過watch_timeo機關長度的時間。如果是,那麼上一個包的傳輸中出現問題,必須檢查網絡擴充卡。要檢查網絡擴充卡,需要調用驅動函數tx_timeout()。如果從上次傳輸開始還沒有經過足夠長的時間,那麼除了watchdog計時器啟動之外沒有發生其他網絡事件*/

    /* Protocol specific pointers */
/*指向網絡擴充卡的第三層協定的資訊。如果網絡裝置被設定為Internet協定,那麼ip_ptr指向in_device類型的結構,它管理有關的IP執行個體的資訊和配置參數。例如in_device結構管理包含網絡裝置IP位址清單,包含多點傳播組活動IP清單和ARP協定參數等*/
    
    void             *atalk_ptr;    /* AppleTalk link     */
    void            *ip_ptr;    /* IPv4 specific data    */  
    void                    *dn_ptr;        /* DECnet specific data */
    void                    *ip6_ptr;       /* IPv6 specific data */
    void            *ec_ptr;    /* Econet specific data    */
    void            *ax25_ptr;    /* AX.25 specific data */

    struct list_head    poll_list;    /* Link to poll list    */
    int            quota;
    int            weight;

    struct Qdisc        *qdisc;
    struct Qdisc        *qdisc_sleeping;
    struct Qdisc        *qdisc_ingress;
    struct list_head    qdisc_list; 
    unsigned long        tx_queue_len;    /* Max frames per queue allowed */    //該字段表示指定了網絡裝置發送隊列中可以排列的最大幀數
                                            這個值有ether_setup()設定為100.不要将tx_queue_len
                                            與網絡擴充卡的緩沖區想混淆。通常網絡擴充卡有額外的環形
                                            緩沖區,大小為16或32個包大小

    /* ingress path synchronizer */
    spinlock_t        ingress_lock;
    /* hard_start_xmit synchronizer */
    spinlock_t        xmit_lock;
    /* cpu id of processor entered to hard_start_xmit or -1,
       if nobody entered there.
     */
    int            xmit_lock_owner;
    /* device queue lock */
    spinlock_t        queue_lock;
    /* Number of references to this device */
    atomic_t        refcnt;
    /* delayed register/unregister */
    struct list_head    todo_list;
    /* device name hash chain */
    struct hlist_node    name_hlist;
    /* device index hash chain */
    struct hlist_node    index_hlist;

    /* register/unregister state machine */
    enum { NETREG_UNINITIALIZED=0,
           NETREG_REGISTERING,    /* called register_netdevice */
           NETREG_REGISTERED,    /* completed register todo */
           NETREG_UNREGISTERING,    /* called unregister_netdevice */
           NETREG_UNREGISTERED,    /* completed unregister todo */
           NETREG_RELEASED,        /* called free_netdev */
    } reg_state;

    /* Net device features */
    int            features;
#define NETIF_F_SG        1    /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM        2    /* Can checksum only TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM        4    /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM        8    /* Can checksum all the packets. */
#define NETIF_F_HIGHDMA        32    /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST    64    /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX    128    /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX    256    /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER    512    /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED    1024    /* Device cannot handle VLAN packets */
#define NETIF_F_TSO        2048    /* Can offload TCP/IP segmentation */
#define NETIF_F_LLTX        4096    /* LockLess TX */

    /* Called after device is detached from network. */
    void            (*uninit)(struct net_device *dev); 用來登出網絡裝置,該方法用來執行驅動程式相關的函數,這些函數在删除網絡裝置時也是必須的。目前沒有驅動程式使用該方法

    /* Called after last user reference disappears. */
    void            (*destructor)(struct net_device *dev);

    /* Pointers to interface service routines.    */
    int            (*open)(struct net_device *dev);    打開一個已經命名的網絡裝置。可以使用ifconfig指令激活網絡裝置,在激活過程中,open方法應當注冊它需要的系統資源(I/O口,IRQ,DMA,等等),以及進行其他的網絡裝置要求
    int            (*stop)(struct net_device *dev);  停止網絡擴充卡的活動并釋放相關資源,此後網絡裝置不能活動
    int            (*hard_start_xmit) (struct sk_buff *skb, 
                            struct net_device *dev); 在網絡裝置上發送資料包的方法。完整的封包(協定頭和所有其他資料)包含在一個socket緩沖區(sk_buff)結構中。資料包如果成功發送到網絡擴充卡該函數傳回0,否則傳回1
#define HAVE_NETDEV_POLL
    int            (*poll) (struct net_device *dev, int *quota);
    int            (*hard_header) (struct sk_buff *skb,
                        struct net_device *dev,
                        unsigned short type,
                        void *daddr,
                        void *saddr,
                        unsigned len);  用先前提取到的源和目的硬體位址來建立硬體頭的函數(在hard_start_xmit 前調用)。它的工作是将傳給它的參數資訊組織成一個合适的特定于裝置的硬體頭
    int            (*rebuild_header)(struct sk_buff *skb);  用來在ARP解析完成後、封包發送前,重建硬體頭的函數
#define HAVE_MULTICAST             
    void            (*set_multicast_list)(struct net_device *dev);   将多點傳播MAC位址清單傳遞給網絡擴充卡,擴充卡就可以根據這些位址接收包
#define HAVE_SET_MAC_ADDR           
    int            (*set_mac_address)(struct net_device *dev,
                           void *addr);    改變網絡裝置的硬體位址(MAC位址)
#define HAVE_PRIVATE_IOCTL
    int            (*do_ioctl)(struct net_device *dev,
                        struct ifreq *ifr, int cmd);
#define HAVE_SET_CONFIG
    int            (*set_config)(struct net_device *dev,
                          struct ifmap *map);
#define HAVE_HEADER_CACHE
    int            (*hard_header_cache)(struct neighbour *neigh,
                             struct hh_cache *hh);
    void            (*header_cache_update)(struct hh_cache *hh,
                               struct net_device *dev,
                               unsigned char *  haddr);  在響應一個變化中,更新hh_cache結構中的目的位址方法
#define HAVE_CHANGE_MTU
    int            (*change_mtu)(struct net_device *dev, int new_mtu);  改變網絡裝置最大傳輸單元(MTU)函數

#define HAVE_TX_TIMEOUT
    void            (*tx_timeout) (struct net_device *dev);  網絡驅動程式代碼沒有在一個合理的時間内将一個封包發送完成時會調用該方法,封包沒有被及時發送的原因可能是丢失一個中斷或某個接口被鎖。此時該函數處理這個問題并恢複封包發送

    void            (*vlan_rx_register)(struct net_device *dev,
                            struct vlan_group *grp);
    void            (*vlan_rx_add_vid)(struct net_device *dev,
                           unsigned short vid);
    void            (*vlan_rx_kill_vid)(struct net_device *dev,
                            unsigned short vid);

    int            (*hard_header_parse)(struct sk_buff *skb,
                             unsigned char *haddr);  該方法完成的工作包括從skb中的封包中抽取源位址,複制到haddr的緩沖區中。函數的傳回值是位址的長度資訊
    int            (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
    int            (*accept_fastpath)(struct net_device *, struct dst_entry*);
#ifdef CONFIG_NETPOLL
    int            netpoll_rx;
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
    void                    (*poll_controller)(struct net_device *dev);
#endif

    /* bridge stuff */
    struct net_bridge_port    *br_port;

#ifdef CONFIG_NET_DIVERT
    /* this will get initialized at each interface type init routine */
    struct divert_blk    *divert;
#endif /* CONFIG_NET_DIVERT */

    /* class/net/name entry */
    struct class_device    class_dev;
    /* how much padding had been added by alloc_netdev() */
    int padded;
};



struct netdev_boot_setup {
    char name[IFNAMSIZ];
    struct ifmap map;
};


struct hh_cache
{
    struct hh_cache *hh_next;    /* Next entry                 */
    atomic_t    hh_refcnt;    /* number of users                   */
    unsigned short  hh_type;    /* protocol identifier, f.e ETH_P_IP
                                         *  NOTE:  For VLANs, this will be the
                                         *  encapuslated type. --BLG
                                         */
    int        hh_len;        /* length of header */
    int        (*hh_output)(struct sk_buff *skb);
    rwlock_t    hh_lock;

    /* cached hardware header; allow for machine alignment needs.        */
#define HH_DATA_MOD    16
#define HH_DATA_OFF(__len) \
    (HH_DATA_MOD - ((__len) & (HH_DATA_MOD - 1)))
#define HH_DATA_ALIGN(__len) \
    (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
    unsigned long    hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};


struct dev_mc_list
{    
    struct dev_mc_list    *next;
    __u8            dmi_addr[MAX_ADDR_LEN];
    unsigned char        dmi_addrlen;
    int            dmi_users;
    int            dmi_gusers;
};


struct sk_buff_head {
    /* These two members must be first. */
    struct sk_buff    *next;
    struct sk_buff    *prev;

    __u32        qlen;
    spinlock_t    lock;
};

struct sk_buff;

/* To allow 64K frame to be packed as single skb without frag_list */
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)

typedef struct skb_frag_struct skb_frag_t;

struct skb_frag_struct {
    struct page *page;
    __u16 page_offset;
    __u16 size;
};


struct sk_buff {
    /* These two members must be first. */
    struct sk_buff        *next;
    struct sk_buff        *prev;   //雙向連結清單指針

    struct sk_buff_head    *list;  指向套接字緩存在隊列中的目前位置
    struct sock        *sk;  指向建立封包的socket
    struct timeval        stamp;   封包到達Linux系統的時間
    struct net_device    *dev;  表明套接字緩存目前操作所在的網絡裝置。網絡路由器被确定下來後,dev就指向封包離開計算機時經過的網絡擴充卡。知道封包的輸出擴充卡已知之前,dev都指向輸入擴充卡
    struct net_device    *input_dev;
    struct net_device    *real_dev;

    union {
        struct tcphdr    *th;
        struct udphdr    *uh;
        struct icmphdr    *icmph;
        struct igmphdr    *igmph;
        struct iphdr    *ipiph;
        struct ipv6hdr    *ipv6h;
        unsigned char    *raw;
    } h;    傳輸層封包幀頭的指針

    union {
        struct iphdr    *iph;
        struct ipv6hdr    *ipv6h;
        struct arphdr    *arph;
        unsigned char    *raw;
    } nh; 網絡層封包幀頭的指針

    union {
          unsigned char     *raw;
    } mac; MAC層封包幀頭的指針

    struct  dst_entry    *dst;   指向路由高速緩存中的一條記錄,它包含着有關封包進一步前進的路由資訊
    struct    sec_path    *sp;

    /*
     * This is the control buffer. It is free to use for every
     * layer. Please put your private variables there. If you
     * want to keep them across layers you have to do a skb_clone()
     * first. This is owned by whoever has the skb queued ATM.
     */
    char            cb[40];

    unsigned int        len,            指明套接字緩存所代表的封包長度,這裡隻考慮核心可通路的資料。在以太網封包中兩個MAC位址和類型/長度域被考慮其中。其他的域(報頭、連結和檢驗)以後再在網絡擴充卡中進行添加
                data_len,
                mac_len,
                csum;
    unsigned char        local_df,
                cloned,
                pkt_type,        封包的類型
                ip_summed;
    __u32            priority;
    unsigned short        protocol,
                security;

    void            (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_NETFILTER
        unsigned long        nfmark;
    __u32            nfcache;
    __u32            nfctinfo;
    struct nf_conntrack    *nfct;
#ifdef CONFIG_NETFILTER_DEBUG
        unsigned int        nf_debug;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
    struct nf_bridge_info    *nf_bridge;
#endif
#endif /* CONFIG_NETFILTER */
#if defined(CONFIG_HIPPI)
    union {
        __u32        ifield;
    } private;
#endif
#ifdef CONFIG_NET_SCHED
       __u32            tc_index;        /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
    __u32           tc_verd;               /* traffic control verdict */
    __u32           tc_classid;            /* traffic control classid */
#endif

#endif


    /* These elements must be at the end, see alloc_skb() for details.  */
    unsigned int        truesize;
    atomic_t        users;
    unsigned char        *head,  
                *data,
                *tail,
                *end;
};


struct skb_shared_info {
    atomic_t    dataref;
    unsigned int    nr_frags;
    unsigned short    tso_size;
    unsigned short    tso_segs;
    struct sk_buff    *frag_list;
    skb_frag_t    frags[MAX_SKB_FRAGS];
};


struct skb_iter {
    /* Iteration functions set these */
    unsigned char *data;
    unsigned int len;

    /* Private to iteration */
    unsigned int nextfrag;
    struct sk_buff *fraglist;
};



#ifdef CONFIG_NETFILTER
struct nf_conntrack {
    atomic_t use;
    void (*destroy)(struct nf_conntrack *);
};

#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info {
    atomic_t use;
    struct net_device *physindev;
    struct net_device *physoutdev;
#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
    struct net_device *netoutdev;
#endif
    unsigned int mask;
    unsigned long data[32 / sizeof(unsigned long)];
};
           

繼續閱讀