天天看點

Linux下快速解析nf_conntrack1. 背景2. 使用3. 例子4. 總結

1. 背景

回顧了項目需求是系統的統計tcp連接配接數;

于是想到了

nf_conntrack

這個Linux核心提供的記錄和跟蹤連接配接狀态的功能;

然後寫了個程式解析

/proc/net/nf_conntrack

這個映射檔案,後來悲劇就發生了,當conntrack表記錄變增加到1w以上之後,解析速度急速下降,到了10w規模後,解析耗時幾十秒都不能完成,,,

終于後來翻到了netfilter的老巢,發現了解決方法:libnml、libnetfilter_conntrack

2. 使用

核心原理是通過

netlink套接字

的方式,與核心互動,查詢得到結果

libmnl基本方法:

extern struct mnl_socket *mnl_socket_open(int bus);
extern int mnl_socket_bind(struct mnl_socket *nl, unsigned int groups, pid_t pid);
extern int mnl_socket_close(struct mnl_socket *nl);
extern ssize_t mnl_socket_sendto(const struct mnl_socket *nl, const void *req, size_t siz);
extern ssize_t mnl_socket_recvfrom(const struct mnl_socket *nl, void *buf, size_t siz);
           

libnetfilter_conntrack則主要是對擷取的結果進行解析,比如拿出源位址、協定簇資訊

/* conntrack attributes */
enum nf_conntrack_attr {
    ATTR_ORIG_IPV4_SRC = 0,         /* u32 bits */
    ATTR_IPV4_SRC = ATTR_ORIG_IPV4_SRC, /* alias */
    ATTR_ORIG_IPV4_DST,         /* u32 bits */
    ATTR_IPV4_DST = ATTR_ORIG_IPV4_DST, /* alias */
    ATTR_REPL_IPV4_SRC,         /* u32 bits */
    ATTR_REPL_IPV4_DST,         /* u32 bits */
    ATTR_ORIG_IPV6_SRC = 4,         /* u128 bits */
    ATTR_IPV6_SRC = ATTR_ORIG_IPV6_SRC, /* alias */
    ATTR_ORIG_IPV6_DST,         /* u128 bits */
    ATTR_IPV6_DST = ATTR_ORIG_IPV6_DST, /* alias */
    ATTR_REPL_IPV6_SRC,         /* u128 bits */
    ATTR_REPL_IPV6_DST,         /* u128 bits */
    ATTR_ORIG_PORT_SRC = 8,         /* u16 bits */
    ATTR_PORT_SRC = ATTR_ORIG_PORT_SRC, /* alias */
    ATTR_ORIG_PORT_DST,         /* u16 bits */
    ATTR_PORT_DST = ATTR_ORIG_PORT_DST, /* alias */
    ATTR_REPL_PORT_SRC,         /* u16 bits */
    ATTR_REPL_PORT_DST,         /* u16 bits */
    ATTR_ICMP_TYPE = 12,            /* u8 bits */
    ATTR_ICMP_CODE,             /* u8 bits */
    ATTR_ICMP_ID,               /* u16 bits */
    ATTR_ORIG_L3PROTO,          /* u8 bits */
    ATTR_L3PROTO = ATTR_ORIG_L3PROTO,   /* alias */
    ATTR_REPL_L3PROTO = 16,         /* u8 bits */
    ATTR_ORIG_L4PROTO,          /* u8 bits */
    ATTR_L4PROTO = ATTR_ORIG_L4PROTO,   /* alias */
    ATTR_REPL_L4PROTO,          /* u8 bits */
    ATTR_TCP_STATE,             /* u8 bits */
    ATTR_SNAT_IPV4 = 20,            /* u32 bits */
    ATTR_DNAT_IPV4,             /* u32 bits */
    ATTR_SNAT_PORT,             /* u16 bits */
    ATTR_DNAT_PORT,             /* u16 bits */
    ATTR_TIMEOUT = 24,          /* u32 bits */
    ATTR_MARK,              /* u32 bits */
    ATTR_ORIG_COUNTER_PACKETS,      /* u64 bits */
    ATTR_REPL_COUNTER_PACKETS,      /* u64 bits */
    ATTR_ORIG_COUNTER_BYTES = 28,       /* u64 bits */
    ATTR_REPL_COUNTER_BYTES,        /* u64 bits */
    ATTR_USE,               /* u32 bits */
    ATTR_ID,                /* u32 bits */
    ATTR_STATUS = 32,           /* u32 bits  */
        ATTR_TCP_FLAGS_ORIG,            /* u8 bits */
    ATTR_TCP_FLAGS_REPL,            /* u8 bits */
    ATTR_TCP_MASK_ORIG,         /* u8 bits */
    ATTR_TCP_MASK_REPL = 36,        /* u8 bits */
    ATTR_MASTER_IPV4_SRC,           /* u32 bits */
    ATTR_MASTER_IPV4_DST,           /* u32 bits */
    ATTR_MASTER_IPV6_SRC,           /* u128 bits */
    ATTR_MASTER_IPV6_DST = 40,      /* u128 bits */
    ATTR_MASTER_PORT_SRC,           /* u16 bits */
    ATTR_MASTER_PORT_DST,           /* u16 bits */
    ATTR_MASTER_L3PROTO,            /* u8 bits */
    ATTR_MASTER_L4PROTO = 44,       /* u8 bits */
    ATTR_SECMARK,               /* u32 bits */
    ATTR_ORIG_NAT_SEQ_CORRECTION_POS,   /* u32 bits */
    ATTR_ORIG_NAT_SEQ_OFFSET_BEFORE,    /* u32 bits */
    ATTR_ORIG_NAT_SEQ_OFFSET_AFTER = 48,    /* u32 bits */
    ATTR_REPL_NAT_SEQ_CORRECTION_POS,   /* u32 bits */
    ATTR_REPL_NAT_SEQ_OFFSET_BEFORE,    /* u32 bits */
    ATTR_REPL_NAT_SEQ_OFFSET_AFTER,     /* u32 bits */
    ATTR_SCTP_STATE = 52,           /* u8 bits */
    ATTR_SCTP_VTAG_ORIG,            /* u32 bits */
    ATTR_SCTP_VTAG_REPL,            /* u32 bits */
    ATTR_HELPER_NAME,           /* string (30 bytes max) */
    ATTR_DCCP_STATE = 56,           /* u8 bits */
    ATTR_DCCP_ROLE,             /* u8 bits */
    ATTR_DCCP_HANDSHAKE_SEQ,        /* u64 bits */
    ATTR_TCP_WSCALE_ORIG,           /* u8 bits */
    ATTR_TCP_WSCALE_REPL = 60,      /* u8 bits */
    ATTR_ZONE,              /* u16 bits */
    ATTR_SECCTX,                /* string */
    ATTR_TIMESTAMP_START,           /* u64 bits, linux >= 2.6.38 */
    ATTR_TIMESTAMP_STOP = 64,       /* u64 bits, linux >= 2.6.38 */
    ATTR_HELPER_INFO,           /* variable length */
    ATTR_CONNLABELS,            /* variable length */
    ATTR_CONNLABELS_MASK,           /* variable length */
    ATTR_ORIG_ZONE,             /* u16 bits */
    ATTR_REPL_ZONE,             /* u16 bits */
    ATTR_SNAT_IPV6,             /* u128 bits */
    ATTR_DNAT_IPV6,             /* u128 bits */
    ATTR_SYNPROXY_ISN,          /* u32 bits */
    ATTR_SYNPROXY_ITS,          /* u32 bits */
    ATTR_SYNPROXY_TSOFF,            /* u32 bits */
    ATTR_MAX
};
           

3. 例子

以下例子為列印TCP目前連接配接情況

main函數主要就是建立一個netlink套接字,發送請求

IPCTNL_MSG_CT_GET

擷取整個conntrack表資訊

最終結果接收在buf中,使用

mnl_cb_run

進行循環解析。

int main(void)
{
    struct mnl_socket *nl;
    struct nlmsghdr *nlh;
    struct nfgenmsg *nfh;
    char buf[MNL_SOCKET_BUFFER_SIZE];
    unsigned int seq, portid;
    int ret;

    nl = mnl_socket_open(NETLINK_NETFILTER);
    if (nl == NULL) {
        perror("mnl_socket_open");
        exit(EXIT_FAILURE);
    }   

    if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
        perror("mnl_socket_bind");
        exit(EXIT_FAILURE);
    }   
    portid = mnl_socket_get_portid(nl);

    nlh = mnl_nlmsg_put_header(buf);
    nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
    nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
    nlh->nlmsg_seq = seq = time(NULL);

    nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
    nfh->nfgen_family = AF_INET;
    nfh->version = NFNETLINK_V0;
    nfh->res_id = 0;
    ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
    if (ret == -1) {
        perror("mnl_socket_recvfrom");
        exit(EXIT_FAILURE);
    }

    ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
    while (ret > 0) {
        ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
        if (ret <= MNL_CB_STOP) {
            break;
        }
        ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
    }
    if (ret == -1) {
        perror("mnl_socket_recvfrom");
        exit(EXIT_FAILURE);
    }
    mnl_socket_close(nl);
    return 0;
}
           

以下為 回調函數的實作,在本例子中,則篩選出TCP連接配接進行展示

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <arpa/inet.h>

#include <libmnl/libmnl.h>
#include <libnetfilter_conntrack/libnetfilter_conntrack.h>

static int data_cb(const struct nlmsghdr *nlh, void *data)
{
    struct nf_conntrack *ct;
    char buf[4096];

    ct = nfct_new();
    if (ct == NULL) {
        return MNL_CB_OK;
    }   

    nfct_nlmsg_parse(nlh, ct);

    switch (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO)) {
    case IPPROTO_TCP:
        nfct_snprintf(buf, sizeof(buf), ct, NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0); 
        printf("%s\n", buf);
        break;
    }   

    nfct_destroy(ct);

    return MNL_CB_OK;
}
           

運作結果涉及本機一些位址,就不展示了,結果與

/proc/net/nf_conntrack

一緻,但到10w記錄的環境下,并不會有巨大的開銷。

4. 總結

隻要不設定notrack辨別的連接配接,就可以通過 nf_conntrack進行擷取連接配接數;

而使用libnml+libnetfilter_conntrack的netlink套接字的方式,比直接cat檔案速度快很多;

檢視了官方手冊,發現libnetfilter_conntrack不僅可解析conntrack表,還能夠進行監控、修改等進階操作,功能十分強大!

參考文章:

[1] https://en.wikipedia.org/wiki/Netfilter

[2] https://www.netfilter.org/projects/libnetfilter_conntrack/index.html

繼續閱讀