1. 背景
回顧了項目需求是系統的統計tcp連接配接數;
于是想到了
nf_conntrack
這個Linux核心提供的記錄和跟蹤連接配接狀态的功能;
然後寫了個程式解析
/proc/net/nf_conntrack
這個映射檔案,後來悲劇就發生了,當conntrack表記錄變增加到1w以上之後,解析速度急速下降,到了10w規模後,解析耗時幾十秒都不能完成,,,
終于後來翻到了netfilter的老巢,發現了解決方法:libnml、libnetfilter_conntrack
2. 使用
核心原理是通過
netlink套接字
的方式,與核心互動,查詢得到結果
libmnl基本方法:
extern struct mnl_socket *mnl_socket_open(int bus);
extern int mnl_socket_bind(struct mnl_socket *nl, unsigned int groups, pid_t pid);
extern int mnl_socket_close(struct mnl_socket *nl);
extern ssize_t mnl_socket_sendto(const struct mnl_socket *nl, const void *req, size_t siz);
extern ssize_t mnl_socket_recvfrom(const struct mnl_socket *nl, void *buf, size_t siz);
libnetfilter_conntrack則主要是對擷取的結果進行解析,比如拿出源位址、協定簇資訊
/* conntrack attributes */
enum nf_conntrack_attr {
ATTR_ORIG_IPV4_SRC = 0, /* u32 bits */
ATTR_IPV4_SRC = ATTR_ORIG_IPV4_SRC, /* alias */
ATTR_ORIG_IPV4_DST, /* u32 bits */
ATTR_IPV4_DST = ATTR_ORIG_IPV4_DST, /* alias */
ATTR_REPL_IPV4_SRC, /* u32 bits */
ATTR_REPL_IPV4_DST, /* u32 bits */
ATTR_ORIG_IPV6_SRC = 4, /* u128 bits */
ATTR_IPV6_SRC = ATTR_ORIG_IPV6_SRC, /* alias */
ATTR_ORIG_IPV6_DST, /* u128 bits */
ATTR_IPV6_DST = ATTR_ORIG_IPV6_DST, /* alias */
ATTR_REPL_IPV6_SRC, /* u128 bits */
ATTR_REPL_IPV6_DST, /* u128 bits */
ATTR_ORIG_PORT_SRC = 8, /* u16 bits */
ATTR_PORT_SRC = ATTR_ORIG_PORT_SRC, /* alias */
ATTR_ORIG_PORT_DST, /* u16 bits */
ATTR_PORT_DST = ATTR_ORIG_PORT_DST, /* alias */
ATTR_REPL_PORT_SRC, /* u16 bits */
ATTR_REPL_PORT_DST, /* u16 bits */
ATTR_ICMP_TYPE = 12, /* u8 bits */
ATTR_ICMP_CODE, /* u8 bits */
ATTR_ICMP_ID, /* u16 bits */
ATTR_ORIG_L3PROTO, /* u8 bits */
ATTR_L3PROTO = ATTR_ORIG_L3PROTO, /* alias */
ATTR_REPL_L3PROTO = 16, /* u8 bits */
ATTR_ORIG_L4PROTO, /* u8 bits */
ATTR_L4PROTO = ATTR_ORIG_L4PROTO, /* alias */
ATTR_REPL_L4PROTO, /* u8 bits */
ATTR_TCP_STATE, /* u8 bits */
ATTR_SNAT_IPV4 = 20, /* u32 bits */
ATTR_DNAT_IPV4, /* u32 bits */
ATTR_SNAT_PORT, /* u16 bits */
ATTR_DNAT_PORT, /* u16 bits */
ATTR_TIMEOUT = 24, /* u32 bits */
ATTR_MARK, /* u32 bits */
ATTR_ORIG_COUNTER_PACKETS, /* u64 bits */
ATTR_REPL_COUNTER_PACKETS, /* u64 bits */
ATTR_ORIG_COUNTER_BYTES = 28, /* u64 bits */
ATTR_REPL_COUNTER_BYTES, /* u64 bits */
ATTR_USE, /* u32 bits */
ATTR_ID, /* u32 bits */
ATTR_STATUS = 32, /* u32 bits */
ATTR_TCP_FLAGS_ORIG, /* u8 bits */
ATTR_TCP_FLAGS_REPL, /* u8 bits */
ATTR_TCP_MASK_ORIG, /* u8 bits */
ATTR_TCP_MASK_REPL = 36, /* u8 bits */
ATTR_MASTER_IPV4_SRC, /* u32 bits */
ATTR_MASTER_IPV4_DST, /* u32 bits */
ATTR_MASTER_IPV6_SRC, /* u128 bits */
ATTR_MASTER_IPV6_DST = 40, /* u128 bits */
ATTR_MASTER_PORT_SRC, /* u16 bits */
ATTR_MASTER_PORT_DST, /* u16 bits */
ATTR_MASTER_L3PROTO, /* u8 bits */
ATTR_MASTER_L4PROTO = 44, /* u8 bits */
ATTR_SECMARK, /* u32 bits */
ATTR_ORIG_NAT_SEQ_CORRECTION_POS, /* u32 bits */
ATTR_ORIG_NAT_SEQ_OFFSET_BEFORE, /* u32 bits */
ATTR_ORIG_NAT_SEQ_OFFSET_AFTER = 48, /* u32 bits */
ATTR_REPL_NAT_SEQ_CORRECTION_POS, /* u32 bits */
ATTR_REPL_NAT_SEQ_OFFSET_BEFORE, /* u32 bits */
ATTR_REPL_NAT_SEQ_OFFSET_AFTER, /* u32 bits */
ATTR_SCTP_STATE = 52, /* u8 bits */
ATTR_SCTP_VTAG_ORIG, /* u32 bits */
ATTR_SCTP_VTAG_REPL, /* u32 bits */
ATTR_HELPER_NAME, /* string (30 bytes max) */
ATTR_DCCP_STATE = 56, /* u8 bits */
ATTR_DCCP_ROLE, /* u8 bits */
ATTR_DCCP_HANDSHAKE_SEQ, /* u64 bits */
ATTR_TCP_WSCALE_ORIG, /* u8 bits */
ATTR_TCP_WSCALE_REPL = 60, /* u8 bits */
ATTR_ZONE, /* u16 bits */
ATTR_SECCTX, /* string */
ATTR_TIMESTAMP_START, /* u64 bits, linux >= 2.6.38 */
ATTR_TIMESTAMP_STOP = 64, /* u64 bits, linux >= 2.6.38 */
ATTR_HELPER_INFO, /* variable length */
ATTR_CONNLABELS, /* variable length */
ATTR_CONNLABELS_MASK, /* variable length */
ATTR_ORIG_ZONE, /* u16 bits */
ATTR_REPL_ZONE, /* u16 bits */
ATTR_SNAT_IPV6, /* u128 bits */
ATTR_DNAT_IPV6, /* u128 bits */
ATTR_SYNPROXY_ISN, /* u32 bits */
ATTR_SYNPROXY_ITS, /* u32 bits */
ATTR_SYNPROXY_TSOFF, /* u32 bits */
ATTR_MAX
};
3. 例子
以下例子為列印TCP目前連接配接情況
main函數主要就是建立一個netlink套接字,發送請求
IPCTNL_MSG_CT_GET
擷取整個conntrack表資訊
最終結果接收在buf中,使用
mnl_cb_run
進行循環解析。
int main(void)
{
struct mnl_socket *nl;
struct nlmsghdr *nlh;
struct nfgenmsg *nfh;
char buf[MNL_SOCKET_BUFFER_SIZE];
unsigned int seq, portid;
int ret;
nl = mnl_socket_open(NETLINK_NETFILTER);
if (nl == NULL) {
perror("mnl_socket_open");
exit(EXIT_FAILURE);
}
if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
perror("mnl_socket_bind");
exit(EXIT_FAILURE);
}
portid = mnl_socket_get_portid(nl);
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
nlh->nlmsg_seq = seq = time(NULL);
nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
nfh->nfgen_family = AF_INET;
nfh->version = NFNETLINK_V0;
nfh->res_id = 0;
ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
if (ret == -1) {
perror("mnl_socket_recvfrom");
exit(EXIT_FAILURE);
}
ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
while (ret > 0) {
ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
if (ret <= MNL_CB_STOP) {
break;
}
ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
}
if (ret == -1) {
perror("mnl_socket_recvfrom");
exit(EXIT_FAILURE);
}
mnl_socket_close(nl);
return 0;
}
以下為 回調函數的實作,在本例子中,則篩選出TCP連接配接進行展示
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <arpa/inet.h>
#include <libmnl/libmnl.h>
#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
static int data_cb(const struct nlmsghdr *nlh, void *data)
{
struct nf_conntrack *ct;
char buf[4096];
ct = nfct_new();
if (ct == NULL) {
return MNL_CB_OK;
}
nfct_nlmsg_parse(nlh, ct);
switch (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO)) {
case IPPROTO_TCP:
nfct_snprintf(buf, sizeof(buf), ct, NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0);
printf("%s\n", buf);
break;
}
nfct_destroy(ct);
return MNL_CB_OK;
}
運作結果涉及本機一些位址,就不展示了,結果與
/proc/net/nf_conntrack
一緻,但到10w記錄的環境下,并不會有巨大的開銷。
4. 總結
隻要不設定notrack辨別的連接配接,就可以通過 nf_conntrack進行擷取連接配接數;
而使用libnml+libnetfilter_conntrack的netlink套接字的方式,比直接cat檔案速度快很多;
檢視了官方手冊,發現libnetfilter_conntrack不僅可解析conntrack表,還能夠進行監控、修改等進階操作,功能十分強大!
參考文章:
[1] https://en.wikipedia.org/wiki/Netfilter
[2] https://www.netfilter.org/projects/libnetfilter_conntrack/index.html