繼上一篇筆記防火牆之filter表(一)—AF_INET協定族,這篇筆記來看看AF_INET協定族的filter表的鈎子的注冊,以及防火牆檢查時規則的周遊。涉及的核心代碼檔案有:
代碼路徑 | 說明 |
---|---|
net/ipv4/netfilter/iptable_filter.c | IPv4 filter表實作 |
1. HOOK的注冊
static int __init iptable_filter_init(void)
{
...
/* Register hooks */
ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
if (ret < 0)
goto cleanup_table;
return ret;
...
}
沒什麼複雜的,直接調用Netfilter架構的鈎子注冊函數,關鍵是ipt_ops的定義。
static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_local_out_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_FILTER,
},
};
注冊了三個HOOK點:LOCAL_IN、LOCAL_OUT和FORWARD,對應的鈎子函數分别為ipt_hook()、ipt_hook()和ipt_local_out_hook()。這三個鈎子的優先級都是NF_IP_PRI_FILTER(值為0)。
2. 鈎子函數:ipt_hook()
如上,當從LOCAL_IN和FORWARD兩個HOOK點通過filter表時,入口函數就是ipt_hook(),可以想象得到,該函數要執行的動作就是找到filter表中對應HOOK點上挂的規則,然後逐條比對處理。
/* The work comes in here from netfilter.c. */
static unsigned int ipt_hook(unsigned int hook,
struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, int (*okfn)(struct sk_buff *))
{
//指定處理filter表
return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}
2.1 ipt_do_table()
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook,
const struct net_device *in, const struct net_device *out,
struct xt_table *table)
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
u_int16_t offset;
struct iphdr *ip;
u_int16_t datalen;
bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
const char *indev, *outdev;
void *table_base;
struct ipt_entry *e, *back;
struct xt_table_info *private;
/* Initialization */
ip = ip_hdr(skb);
datalen = skb->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
* if it was a normal packet. All other fragments are treated
* normally, except that they will NEVER match rules that ask
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
offset = ntohs(ip->frag_off) & IP_OFFSET;
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
//找到struct xt_table_info
private = table->private;
//找到本地CPU的第一條規則入口
table_base = (void *)private->entries[smp_processor_id()];
//hook_entry[]中記錄的是各個HOOK點第一條規則距離表頭的偏移,
//e指向該hook點上的第一條規則
e = get_entry(table_base, private->hook_entry[hook]);
//back初始化為HOOK點的最後一條規則,即該HOOK點的預設政策
back = get_entry(table_base, private->underflow[hook]);
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
//ip_packet_match()對skb進行基本條件比對,如果ip位址、網卡
if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
struct ipt_entry_target *t;
//do_match()進行擴充match的比對。具體match可以通過對hotdrop指派為true,
//結束整個周遊過程
if (IPT_MATCH_ITERATE(e, do_match, skb, in, out, offset, &hotdrop) != 0)
goto no_match;
//所有比對都已命中,累加計數器
ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
//擷取本條規則的target
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
//xt_target中的target回調函數指針為空表示是标準target
if (!t->u.kernel.target->target) {
int v = ((struct ipt_standard_target *)t)->verdict;
if (v < 0) {
//verdict小于0,并且不是IPT_RETURN,說明屬于ACCEPT或者DROP,
//這已經是一種判決結果了,結束整個表在該HOOK點的處理過程
if (v != IPT_RETURN) {
verdict = (unsigned)(-v) - 1;
break;
}
//verdict是IPT_RETURN,說明是自定義鍊的傳回。
//這裡,指針back和comefrom實作了一種類似于棧的操作,back為棧頂元素
//棧中的每個元素用規則中的comefrom指向
e = back;
back = get_entry(table_base, back->comefrom);
continue;
}
//target是自定義鍊,back入棧,然後跳轉到自定義鍊的規則入口
if (table_base + v != (void *)e + e->next_offset &&
!(e->ip.flags & IPT_F_GOTO)) {
/* Save old back ptr in next entry */
struct ipt_entry *next = (void *)e + e->next_offset;
next->comefrom = (void *)back - table_base;
/* set back pointer to next entry */
back = next;
}
//對于自定義鍊,v大于0,儲存的是該自定義鍊距離table中第一條規則的偏移
e = get_entry(table_base, v);
} else {
//這是一個擴充target,直接調用其target()回調
/* Targets which reenter must return abs. verdicts */
verdict = t->u.kernel.target->target(skb, in, out, hook,
t->u.kernel.target, t->data);
/* Target might have changed stuff. */
ip = ip_hdr(skb);
datalen = skb->len - ip->ihl * 4;
//如果target的傳回結果是IPT_CONTINUE,那麼會繼續周遊下一條,
//否則結束整個table的變量
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
else
/* Verdict */
break;
}
} else {
no_match:
//不比對,繼續下一條規則
e = (void *)e + e->next_offset;
}
} while (!hotdrop);//如果中間有match設定了hotdrop為true,那麼可以提前結束周遊
read_unlock_bh(&table->lock);
//hotdrop設定為true,會最終導緻資料包被丢棄
if (hotdrop)
return NF_DROP;
else
return verdict;
}
2.1.1 基本比對ip_packet_match()
該函數根據skb頭部和struct ipt_entry中的struct ipt_ip結構進行比較,基本意思就是逐個比較。
/*
We keep a set of rules for each CPU, so we can avoid write-locking
them in the softirq when updating the counters and therefore
only need to read-lock in the softirq; doing a write_lock_bh() in user
context stops packets coming through and allows user context to read
the counters or update the rules.
Hence the start of any table is given by get_table() below. */
/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
static inline bool ip_packet_match(const struct iphdr *ip, const char *indev,
const char *outdev, const struct ipt_ip *ipinfo, int isfrag)
{
size_t i;
unsigned long ret;
#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
IPT_INV_SRCIP)
|| FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
IPT_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
return false;
}
/* Look for ifname matches; this should unroll nicely. */
for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
ret |= (((const unsigned long *)indev)[i]
^ ((const unsigned long *)ipinfo->iniface)[i])
& ((const unsigned long *)ipinfo->iniface_mask)[i];
}
if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ipinfo->iniface,
ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
return false;
}
for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
ret |= (((const unsigned long *)outdev)[i]
^ ((const unsigned long *)ipinfo->outiface)[i])
& ((const unsigned long *)ipinfo->outiface_mask)[i];
}
if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ipinfo->outiface,
ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
return false;
}
/* Check specific protocol */
if (ipinfo->proto
&& FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
dprintf("Packet protocol %hi does not match %hi.%s\n",
ip->protocol, ipinfo->proto,
ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
return false;
}
/* If we have a fragment rule but the packet is not a fragment
* then we return zero */
if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
dprintf("Fragment rule but not fragment.%s\n",
ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
return false;
}
return true;
}
2.1.2 擴充比對do_match()
/* Performance critical - called for every packet */
static inline bool
do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int offset, bool *hotdrop)
{
//直接調用擴充比對的match()回調函數
if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
offset, ip_hdrlen(skb), hotdrop))
return true;
else
return false;
}
3. 鈎子函數ipt_local_out_hook()
static unsigned int ipt_local_out_hook(unsigned int hook, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_filter: ignoring short SOCK_RAW packet.\n");
return NF_ACCEPT;
}
//依然是調用ipt_do_table()執行周遊操作
return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}