天天看點

防火牆之filter表(二)---AF_INET協定族1. HOOK的注冊2. 鈎子函數:ipt_hook()3. 鈎子函數ipt_local_out_hook()

繼上一篇筆記防火牆之filter表(一)—AF_INET協定族,這篇筆記來看看AF_INET協定族的filter表的鈎子的注冊,以及防火牆檢查時規則的周遊。涉及的核心代碼檔案有:

代碼路徑 說明
net/ipv4/netfilter/iptable_filter.c IPv4 filter表實作

1. HOOK的注冊

static int __init iptable_filter_init(void)
{
...
	/* Register hooks */
	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
	if (ret < 0)
		goto cleanup_table;
	return ret;
...
}
           

沒什麼複雜的,直接調用Netfilter架構的鈎子注冊函數,關鍵是ipt_ops的定義。

static struct nf_hook_ops ipt_ops[] __read_mostly = {
	{
		.hook		= ipt_hook,
		.owner		= THIS_MODULE,
		.pf		= PF_INET,
		.hooknum	= NF_INET_LOCAL_IN,
		.priority	= NF_IP_PRI_FILTER,
	},
	{
		.hook		= ipt_hook,
		.owner		= THIS_MODULE,
		.pf		= PF_INET,
		.hooknum	= NF_INET_FORWARD,
		.priority	= NF_IP_PRI_FILTER,
	},
	{
		.hook		= ipt_local_out_hook,
		.owner		= THIS_MODULE,
		.pf		= PF_INET,
		.hooknum	= NF_INET_LOCAL_OUT,
		.priority	= NF_IP_PRI_FILTER,
	},
};
           

注冊了三個HOOK點:LOCAL_IN、LOCAL_OUT和FORWARD,對應的鈎子函數分别為ipt_hook()、ipt_hook()和ipt_local_out_hook()。這三個鈎子的優先級都是NF_IP_PRI_FILTER(值為0)。

2. 鈎子函數:ipt_hook()

如上,當從LOCAL_IN和FORWARD兩個HOOK點通過filter表時,入口函數就是ipt_hook(),可以想象得到,該函數要執行的動作就是找到filter表中對應HOOK點上挂的規則,然後逐條比對處理。

/* The work comes in here from netfilter.c. */
static unsigned int ipt_hook(unsigned int hook,
	 struct sk_buff *skb, const struct net_device *in,
	 const struct net_device *out, int (*okfn)(struct sk_buff *))
{
	//指定處理filter表
	return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}
           

2.1 ipt_do_table()

/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook,
	     const struct net_device *in, const struct net_device *out,
	     struct xt_table *table)
{
	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
	u_int16_t offset;
	struct iphdr *ip;
	u_int16_t datalen;
	bool hotdrop = false;
	/* Initializing verdict to NF_DROP keeps gcc happy. */
	unsigned int verdict = NF_DROP;
	const char *indev, *outdev;
	void *table_base;
	struct ipt_entry *e, *back;
	struct xt_table_info *private;

	/* Initialization */
	ip = ip_hdr(skb);
	datalen = skb->len - ip->ihl * 4;
	indev = in ? in->name : nulldevname;
	outdev = out ? out->name : nulldevname;
	/* We handle fragments by dealing with the first fragment as
	 * if it was a normal packet.  All other fragments are treated
	 * normally, except that they will NEVER match rules that ask
	 * things we don't know, ie. tcp syn flag or ports).  If the
	 * rule is also a fragment-specific rule, non-fragments won't
	 * match it. */
	offset = ntohs(ip->frag_off) & IP_OFFSET;

	read_lock_bh(&table->lock);
	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
	//找到struct xt_table_info
	private = table->private;
	//找到本地CPU的第一條規則入口
	table_base = (void *)private->entries[smp_processor_id()];
	//hook_entry[]中記錄的是各個HOOK點第一條規則距離表頭的偏移,
	//e指向該hook點上的第一條規則
	e = get_entry(table_base, private->hook_entry[hook]);
	//back初始化為HOOK點的最後一條規則,即該HOOK點的預設政策
	back = get_entry(table_base, private->underflow[hook]);

	do {
		IP_NF_ASSERT(e);
		IP_NF_ASSERT(back);
		//ip_packet_match()對skb進行基本條件比對,如果ip位址、網卡
		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
			struct ipt_entry_target *t;
			//do_match()進行擴充match的比對。具體match可以通過對hotdrop指派為true,
			//結束整個周遊過程
			if (IPT_MATCH_ITERATE(e, do_match, skb, in, out, offset, &hotdrop) != 0)
				goto no_match;
			//所有比對都已命中,累加計數器
			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
			//擷取本條規則的target
			t = ipt_get_target(e);
			IP_NF_ASSERT(t->u.kernel.target);
			//xt_target中的target回調函數指針為空表示是标準target
			if (!t->u.kernel.target->target) {
				int v = ((struct ipt_standard_target *)t)->verdict;
				if (v < 0) {
					//verdict小于0,并且不是IPT_RETURN,說明屬于ACCEPT或者DROP,
					//這已經是一種判決結果了,結束整個表在該HOOK點的處理過程
					if (v != IPT_RETURN) {
						verdict = (unsigned)(-v) - 1;
						break;
					}
					//verdict是IPT_RETURN,說明是自定義鍊的傳回。
					//這裡,指針back和comefrom實作了一種類似于棧的操作,back為棧頂元素
					//棧中的每個元素用規則中的comefrom指向
					e = back;
					back = get_entry(table_base, back->comefrom);
					continue;
				}
				//target是自定義鍊,back入棧,然後跳轉到自定義鍊的規則入口
				if (table_base + v != (void *)e + e->next_offset &&
					!(e->ip.flags & IPT_F_GOTO)) {
					/* Save old back ptr in next entry */
					struct ipt_entry *next = (void *)e + e->next_offset;
					next->comefrom = (void *)back - table_base;
					/* set back pointer to next entry */
					back = next;
				}
				//對于自定義鍊,v大于0,儲存的是該自定義鍊距離table中第一條規則的偏移
				e = get_entry(table_base, v);
			} else {
				//這是一個擴充target,直接調用其target()回調
				/* Targets which reenter must return abs. verdicts */
				verdict = t->u.kernel.target->target(skb, in, out, hook,
								     t->u.kernel.target, t->data);
				/* Target might have changed stuff. */
				ip = ip_hdr(skb);
				datalen = skb->len - ip->ihl * 4;
				//如果target的傳回結果是IPT_CONTINUE,那麼會繼續周遊下一條,
				//否則結束整個table的變量
				if (verdict == IPT_CONTINUE)
					e = (void *)e + e->next_offset;
				else
					/* Verdict */
					break;
			}
		} else {
		no_match:
			//不比對,繼續下一條規則
			e = (void *)e + e->next_offset;
		}
	} while (!hotdrop);//如果中間有match設定了hotdrop為true,那麼可以提前結束周遊
	read_unlock_bh(&table->lock);
	//hotdrop設定為true,會最終導緻資料包被丢棄
	if (hotdrop)
		return NF_DROP;
	else 
		return verdict;
}
           

2.1.1 基本比對ip_packet_match()

該函數根據skb頭部和struct ipt_entry中的struct ipt_ip結構進行比較,基本意思就是逐個比較。

/*
   We keep a set of rules for each CPU, so we can avoid write-locking
   them in the softirq when updating the counters and therefore
   only need to read-lock in the softirq; doing a write_lock_bh() in user
   context stops packets coming through and allows user context to read
   the counters or update the rules.

   Hence the start of any table is given by get_table() below.  */

/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
static inline bool ip_packet_match(const struct iphdr *ip, const char *indev,
		const char *outdev, const struct ipt_ip *ipinfo, int isfrag)
{
	size_t i;
	unsigned long ret;

#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))

	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
		  IPT_INV_SRCIP)
	    || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
		     IPT_INV_DSTIP)) {
		dprintf("Source or dest mismatch.\n");
		return false;
	}
	/* Look for ifname matches; this should unroll nicely. */
	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
		ret |= (((const unsigned long *)indev)[i]
			^ ((const unsigned long *)ipinfo->iniface)[i])
			& ((const unsigned long *)ipinfo->iniface_mask)[i];
	}
	if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
		dprintf("VIA in mismatch (%s vs %s).%s\n",
			indev, ipinfo->iniface,
			ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
		return false;
	}
	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
		ret |= (((const unsigned long *)outdev)[i]
			^ ((const unsigned long *)ipinfo->outiface)[i])
			& ((const unsigned long *)ipinfo->outiface_mask)[i];
	}
	if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
		dprintf("VIA out mismatch (%s vs %s).%s\n",
			outdev, ipinfo->outiface,
			ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
		return false;
	}
	/* Check specific protocol */
	if (ipinfo->proto
	    && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
		dprintf("Packet protocol %hi does not match %hi.%s\n",
			ip->protocol, ipinfo->proto,
			ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
		return false;
	}
	/* If we have a fragment rule but the packet is not a fragment
	 * then we return zero */
	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
		dprintf("Fragment rule but not fragment.%s\n",
			ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
		return false;
	}
	return true;
}
           

2.1.2 擴充比對do_match()

/* Performance critical - called for every packet */
static inline bool
do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
	      const struct net_device *in, const struct net_device *out,
	      int offset, bool *hotdrop)
{
	//直接調用擴充比對的match()回調函數
	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
				      offset, ip_hdrlen(skb), hotdrop))
		return true;
	else
		return false;
}
           

3. 鈎子函數ipt_local_out_hook()

static unsigned int ipt_local_out_hook(unsigned int hook, struct sk_buff *skb,
		   const struct net_device *in, const struct net_device *out,
		   int (*okfn)(struct sk_buff *))
{
	/* root is playing with raw sockets. */
	if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) {
		if (net_ratelimit())
			printk("iptable_filter: ignoring short SOCK_RAW packet.\n");
		return NF_ACCEPT;
	}
	//依然是調用ipt_do_table()執行周遊操作
	return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}
           

繼續閱讀