轉載:
https://blog.csdn.net/Sophisticated_/article/details/87923362
http://blog.chinaunix.net/uid-28541347-id-5750406.html
https://blog.csdn.net/linyt/article/details/5191512
https://blog.csdn.net/NW_NW_NW/article/details/76674232
https://www.cnblogs.com/super-king/p/bridge_implement.html
https://www.cnblogs.com/ck1020/p/5894235.html
https://blog.csdn.net/one_clouder/article/details/52877737
linux核心源代碼變動怎麼這麼大,handle_bridge函數居然沒有了,本來接着準備以3.9.1分析的,但發現和後面的又變了,還是以4.15.1現在電腦上用的版本分析吧。
linux kernel:4.15.1
先看兩張圖檔
轉載:https://blog.csdn.net/NW_NW_NW/article/details/76153027
linux核心版本不一樣,函數發生了些許小改變,對圖檔進行細微更改。
轉載:https://blog.csdn.net/u012247418/article/details/90137663
1. br_handle_frame()
作用:
1. 對于需要轉發的封包,調用NF_BR_PRE_ROUTING處鈎子函數,結束後,進入br_handle_frame_finish()函數;
2. 對于STP封包,調用NF_BR_LOCAL_IN處鈎子函數,結束後,進入br_handle_local_finish()函數,在br_handle_local_finish()函數中會調用br_pass_frame_up()函數。
//linux/net/bridge/br_input.c
/*
* Return NULL if skb is handled
* note: already called with rcu_read_lock
*/
rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
{
struct net_bridge_port *p;
struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
br_should_route_hook_t *rhook;
/*如果是環回位址,直接傳回RX_HANDLER_PASS*/
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;
/*判斷源MAC位址是否是有效的位址,不是直接丢棄,源MAC位址不能是多點傳播位址和全0位址*/
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto drop;
/*判斷是否是共享資料包,若是則clone該資料包;若clone時配置設定記憶體出錯,傳回NULL*/
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return RX_HANDLER_CONSUMED;
/*擷取dev對應的網橋端口*/
p = br_port_get_rcu(skb->dev);
if (p->flags & BR_VLAN_TUNNEL) {
if (br_handle_ingress_vlan_tunnel(skb, p,
nbp_vlan_group_rcu(p)))
goto drop;
}
/*特殊MAC位址處理*/
//如果目的mac位址是本地鍊路位址link local reserved addr (01:80:c2:00:00:0X) STP封包
if (unlikely(is_link_local_ether_addr(dest))) {
u16 fwd_mask = p->br->group_fwd_mask_required;
/*
* See IEEE 802.1D Table 7-10 Reserved addresses
*
* Assignment Value
* Bridge Group Address 01-80-C2-00-00-00
* (MAC Control) 802.3 01-80-C2-00-00-01
* (Link Aggregation) 802.3 01-80-C2-00-00-02
* 802.1X PAE address 01-80-C2-00-00-03
*
* 802.1AB LLDP 01-80-C2-00-00-0E
*
* Others reserved for future standardization
*/
fwd_mask |= p->group_fwd_mask;
switch (dest[5]) {
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
then must forward to keep loop detection */
if (p->br->stp_enabled == BR_NO_STP ||
fwd_mask & (1u << dest[5]))
goto forward;
*pskb = skb;
__br_handle_local_finish(skb);
return RX_HANDLER_PASS;
case 0x01: /* IEEE MAC (Pause) */
goto drop;
case 0x0E: /* 802.1AB LLDP */
fwd_mask |= p->br->group_fwd_mask;
if (fwd_mask & (1u << dest[5]))
goto forward;
*pskb = skb;
__br_handle_local_finish(skb);
return RX_HANDLER_PASS;
default:
/* Allow selective forwarding for most other protocols */
fwd_mask |= p->br->group_fwd_mask;
if (fwd_mask & (1u << dest[5]))
goto forward;
}
/* Deliver packet to local host only */
/*調用NF_BR_LOCAL_IN處鈎子函數,結束後,進入br_handle_local_finish函數*/
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
NULL, skb, skb->dev, NULL, br_handle_local_finish);
return RX_HANDLER_CONSUMED;
}
forward:
switch (p->state) {
//網橋端口處于轉發狀态
case BR_STATE_FORWARDING:
rhook = rcu_dereference(br_should_route_hook);
if (rhook) {
if ((*rhook)(skb)) {
*pskb = skb;
return RX_HANDLER_PASS;
}
dest = eth_hdr(skb)->h_dest;
}
/* fall through */
/*網橋端口處于學習狀态,處于轉發狀态也會執行下面的代碼,因為上面的case沒有break。*/
case BR_STATE_LEARNING:
/*資料包目的MAC為網橋的Mac,發往本地的資料包*/
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
/*調用NF_BR_PRE_ROUTING處鈎子函數,結束後進入br_handle_frame_finish函數*/
NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
br_handle_frame_finish);
break;
default:
drop:
kfree_skb(skb);
}
return RX_HANDLER_CONSUMED;
}
相關函數
1. rx_handler_result_t枚舉類型
enum rx_handler_result {
RX_HANDLER_CONSUMED,
RX_HANDLER_ANOTHER,
RX_HANDLER_EXACT,
RX_HANDLER_PASS,
};
typedef enum rx_handler_result rx_handler_result_t;
2. is_valid_ether_addr()
/**
* is_valid_ether_addr - Determine if the given Ethernet address is valid
* @addr: Pointer to a six-byte array containing the Ethernet address
*
* Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
* a multicast address, and is not FF:FF:FF:FF:FF:FF.
*
* Return true if the address is valid.
*
* Please note: addr must be aligned to u16.
*/
static inline bool is_valid_ether_addr(const u8 *addr)
{
/* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to
* explicitly check for it here. */
return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr);
}
3.br_handle_local_finish()
br_handle_local_finish()函數中調用br_pass_fame_up()函數。
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
__br_handle_local_finish(skb);
BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
br_pass_frame_up(skb);
return 0;
}
2. br_handle_frame_finish()
作用:
網橋裝置是否處于混雜模式,如果是,則會發一份到本地進行處理
如果是廣播包,則會進行廣播洪泛,并會發一份到本地處理
如果是多點傳播包,則根據多點傳播表進行多點傳播轉發,并發一份數數包到本地處理
如果是單點傳播包,發往本地的單點傳播包則送到本地處理,在fdb表中可以找到轉發表項的單點傳播包則進行轉發,未知單點傳播包在廣播域内進行洪泛
//linux/net/bridge/br_input.c
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
enum br_pkt_type pkt_type = BR_PKT_UNICAST;
struct net_bridge_fdb_entry *dst = NULL;
struct net_bridge_mdb_entry *mdst;
bool local_rcv, mcast_hit = false;
const unsigned char *dest;
struct net_bridge *br;
u16 vid = 0;
/*如果網橋端口不存在或者網橋端口狀态為BR_STATE_DISABLED,則丢棄*/
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
/*判斷是否允許進入橋内,如果沒有開啟VLAN則所有資料包都可以進入,
如果開啟了VLAN,則根據VLAN相應的規則,從橋上進行資料包轉發。*/
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
goto out;
nbp_switchdev_frame_mark(p, skb);
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
/*如果網橋端口标志有BR_LEARNING,則更新fdb表。
一般建立網橋端口p->flags=BR_LEARNING| BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD*/
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
//發往本地資料包标記,!!的作用是轉換為bool值
local_rcv = !!(br->dev->flags & IFF_PROMISC);
dest = eth_hdr(skb)->h_dest;
/*目的位址為多點傳播位址*/
if (is_multicast_ether_addr(dest)) {
/* by definition the broadcast is also a multicast address */
/*如果目的位址是廣播位址,将資料包也發往本地一份*/
if (is_broadcast_ether_addr(dest)) {
pkt_type = BR_PKT_BROADCAST;
local_rcv = true;
} else {
pkt_type = BR_PKT_MULTICAST;
//igmp snooping留給網橋子系統的外部接口函數,
//當網橋接收了igmp資料包後就會調用該函數進行後續處理
if (br_multicast_rcv(br, p, skb, vid))
goto drop;
}
}
//如果網橋端口狀态此時還是BR_STATE_LEARNING,則丢棄。
if (p->state == BR_STATE_LEARNING)
goto drop;
//将網橋所屬的net_device放入skb的私有資料中(struct br_input_skb_cb)
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
if (IS_ENABLED(CONFIG_INET) &&
(skb->protocol == htons(ETH_P_ARP) ||
skb->protocol == htons(ETH_P_RARP))) {
br_do_proxy_suppress_arp(skb, br, vid, p);
} else if (IS_ENABLED(CONFIG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6) &&
br->neigh_suppress_enabled &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
sizeof(struct nd_msg)) &&
ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
struct nd_msg *msg, _msg;
msg = br_is_nd_neigh_msg(skb, &_msg);
if (msg)
br_do_suppress_nd(skb, br, vid, p, msg);
}
switch (pkt_type) {
//多點傳播包
case BR_PKT_MULTICAST:
//擷取多點傳播轉發項,設定local_rcv為true,多點傳播包也要發往本地一份。
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb))) {
if ((mdst && mdst->host_joined) ||
br_multicast_is_router(br)) {
local_rcv = true;
br->dev->stats.multicast++;
}
mcast_hit = true;
} else {
local_rcv = true;
br->dev->stats.multicast++;
}
break;
//單點傳播包
case BR_PKT_UNICAST:
//根據目的MAC位址查找fdb表,看是否有對應的表項
dst = br_fdb_find_rcu(br, dest, vid);
default:
break;
}
//如果找到目的MAC對應的表項
if (dst) {
unsigned long now = jiffies;
//送入上層處理
if (dst->is_local)
return br_pass_frame_up(skb);
if (now != dst->used)
dst->used = now;
//根據fdb轉發表項進行轉發,若這裡local_rcv 為1,(即端口處于混雜模式IFF_PROMISC),則會克隆一份再轉發
//傳入的第一個參數dst->dst 即為要轉發的目的端口
br_forward(dst->dst, skb, local_rcv, false);
} else {
//進行廣播或者多點傳播洪泛
if (!mcast_hit)
br_flood(br, skb, pkt_type, local_rcv, false);
else
br_multicast_flood(mdst, skb, local_rcv, false);
}
//local_rcv标記為1,送入上層處理。
if (local_rcv)
return br_pass_frame_up(skb);
out:
return 0;
drop:
kfree_skb(skb);
goto out;
}
3.br_pass_frame_up
資料包的目的MAC是本地的單點傳播封包,廣播,多點傳播和網橋處于混雜模式時,封包都會通過br_pass_frame_up函數交由上層處理。
作用:
調用NF_BR_LOCAL_IN處鈎子函數,最後調用br_netif_receive_skb函數,繞一圈後,交友上層處理。
//linux/net/bridge/br_input.c
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
struct net_bridge_vlan_group *vg;
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
//統計網橋裝置上的收包流量資料
u64_stats_update_begin(&brstats->syncp);
brstats->rx_packets++;
brstats->rx_bytes += skb->len;
u64_stats_update_end(&brstats->syncp);
//擷取網橋裝置上的VLAN組
vg = br_vlan_group_rcu(br);
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc modue when someone
* may be running packet capture.
*/
if (!(brdev->flags & IFF_PROMISC) &&
!br_allowed_egress(vg, skb)) {
kfree_skb(skb);
return NET_RX_DROP;
}
//記錄資料包的收包網絡裝置
indev = skb->dev;
//将資料包的收包裝置改為網橋裝置
//當再次進入__netif_receive_skb_core時就不會再次進入橋處理了,因為網橋上沒有注冊rx_handler 函數
skb->dev = brdev;
//配置資料包vlan相關資訊
skb = br_handle_vlan(br, NULL, vg, skb);
if (!skb)
return NET_RX_DROP;
/* update the multicast stats if the packet is IGMP/MLD */
//如果資料包是多點傳播,更新多點傳播資料包的統計資訊
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
//調用NF_BR_LOCAL_IN處鈎子函數,最後調用br_netif_receive_skb函數。
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
}
再次進入netif_receive_skb,由于skb-dev被設定成了bridge,而bridge裝置的rx_handler函數是沒有被設定的,是以就不會再次進入bridge邏輯,而直接進入了主機上層協定棧。
相關函數
1. br_netif_receive_skb()
可以看到在br_netif_receive_skb()函數中調用了netif_receive_skb()函數。
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
br_drop_fake_rtable(skb);
return netif_receive_skb(skb);
}
4. br_forward()
不是發往本地的資料包,但在fdb表中能找到對應的表項,則進行轉發br_forward(),若在fdb表中找不到對應表項就進行洪泛br_blood().
作用:
主要是調用__br_forward()轉發封包
//linux/net/bridge/br_forward.c
/**
* br_forward - forward a packet to a specific port
* @to: destination port
* @skb: packet being forwarded
* @local_rcv: packet will be received locally after forwarding
* @local_orig: packet is locally originated
*
* Should be called with rcu_read_lock.
*/
void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_rcv, bool local_orig)
{
//should_deliver測試是否應該将該包轉發出去
if (to && should_deliver(to, skb)) {
//如果local_rcv為1,表明端口為混雜模式,先clone一份資料包再進行轉發
if (local_rcv)
deliver_clone(to, skb, local_orig);
else
__br_forward(to, skb, local_orig);
return;
}
if (!local_rcv)
kfree_skb(skb);
}
5. __br_forward()
感覺這個函數很重要
作用
__br_forward()函數根據資料包的來源(local_orig)分别進入不同的鈎子點,如果資料包是從本地發出的,則進入NF_BR_LOCAL_OUT,如果不是本地發出的,則進入NF_BR_FORWARD鈎子,最後都進入br_forward_finish()函數。
//linux/net/bridge/br_forward.c
static void __br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_orig)
{
struct net_bridge_vlan_group *vg;
struct net_device *indev;
struct net *net;
int br_hook;
//擷取vlan組,這個組中有許多的vlanid,br_handle_vlan函數就是要在這個組中查找自己的vid
vg = nbp_vlan_group_rcu(to);
//添加vlan的相關配置
skb = br_handle_vlan(to->br, to, vg, skb);
if (!skb)
return;
//記錄資料包的原始收包網絡裝置
indev = skb->dev;
//将skb的dev修改為出口網絡裝置
skb->dev = to->dev;
/*如果local_orig标志位(判斷是否從本地發出的資料包)為false,就進入NF_BR_FORWARD鈎子
若為true,就進入NF_BR_LOCAL_OUT鈎子點*/
if (!local_orig) {
if (skb_warn_if_lro(skb)) {
kfree_skb(skb);
return;
}
//不是本地發出的資料包,進入NF_BR_FORWARD
br_hook = NF_BR_FORWARD;
skb_forward_csum(skb);
net = dev_net(indev);
} else {
if (unlikely(netpoll_tx_running(to->br->dev))) {
if (!is_skb_forwardable(skb->dev, skb)) {
kfree_skb(skb);
} else {
skb_push(skb, ETH_HLEN);
br_netpoll_send_skb(to, skb);
}
return;
}
//若是本地發出的資料包,進入NF_BR_LOCAL_OUT處理
br_hook = NF_BR_LOCAL_OUT;
net = dev_net(skb->dev);
indev = NULL;
}
//進入鈎子點,最後執行br_forward_finish函數。
NF_HOOK(NFPROTO_BRIDGE, br_hook,
net, NULL, skb, indev, skb->dev,
br_forward_finish);
}
6. br_forward_finish()
br_forward_finish()函數比較簡單,調用NF_BR_POST_ROUTING處的鈎子函數,最後進入br_dev_queue_push_xmit函數。
//linux/net/bridge/br_forward.c
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
}
7. br_dev_queue_push_xmit
在br_dev_queue_push_xmit()中,會先skb_push(skb,ETH,HLEN);将data指向二層頭部,然後調用dev_queue_xmit()發送封包。
//linux/net/bridge/br_forward.c
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (!is_skb_forwardable(skb->dev, skb))
goto drop;
//注意這句操作
skb_push(skb, ETH_HLEN);
br_drop_fake_rtable(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(skb->protocol == htons(ETH_P_8021Q) ||
skb->protocol == htons(ETH_P_8021AD))) {
int depth;
if (!__vlan_get_protocol(skb, skb->protocol, &depth))
goto drop;
skb_set_network_header(skb, depth);
}
dev_queue_xmit(skb);
return 0;
drop:
kfree_skb(skb);
return 0;
}
8. br_flood
br_flood()也是調用__br_forward()函數轉發封包。
//linux/net/bridge/br_forward.c
/* called under rcu_read_lock */
void br_flood(struct net_bridge *br, struct sk_buff *skb,
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
{
u8 igmp_type = br_multicast_igmp_type(skb);
struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
/*周遊網橋裝置的port_list,取出所有的網橋端口*/
list_for_each_entry_rcu(p, &br->port_list, list)
{
/* Do not flood unicast traffic to ports that turn it off, nor
* other traffic if flood off, except for traffic we originate
*/
switch (pkt_type)
{
case BR_PKT_UNICAST:
if (!(p->flags & BR_FLOOD))
continue;
break;
case BR_PKT_MULTICAST:
if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
continue;
break;
case BR_PKT_BROADCAST:
if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev)
continue;
break;
}
/* Do not flood to ports that enable proxy ARP */
/*代理arp*/
if (p->flags & BR_PROXYARP)
continue;
if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
prev = maybe_deliver(prev, p, skb, local_orig);
if (IS_ERR(prev))
goto out;
if (prev == p)
br_multicast_count(p->br, p, skb, igmp_type,
BR_MCAST_DIR_TX);
}
if (!prev)
goto out;
if (local_rcv)
deliver_clone(prev, skb, local_orig);
else
__br_forward(prev, skb, local_orig);
return;
out:
if (!local_rcv)
kfree_skb(skb);
}
9. br_mulicast_flood()
以後再分析,肚子餓了。
//linux/net/bridge/br_forward.c
/* called with rcu_read_lock */
void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
bool local_rcv, bool local_orig)
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
u8 igmp_type = br_multicast_igmp_type(skb);
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
p = mdst ? rcu_dereference(mdst->ports) : NULL;
while (p || rp) {
struct net_bridge_port *port, *lport, *rport;
lport = p ? p->port : NULL;
rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
NULL;
if ((unsigned long)lport > (unsigned long)rport) {
port = lport;
if (port->flags & BR_MULTICAST_TO_UNICAST) {
maybe_deliver_addr(lport, skb, p->eth_addr,
local_orig);
goto delivered;
}
} else {
port = rport;
}
prev = maybe_deliver(prev, port, skb, local_orig);
delivered:
if (IS_ERR(prev))
goto out;
if (prev == port)
br_multicast_count(port->br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
if ((unsigned long)lport >= (unsigned long)port)
p = rcu_dereference(p->next);
if ((unsigned long)rport >= (unsigned long)port)
rp = rcu_dereference(hlist_next_rcu(rp));
}
if (!prev)
goto out;
if (local_rcv)
deliver_clone(prev, skb, local_orig);
else
__br_forward(prev, skb, local_orig);
return;
out:
if (!local_rcv)
kfree_skb(skb);
}