source:
npl/kernel/linux_src/linux-4.9-imq.diff
@
0e62558
Last change on this file since 0e62558 was 6936b89, checked in by , 6 years ago | |
---|---|
|
|
File size: 45.3 KB |
-
drivers/net/Kconfig
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..93fada5 100644
a b config RIONET_RX_SIZE 260 260 depends on RIONET 261 261 default "128" 262 262 263 config IMQ 264 tristate "IMQ (intermediate queueing device) support" 265 depends on NETDEVICES && NETFILTER 266 ---help--- 267 The IMQ device(s) is used as placeholder for QoS queueing 268 disciplines. Every packet entering/leaving the IP stack can be 269 directed through the IMQ device where it's enqueued/dequeued to the 270 attached qdisc. This allows you to treat network devices as classes 271 and distribute bandwidth among them. Iptables is used to specify 272 through which IMQ device, if any, packets travel. 273 274 More information at: https://github.com/imq/linuximq 275 276 To compile this driver as a module, choose M here: the module 277 will be called imq. If unsure, say N. 278 279 choice 280 prompt "IMQ behavior (PRE/POSTROUTING)" 281 depends on IMQ 282 default IMQ_BEHAVIOR_AB 283 help 284 This setting defines how IMQ behaves in respect to its 285 hooking in PREROUTING and POSTROUTING. 286 287 IMQ can work in any of the following ways: 288 289 PREROUTING | POSTROUTING 290 -----------------|------------------- 291 #1 After NAT | After NAT 292 #2 After NAT | Before NAT 293 #3 Before NAT | After NAT 294 #4 Before NAT | Before NAT 295 296 The default behavior is to hook before NAT on PREROUTING 297 and after NAT on POSTROUTING (#3). 298 299 This settings are specially usefull when trying to use IMQ 300 to shape NATed clients. 301 302 More information can be found at: https://github.com/imq/linuximq 303 304 If not sure leave the default settings alone. 305 306 config IMQ_BEHAVIOR_AA 307 bool "IMQ AA" 308 help 309 This setting defines how IMQ behaves in respect to its 310 hooking in PREROUTING and POSTROUTING. 311 312 Choosing this option will make IMQ hook like this: 313 314 PREROUTING: After NAT 315 POSTROUTING: After NAT 316 317 More information can be found at: https://github.com/imq/linuximq 318 319 If not sure leave the default settings alone. 320 321 config IMQ_BEHAVIOR_AB 322 bool "IMQ AB" 323 help 324 This setting defines how IMQ behaves in respect to its 325 hooking in PREROUTING and POSTROUTING. 326 327 Choosing this option will make IMQ hook like this: 328 329 PREROUTING: After NAT 330 POSTROUTING: Before NAT 331 332 More information can be found at: https://github.com/imq/linuximq 333 334 If not sure leave the default settings alone. 335 336 config IMQ_BEHAVIOR_BA 337 bool "IMQ BA" 338 help 339 This setting defines how IMQ behaves in respect to its 340 hooking in PREROUTING and POSTROUTING. 341 342 Choosing this option will make IMQ hook like this: 343 344 PREROUTING: Before NAT 345 POSTROUTING: After NAT 346 347 More information can be found at: https://github.com/imq/linuximq 348 349 If not sure leave the default settings alone. 350 351 config IMQ_BEHAVIOR_BB 352 bool "IMQ BB" 353 help 354 This setting defines how IMQ behaves in respect to its 355 hooking in PREROUTING and POSTROUTING. 356 357 Choosing this option will make IMQ hook like this: 358 359 PREROUTING: Before NAT 360 POSTROUTING: Before NAT 361 362 More information can be found at: https://github.com/imq/linuximq 363 364 If not sure leave the default settings alone. 365 366 endchoice 367 368 config IMQ_NUM_DEVS 369 int "Number of IMQ devices" 370 range 2 16 371 depends on IMQ 372 default "16" 373 help 374 This setting defines how many IMQ devices will be created. 375 376 The default value is 16. 377 378 More information can be found at: https://github.com/imq/linuximq 379 380 If not sure leave the default settings alone. 381 263 382 config TUN 264 383 tristate "Universal TUN/TAP device driver support" 265 384 depends on INET -
drivers/net/Makefile
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..d6d7ad4 100644
a b obj-$(CONFIG_DUMMY) += dummy.o 11 11 obj-$(CONFIG_EQUALIZER) += eql.o 12 12 obj-$(CONFIG_IFB) += ifb.o 13 13 obj-$(CONFIG_MACSEC) += macsec.o 14 obj-$(CONFIG_IMQ) += imq.o 14 15 obj-$(CONFIG_MACVLAN) += macvlan.o 15 16 obj-$(CONFIG_MACVTAP) += macvtap.o 16 17 obj-$(CONFIG_MII) += mii.o -
new file drivers/net/imq.c
diff --git a/drivers/net/imq.c b/drivers/net/imq.c new file mode 100644 index 0000000..bc3b997
- + 1 /* 2 * Pseudo-driver for the intermediate queue device. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Patrick McHardy, <kaber@trash.net> 10 * 11 * The first version was written by Martin Devera, <devik@cdi.cz> 12 * 13 * See Creditis.txt 14 */ 15 16 #include <linux/module.h> 17 #include <linux/kernel.h> 18 #include <linux/moduleparam.h> 19 #include <linux/list.h> 20 #include <linux/skbuff.h> 21 #include <linux/netdevice.h> 22 #include <linux/etherdevice.h> 23 #include <linux/rtnetlink.h> 24 #include <linux/if_arp.h> 25 #include <linux/netfilter.h> 26 #include <linux/netfilter_ipv4.h> 27 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 28 #include <linux/netfilter_ipv6.h> 29 #endif 30 #include <linux/imq.h> 31 #include <net/pkt_sched.h> 32 #include <net/netfilter/nf_queue.h> 33 #include <net/sock.h> 34 #include <linux/ip.h> 35 #include <linux/ipv6.h> 36 #include <linux/if_vlan.h> 37 #include <linux/if_pppox.h> 38 #include <net/ip.h> 39 #include <net/ipv6.h> 40 41 static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num); 42 43 static nf_hookfn imq_nf_hook; 44 45 static struct nf_hook_ops imq_ops[] = { 46 { 47 /* imq_ingress_ipv4 */ 48 .hook = imq_nf_hook, 49 .pf = PF_INET, 50 .hooknum = NF_INET_PRE_ROUTING, 51 #if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) 52 .priority = NF_IP_PRI_MANGLE + 1, 53 #else 54 .priority = NF_IP_PRI_NAT_DST + 1, 55 #endif 56 }, 57 { 58 /* imq_egress_ipv4 */ 59 .hook = imq_nf_hook, 60 .pf = PF_INET, 61 .hooknum = NF_INET_POST_ROUTING, 62 #if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) 63 .priority = NF_IP_PRI_LAST, 64 #else 65 .priority = NF_IP_PRI_NAT_SRC - 1, 66 #endif 67 }, 68 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 69 { 70 /* imq_ingress_ipv6 */ 71 .hook = imq_nf_hook, 72 .pf = PF_INET6, 73 .hooknum = NF_INET_PRE_ROUTING, 74 #if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) 75 .priority = NF_IP6_PRI_MANGLE + 1, 76 #else 77 .priority = NF_IP6_PRI_NAT_DST + 1, 78 #endif 79 }, 80 { 81 /* imq_egress_ipv6 */ 82 .hook = imq_nf_hook, 83 .pf = PF_INET6, 84 .hooknum = NF_INET_POST_ROUTING, 85 #if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) 86 .priority = NF_IP6_PRI_LAST, 87 #else 88 .priority = NF_IP6_PRI_NAT_SRC - 1, 89 #endif 90 }, 91 #endif 92 }; 93 94 #if defined(CONFIG_IMQ_NUM_DEVS) 95 static int numdevs = CONFIG_IMQ_NUM_DEVS; 96 #else 97 static int numdevs = IMQ_MAX_DEVS; 98 #endif 99 100 static struct net_device *imq_devs_cache[IMQ_MAX_DEVS]; 101 102 #define IMQ_MAX_QUEUES 32 103 static int numqueues = 1; 104 static u32 imq_hashrnd; 105 static int imq_dev_accurate_stats = 1; 106 107 static inline __be16 pppoe_proto(const struct sk_buff *skb) 108 { 109 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 110 sizeof(struct pppoe_hdr))); 111 } 112 113 static u16 imq_hash(struct net_device *dev, struct sk_buff *skb) 114 { 115 unsigned int pull_len; 116 u16 protocol = skb->protocol; 117 u32 addr1, addr2; 118 u32 hash, ihl = 0; 119 union { 120 u16 in16[2]; 121 u32 in32; 122 } ports; 123 u8 ip_proto; 124 125 pull_len = 0; 126 127 recheck: 128 switch (protocol) { 129 case htons(ETH_P_8021Q): { 130 if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL)) 131 goto other; 132 133 pull_len += VLAN_HLEN; 134 skb->network_header += VLAN_HLEN; 135 136 protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 137 goto recheck; 138 } 139 140 case htons(ETH_P_PPP_SES): { 141 if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL)) 142 goto other; 143 144 pull_len += PPPOE_SES_HLEN; 145 skb->network_header += PPPOE_SES_HLEN; 146 147 protocol = pppoe_proto(skb); 148 goto recheck; 149 } 150 151 case htons(ETH_P_IP): { 152 const struct iphdr *iph = ip_hdr(skb); 153 154 if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) 155 goto other; 156 157 addr1 = iph->daddr; 158 addr2 = iph->saddr; 159 160 ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? 161 iph->protocol : 0; 162 ihl = ip_hdrlen(skb); 163 164 break; 165 } 166 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 167 case htons(ETH_P_IPV6): { 168 const struct ipv6hdr *iph = ipv6_hdr(skb); 169 __be16 fo = 0; 170 171 if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr)))) 172 goto other; 173 174 addr1 = iph->daddr.s6_addr32[3]; 175 addr2 = iph->saddr.s6_addr32[3]; 176 ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, 177 &fo); 178 if (unlikely(ihl < 0)) 179 goto other; 180 181 break; 182 } 183 #endif 184 default: 185 other: 186 if (pull_len != 0) { 187 skb_push(skb, pull_len); 188 skb->network_header -= pull_len; 189 } 190 191 return (u16)(ntohs(protocol) % dev->real_num_tx_queues); 192 } 193 194 if (addr1 > addr2) 195 swap(addr1, addr2); 196 197 switch (ip_proto) { 198 case IPPROTO_TCP: 199 case IPPROTO_UDP: 200 case IPPROTO_DCCP: 201 case IPPROTO_ESP: 202 case IPPROTO_AH: 203 case IPPROTO_SCTP: 204 case IPPROTO_UDPLITE: { 205 if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) { 206 if (ports.in16[0] > ports.in16[1]) 207 swap(ports.in16[0], ports.in16[1]); 208 break; 209 } 210 /* fall-through */ 211 } 212 default: 213 ports.in32 = 0; 214 break; 215 } 216 217 if (pull_len != 0) { 218 skb_push(skb, pull_len); 219 skb->network_header -= pull_len; 220 } 221 222 hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto); 223 224 return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); 225 } 226 227 static inline bool sk_tx_queue_recorded(struct sock *sk) 228 { 229 return (sk_tx_queue_get(sk) >= 0); 230 } 231 232 static struct netdev_queue *imq_select_queue(struct net_device *dev, 233 struct sk_buff *skb) 234 { 235 u16 queue_index = 0; 236 u32 hash; 237 238 if (likely(dev->real_num_tx_queues == 1)) 239 goto out; 240 241 /* IMQ can be receiving ingress or engress packets. */ 242 243 /* Check first for if rx_queue is set */ 244 if (skb_rx_queue_recorded(skb)) { 245 queue_index = skb_get_rx_queue(skb); 246 goto out; 247 } 248 249 /* Check if socket has tx_queue set */ 250 if (sk_tx_queue_recorded(skb->sk)) { 251 queue_index = sk_tx_queue_get(skb->sk); 252 goto out; 253 } 254 255 /* Try use socket hash */ 256 if (skb->sk && skb->sk->sk_hash) { 257 hash = skb->sk->sk_hash; 258 queue_index = 259 (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); 260 goto out; 261 } 262 263 /* Generate hash from packet data */ 264 queue_index = imq_hash(dev, skb); 265 266 out: 267 if (unlikely(queue_index >= dev->real_num_tx_queues)) 268 queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues); 269 270 skb_set_queue_mapping(skb, queue_index); 271 return netdev_get_tx_queue(dev, queue_index); 272 } 273 274 static struct net_device_stats *imq_get_stats(struct net_device *dev) 275 { 276 return &dev->stats; 277 } 278 279 /* called for packets kfree'd in qdiscs at places other than enqueue */ 280 static void imq_skb_destructor(struct sk_buff *skb) 281 { 282 struct nf_queue_entry *entry = skb->nf_queue_entry; 283 284 skb->nf_queue_entry = NULL; 285 286 if (entry) { 287 nf_queue_entry_release_refs(entry); 288 kfree(entry); 289 } 290 291 skb_restore_cb(skb); /* kfree backup */ 292 } 293 294 static void imq_done_check_queue_mapping(struct sk_buff *skb, 295 struct net_device *dev) 296 { 297 unsigned int queue_index; 298 299 /* Don't let queue_mapping be left too large after exiting IMQ */ 300 if (likely(skb->dev != dev && skb->dev != NULL)) { 301 queue_index = skb_get_queue_mapping(skb); 302 if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) { 303 queue_index = (u16)((u32)queue_index % 304 skb->dev->real_num_tx_queues); 305 skb_set_queue_mapping(skb, queue_index); 306 } 307 } else { 308 /* skb->dev was IMQ device itself or NULL, be on safe side and 309 * just clear queue mapping. 310 */ 311 skb_set_queue_mapping(skb, 0); 312 } 313 } 314 315 static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev) 316 { 317 struct nf_queue_entry *entry = skb->nf_queue_entry; 318 319 skb->nf_queue_entry = NULL; 320 netif_trans_update(dev); 321 322 dev->stats.tx_bytes += skb->len; 323 dev->stats.tx_packets++; 324 325 if (unlikely(entry == NULL)) { 326 /* We don't know what is going on here.. packet is queued for 327 * imq device, but (probably) not by us. 328 * 329 * If this packet was not send here by imq_nf_queue(), then 330 * skb_save_cb() was not used and skb_free() should not show: 331 * WARNING: IMQ: kfree_skb: skb->cb_next:.. 332 * and/or 333 * WARNING: IMQ: kfree_skb: skb->nf_queue_entry... 334 * 335 * However if this message is shown, then IMQ is somehow broken 336 * and you should report this to linuximq.net. 337 */ 338 339 /* imq_dev_xmit is black hole that eats all packets, report that 340 * we eat this packet happily and increase dropped counters. 341 */ 342 343 dev->stats.tx_dropped++; 344 dev_kfree_skb(skb); 345 346 return NETDEV_TX_OK; 347 } 348 349 skb_restore_cb(skb); /* restore skb->cb */ 350 351 skb->imq_flags = 0; 352 skb->destructor = NULL; 353 354 imq_done_check_queue_mapping(skb, dev); 355 356 nf_reinject(entry, NF_ACCEPT); 357 358 return NETDEV_TX_OK; 359 } 360 361 static struct net_device *get_imq_device_by_index(int index) 362 { 363 struct net_device *dev = NULL; 364 struct net *net; 365 char buf[8]; 366 367 /* get device by name and cache result */ 368 snprintf(buf, sizeof(buf), "imq%d", index); 369 370 /* Search device from all namespaces. */ 371 for_each_net(net) { 372 dev = dev_get_by_name(net, buf); 373 if (dev) 374 break; 375 } 376 377 if (WARN_ON_ONCE(dev == NULL)) { 378 /* IMQ device not found. Exotic config? */ 379 return ERR_PTR(-ENODEV); 380 } 381 382 imq_devs_cache[index] = dev; 383 dev_put(dev); 384 385 return dev; 386 } 387 388 static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e) 389 { 390 struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); 391 if (entry) { 392 nf_queue_entry_get_refs(entry); 393 return entry; 394 } 395 return NULL; 396 } 397 398 #ifdef CONFIG_BRIDGE_NETFILTER 399 /* When called from bridge netfilter, skb->data must point to MAC header 400 * before calling skb_gso_segment(). Else, original MAC header is lost 401 * and segmented skbs will be sent to wrong destination. 402 */ 403 static void nf_bridge_adjust_skb_data(struct sk_buff *skb) 404 { 405 if (skb->nf_bridge) 406 __skb_push(skb, skb->network_header - skb->mac_header); 407 } 408 409 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) 410 { 411 if (skb->nf_bridge) 412 __skb_pull(skb, skb->network_header - skb->mac_header); 413 } 414 #else 415 #define nf_bridge_adjust_skb_data(s) do {} while (0) 416 #define nf_bridge_adjust_segmented_data(s) do {} while (0) 417 #endif 418 419 static void free_entry(struct nf_queue_entry *entry) 420 { 421 nf_queue_entry_release_refs(entry); 422 kfree(entry); 423 } 424 425 static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev); 426 427 static int __imq_nf_queue_gso(struct nf_queue_entry *entry, 428 struct net_device *dev, struct sk_buff *skb) 429 { 430 int ret = -ENOMEM; 431 struct nf_queue_entry *entry_seg; 432 433 nf_bridge_adjust_segmented_data(skb); 434 435 if (skb->next == NULL) { /* last packet, no need to copy entry */ 436 struct sk_buff *gso_skb = entry->skb; 437 entry->skb = skb; 438 ret = __imq_nf_queue(entry, dev); 439 if (ret) 440 entry->skb = gso_skb; 441 return ret; 442 } 443 444 skb->next = NULL; 445 446 entry_seg = nf_queue_entry_dup(entry); 447 if (entry_seg) { 448 entry_seg->skb = skb; 449 ret = __imq_nf_queue(entry_seg, dev); 450 if (ret) 451 free_entry(entry_seg); 452 } 453 return ret; 454 } 455 456 static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num) 457 { 458 struct sk_buff *skb, *segs; 459 struct net_device *dev; 460 unsigned int queued; 461 int index, retval, err; 462 463 index = entry->skb->imq_flags & IMQ_F_IFMASK; 464 if (unlikely(index > numdevs - 1)) { 465 if (net_ratelimit()) 466 pr_warn("IMQ: invalid device specified, highest is %u\n", 467 numdevs - 1); 468 retval = -EINVAL; 469 goto out_no_dev; 470 } 471 472 /* check for imq device by index from cache */ 473 dev = imq_devs_cache[index]; 474 if (unlikely(!dev)) { 475 dev = get_imq_device_by_index(index); 476 if (IS_ERR(dev)) { 477 retval = PTR_ERR(dev); 478 goto out_no_dev; 479 } 480 } 481 482 if (unlikely(!(dev->flags & IFF_UP))) { 483 entry->skb->imq_flags = 0; 484 retval = -ECANCELED; 485 goto out_no_dev; 486 } 487 488 /* Since 3.10.x, GSO handling moved here as result of upstream commit 489 * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move 490 * skb_gso_segment into nfnetlink_queue module). 491 * 492 * Following code replicates the gso handling from 493 * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet(). 494 */ 495 496 skb = entry->skb; 497 498 switch (entry->state.pf) { 499 case NFPROTO_IPV4: 500 skb->protocol = htons(ETH_P_IP); 501 break; 502 case NFPROTO_IPV6: 503 skb->protocol = htons(ETH_P_IPV6); 504 break; 505 } 506 507 if (!skb_is_gso(entry->skb)) 508 return __imq_nf_queue(entry, dev); 509 510 nf_bridge_adjust_skb_data(skb); 511 segs = skb_gso_segment(skb, 0); 512 /* Does not use PTR_ERR to limit the number of error codes that can be 513 * returned by nf_queue. For instance, callers rely on -ECANCELED to 514 * mean 'ignore this hook'. 515 */ 516 err = -ENOBUFS; 517 if (IS_ERR(segs)) 518 goto out_err; 519 queued = 0; 520 err = 0; 521 do { 522 struct sk_buff *nskb = segs->next; 523 if (nskb && nskb->next) 524 nskb->cb_next = NULL; 525 if (err == 0) 526 err = __imq_nf_queue_gso(entry, dev, segs); 527 if (err == 0) 528 queued++; 529 else 530 kfree_skb(segs); 531 segs = nskb; 532 } while (segs); 533 534 if (queued) { 535 if (err) /* some segments are already queued */ 536 free_entry(entry); 537 kfree_skb(skb); 538 return 0; 539 } 540 541 out_err: 542 nf_bridge_adjust_segmented_data(skb); 543 retval = err; 544 out_no_dev: 545 return retval; 546 } 547 548 static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev) 549 { 550 struct sk_buff *skb_orig, *skb, *skb_shared, *skb_popd; 551 struct Qdisc *q; 552 struct sk_buff *to_free = NULL; 553 struct netdev_queue *txq; 554 spinlock_t *root_lock; 555 int users; 556 int retval = -EINVAL; 557 unsigned int orig_queue_index; 558 559 dev->last_rx = jiffies; 560 561 skb = entry->skb; 562 skb_orig = NULL; 563 564 /* skb has owner? => make clone */ 565 if (unlikely(skb->destructor)) { 566 skb_orig = skb; 567 skb = skb_clone(skb, GFP_ATOMIC); 568 if (unlikely(!skb)) { 569 retval = -ENOMEM; 570 goto out; 571 } 572 skb->cb_next = NULL; 573 entry->skb = skb; 574 } 575 576 dev->stats.rx_bytes += skb->len; 577 dev->stats.rx_packets++; 578 579 if (!skb->dev) { 580 /* skb->dev == NULL causes problems, try the find cause. */ 581 if (net_ratelimit()) { 582 dev_warn(&dev->dev, 583 "received packet with skb->dev == NULL\n"); 584 dump_stack(); 585 } 586 587 skb->dev = dev; 588 } 589 590 /* Disables softirqs for lock below */ 591 rcu_read_lock_bh(); 592 593 /* Multi-queue selection */ 594 orig_queue_index = skb_get_queue_mapping(skb); 595 txq = imq_select_queue(dev, skb); 596 597 q = rcu_dereference(txq->qdisc); 598 if (unlikely(!q->enqueue)) 599 goto packet_not_eaten_by_imq_dev; 600 601 skb->nf_queue_entry = entry; 602 root_lock = qdisc_lock(q); 603 spin_lock(root_lock); 604 605 users = atomic_read(&skb->users); 606 607 skb_shared = skb_get(skb); /* increase reference count by one */ 608 609 /* backup skb->cb, as qdisc layer will overwrite it */ 610 skb_save_cb(skb_shared); 611 qdisc_enqueue_root(skb_shared, q, &to_free); /* might kfree_skb */ 612 if (likely(atomic_read(&skb_shared->users) == users + 1)) { 613 bool validate; 614 615 kfree_skb(skb_shared); /* decrease reference count by one */ 616 617 skb->destructor = &imq_skb_destructor; 618 619 skb_popd = qdisc_dequeue_skb(q, &validate); 620 621 /* cloned? */ 622 if (unlikely(skb_orig)) 623 kfree_skb(skb_orig); /* free original */ 624 625 spin_unlock(root_lock); 626 627 #if 0 628 /* schedule qdisc dequeue */ 629 __netif_schedule(q); 630 #else 631 if (likely(skb_popd)) { 632 /* Note that we validate skb (GSO, checksum, ...) outside of locks */ 633 if (validate) 634 skb_popd = validate_xmit_skb_list(skb_popd, dev); 635 636 if (skb_popd) { 637 int dummy_ret; 638 int cpu = smp_processor_id(); /* ok because BHs are off */ 639 640 txq = skb_get_tx_queue(dev, skb_popd); 641 /* 642 IMQ device will not be frozen or stoped, and it always be successful. 643 So we need not check its status and return value to accelerate. 644 */ 645 if (imq_dev_accurate_stats && txq->xmit_lock_owner != cpu) { 646 HARD_TX_LOCK(dev, txq, cpu); 647 if (!netif_xmit_frozen_or_stopped(txq)) { 648 dev_hard_start_xmit(skb_popd, dev, txq, &dummy_ret); 649 } 650 HARD_TX_UNLOCK(dev, txq); 651 } else { 652 if (!netif_xmit_frozen_or_stopped(txq)) { 653 dev_hard_start_xmit(skb_popd, dev, txq, &dummy_ret); 654 } 655 } 656 } 657 } else { 658 /* No ready skb, then schedule it */ 659 __netif_schedule(q); 660 } 661 #endif 662 rcu_read_unlock_bh(); 663 retval = 0; 664 goto out; 665 } else { 666 skb_restore_cb(skb_shared); /* restore skb->cb */ 667 skb->nf_queue_entry = NULL; 668 /* 669 * qdisc dropped packet and decreased skb reference count of 670 * skb, so we don't really want to and try refree as that would 671 * actually destroy the skb. 672 */ 673 spin_unlock(root_lock); 674 goto packet_not_eaten_by_imq_dev; 675 } 676 677 packet_not_eaten_by_imq_dev: 678 skb_set_queue_mapping(skb, orig_queue_index); 679 rcu_read_unlock_bh(); 680 681 /* cloned? restore original */ 682 if (unlikely(skb_orig)) { 683 kfree_skb(skb); 684 entry->skb = skb_orig; 685 } 686 retval = -1; 687 out: 688 if (unlikely(to_free)) { 689 kfree_skb_list(to_free); 690 } 691 return retval; 692 } 693 static unsigned int imq_nf_hook(void *priv, 694 struct sk_buff *skb, 695 const struct nf_hook_state *state) 696 { 697 return (skb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT; 698 } 699 700 static int imq_close(struct net_device *dev) 701 { 702 netif_stop_queue(dev); 703 return 0; 704 } 705 706 static int imq_open(struct net_device *dev) 707 { 708 netif_start_queue(dev); 709 return 0; 710 } 711 712 static const struct net_device_ops imq_netdev_ops = { 713 .ndo_open = imq_open, 714 .ndo_stop = imq_close, 715 .ndo_start_xmit = imq_dev_xmit, 716 .ndo_get_stats = imq_get_stats, 717 }; 718 719 static void imq_setup(struct net_device *dev) 720 { 721 dev->netdev_ops = &imq_netdev_ops; 722 dev->type = ARPHRD_VOID; 723 dev->mtu = 16000; /* too small? */ 724 dev->tx_queue_len = 11000; /* too big? */ 725 dev->flags = IFF_NOARP; 726 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | 727 NETIF_F_GSO | NETIF_F_HW_CSUM | 728 NETIF_F_HIGHDMA; 729 dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | 730 IFF_TX_SKB_SHARING); 731 } 732 733 static int imq_validate(struct nlattr *tb[], struct nlattr *data[]) 734 { 735 int ret = 0; 736 737 if (tb[IFLA_ADDRESS]) { 738 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 739 ret = -EINVAL; 740 goto end; 741 } 742 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 743 ret = -EADDRNOTAVAIL; 744 goto end; 745 } 746 } 747 return 0; 748 end: 749 pr_warn("IMQ: imq_validate failed (%d)\n", ret); 750 return ret; 751 } 752 753 static struct rtnl_link_ops imq_link_ops __read_mostly = { 754 .kind = "imq", 755 .priv_size = 0, 756 .setup = imq_setup, 757 .validate = imq_validate, 758 }; 759 760 static const struct nf_queue_handler imq_nfqh = { 761 .outfn = imq_nf_queue, 762 }; 763 764 static int __init imq_init_hooks(void) 765 { 766 int ret; 767 768 nf_register_queue_imq_handler(&imq_nfqh); 769 770 ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops)); 771 if (ret < 0) 772 nf_unregister_queue_imq_handler(); 773 774 return ret; 775 } 776 777 static int __init imq_init_one(int index) 778 { 779 struct net_device *dev; 780 int ret; 781 782 dev = alloc_netdev_mq(0, "imq%d", NET_NAME_UNKNOWN, imq_setup, numqueues); 783 if (!dev) 784 return -ENOMEM; 785 786 ret = dev_alloc_name(dev, dev->name); 787 if (ret < 0) 788 goto fail; 789 790 dev->rtnl_link_ops = &imq_link_ops; 791 ret = register_netdevice(dev); 792 if (ret < 0) 793 goto fail; 794 795 return 0; 796 fail: 797 free_netdev(dev); 798 return ret; 799 } 800 801 static int __init imq_init_devs(void) 802 { 803 int err, i; 804 805 if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) { 806 pr_err("IMQ: numdevs has to be betweed 1 and %u\n", 807 IMQ_MAX_DEVS); 808 return -EINVAL; 809 } 810 811 if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) { 812 pr_err("IMQ: numqueues has to be betweed 1 and %u\n", 813 IMQ_MAX_QUEUES); 814 return -EINVAL; 815 } 816 817 get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd)); 818 819 rtnl_lock(); 820 err = __rtnl_link_register(&imq_link_ops); 821 822 for (i = 0; i < numdevs && !err; i++) 823 err = imq_init_one(i); 824 825 if (err) { 826 __rtnl_link_unregister(&imq_link_ops); 827 memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); 828 } 829 rtnl_unlock(); 830 831 return err; 832 } 833 834 static int __init imq_init_module(void) 835 { 836 int err; 837 838 #if defined(CONFIG_IMQ_NUM_DEVS) 839 BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16); 840 BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2); 841 BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK); 842 #endif 843 844 err = imq_init_devs(); 845 if (err) { 846 pr_err("IMQ: Error trying imq_init_devs(net)\n"); 847 return err; 848 } 849 850 err = imq_init_hooks(); 851 if (err) { 852 pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n"); 853 rtnl_link_unregister(&imq_link_ops); 854 memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); 855 return err; 856 } 857 858 pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d, imq_dev_accurate_stats = %d)\n", 859 numdevs, numqueues, imq_dev_accurate_stats); 860 861 #if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) 862 pr_info("\tHooking IMQ before NAT on PREROUTING.\n"); 863 #else 864 pr_info("\tHooking IMQ after NAT on PREROUTING.\n"); 865 #endif 866 #if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB) 867 pr_info("\tHooking IMQ before NAT on POSTROUTING.\n"); 868 #else 869 pr_info("\tHooking IMQ after NAT on POSTROUTING.\n"); 870 #endif 871 872 return 0; 873 } 874 875 static void __exit imq_unhook(void) 876 { 877 nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops)); 878 nf_unregister_queue_imq_handler(); 879 } 880 881 static void __exit imq_cleanup_devs(void) 882 { 883 rtnl_link_unregister(&imq_link_ops); 884 memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); 885 } 886 887 static void __exit imq_exit_module(void) 888 { 889 imq_unhook(); 890 imq_cleanup_devs(); 891 pr_info("IMQ driver unloaded successfully.\n"); 892 } 893 894 module_init(imq_init_module); 895 module_exit(imq_exit_module); 896 897 module_param(numdevs, int, 0); 898 module_param(numqueues, int, 0); 899 module_param(imq_dev_accurate_stats, int, 0); 900 MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)"); 901 MODULE_PARM_DESC(numqueues, "number of queues per IMQ device"); 902 MODULE_PARM_DESC(imq_dev_accurate_stats, "Notify if need the accurate imq device stats"); 903 904 MODULE_AUTHOR("https://github.com/imq/linuximq"); 905 MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See https://github.com/imq/linuximq/wiki for more information."); 906 MODULE_LICENSE("GPL"); 907 MODULE_ALIAS_RTNL_LINK("imq"); -
new file include/linux/imq.h
diff --git a/include/linux/imq.h b/include/linux/imq.h new file mode 100644 index 0000000..1babb09
- + 1 #ifndef _IMQ_H 2 #define _IMQ_H 3 4 /* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */ 5 #define IMQ_F_BITS 5 6 7 #define IMQ_F_IFMASK 0x0f 8 #define IMQ_F_ENQUEUE 0x10 9 10 #define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1) 11 12 #endif /* _IMQ_H */ 13 -
include/linux/netdevice.h
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e16a2a9..4a1090a 100644
a b static inline void netif_tx_unlock_bh(struct net_device *dev) 3669 3669 } \ 3670 3670 } 3671 3671 3672 #define HARD_TX_LOCK_BH(dev, txq) { \ 3673 if ((dev->features & NETIF_F_LLTX) == 0) { \ 3674 __netif_tx_lock_bh(txq); \ 3675 } \ 3676 } 3677 3678 #define HARD_TX_UNLOCK_BH(dev, txq) { \ 3679 if ((dev->features & NETIF_F_LLTX) == 0) { \ 3680 __netif_tx_unlock_bh(txq); \ 3681 } \ 3682 } 3683 3684 3672 3685 static inline void netif_tx_disable(struct net_device *dev) 3673 3686 { 3674 3687 unsigned int i; -
new file include/linux/netfilter/xt_IMQ.h
diff --git a/include/linux/netfilter/xt_IMQ.h b/include/linux/netfilter/xt_IMQ.h new file mode 100644 index 0000000..9b07230
- + 1 #ifndef _XT_IMQ_H 2 #define _XT_IMQ_H 3 4 struct xt_imq_info { 5 unsigned int todev; /* target imq device */ 6 }; 7 8 #endif /* _XT_IMQ_H */ 9 -
new file include/linux/netfilter_ipv4/ipt_IMQ.h
diff --git a/include/linux/netfilter_ipv4/ipt_IMQ.h b/include/linux/netfilter_ipv4/ipt_IMQ.h new file mode 100644 index 0000000..7af320f
- + 1 #ifndef _IPT_IMQ_H 2 #define _IPT_IMQ_H 3 4 /* Backwards compatibility for old userspace */ 5 #include <linux/netfilter/xt_IMQ.h> 6 7 #define ipt_imq_info xt_imq_info 8 9 #endif /* _IPT_IMQ_H */ 10 -
new file include/linux/netfilter_ipv6/ip6t_IMQ.h
diff --git a/include/linux/netfilter_ipv6/ip6t_IMQ.h b/include/linux/netfilter_ipv6/ip6t_IMQ.h new file mode 100644 index 0000000..198ac01
- + 1 #ifndef _IP6T_IMQ_H 2 #define _IP6T_IMQ_H 3 4 /* Backwards compatibility for old userspace */ 5 #include <linux/netfilter/xt_IMQ.h> 6 7 #define ip6t_imq_info xt_imq_info 8 9 #endif /* _IP6T_IMQ_H */ 10 -
include/linux/skbuff.h
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8f9fa6..6c425c2 100644
a b 39 39 #include <linux/in6.h> 40 40 #include <linux/if_packet.h> 41 41 #include <net/flow.h> 42 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 43 #include <linux/imq.h> 44 #endif 45 42 46 43 47 /* The interface for checksum offload between the stack and networking drivers 44 48 * is as follows... … … struct sk_buff { 654 658 * first. This is owned by whoever has the skb queued ATM. 655 659 */ 656 660 char cb[48] __aligned(8); 661 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 662 void *cb_next; 663 #endif 657 664 658 665 unsigned long _skb_refdst; 659 666 void (*destructor)(struct sk_buff *skb); … … struct sk_buff { 663 670 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 664 671 struct nf_conntrack *nfct; 665 672 #endif 673 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 674 struct nf_queue_entry *nf_queue_entry; 675 #endif 666 676 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 667 677 struct nf_bridge_info *nf_bridge; 668 678 #endif … … struct sk_buff { 743 753 __u8 offload_fwd_mark:1; 744 754 #endif 745 755 /* 2, 4 or 5 bit hole */ 756 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 757 __u8 imq_flags:IMQ_F_BITS; 758 #endif 746 759 747 760 #ifdef CONFIG_NET_SCHED 748 761 __u16 tc_index; /* traffic control index */ … … void kfree_skb_list(struct sk_buff *segs); 903 916 void skb_tx_error(struct sk_buff *skb); 904 917 void consume_skb(struct sk_buff *skb); 905 918 void __kfree_skb(struct sk_buff *skb); 919 920 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 921 int skb_save_cb(struct sk_buff *skb); 922 int skb_restore_cb(struct sk_buff *skb); 923 #endif 924 906 925 extern struct kmem_cache *skbuff_head_cache; 907 926 908 927 void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); … … static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, 3594 3613 if (copy) 3595 3614 dst->nfctinfo = src->nfctinfo; 3596 3615 #endif 3616 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 3617 dst->imq_flags = src->imq_flags; 3618 dst->nf_queue_entry = src->nf_queue_entry; 3619 #endif 3597 3620 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 3598 3621 dst->nf_bridge = src->nf_bridge; 3599 3622 nf_bridge_get(src->nf_bridge); -
include/net/netfilter/nf_queue.h
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 2280cfe..ec8fa51 100644
a b struct nf_queue_handler { 30 30 void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); 31 31 void nf_unregister_queue_handler(struct net *net); 32 32 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); 33 void nf_queue_entry_release_refs(struct nf_queue_entry *entry); 34 35 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 36 void nf_register_queue_imq_handler(const struct nf_queue_handler *qh); 37 void nf_unregister_queue_imq_handler(void); 38 #endif 33 39 34 40 void nf_queue_entry_get_refs(struct nf_queue_entry *entry); 35 41 void nf_queue_entry_release_refs(struct nf_queue_entry *entry); -
include/net/pkt_sched.h
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd334c9..6757228 100644
a b int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, 105 105 106 106 void __qdisc_run(struct Qdisc *q); 107 107 108 struct sk_buff *qdisc_dequeue_skb(struct Qdisc *q, bool *validate); 109 108 110 static inline void qdisc_run(struct Qdisc *q) 109 111 { 110 112 if (qdisc_run_begin(q)) -
include/net/sch_generic.h
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e6aa0a2..08b37dc 100644
a b static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, 518 518 return sch->enqueue(skb, sch, to_free); 519 519 } 520 520 521 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch, 522 struct sk_buff **to_free) 523 { 524 qdisc_skb_cb(skb)->pkt_len = skb->len; 525 return qdisc_enqueue(skb, sch, to_free) & NET_XMIT_MASK; 526 } 527 521 528 static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) 522 529 { 523 530 return q->flags & TCQ_F_CPUSTATS; -
include/uapi/linux/netfilter.h
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index d93f949..23fb6d1 100644
a b 14 14 #define NF_QUEUE 3 15 15 #define NF_REPEAT 4 16 16 #define NF_STOP 5 17 #define NF_MAX_VERDICT NF_STOP 17 #define NF_IMQ_QUEUE 6 18 #define NF_MAX_VERDICT NF_IMQ_QUEUE 18 19 19 20 /* we overload the higher bits for encoding auxiliary data such as the queue 20 21 * number or errno values. Not nice, but better than additional function -
net/core/dev.c
diff --git a/net/core/dev.c b/net/core/dev.c index 6666b28..3e12add 100644
a b 141 141 #include <linux/netfilter_ingress.h> 142 142 #include <linux/sctp.h> 143 143 #include <linux/crash_dump.h> 144 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 145 #include <linux/imq.h> 146 #endif 144 147 145 148 #include "net-sysfs.h" 146 149 … … static int xmit_one(struct sk_buff *skb, struct net_device *dev, 2906 2909 unsigned int len; 2907 2910 int rc; 2908 2911 2912 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 2913 if ((!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) && 2914 !(skb->imq_flags & IMQ_F_ENQUEUE)) 2915 #else 2909 2916 if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) 2917 #endif 2910 2918 dev_queue_xmit_nit(skb, dev); 2911 2919 2912 2920 len = skb->len; … … struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de 2945 2953 return skb; 2946 2954 } 2947 2955 2956 EXPORT_SYMBOL_GPL(dev_hard_start_xmit); 2957 2948 2958 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, 2949 2959 netdev_features_t features) 2950 2960 { -
net/core/skbuff.c
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9bf1289..e3fcf17 100644
a b struct kmem_cache *skbuff_head_cache __read_mostly; 82 82 static struct kmem_cache *skbuff_fclone_cache __read_mostly; 83 83 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; 84 84 EXPORT_SYMBOL(sysctl_max_skb_frags); 85 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 86 static struct kmem_cache *skbuff_cb_store_cache __read_mostly; 87 #endif 88 89 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 90 /* Control buffer save/restore for IMQ devices */ 91 struct skb_cb_table { 92 char cb[48] __aligned(8); 93 void *cb_next; 94 atomic_t refcnt; 95 }; 96 97 static DEFINE_SPINLOCK(skb_cb_store_lock); 98 99 int skb_save_cb(struct sk_buff *skb) 100 { 101 struct skb_cb_table *next; 102 103 next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC); 104 if (!next) 105 return -ENOMEM; 106 107 BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); 108 109 memcpy(next->cb, skb->cb, sizeof(skb->cb)); 110 next->cb_next = skb->cb_next; 111 112 atomic_set(&next->refcnt, 1); 113 114 skb->cb_next = next; 115 return 0; 116 } 117 EXPORT_SYMBOL(skb_save_cb); 118 119 int skb_restore_cb(struct sk_buff *skb) 120 { 121 struct skb_cb_table *next; 122 123 if (!skb->cb_next) 124 return 0; 125 126 next = skb->cb_next; 127 128 BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); 129 130 memcpy(skb->cb, next->cb, sizeof(skb->cb)); 131 skb->cb_next = next->cb_next; 132 133 spin_lock(&skb_cb_store_lock); 134 135 if (atomic_dec_and_test(&next->refcnt)) 136 kmem_cache_free(skbuff_cb_store_cache, next); 137 138 spin_unlock(&skb_cb_store_lock); 139 140 return 0; 141 } 142 EXPORT_SYMBOL(skb_restore_cb); 143 144 static void skb_copy_stored_cb(struct sk_buff * , const struct sk_buff * ) __attribute__ ((unused)); 145 static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old) 146 { 147 struct skb_cb_table *next; 148 struct sk_buff *old; 149 150 if (!__old->cb_next) { 151 new->cb_next = NULL; 152 return; 153 } 154 155 spin_lock(&skb_cb_store_lock); 156 157 old = (struct sk_buff *)__old; 158 159 next = old->cb_next; 160 atomic_inc(&next->refcnt); 161 new->cb_next = next; 162 163 spin_unlock(&skb_cb_store_lock); 164 } 165 #endif 85 166 86 167 /** 87 168 * skb_panic - private function for out-of-line support … … static void skb_release_head_state(struct sk_buff *skb) 654 735 WARN_ON(in_irq()); 655 736 skb->destructor(skb); 656 737 } 738 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 739 /* 740 * This should not happen. When it does, avoid memleak by restoring 741 * the chain of cb-backups. 742 */ 743 while (skb->cb_next != NULL) { 744 if (net_ratelimit()) 745 pr_warn("IMQ: kfree_skb: skb->cb_next: %08x\n", 746 (unsigned int)(uintptr_t)skb->cb_next); 747 748 skb_restore_cb(skb); 749 } 750 /* 751 * This should not happen either, nf_queue_entry is nullified in 752 * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are 753 * leaking entry pointers, maybe memory. We don't know if this is 754 * pointer to already freed memory, or should this be freed. 755 * If this happens we need to add refcounting, etc for nf_queue_entry. 756 */ 757 if (skb->nf_queue_entry && net_ratelimit()) 758 pr_warn("%s\n", "IMQ: kfree_skb: skb->nf_queue_entry != NULL"); 759 #endif 657 760 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 658 761 nf_conntrack_put(skb->nfct); 659 762 #endif … … static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 843 946 new->sp = secpath_get(old->sp); 844 947 #endif 845 948 __nf_copy(new, old, false); 949 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 950 new->cb_next = NULL; 951 /*skb_copy_stored_cb(new, old);*/ 952 #endif 846 953 847 954 /* Note : this field could be in headers_start/headers_end section 848 955 * It is not yet because we do not want to have a 16 bit hole … … void __init skb_init(void) 3464 3571 0, 3465 3572 SLAB_HWCACHE_ALIGN|SLAB_PANIC, 3466 3573 NULL); 3574 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 3575 skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache", 3576 sizeof(struct skb_cb_table), 3577 0, 3578 SLAB_HWCACHE_ALIGN|SLAB_PANIC, 3579 NULL); 3580 #endif 3467 3581 } 3468 3582 3469 3583 /** -
net/ipv6/ip6_output.c
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59eb4ed..8020b07 100644
a b static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * 66 66 struct in6_addr *nexthop; 67 67 int ret; 68 68 69 skb->protocol = htons(ETH_P_IPV6);70 skb->dev = dev;71 72 69 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 73 70 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 74 71 … … int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 150 147 return 0; 151 148 } 152 149 150 /* 151 * IMQ-patch: moved setting skb->dev and skb->protocol from 152 * ip6_finish_output2 to fix crashing at netif_skb_features(). 153 */ 154 skb->protocol = htons(ETH_P_IPV6); 155 skb->dev = dev; 156 153 157 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 154 158 net, sk, skb, NULL, dev, 155 159 ip6_finish_output, -
net/netfilter/Kconfig
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4a2e5a3..fc5cc9a 100644
a b config NETFILTER_XT_TARGET_LOG 833 833 834 834 To compile it as a module, choose M here. If unsure, say N. 835 835 836 config NETFILTER_XT_TARGET_IMQ 837 tristate '"IMQ" target support' 838 depends on NETFILTER_XTABLES 839 depends on IP_NF_MANGLE || IP6_NF_MANGLE 840 select IMQ 841 default m if NETFILTER_ADVANCED=n 842 help 843 This option adds a `IMQ' target which is used to specify if and 844 to which imq device packets should get enqueued/dequeued. 845 846 To compile it as a module, choose M here. If unsure, say N. 847 836 848 config NETFILTER_XT_TARGET_MARK 837 849 tristate '"MARK" target support' 838 850 depends on NETFILTER_ADVANCED -
net/netfilter/Makefile
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index e5c5e1e..3128bc5 100644
a b obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o 119 119 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o 120 120 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o 121 121 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o 122 obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o 122 123 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o 123 124 obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o 124 125 obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o -
net/netfilter/core.c
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 004af03..768a08b 100644
a b int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) 360 360 ret = NF_DROP_GETERR(verdict); 361 361 if (ret == 0) 362 362 ret = -EPERM; 363 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 363 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE || 364 (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) { 364 365 ret = nf_queue(skb, state, &entry, verdict); 366 if (ret == -ECANCELED) 367 goto next_hook; 365 368 if (ret == 1 && entry) 366 369 goto next_hook; 367 370 } -
net/netfilter/nf_queue.c
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8f08d75..c12c9eb 100644
a b 27 27 * receives, no matter what. 28 28 */ 29 29 30 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 31 static const struct nf_queue_handler __rcu *queue_imq_handler __read_mostly; 32 33 void nf_register_queue_imq_handler(const struct nf_queue_handler *qh) 34 { 35 rcu_assign_pointer(queue_imq_handler, qh); 36 } 37 EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler); 38 39 void nf_unregister_queue_imq_handler(void) 40 { 41 RCU_INIT_POINTER(queue_imq_handler, NULL); 42 synchronize_rcu(); 43 } 44 EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler); 45 #endif 46 30 47 /* return EBUSY when somebody else is registered, return EEXIST if the 31 48 * same handler is registered, return 0 in case of success. */ 32 49 void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) … … void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) 108 125 } 109 126 110 127 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, 111 unsigned int queuenum)128 unsigned int verdict) 112 129 { 113 130 int status = -ENOENT; 114 131 struct nf_queue_entry *entry = NULL; 115 132 const struct nf_afinfo *afinfo; 116 133 const struct nf_queue_handler *qh; 117 134 struct net *net = state->net; 135 unsigned int queuetype = verdict & NF_VERDICT_MASK; 136 unsigned int queuenum = verdict >> NF_VERDICT_QBITS; 118 137 119 138 /* QUEUE == DROP if no one is waiting, to be safe. */ 120 qh = rcu_dereference(net->nf.queue_handler); 139 if (queuetype == NF_IMQ_QUEUE) { 140 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 141 qh = rcu_dereference(queue_imq_handler); 142 #else 143 BUG(); 144 goto err_unlock; 145 #endif 146 } else { 147 qh = rcu_dereference(net->nf.queue_handler); 148 } 149 121 150 if (!qh) { 122 151 status = -ESRCH; 123 152 goto err; … … int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, 164 193 int ret; 165 194 166 195 RCU_INIT_POINTER(state->hook_entries, entry); 167 ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);196 ret = __nf_queue(skb, state, verdict); 168 197 if (ret < 0) { 198 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) 199 if (ret == -ECANCELED && skb->imq_flags == 0) { // down interface 200 *entryp = rcu_dereference(entry->next); 201 return 1; 202 } 203 #endif 169 204 if (ret == -ESRCH && 170 205 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { 171 206 *entryp = rcu_dereference(entry->next); … … void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 218 253 local_bh_enable(); 219 254 break; 220 255 case NF_QUEUE: 256 case NF_IMQ_QUEUE: 221 257 err = nf_queue(skb, &entry->state, &hook_entry, verdict); 222 258 if (err == 1) { 223 259 if (hook_entry) -
new file net/netfilter/xt_IMQ.c
diff --git a/net/netfilter/xt_IMQ.c b/net/netfilter/xt_IMQ.c new file mode 100644 index 0000000..f9c5817
- + 1 /* 2 * This target marks packets to be enqueued to an imq device 3 */ 4 #include <linux/module.h> 5 #include <linux/skbuff.h> 6 #include <linux/netfilter/x_tables.h> 7 #include <linux/netfilter/xt_IMQ.h> 8 #include <linux/imq.h> 9 10 static unsigned int imq_target(struct sk_buff *pskb, 11 const struct xt_action_param *par) 12 { 13 const struct xt_imq_info *mr = par->targinfo; 14 15 pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE; 16 17 return XT_CONTINUE; 18 } 19 20 static int imq_checkentry(const struct xt_tgchk_param *par) 21 { 22 struct xt_imq_info *mr = par->targinfo; 23 24 if (mr->todev > IMQ_MAX_DEVS - 1) { 25 pr_warn("IMQ: invalid device specified, highest is %u\n", 26 IMQ_MAX_DEVS - 1); 27 return -EINVAL; 28 } 29 30 return 0; 31 } 32 33 static struct xt_target xt_imq_reg[] __read_mostly = { 34 { 35 .name = "IMQ", 36 .family = AF_INET, 37 .checkentry = imq_checkentry, 38 .target = imq_target, 39 .targetsize = sizeof(struct xt_imq_info), 40 .table = "mangle", 41 .me = THIS_MODULE 42 }, 43 { 44 .name = "IMQ", 45 .family = AF_INET6, 46 .checkentry = imq_checkentry, 47 .target = imq_target, 48 .targetsize = sizeof(struct xt_imq_info), 49 .table = "mangle", 50 .me = THIS_MODULE 51 }, 52 }; 53 54 static int __init imq_init(void) 55 { 56 return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); 57 } 58 59 static void __exit imq_fini(void) 60 { 61 xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); 62 } 63 64 module_init(imq_init); 65 module_exit(imq_fini); 66 67 MODULE_AUTHOR("https://github.com/imq/linuximq"); 68 MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See https://github.com/imq/linuximq/wiki for more information."); 69 MODULE_LICENSE("GPL"); 70 MODULE_ALIAS("ipt_IMQ"); 71 MODULE_ALIAS("ip6t_IMQ"); 72 -
net/sched/sch_generic.c
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6cfb6e9..4c675e9 100644
a b static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, 154 154 return skb; 155 155 } 156 156 157 struct sk_buff *qdisc_dequeue_skb(struct Qdisc *q, bool *validate) 158 { 159 int packets; 160 161 return dequeue_skb(q, validate, &packets); 162 } 163 EXPORT_SYMBOL(qdisc_dequeue_skb); 164 157 165 /* 158 166 * Transmit possibly several skbs, and handle the return status as 159 167 * required. Owning running seqcount bit guarantees that
Note: See TracBrowser
for help on using the repository browser.