Merge branches 'sched/cleanups', 'sched/urgent' and 'linus' into sched/core
[linux-2.6] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
38
39 #define NEIGH_DEBUG 1
40
41 #define NEIGH_PRINTK(x...) printk(x)
42 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
43 #define NEIGH_PRINTK0 NEIGH_PRINTK
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return (base ? (net_random() % base) + (base >> 1) : 0);
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133
134         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
135
136         write_lock_bh(&tbl->lock);
137         for (i = 0; i <= tbl->hash_mask; i++) {
138                 struct neighbour *n, **np;
139
140                 np = &tbl->hash_buckets[i];
141                 while ((n = *np) != NULL) {
142                         /* Neighbour record may be discarded if:
143                          * - nobody refers to it.
144                          * - it is not permanent
145                          */
146                         write_lock(&n->lock);
147                         if (atomic_read(&n->refcnt) == 1 &&
148                             !(n->nud_state & NUD_PERMANENT)) {
149                                 *np     = n->next;
150                                 n->dead = 1;
151                                 shrunk  = 1;
152                                 write_unlock(&n->lock);
153                                 neigh_cleanup_and_release(n);
154                                 continue;
155                         }
156                         write_unlock(&n->lock);
157                         np = &n->next;
158                 }
159         }
160
161         tbl->last_flush = jiffies;
162
163         write_unlock_bh(&tbl->lock);
164
165         return shrunk;
166 }
167
168 static void neigh_add_timer(struct neighbour *n, unsigned long when)
169 {
170         neigh_hold(n);
171         if (unlikely(mod_timer(&n->timer, when))) {
172                 printk("NEIGH: BUG, double timer add, state is %x\n",
173                        n->nud_state);
174                 dump_stack();
175         }
176 }
177
178 static int neigh_del_timer(struct neighbour *n)
179 {
180         if ((n->nud_state & NUD_IN_TIMER) &&
181             del_timer(&n->timer)) {
182                 neigh_release(n);
183                 return 1;
184         }
185         return 0;
186 }
187
188 static void pneigh_queue_purge(struct sk_buff_head *list)
189 {
190         struct sk_buff *skb;
191
192         while ((skb = skb_dequeue(list)) != NULL) {
193                 dev_put(skb->dev);
194                 kfree_skb(skb);
195         }
196 }
197
198 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
199 {
200         int i;
201
202         for (i = 0; i <= tbl->hash_mask; i++) {
203                 struct neighbour *n, **np = &tbl->hash_buckets[i];
204
205                 while ((n = *np) != NULL) {
206                         if (dev && n->dev != dev) {
207                                 np = &n->next;
208                                 continue;
209                         }
210                         *np = n->next;
211                         write_lock(&n->lock);
212                         neigh_del_timer(n);
213                         n->dead = 1;
214
215                         if (atomic_read(&n->refcnt) != 1) {
216                                 /* The most unpleasant situation.
217                                    We must destroy neighbour entry,
218                                    but someone still uses it.
219
220                                    The destroy will be delayed until
221                                    the last user releases us, but
222                                    we must kill timers etc. and move
223                                    it to safe state.
224                                  */
225                                 skb_queue_purge(&n->arp_queue);
226                                 n->output = neigh_blackhole;
227                                 if (n->nud_state & NUD_VALID)
228                                         n->nud_state = NUD_NOARP;
229                                 else
230                                         n->nud_state = NUD_NONE;
231                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
232                         }
233                         write_unlock(&n->lock);
234                         neigh_cleanup_and_release(n);
235                 }
236         }
237 }
238
239 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
240 {
241         write_lock_bh(&tbl->lock);
242         neigh_flush_dev(tbl, dev);
243         write_unlock_bh(&tbl->lock);
244 }
245 EXPORT_SYMBOL(neigh_changeaddr);
246
247 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
248 {
249         write_lock_bh(&tbl->lock);
250         neigh_flush_dev(tbl, dev);
251         pneigh_ifdown(tbl, dev);
252         write_unlock_bh(&tbl->lock);
253
254         del_timer_sync(&tbl->proxy_timer);
255         pneigh_queue_purge(&tbl->proxy_queue);
256         return 0;
257 }
258 EXPORT_SYMBOL(neigh_ifdown);
259
260 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
261 {
262         struct neighbour *n = NULL;
263         unsigned long now = jiffies;
264         int entries;
265
266         entries = atomic_inc_return(&tbl->entries) - 1;
267         if (entries >= tbl->gc_thresh3 ||
268             (entries >= tbl->gc_thresh2 &&
269              time_after(now, tbl->last_flush + 5 * HZ))) {
270                 if (!neigh_forced_gc(tbl) &&
271                     entries >= tbl->gc_thresh3)
272                         goto out_entries;
273         }
274
275         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
276         if (!n)
277                 goto out_entries;
278
279         skb_queue_head_init(&n->arp_queue);
280         rwlock_init(&n->lock);
281         n->updated        = n->used = now;
282         n->nud_state      = NUD_NONE;
283         n->output         = neigh_blackhole;
284         n->parms          = neigh_parms_clone(&tbl->parms);
285         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
286
287         NEIGH_CACHE_STAT_INC(tbl, allocs);
288         n->tbl            = tbl;
289         atomic_set(&n->refcnt, 1);
290         n->dead           = 1;
291 out:
292         return n;
293
294 out_entries:
295         atomic_dec(&tbl->entries);
296         goto out;
297 }
298
299 static struct neighbour **neigh_hash_alloc(unsigned int entries)
300 {
301         unsigned long size = entries * sizeof(struct neighbour *);
302         struct neighbour **ret;
303
304         if (size <= PAGE_SIZE) {
305                 ret = kzalloc(size, GFP_ATOMIC);
306         } else {
307                 ret = (struct neighbour **)
308                       __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
309         }
310         return ret;
311 }
312
313 static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
314 {
315         unsigned long size = entries * sizeof(struct neighbour *);
316
317         if (size <= PAGE_SIZE)
318                 kfree(hash);
319         else
320                 free_pages((unsigned long)hash, get_order(size));
321 }
322
323 static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
324 {
325         struct neighbour **new_hash, **old_hash;
326         unsigned int i, new_hash_mask, old_entries;
327
328         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
329
330         BUG_ON(!is_power_of_2(new_entries));
331         new_hash = neigh_hash_alloc(new_entries);
332         if (!new_hash)
333                 return;
334
335         old_entries = tbl->hash_mask + 1;
336         new_hash_mask = new_entries - 1;
337         old_hash = tbl->hash_buckets;
338
339         get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
340         for (i = 0; i < old_entries; i++) {
341                 struct neighbour *n, *next;
342
343                 for (n = old_hash[i]; n; n = next) {
344                         unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
345
346                         hash_val &= new_hash_mask;
347                         next = n->next;
348
349                         n->next = new_hash[hash_val];
350                         new_hash[hash_val] = n;
351                 }
352         }
353         tbl->hash_buckets = new_hash;
354         tbl->hash_mask = new_hash_mask;
355
356         neigh_hash_free(old_hash, old_entries);
357 }
358
359 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
360                                struct net_device *dev)
361 {
362         struct neighbour *n;
363         int key_len = tbl->key_len;
364         u32 hash_val;
365
366         NEIGH_CACHE_STAT_INC(tbl, lookups);
367
368         read_lock_bh(&tbl->lock);
369         hash_val = tbl->hash(pkey, dev);
370         for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
371                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
372                         neigh_hold(n);
373                         NEIGH_CACHE_STAT_INC(tbl, hits);
374                         break;
375                 }
376         }
377         read_unlock_bh(&tbl->lock);
378         return n;
379 }
380 EXPORT_SYMBOL(neigh_lookup);
381
382 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
383                                      const void *pkey)
384 {
385         struct neighbour *n;
386         int key_len = tbl->key_len;
387         u32 hash_val;
388
389         NEIGH_CACHE_STAT_INC(tbl, lookups);
390
391         read_lock_bh(&tbl->lock);
392         hash_val = tbl->hash(pkey, NULL);
393         for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
394                 if (!memcmp(n->primary_key, pkey, key_len) &&
395                     net_eq(dev_net(n->dev), net)) {
396                         neigh_hold(n);
397                         NEIGH_CACHE_STAT_INC(tbl, hits);
398                         break;
399                 }
400         }
401         read_unlock_bh(&tbl->lock);
402         return n;
403 }
404 EXPORT_SYMBOL(neigh_lookup_nodev);
405
406 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
407                                struct net_device *dev)
408 {
409         u32 hash_val;
410         int key_len = tbl->key_len;
411         int error;
412         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
413
414         if (!n) {
415                 rc = ERR_PTR(-ENOBUFS);
416                 goto out;
417         }
418
419         memcpy(n->primary_key, pkey, key_len);
420         n->dev = dev;
421         dev_hold(dev);
422
423         /* Protocol specific setup. */
424         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
425                 rc = ERR_PTR(error);
426                 goto out_neigh_release;
427         }
428
429         /* Device specific setup. */
430         if (n->parms->neigh_setup &&
431             (error = n->parms->neigh_setup(n)) < 0) {
432                 rc = ERR_PTR(error);
433                 goto out_neigh_release;
434         }
435
436         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
437
438         write_lock_bh(&tbl->lock);
439
440         if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
441                 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
442
443         hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
444
445         if (n->parms->dead) {
446                 rc = ERR_PTR(-EINVAL);
447                 goto out_tbl_unlock;
448         }
449
450         for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
451                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
452                         neigh_hold(n1);
453                         rc = n1;
454                         goto out_tbl_unlock;
455                 }
456         }
457
458         n->next = tbl->hash_buckets[hash_val];
459         tbl->hash_buckets[hash_val] = n;
460         n->dead = 0;
461         neigh_hold(n);
462         write_unlock_bh(&tbl->lock);
463         NEIGH_PRINTK2("neigh %p is created.\n", n);
464         rc = n;
465 out:
466         return rc;
467 out_tbl_unlock:
468         write_unlock_bh(&tbl->lock);
469 out_neigh_release:
470         neigh_release(n);
471         goto out;
472 }
473 EXPORT_SYMBOL(neigh_create);
474
475 static u32 pneigh_hash(const void *pkey, int key_len)
476 {
477         u32 hash_val = *(u32 *)(pkey + key_len - 4);
478         hash_val ^= (hash_val >> 16);
479         hash_val ^= hash_val >> 8;
480         hash_val ^= hash_val >> 4;
481         hash_val &= PNEIGH_HASHMASK;
482         return hash_val;
483 }
484
485 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
486                                               struct net *net,
487                                               const void *pkey,
488                                               int key_len,
489                                               struct net_device *dev)
490 {
491         while (n) {
492                 if (!memcmp(n->key, pkey, key_len) &&
493                     net_eq(pneigh_net(n), net) &&
494                     (n->dev == dev || !n->dev))
495                         return n;
496                 n = n->next;
497         }
498         return NULL;
499 }
500
501 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
502                 struct net *net, const void *pkey, struct net_device *dev)
503 {
504         int key_len = tbl->key_len;
505         u32 hash_val = pneigh_hash(pkey, key_len);
506
507         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
508                                  net, pkey, key_len, dev);
509 }
510 EXPORT_SYMBOL_GPL(__pneigh_lookup);
511
512 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
513                                     struct net *net, const void *pkey,
514                                     struct net_device *dev, int creat)
515 {
516         struct pneigh_entry *n;
517         int key_len = tbl->key_len;
518         u32 hash_val = pneigh_hash(pkey, key_len);
519
520         read_lock_bh(&tbl->lock);
521         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
522                               net, pkey, key_len, dev);
523         read_unlock_bh(&tbl->lock);
524
525         if (n || !creat)
526                 goto out;
527
528         ASSERT_RTNL();
529
530         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
531         if (!n)
532                 goto out;
533
534         write_pnet(&n->net, hold_net(net));
535         memcpy(n->key, pkey, key_len);
536         n->dev = dev;
537         if (dev)
538                 dev_hold(dev);
539
540         if (tbl->pconstructor && tbl->pconstructor(n)) {
541                 if (dev)
542                         dev_put(dev);
543                 release_net(net);
544                 kfree(n);
545                 n = NULL;
546                 goto out;
547         }
548
549         write_lock_bh(&tbl->lock);
550         n->next = tbl->phash_buckets[hash_val];
551         tbl->phash_buckets[hash_val] = n;
552         write_unlock_bh(&tbl->lock);
553 out:
554         return n;
555 }
556 EXPORT_SYMBOL(pneigh_lookup);
557
558
559 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
560                   struct net_device *dev)
561 {
562         struct pneigh_entry *n, **np;
563         int key_len = tbl->key_len;
564         u32 hash_val = pneigh_hash(pkey, key_len);
565
566         write_lock_bh(&tbl->lock);
567         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
568              np = &n->next) {
569                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
570                     net_eq(pneigh_net(n), net)) {
571                         *np = n->next;
572                         write_unlock_bh(&tbl->lock);
573                         if (tbl->pdestructor)
574                                 tbl->pdestructor(n);
575                         if (n->dev)
576                                 dev_put(n->dev);
577                         release_net(pneigh_net(n));
578                         kfree(n);
579                         return 0;
580                 }
581         }
582         write_unlock_bh(&tbl->lock);
583         return -ENOENT;
584 }
585
586 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
587 {
588         struct pneigh_entry *n, **np;
589         u32 h;
590
591         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
592                 np = &tbl->phash_buckets[h];
593                 while ((n = *np) != NULL) {
594                         if (!dev || n->dev == dev) {
595                                 *np = n->next;
596                                 if (tbl->pdestructor)
597                                         tbl->pdestructor(n);
598                                 if (n->dev)
599                                         dev_put(n->dev);
600                                 release_net(pneigh_net(n));
601                                 kfree(n);
602                                 continue;
603                         }
604                         np = &n->next;
605                 }
606         }
607         return -ENOENT;
608 }
609
610 static void neigh_parms_destroy(struct neigh_parms *parms);
611
612 static inline void neigh_parms_put(struct neigh_parms *parms)
613 {
614         if (atomic_dec_and_test(&parms->refcnt))
615                 neigh_parms_destroy(parms);
616 }
617
618 /*
619  *      neighbour must already be out of the table;
620  *
621  */
622 void neigh_destroy(struct neighbour *neigh)
623 {
624         struct hh_cache *hh;
625
626         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
627
628         if (!neigh->dead) {
629                 printk(KERN_WARNING
630                        "Destroying alive neighbour %p\n", neigh);
631                 dump_stack();
632                 return;
633         }
634
635         if (neigh_del_timer(neigh))
636                 printk(KERN_WARNING "Impossible event.\n");
637
638         while ((hh = neigh->hh) != NULL) {
639                 neigh->hh = hh->hh_next;
640                 hh->hh_next = NULL;
641
642                 write_seqlock_bh(&hh->hh_lock);
643                 hh->hh_output = neigh_blackhole;
644                 write_sequnlock_bh(&hh->hh_lock);
645                 if (atomic_dec_and_test(&hh->hh_refcnt))
646                         kfree(hh);
647         }
648
649         skb_queue_purge(&neigh->arp_queue);
650
651         dev_put(neigh->dev);
652         neigh_parms_put(neigh->parms);
653
654         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
655
656         atomic_dec(&neigh->tbl->entries);
657         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
658 }
659 EXPORT_SYMBOL(neigh_destroy);
660
661 /* Neighbour state is suspicious;
662    disable fast path.
663
664    Called with write_locked neigh.
665  */
666 static void neigh_suspect(struct neighbour *neigh)
667 {
668         struct hh_cache *hh;
669
670         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
671
672         neigh->output = neigh->ops->output;
673
674         for (hh = neigh->hh; hh; hh = hh->hh_next)
675                 hh->hh_output = neigh->ops->output;
676 }
677
678 /* Neighbour state is OK;
679    enable fast path.
680
681    Called with write_locked neigh.
682  */
683 static void neigh_connect(struct neighbour *neigh)
684 {
685         struct hh_cache *hh;
686
687         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
688
689         neigh->output = neigh->ops->connected_output;
690
691         for (hh = neigh->hh; hh; hh = hh->hh_next)
692                 hh->hh_output = neigh->ops->hh_output;
693 }
694
695 static void neigh_periodic_timer(unsigned long arg)
696 {
697         struct neigh_table *tbl = (struct neigh_table *)arg;
698         struct neighbour *n, **np;
699         unsigned long expire, now = jiffies;
700
701         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
702
703         write_lock(&tbl->lock);
704
705         /*
706          *      periodically recompute ReachableTime from random function
707          */
708
709         if (time_after(now, tbl->last_rand + 300 * HZ)) {
710                 struct neigh_parms *p;
711                 tbl->last_rand = now;
712                 for (p = &tbl->parms; p; p = p->next)
713                         p->reachable_time =
714                                 neigh_rand_reach_time(p->base_reachable_time);
715         }
716
717         np = &tbl->hash_buckets[tbl->hash_chain_gc];
718         tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
719
720         while ((n = *np) != NULL) {
721                 unsigned int state;
722
723                 write_lock(&n->lock);
724
725                 state = n->nud_state;
726                 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
727                         write_unlock(&n->lock);
728                         goto next_elt;
729                 }
730
731                 if (time_before(n->used, n->confirmed))
732                         n->used = n->confirmed;
733
734                 if (atomic_read(&n->refcnt) == 1 &&
735                     (state == NUD_FAILED ||
736                      time_after(now, n->used + n->parms->gc_staletime))) {
737                         *np = n->next;
738                         n->dead = 1;
739                         write_unlock(&n->lock);
740                         neigh_cleanup_and_release(n);
741                         continue;
742                 }
743                 write_unlock(&n->lock);
744
745 next_elt:
746                 np = &n->next;
747         }
748
749         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
750          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
751          * base_reachable_time.
752          */
753         expire = tbl->parms.base_reachable_time >> 1;
754         expire /= (tbl->hash_mask + 1);
755         if (!expire)
756                 expire = 1;
757
758         if (expire>HZ)
759                 mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
760         else
761                 mod_timer(&tbl->gc_timer, now + expire);
762
763         write_unlock(&tbl->lock);
764 }
765
766 static __inline__ int neigh_max_probes(struct neighbour *n)
767 {
768         struct neigh_parms *p = n->parms;
769         return (n->nud_state & NUD_PROBE ?
770                 p->ucast_probes :
771                 p->ucast_probes + p->app_probes + p->mcast_probes);
772 }
773
774 /* Called when a timer expires for a neighbour entry. */
775
776 static void neigh_timer_handler(unsigned long arg)
777 {
778         unsigned long now, next;
779         struct neighbour *neigh = (struct neighbour *)arg;
780         unsigned state;
781         int notify = 0;
782
783         write_lock(&neigh->lock);
784
785         state = neigh->nud_state;
786         now = jiffies;
787         next = now + HZ;
788
789         if (!(state & NUD_IN_TIMER)) {
790 #ifndef CONFIG_SMP
791                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
792 #endif
793                 goto out;
794         }
795
796         if (state & NUD_REACHABLE) {
797                 if (time_before_eq(now,
798                                    neigh->confirmed + neigh->parms->reachable_time)) {
799                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
800                         next = neigh->confirmed + neigh->parms->reachable_time;
801                 } else if (time_before_eq(now,
802                                           neigh->used + neigh->parms->delay_probe_time)) {
803                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
804                         neigh->nud_state = NUD_DELAY;
805                         neigh->updated = jiffies;
806                         neigh_suspect(neigh);
807                         next = now + neigh->parms->delay_probe_time;
808                 } else {
809                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
810                         neigh->nud_state = NUD_STALE;
811                         neigh->updated = jiffies;
812                         neigh_suspect(neigh);
813                         notify = 1;
814                 }
815         } else if (state & NUD_DELAY) {
816                 if (time_before_eq(now,
817                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
818                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
819                         neigh->nud_state = NUD_REACHABLE;
820                         neigh->updated = jiffies;
821                         neigh_connect(neigh);
822                         notify = 1;
823                         next = neigh->confirmed + neigh->parms->reachable_time;
824                 } else {
825                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
826                         neigh->nud_state = NUD_PROBE;
827                         neigh->updated = jiffies;
828                         atomic_set(&neigh->probes, 0);
829                         next = now + neigh->parms->retrans_time;
830                 }
831         } else {
832                 /* NUD_PROBE|NUD_INCOMPLETE */
833                 next = now + neigh->parms->retrans_time;
834         }
835
836         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
837             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
838                 struct sk_buff *skb;
839
840                 neigh->nud_state = NUD_FAILED;
841                 neigh->updated = jiffies;
842                 notify = 1;
843                 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
844                 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
845
846                 /* It is very thin place. report_unreachable is very complicated
847                    routine. Particularly, it can hit the same neighbour entry!
848
849                    So that, we try to be accurate and avoid dead loop. --ANK
850                  */
851                 while (neigh->nud_state == NUD_FAILED &&
852                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
853                         write_unlock(&neigh->lock);
854                         neigh->ops->error_report(neigh, skb);
855                         write_lock(&neigh->lock);
856                 }
857                 skb_queue_purge(&neigh->arp_queue);
858         }
859
860         if (neigh->nud_state & NUD_IN_TIMER) {
861                 if (time_before(next, jiffies + HZ/2))
862                         next = jiffies + HZ/2;
863                 if (!mod_timer(&neigh->timer, next))
864                         neigh_hold(neigh);
865         }
866         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
867                 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
868                 /* keep skb alive even if arp_queue overflows */
869                 if (skb)
870                         skb = skb_copy(skb, GFP_ATOMIC);
871                 write_unlock(&neigh->lock);
872                 neigh->ops->solicit(neigh, skb);
873                 atomic_inc(&neigh->probes);
874                 if (skb)
875                         kfree_skb(skb);
876         } else {
877 out:
878                 write_unlock(&neigh->lock);
879         }
880
881         if (notify)
882                 neigh_update_notify(neigh);
883
884         neigh_release(neigh);
885 }
886
887 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
888 {
889         int rc;
890         unsigned long now;
891
892         write_lock_bh(&neigh->lock);
893
894         rc = 0;
895         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
896                 goto out_unlock_bh;
897
898         now = jiffies;
899
900         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
901                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
902                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
903                         neigh->nud_state     = NUD_INCOMPLETE;
904                         neigh->updated = jiffies;
905                         neigh_add_timer(neigh, now + 1);
906                 } else {
907                         neigh->nud_state = NUD_FAILED;
908                         neigh->updated = jiffies;
909                         write_unlock_bh(&neigh->lock);
910
911                         if (skb)
912                                 kfree_skb(skb);
913                         return 1;
914                 }
915         } else if (neigh->nud_state & NUD_STALE) {
916                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
917                 neigh->nud_state = NUD_DELAY;
918                 neigh->updated = jiffies;
919                 neigh_add_timer(neigh,
920                                 jiffies + neigh->parms->delay_probe_time);
921         }
922
923         if (neigh->nud_state == NUD_INCOMPLETE) {
924                 if (skb) {
925                         if (skb_queue_len(&neigh->arp_queue) >=
926                             neigh->parms->queue_len) {
927                                 struct sk_buff *buff;
928                                 buff = __skb_dequeue(&neigh->arp_queue);
929                                 kfree_skb(buff);
930                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
931                         }
932                         __skb_queue_tail(&neigh->arp_queue, skb);
933                 }
934                 rc = 1;
935         }
936 out_unlock_bh:
937         write_unlock_bh(&neigh->lock);
938         return rc;
939 }
940 EXPORT_SYMBOL(__neigh_event_send);
941
942 static void neigh_update_hhs(struct neighbour *neigh)
943 {
944         struct hh_cache *hh;
945         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
946                 = neigh->dev->header_ops->cache_update;
947
948         if (update) {
949                 for (hh = neigh->hh; hh; hh = hh->hh_next) {
950                         write_seqlock_bh(&hh->hh_lock);
951                         update(hh, neigh->dev, neigh->ha);
952                         write_sequnlock_bh(&hh->hh_lock);
953                 }
954         }
955 }
956
957
958
959 /* Generic update routine.
960    -- lladdr is new lladdr or NULL, if it is not supplied.
961    -- new    is new state.
962    -- flags
963         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
964                                 if it is different.
965         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
966                                 lladdr instead of overriding it
967                                 if it is different.
968                                 It also allows to retain current state
969                                 if lladdr is unchanged.
970         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
971
972         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
973                                 NTF_ROUTER flag.
974         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
975                                 a router.
976
977    Caller MUST hold reference count on the entry.
978  */
979
980 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
981                  u32 flags)
982 {
983         u8 old;
984         int err;
985         int notify = 0;
986         struct net_device *dev;
987         int update_isrouter = 0;
988
989         write_lock_bh(&neigh->lock);
990
991         dev    = neigh->dev;
992         old    = neigh->nud_state;
993         err    = -EPERM;
994
995         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
996             (old & (NUD_NOARP | NUD_PERMANENT)))
997                 goto out;
998
999         if (!(new & NUD_VALID)) {
1000                 neigh_del_timer(neigh);
1001                 if (old & NUD_CONNECTED)
1002                         neigh_suspect(neigh);
1003                 neigh->nud_state = new;
1004                 err = 0;
1005                 notify = old & NUD_VALID;
1006                 goto out;
1007         }
1008
1009         /* Compare new lladdr with cached one */
1010         if (!dev->addr_len) {
1011                 /* First case: device needs no address. */
1012                 lladdr = neigh->ha;
1013         } else if (lladdr) {
1014                 /* The second case: if something is already cached
1015                    and a new address is proposed:
1016                    - compare new & old
1017                    - if they are different, check override flag
1018                  */
1019                 if ((old & NUD_VALID) &&
1020                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1021                         lladdr = neigh->ha;
1022         } else {
1023                 /* No address is supplied; if we know something,
1024                    use it, otherwise discard the request.
1025                  */
1026                 err = -EINVAL;
1027                 if (!(old & NUD_VALID))
1028                         goto out;
1029                 lladdr = neigh->ha;
1030         }
1031
1032         if (new & NUD_CONNECTED)
1033                 neigh->confirmed = jiffies;
1034         neigh->updated = jiffies;
1035
1036         /* If entry was valid and address is not changed,
1037            do not change entry state, if new one is STALE.
1038          */
1039         err = 0;
1040         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1041         if (old & NUD_VALID) {
1042                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1043                         update_isrouter = 0;
1044                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1045                             (old & NUD_CONNECTED)) {
1046                                 lladdr = neigh->ha;
1047                                 new = NUD_STALE;
1048                         } else
1049                                 goto out;
1050                 } else {
1051                         if (lladdr == neigh->ha && new == NUD_STALE &&
1052                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1053                              (old & NUD_CONNECTED))
1054                             )
1055                                 new = old;
1056                 }
1057         }
1058
1059         if (new != old) {
1060                 neigh_del_timer(neigh);
1061                 if (new & NUD_IN_TIMER)
1062                         neigh_add_timer(neigh, (jiffies +
1063                                                 ((new & NUD_REACHABLE) ?
1064                                                  neigh->parms->reachable_time :
1065                                                  0)));
1066                 neigh->nud_state = new;
1067         }
1068
1069         if (lladdr != neigh->ha) {
1070                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1071                 neigh_update_hhs(neigh);
1072                 if (!(new & NUD_CONNECTED))
1073                         neigh->confirmed = jiffies -
1074                                       (neigh->parms->base_reachable_time << 1);
1075                 notify = 1;
1076         }
1077         if (new == old)
1078                 goto out;
1079         if (new & NUD_CONNECTED)
1080                 neigh_connect(neigh);
1081         else
1082                 neigh_suspect(neigh);
1083         if (!(old & NUD_VALID)) {
1084                 struct sk_buff *skb;
1085
1086                 /* Again: avoid dead loop if something went wrong */
1087
1088                 while (neigh->nud_state & NUD_VALID &&
1089                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1090                         struct neighbour *n1 = neigh;
1091                         write_unlock_bh(&neigh->lock);
1092                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1093                         if (skb->dst && skb->dst->neighbour)
1094                                 n1 = skb->dst->neighbour;
1095                         n1->output(skb);
1096                         write_lock_bh(&neigh->lock);
1097                 }
1098                 skb_queue_purge(&neigh->arp_queue);
1099         }
1100 out:
1101         if (update_isrouter) {
1102                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1103                         (neigh->flags | NTF_ROUTER) :
1104                         (neigh->flags & ~NTF_ROUTER);
1105         }
1106         write_unlock_bh(&neigh->lock);
1107
1108         if (notify)
1109                 neigh_update_notify(neigh);
1110
1111         return err;
1112 }
1113 EXPORT_SYMBOL(neigh_update);
1114
1115 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1116                                  u8 *lladdr, void *saddr,
1117                                  struct net_device *dev)
1118 {
1119         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1120                                                  lladdr || !dev->addr_len);
1121         if (neigh)
1122                 neigh_update(neigh, lladdr, NUD_STALE,
1123                              NEIGH_UPDATE_F_OVERRIDE);
1124         return neigh;
1125 }
1126 EXPORT_SYMBOL(neigh_event_ns);
1127
1128 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1129                           __be16 protocol)
1130 {
1131         struct hh_cache *hh;
1132         struct net_device *dev = dst->dev;
1133
1134         for (hh = n->hh; hh; hh = hh->hh_next)
1135                 if (hh->hh_type == protocol)
1136                         break;
1137
1138         if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
1139                 seqlock_init(&hh->hh_lock);
1140                 hh->hh_type = protocol;
1141                 atomic_set(&hh->hh_refcnt, 0);
1142                 hh->hh_next = NULL;
1143
1144                 if (dev->header_ops->cache(n, hh)) {
1145                         kfree(hh);
1146                         hh = NULL;
1147                 } else {
1148                         atomic_inc(&hh->hh_refcnt);
1149                         hh->hh_next = n->hh;
1150                         n->hh       = hh;
1151                         if (n->nud_state & NUD_CONNECTED)
1152                                 hh->hh_output = n->ops->hh_output;
1153                         else
1154                                 hh->hh_output = n->ops->output;
1155                 }
1156         }
1157         if (hh) {
1158                 atomic_inc(&hh->hh_refcnt);
1159                 dst->hh = hh;
1160         }
1161 }
1162
1163 /* This function can be used in contexts, where only old dev_queue_xmit
1164    worked, f.e. if you want to override normal output path (eql, shaper),
1165    but resolution is not made yet.
1166  */
1167
1168 int neigh_compat_output(struct sk_buff *skb)
1169 {
1170         struct net_device *dev = skb->dev;
1171
1172         __skb_pull(skb, skb_network_offset(skb));
1173
1174         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1175                             skb->len) < 0 &&
1176             dev->header_ops->rebuild(skb))
1177                 return 0;
1178
1179         return dev_queue_xmit(skb);
1180 }
1181 EXPORT_SYMBOL(neigh_compat_output);
1182
1183 /* Slow and careful. */
1184
1185 int neigh_resolve_output(struct sk_buff *skb)
1186 {
1187         struct dst_entry *dst = skb->dst;
1188         struct neighbour *neigh;
1189         int rc = 0;
1190
1191         if (!dst || !(neigh = dst->neighbour))
1192                 goto discard;
1193
1194         __skb_pull(skb, skb_network_offset(skb));
1195
1196         if (!neigh_event_send(neigh, skb)) {
1197                 int err;
1198                 struct net_device *dev = neigh->dev;
1199                 if (dev->header_ops->cache && !dst->hh) {
1200                         write_lock_bh(&neigh->lock);
1201                         if (!dst->hh)
1202                                 neigh_hh_init(neigh, dst, dst->ops->protocol);
1203                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1204                                               neigh->ha, NULL, skb->len);
1205                         write_unlock_bh(&neigh->lock);
1206                 } else {
1207                         read_lock_bh(&neigh->lock);
1208                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1209                                               neigh->ha, NULL, skb->len);
1210                         read_unlock_bh(&neigh->lock);
1211                 }
1212                 if (err >= 0)
1213                         rc = neigh->ops->queue_xmit(skb);
1214                 else
1215                         goto out_kfree_skb;
1216         }
1217 out:
1218         return rc;
1219 discard:
1220         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1221                       dst, dst ? dst->neighbour : NULL);
1222 out_kfree_skb:
1223         rc = -EINVAL;
1224         kfree_skb(skb);
1225         goto out;
1226 }
1227 EXPORT_SYMBOL(neigh_resolve_output);
1228
1229 /* As fast as possible without hh cache */
1230
1231 int neigh_connected_output(struct sk_buff *skb)
1232 {
1233         int err;
1234         struct dst_entry *dst = skb->dst;
1235         struct neighbour *neigh = dst->neighbour;
1236         struct net_device *dev = neigh->dev;
1237
1238         __skb_pull(skb, skb_network_offset(skb));
1239
1240         read_lock_bh(&neigh->lock);
1241         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1242                               neigh->ha, NULL, skb->len);
1243         read_unlock_bh(&neigh->lock);
1244         if (err >= 0)
1245                 err = neigh->ops->queue_xmit(skb);
1246         else {
1247                 err = -EINVAL;
1248                 kfree_skb(skb);
1249         }
1250         return err;
1251 }
1252 EXPORT_SYMBOL(neigh_connected_output);
1253
1254 static void neigh_proxy_process(unsigned long arg)
1255 {
1256         struct neigh_table *tbl = (struct neigh_table *)arg;
1257         long sched_next = 0;
1258         unsigned long now = jiffies;
1259         struct sk_buff *skb, *n;
1260
1261         spin_lock(&tbl->proxy_queue.lock);
1262
1263         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1264                 long tdif = NEIGH_CB(skb)->sched_next - now;
1265
1266                 if (tdif <= 0) {
1267                         struct net_device *dev = skb->dev;
1268                         __skb_unlink(skb, &tbl->proxy_queue);
1269                         if (tbl->proxy_redo && netif_running(dev))
1270                                 tbl->proxy_redo(skb);
1271                         else
1272                                 kfree_skb(skb);
1273
1274                         dev_put(dev);
1275                 } else if (!sched_next || tdif < sched_next)
1276                         sched_next = tdif;
1277         }
1278         del_timer(&tbl->proxy_timer);
1279         if (sched_next)
1280                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1281         spin_unlock(&tbl->proxy_queue.lock);
1282 }
1283
1284 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1285                     struct sk_buff *skb)
1286 {
1287         unsigned long now = jiffies;
1288         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1289
1290         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1291                 kfree_skb(skb);
1292                 return;
1293         }
1294
1295         NEIGH_CB(skb)->sched_next = sched_next;
1296         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1297
1298         spin_lock(&tbl->proxy_queue.lock);
1299         if (del_timer(&tbl->proxy_timer)) {
1300                 if (time_before(tbl->proxy_timer.expires, sched_next))
1301                         sched_next = tbl->proxy_timer.expires;
1302         }
1303         dst_release(skb->dst);
1304         skb->dst = NULL;
1305         dev_hold(skb->dev);
1306         __skb_queue_tail(&tbl->proxy_queue, skb);
1307         mod_timer(&tbl->proxy_timer, sched_next);
1308         spin_unlock(&tbl->proxy_queue.lock);
1309 }
1310 EXPORT_SYMBOL(pneigh_enqueue);
1311
1312 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1313                                                       struct net *net, int ifindex)
1314 {
1315         struct neigh_parms *p;
1316
1317         for (p = &tbl->parms; p; p = p->next) {
1318                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1319                     (!p->dev && !ifindex))
1320                         return p;
1321         }
1322
1323         return NULL;
1324 }
1325
1326 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1327                                       struct neigh_table *tbl)
1328 {
1329         struct neigh_parms *p, *ref;
1330         struct net *net = dev_net(dev);
1331         const struct net_device_ops *ops = dev->netdev_ops;
1332
1333         ref = lookup_neigh_params(tbl, net, 0);
1334         if (!ref)
1335                 return NULL;
1336
1337         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1338         if (p) {
1339                 p->tbl            = tbl;
1340                 atomic_set(&p->refcnt, 1);
1341                 p->reachable_time =
1342                                 neigh_rand_reach_time(p->base_reachable_time);
1343
1344                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1345                         kfree(p);
1346                         return NULL;
1347                 }
1348
1349                 dev_hold(dev);
1350                 p->dev = dev;
1351                 write_pnet(&p->net, hold_net(net));
1352                 p->sysctl_table = NULL;
1353                 write_lock_bh(&tbl->lock);
1354                 p->next         = tbl->parms.next;
1355                 tbl->parms.next = p;
1356                 write_unlock_bh(&tbl->lock);
1357         }
1358         return p;
1359 }
1360 EXPORT_SYMBOL(neigh_parms_alloc);
1361
1362 static void neigh_rcu_free_parms(struct rcu_head *head)
1363 {
1364         struct neigh_parms *parms =
1365                 container_of(head, struct neigh_parms, rcu_head);
1366
1367         neigh_parms_put(parms);
1368 }
1369
1370 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1371 {
1372         struct neigh_parms **p;
1373
1374         if (!parms || parms == &tbl->parms)
1375                 return;
1376         write_lock_bh(&tbl->lock);
1377         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1378                 if (*p == parms) {
1379                         *p = parms->next;
1380                         parms->dead = 1;
1381                         write_unlock_bh(&tbl->lock);
1382                         if (parms->dev)
1383                                 dev_put(parms->dev);
1384                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1385                         return;
1386                 }
1387         }
1388         write_unlock_bh(&tbl->lock);
1389         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1390 }
1391 EXPORT_SYMBOL(neigh_parms_release);
1392
1393 static void neigh_parms_destroy(struct neigh_parms *parms)
1394 {
1395         release_net(neigh_parms_net(parms));
1396         kfree(parms);
1397 }
1398
1399 static struct lock_class_key neigh_table_proxy_queue_class;
1400
1401 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1402 {
1403         unsigned long now = jiffies;
1404         unsigned long phsize;
1405
1406         write_pnet(&tbl->parms.net, &init_net);
1407         atomic_set(&tbl->parms.refcnt, 1);
1408         tbl->parms.reachable_time =
1409                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1410
1411         if (!tbl->kmem_cachep)
1412                 tbl->kmem_cachep =
1413                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1414                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1415                                           NULL);
1416         tbl->stats = alloc_percpu(struct neigh_statistics);
1417         if (!tbl->stats)
1418                 panic("cannot create neighbour cache statistics");
1419
1420 #ifdef CONFIG_PROC_FS
1421         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1422                               &neigh_stat_seq_fops, tbl))
1423                 panic("cannot create neighbour proc dir entry");
1424 #endif
1425
1426         tbl->hash_mask = 1;
1427         tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1428
1429         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1430         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1431
1432         if (!tbl->hash_buckets || !tbl->phash_buckets)
1433                 panic("cannot allocate neighbour cache hashes");
1434
1435         get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1436
1437         rwlock_init(&tbl->lock);
1438         setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
1439         tbl->gc_timer.expires  = now + 1;
1440         add_timer(&tbl->gc_timer);
1441
1442         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1443         skb_queue_head_init_class(&tbl->proxy_queue,
1444                         &neigh_table_proxy_queue_class);
1445
1446         tbl->last_flush = now;
1447         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1448 }
1449 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1450
1451 void neigh_table_init(struct neigh_table *tbl)
1452 {
1453         struct neigh_table *tmp;
1454
1455         neigh_table_init_no_netlink(tbl);
1456         write_lock(&neigh_tbl_lock);
1457         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1458                 if (tmp->family == tbl->family)
1459                         break;
1460         }
1461         tbl->next       = neigh_tables;
1462         neigh_tables    = tbl;
1463         write_unlock(&neigh_tbl_lock);
1464
1465         if (unlikely(tmp)) {
1466                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1467                        "family %d\n", tbl->family);
1468                 dump_stack();
1469         }
1470 }
1471 EXPORT_SYMBOL(neigh_table_init);
1472
1473 int neigh_table_clear(struct neigh_table *tbl)
1474 {
1475         struct neigh_table **tp;
1476
1477         /* It is not clean... Fix it to unload IPv6 module safely */
1478         del_timer_sync(&tbl->gc_timer);
1479         del_timer_sync(&tbl->proxy_timer);
1480         pneigh_queue_purge(&tbl->proxy_queue);
1481         neigh_ifdown(tbl, NULL);
1482         if (atomic_read(&tbl->entries))
1483                 printk(KERN_CRIT "neighbour leakage\n");
1484         write_lock(&neigh_tbl_lock);
1485         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1486                 if (*tp == tbl) {
1487                         *tp = tbl->next;
1488                         break;
1489                 }
1490         }
1491         write_unlock(&neigh_tbl_lock);
1492
1493         neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
1494         tbl->hash_buckets = NULL;
1495
1496         kfree(tbl->phash_buckets);
1497         tbl->phash_buckets = NULL;
1498
1499         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1500
1501         free_percpu(tbl->stats);
1502         tbl->stats = NULL;
1503
1504         kmem_cache_destroy(tbl->kmem_cachep);
1505         tbl->kmem_cachep = NULL;
1506
1507         return 0;
1508 }
1509 EXPORT_SYMBOL(neigh_table_clear);
1510
1511 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1512 {
1513         struct net *net = sock_net(skb->sk);
1514         struct ndmsg *ndm;
1515         struct nlattr *dst_attr;
1516         struct neigh_table *tbl;
1517         struct net_device *dev = NULL;
1518         int err = -EINVAL;
1519
1520         if (nlmsg_len(nlh) < sizeof(*ndm))
1521                 goto out;
1522
1523         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1524         if (dst_attr == NULL)
1525                 goto out;
1526
1527         ndm = nlmsg_data(nlh);
1528         if (ndm->ndm_ifindex) {
1529                 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1530                 if (dev == NULL) {
1531                         err = -ENODEV;
1532                         goto out;
1533                 }
1534         }
1535
1536         read_lock(&neigh_tbl_lock);
1537         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1538                 struct neighbour *neigh;
1539
1540                 if (tbl->family != ndm->ndm_family)
1541                         continue;
1542                 read_unlock(&neigh_tbl_lock);
1543
1544                 if (nla_len(dst_attr) < tbl->key_len)
1545                         goto out_dev_put;
1546
1547                 if (ndm->ndm_flags & NTF_PROXY) {
1548                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1549                         goto out_dev_put;
1550                 }
1551
1552                 if (dev == NULL)
1553                         goto out_dev_put;
1554
1555                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1556                 if (neigh == NULL) {
1557                         err = -ENOENT;
1558                         goto out_dev_put;
1559                 }
1560
1561                 err = neigh_update(neigh, NULL, NUD_FAILED,
1562                                    NEIGH_UPDATE_F_OVERRIDE |
1563                                    NEIGH_UPDATE_F_ADMIN);
1564                 neigh_release(neigh);
1565                 goto out_dev_put;
1566         }
1567         read_unlock(&neigh_tbl_lock);
1568         err = -EAFNOSUPPORT;
1569
1570 out_dev_put:
1571         if (dev)
1572                 dev_put(dev);
1573 out:
1574         return err;
1575 }
1576
1577 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1578 {
1579         struct net *net = sock_net(skb->sk);
1580         struct ndmsg *ndm;
1581         struct nlattr *tb[NDA_MAX+1];
1582         struct neigh_table *tbl;
1583         struct net_device *dev = NULL;
1584         int err;
1585
1586         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1587         if (err < 0)
1588                 goto out;
1589
1590         err = -EINVAL;
1591         if (tb[NDA_DST] == NULL)
1592                 goto out;
1593
1594         ndm = nlmsg_data(nlh);
1595         if (ndm->ndm_ifindex) {
1596                 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1597                 if (dev == NULL) {
1598                         err = -ENODEV;
1599                         goto out;
1600                 }
1601
1602                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1603                         goto out_dev_put;
1604         }
1605
1606         read_lock(&neigh_tbl_lock);
1607         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1608                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1609                 struct neighbour *neigh;
1610                 void *dst, *lladdr;
1611
1612                 if (tbl->family != ndm->ndm_family)
1613                         continue;
1614                 read_unlock(&neigh_tbl_lock);
1615
1616                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1617                         goto out_dev_put;
1618                 dst = nla_data(tb[NDA_DST]);
1619                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1620
1621                 if (ndm->ndm_flags & NTF_PROXY) {
1622                         struct pneigh_entry *pn;
1623
1624                         err = -ENOBUFS;
1625                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1626                         if (pn) {
1627                                 pn->flags = ndm->ndm_flags;
1628                                 err = 0;
1629                         }
1630                         goto out_dev_put;
1631                 }
1632
1633                 if (dev == NULL)
1634                         goto out_dev_put;
1635
1636                 neigh = neigh_lookup(tbl, dst, dev);
1637                 if (neigh == NULL) {
1638                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1639                                 err = -ENOENT;
1640                                 goto out_dev_put;
1641                         }
1642
1643                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1644                         if (IS_ERR(neigh)) {
1645                                 err = PTR_ERR(neigh);
1646                                 goto out_dev_put;
1647                         }
1648                 } else {
1649                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1650                                 err = -EEXIST;
1651                                 neigh_release(neigh);
1652                                 goto out_dev_put;
1653                         }
1654
1655                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1656                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1657                 }
1658
1659                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1660                 neigh_release(neigh);
1661                 goto out_dev_put;
1662         }
1663
1664         read_unlock(&neigh_tbl_lock);
1665         err = -EAFNOSUPPORT;
1666
1667 out_dev_put:
1668         if (dev)
1669                 dev_put(dev);
1670 out:
1671         return err;
1672 }
1673
1674 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1675 {
1676         struct nlattr *nest;
1677
1678         nest = nla_nest_start(skb, NDTA_PARMS);
1679         if (nest == NULL)
1680                 return -ENOBUFS;
1681
1682         if (parms->dev)
1683                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1684
1685         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1686         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1687         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1688         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1689         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1690         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1691         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1692         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1693                       parms->base_reachable_time);
1694         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1695         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1696         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1697         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1698         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1699         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1700
1701         return nla_nest_end(skb, nest);
1702
1703 nla_put_failure:
1704         nla_nest_cancel(skb, nest);
1705         return -EMSGSIZE;
1706 }
1707
1708 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1709                               u32 pid, u32 seq, int type, int flags)
1710 {
1711         struct nlmsghdr *nlh;
1712         struct ndtmsg *ndtmsg;
1713
1714         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1715         if (nlh == NULL)
1716                 return -EMSGSIZE;
1717
1718         ndtmsg = nlmsg_data(nlh);
1719
1720         read_lock_bh(&tbl->lock);
1721         ndtmsg->ndtm_family = tbl->family;
1722         ndtmsg->ndtm_pad1   = 0;
1723         ndtmsg->ndtm_pad2   = 0;
1724
1725         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1726         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1727         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1728         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1729         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1730
1731         {
1732                 unsigned long now = jiffies;
1733                 unsigned int flush_delta = now - tbl->last_flush;
1734                 unsigned int rand_delta = now - tbl->last_rand;
1735
1736                 struct ndt_config ndc = {
1737                         .ndtc_key_len           = tbl->key_len,
1738                         .ndtc_entry_size        = tbl->entry_size,
1739                         .ndtc_entries           = atomic_read(&tbl->entries),
1740                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1741                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1742                         .ndtc_hash_rnd          = tbl->hash_rnd,
1743                         .ndtc_hash_mask         = tbl->hash_mask,
1744                         .ndtc_hash_chain_gc     = tbl->hash_chain_gc,
1745                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1746                 };
1747
1748                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1749         }
1750
1751         {
1752                 int cpu;
1753                 struct ndt_stats ndst;
1754
1755                 memset(&ndst, 0, sizeof(ndst));
1756
1757                 for_each_possible_cpu(cpu) {
1758                         struct neigh_statistics *st;
1759
1760                         st = per_cpu_ptr(tbl->stats, cpu);
1761                         ndst.ndts_allocs                += st->allocs;
1762                         ndst.ndts_destroys              += st->destroys;
1763                         ndst.ndts_hash_grows            += st->hash_grows;
1764                         ndst.ndts_res_failed            += st->res_failed;
1765                         ndst.ndts_lookups               += st->lookups;
1766                         ndst.ndts_hits                  += st->hits;
1767                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1768                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1769                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1770                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1771                 }
1772
1773                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1774         }
1775
1776         BUG_ON(tbl->parms.dev);
1777         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1778                 goto nla_put_failure;
1779
1780         read_unlock_bh(&tbl->lock);
1781         return nlmsg_end(skb, nlh);
1782
1783 nla_put_failure:
1784         read_unlock_bh(&tbl->lock);
1785         nlmsg_cancel(skb, nlh);
1786         return -EMSGSIZE;
1787 }
1788
1789 static int neightbl_fill_param_info(struct sk_buff *skb,
1790                                     struct neigh_table *tbl,
1791                                     struct neigh_parms *parms,
1792                                     u32 pid, u32 seq, int type,
1793                                     unsigned int flags)
1794 {
1795         struct ndtmsg *ndtmsg;
1796         struct nlmsghdr *nlh;
1797
1798         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1799         if (nlh == NULL)
1800                 return -EMSGSIZE;
1801
1802         ndtmsg = nlmsg_data(nlh);
1803
1804         read_lock_bh(&tbl->lock);
1805         ndtmsg->ndtm_family = tbl->family;
1806         ndtmsg->ndtm_pad1   = 0;
1807         ndtmsg->ndtm_pad2   = 0;
1808
1809         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1810             neightbl_fill_parms(skb, parms) < 0)
1811                 goto errout;
1812
1813         read_unlock_bh(&tbl->lock);
1814         return nlmsg_end(skb, nlh);
1815 errout:
1816         read_unlock_bh(&tbl->lock);
1817         nlmsg_cancel(skb, nlh);
1818         return -EMSGSIZE;
1819 }
1820
1821 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1822         [NDTA_NAME]             = { .type = NLA_STRING },
1823         [NDTA_THRESH1]          = { .type = NLA_U32 },
1824         [NDTA_THRESH2]          = { .type = NLA_U32 },
1825         [NDTA_THRESH3]          = { .type = NLA_U32 },
1826         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1827         [NDTA_PARMS]            = { .type = NLA_NESTED },
1828 };
1829
1830 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1831         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1832         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1833         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1834         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1835         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1836         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1837         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1838         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1839         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1840         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1841         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1842         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1843         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1844 };
1845
1846 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1847 {
1848         struct net *net = sock_net(skb->sk);
1849         struct neigh_table *tbl;
1850         struct ndtmsg *ndtmsg;
1851         struct nlattr *tb[NDTA_MAX+1];
1852         int err;
1853
1854         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1855                           nl_neightbl_policy);
1856         if (err < 0)
1857                 goto errout;
1858
1859         if (tb[NDTA_NAME] == NULL) {
1860                 err = -EINVAL;
1861                 goto errout;
1862         }
1863
1864         ndtmsg = nlmsg_data(nlh);
1865         read_lock(&neigh_tbl_lock);
1866         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1867                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1868                         continue;
1869
1870                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1871                         break;
1872         }
1873
1874         if (tbl == NULL) {
1875                 err = -ENOENT;
1876                 goto errout_locked;
1877         }
1878
1879         /*
1880          * We acquire tbl->lock to be nice to the periodic timers and
1881          * make sure they always see a consistent set of values.
1882          */
1883         write_lock_bh(&tbl->lock);
1884
1885         if (tb[NDTA_PARMS]) {
1886                 struct nlattr *tbp[NDTPA_MAX+1];
1887                 struct neigh_parms *p;
1888                 int i, ifindex = 0;
1889
1890                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1891                                        nl_ntbl_parm_policy);
1892                 if (err < 0)
1893                         goto errout_tbl_lock;
1894
1895                 if (tbp[NDTPA_IFINDEX])
1896                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1897
1898                 p = lookup_neigh_params(tbl, net, ifindex);
1899                 if (p == NULL) {
1900                         err = -ENOENT;
1901                         goto errout_tbl_lock;
1902                 }
1903
1904                 for (i = 1; i <= NDTPA_MAX; i++) {
1905                         if (tbp[i] == NULL)
1906                                 continue;
1907
1908                         switch (i) {
1909                         case NDTPA_QUEUE_LEN:
1910                                 p->queue_len = nla_get_u32(tbp[i]);
1911                                 break;
1912                         case NDTPA_PROXY_QLEN:
1913                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1914                                 break;
1915                         case NDTPA_APP_PROBES:
1916                                 p->app_probes = nla_get_u32(tbp[i]);
1917                                 break;
1918                         case NDTPA_UCAST_PROBES:
1919                                 p->ucast_probes = nla_get_u32(tbp[i]);
1920                                 break;
1921                         case NDTPA_MCAST_PROBES:
1922                                 p->mcast_probes = nla_get_u32(tbp[i]);
1923                                 break;
1924                         case NDTPA_BASE_REACHABLE_TIME:
1925                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
1926                                 break;
1927                         case NDTPA_GC_STALETIME:
1928                                 p->gc_staletime = nla_get_msecs(tbp[i]);
1929                                 break;
1930                         case NDTPA_DELAY_PROBE_TIME:
1931                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
1932                                 break;
1933                         case NDTPA_RETRANS_TIME:
1934                                 p->retrans_time = nla_get_msecs(tbp[i]);
1935                                 break;
1936                         case NDTPA_ANYCAST_DELAY:
1937                                 p->anycast_delay = nla_get_msecs(tbp[i]);
1938                                 break;
1939                         case NDTPA_PROXY_DELAY:
1940                                 p->proxy_delay = nla_get_msecs(tbp[i]);
1941                                 break;
1942                         case NDTPA_LOCKTIME:
1943                                 p->locktime = nla_get_msecs(tbp[i]);
1944                                 break;
1945                         }
1946                 }
1947         }
1948
1949         if (tb[NDTA_THRESH1])
1950                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1951
1952         if (tb[NDTA_THRESH2])
1953                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1954
1955         if (tb[NDTA_THRESH3])
1956                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1957
1958         if (tb[NDTA_GC_INTERVAL])
1959                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1960
1961         err = 0;
1962
1963 errout_tbl_lock:
1964         write_unlock_bh(&tbl->lock);
1965 errout_locked:
1966         read_unlock(&neigh_tbl_lock);
1967 errout:
1968         return err;
1969 }
1970
1971 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1972 {
1973         struct net *net = sock_net(skb->sk);
1974         int family, tidx, nidx = 0;
1975         int tbl_skip = cb->args[0];
1976         int neigh_skip = cb->args[1];
1977         struct neigh_table *tbl;
1978
1979         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1980
1981         read_lock(&neigh_tbl_lock);
1982         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1983                 struct neigh_parms *p;
1984
1985                 if (tidx < tbl_skip || (family && tbl->family != family))
1986                         continue;
1987
1988                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1989                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1990                                        NLM_F_MULTI) <= 0)
1991                         break;
1992
1993                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
1994                         if (!net_eq(neigh_parms_net(p), net))
1995                                 continue;
1996
1997                         if (nidx < neigh_skip)
1998                                 goto next;
1999
2000                         if (neightbl_fill_param_info(skb, tbl, p,
2001                                                      NETLINK_CB(cb->skb).pid,
2002                                                      cb->nlh->nlmsg_seq,
2003                                                      RTM_NEWNEIGHTBL,
2004                                                      NLM_F_MULTI) <= 0)
2005                                 goto out;
2006                 next:
2007                         nidx++;
2008                 }
2009
2010                 neigh_skip = 0;
2011         }
2012 out:
2013         read_unlock(&neigh_tbl_lock);
2014         cb->args[0] = tidx;
2015         cb->args[1] = nidx;
2016
2017         return skb->len;
2018 }
2019
2020 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2021                            u32 pid, u32 seq, int type, unsigned int flags)
2022 {
2023         unsigned long now = jiffies;
2024         struct nda_cacheinfo ci;
2025         struct nlmsghdr *nlh;
2026         struct ndmsg *ndm;
2027
2028         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2029         if (nlh == NULL)
2030                 return -EMSGSIZE;
2031
2032         ndm = nlmsg_data(nlh);
2033         ndm->ndm_family  = neigh->ops->family;
2034         ndm->ndm_pad1    = 0;
2035         ndm->ndm_pad2    = 0;
2036         ndm->ndm_flags   = neigh->flags;
2037         ndm->ndm_type    = neigh->type;
2038         ndm->ndm_ifindex = neigh->dev->ifindex;
2039
2040         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2041
2042         read_lock_bh(&neigh->lock);
2043         ndm->ndm_state   = neigh->nud_state;
2044         if ((neigh->nud_state & NUD_VALID) &&
2045             nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
2046                 read_unlock_bh(&neigh->lock);
2047                 goto nla_put_failure;
2048         }
2049
2050         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2051         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2052         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2053         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2054         read_unlock_bh(&neigh->lock);
2055
2056         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2057         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2058
2059         return nlmsg_end(skb, nlh);
2060
2061 nla_put_failure:
2062         nlmsg_cancel(skb, nlh);
2063         return -EMSGSIZE;
2064 }
2065
2066 static void neigh_update_notify(struct neighbour *neigh)
2067 {
2068         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2069         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2070 }
2071
2072 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2073                             struct netlink_callback *cb)
2074 {
2075         struct net * net = sock_net(skb->sk);
2076         struct neighbour *n;
2077         int rc, h, s_h = cb->args[1];
2078         int idx, s_idx = idx = cb->args[2];
2079
2080         read_lock_bh(&tbl->lock);
2081         for (h = 0; h <= tbl->hash_mask; h++) {
2082                 if (h < s_h)
2083                         continue;
2084                 if (h > s_h)
2085                         s_idx = 0;
2086                 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
2087                         if (dev_net(n->dev) != net)
2088                                 continue;
2089                         if (idx < s_idx)
2090                                 goto next;
2091                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2092                                             cb->nlh->nlmsg_seq,
2093                                             RTM_NEWNEIGH,
2094                                             NLM_F_MULTI) <= 0) {
2095                                 read_unlock_bh(&tbl->lock);
2096                                 rc = -1;
2097                                 goto out;
2098                         }
2099                 next:
2100                         idx++;
2101                 }
2102         }
2103         read_unlock_bh(&tbl->lock);
2104         rc = skb->len;
2105 out:
2106         cb->args[1] = h;
2107         cb->args[2] = idx;
2108         return rc;
2109 }
2110
2111 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2112 {
2113         struct neigh_table *tbl;
2114         int t, family, s_t;
2115
2116         read_lock(&neigh_tbl_lock);
2117         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2118         s_t = cb->args[0];
2119
2120         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2121                 if (t < s_t || (family && tbl->family != family))
2122                         continue;
2123                 if (t > s_t)
2124                         memset(&cb->args[1], 0, sizeof(cb->args) -
2125                                                 sizeof(cb->args[0]));
2126                 if (neigh_dump_table(tbl, skb, cb) < 0)
2127                         break;
2128         }
2129         read_unlock(&neigh_tbl_lock);
2130
2131         cb->args[0] = t;
2132         return skb->len;
2133 }
2134
2135 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2136 {
2137         int chain;
2138
2139         read_lock_bh(&tbl->lock);
2140         for (chain = 0; chain <= tbl->hash_mask; chain++) {
2141                 struct neighbour *n;
2142
2143                 for (n = tbl->hash_buckets[chain]; n; n = n->next)
2144                         cb(n, cookie);
2145         }
2146         read_unlock_bh(&tbl->lock);
2147 }
2148 EXPORT_SYMBOL(neigh_for_each);
2149
2150 /* The tbl->lock must be held as a writer and BH disabled. */
2151 void __neigh_for_each_release(struct neigh_table *tbl,
2152                               int (*cb)(struct neighbour *))
2153 {
2154         int chain;
2155
2156         for (chain = 0; chain <= tbl->hash_mask; chain++) {
2157                 struct neighbour *n, **np;
2158
2159                 np = &tbl->hash_buckets[chain];
2160                 while ((n = *np) != NULL) {
2161                         int release;
2162
2163                         write_lock(&n->lock);
2164                         release = cb(n);
2165                         if (release) {
2166                                 *np = n->next;
2167                                 n->dead = 1;
2168                         } else
2169                                 np = &n->next;
2170                         write_unlock(&n->lock);
2171                         if (release)
2172                                 neigh_cleanup_and_release(n);
2173                 }
2174         }
2175 }
2176 EXPORT_SYMBOL(__neigh_for_each_release);
2177
2178 #ifdef CONFIG_PROC_FS
2179
2180 static struct neighbour *neigh_get_first(struct seq_file *seq)
2181 {
2182         struct neigh_seq_state *state = seq->private;
2183         struct net *net = seq_file_net(seq);
2184         struct neigh_table *tbl = state->tbl;
2185         struct neighbour *n = NULL;
2186         int bucket = state->bucket;
2187
2188         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2189         for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
2190                 n = tbl->hash_buckets[bucket];
2191
2192                 while (n) {
2193                         if (!net_eq(dev_net(n->dev), net))
2194                                 goto next;
2195                         if (state->neigh_sub_iter) {
2196                                 loff_t fakep = 0;
2197                                 void *v;
2198
2199                                 v = state->neigh_sub_iter(state, n, &fakep);
2200                                 if (!v)
2201                                         goto next;
2202                         }
2203                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2204                                 break;
2205                         if (n->nud_state & ~NUD_NOARP)
2206                                 break;
2207                 next:
2208                         n = n->next;
2209                 }
2210
2211                 if (n)
2212                         break;
2213         }
2214         state->bucket = bucket;
2215
2216         return n;
2217 }
2218
2219 static struct neighbour *neigh_get_next(struct seq_file *seq,
2220                                         struct neighbour *n,
2221                                         loff_t *pos)
2222 {
2223         struct neigh_seq_state *state = seq->private;
2224         struct net *net = seq_file_net(seq);
2225         struct neigh_table *tbl = state->tbl;
2226
2227         if (state->neigh_sub_iter) {
2228                 void *v = state->neigh_sub_iter(state, n, pos);
2229                 if (v)
2230                         return n;
2231         }
2232         n = n->next;
2233
2234         while (1) {
2235                 while (n) {
2236                         if (!net_eq(dev_net(n->dev), net))
2237                                 goto next;
2238                         if (state->neigh_sub_iter) {
2239                                 void *v = state->neigh_sub_iter(state, n, pos);
2240                                 if (v)
2241                                         return n;
2242                                 goto next;
2243                         }
2244                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2245                                 break;
2246
2247                         if (n->nud_state & ~NUD_NOARP)
2248                                 break;
2249                 next:
2250                         n = n->next;
2251                 }
2252
2253                 if (n)
2254                         break;
2255
2256                 if (++state->bucket > tbl->hash_mask)
2257                         break;
2258
2259                 n = tbl->hash_buckets[state->bucket];
2260         }
2261
2262         if (n && pos)
2263                 --(*pos);
2264         return n;
2265 }
2266
2267 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2268 {
2269         struct neighbour *n = neigh_get_first(seq);
2270
2271         if (n) {
2272                 --(*pos);
2273                 while (*pos) {
2274                         n = neigh_get_next(seq, n, pos);
2275                         if (!n)
2276                                 break;
2277                 }
2278         }
2279         return *pos ? NULL : n;
2280 }
2281
2282 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2283 {
2284         struct neigh_seq_state *state = seq->private;
2285         struct net *net = seq_file_net(seq);
2286         struct neigh_table *tbl = state->tbl;
2287         struct pneigh_entry *pn = NULL;
2288         int bucket = state->bucket;
2289
2290         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2291         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2292                 pn = tbl->phash_buckets[bucket];
2293                 while (pn && !net_eq(pneigh_net(pn), net))
2294                         pn = pn->next;
2295                 if (pn)
2296                         break;
2297         }
2298         state->bucket = bucket;
2299
2300         return pn;
2301 }
2302
2303 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2304                                             struct pneigh_entry *pn,
2305                                             loff_t *pos)
2306 {
2307         struct neigh_seq_state *state = seq->private;
2308         struct net *net = seq_file_net(seq);
2309         struct neigh_table *tbl = state->tbl;
2310
2311         pn = pn->next;
2312         while (!pn) {
2313                 if (++state->bucket > PNEIGH_HASHMASK)
2314                         break;
2315                 pn = tbl->phash_buckets[state->bucket];
2316                 while (pn && !net_eq(pneigh_net(pn), net))
2317                         pn = pn->next;
2318                 if (pn)
2319                         break;
2320         }
2321
2322         if (pn && pos)
2323                 --(*pos);
2324
2325         return pn;
2326 }
2327
2328 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2329 {
2330         struct pneigh_entry *pn = pneigh_get_first(seq);
2331
2332         if (pn) {
2333                 --(*pos);
2334                 while (*pos) {
2335                         pn = pneigh_get_next(seq, pn, pos);
2336                         if (!pn)
2337                                 break;
2338                 }
2339         }
2340         return *pos ? NULL : pn;
2341 }
2342
2343 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2344 {
2345         struct neigh_seq_state *state = seq->private;
2346         void *rc;
2347         loff_t idxpos = *pos;
2348
2349         rc = neigh_get_idx(seq, &idxpos);
2350         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2351                 rc = pneigh_get_idx(seq, &idxpos);
2352
2353         return rc;
2354 }
2355
2356 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2357         __acquires(tbl->lock)
2358 {
2359         struct neigh_seq_state *state = seq->private;
2360
2361         state->tbl = tbl;
2362         state->bucket = 0;
2363         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2364
2365         read_lock_bh(&tbl->lock);
2366
2367         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2368 }
2369 EXPORT_SYMBOL(neigh_seq_start);
2370
2371 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2372 {
2373         struct neigh_seq_state *state;
2374         void *rc;
2375
2376         if (v == SEQ_START_TOKEN) {
2377                 rc = neigh_get_first(seq);
2378                 goto out;
2379         }
2380
2381         state = seq->private;
2382         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2383                 rc = neigh_get_next(seq, v, NULL);
2384                 if (rc)
2385                         goto out;
2386                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2387                         rc = pneigh_get_first(seq);
2388         } else {
2389                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2390                 rc = pneigh_get_next(seq, v, NULL);
2391         }
2392 out:
2393         ++(*pos);
2394         return rc;
2395 }
2396 EXPORT_SYMBOL(neigh_seq_next);
2397
2398 void neigh_seq_stop(struct seq_file *seq, void *v)
2399         __releases(tbl->lock)
2400 {
2401         struct neigh_seq_state *state = seq->private;
2402         struct neigh_table *tbl = state->tbl;
2403
2404         read_unlock_bh(&tbl->lock);
2405 }
2406 EXPORT_SYMBOL(neigh_seq_stop);
2407
2408 /* statistics via seq_file */
2409
2410 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2411 {
2412         struct proc_dir_entry *pde = seq->private;
2413         struct neigh_table *tbl = pde->data;
2414         int cpu;
2415
2416         if (*pos == 0)
2417                 return SEQ_START_TOKEN;
2418
2419         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2420                 if (!cpu_possible(cpu))
2421                         continue;
2422                 *pos = cpu+1;
2423                 return per_cpu_ptr(tbl->stats, cpu);
2424         }
2425         return NULL;
2426 }
2427
2428 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2429 {
2430         struct proc_dir_entry *pde = seq->private;
2431         struct neigh_table *tbl = pde->data;
2432         int cpu;
2433
2434         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2435                 if (!cpu_possible(cpu))
2436                         continue;
2437                 *pos = cpu+1;
2438                 return per_cpu_ptr(tbl->stats, cpu);
2439         }
2440         return NULL;
2441 }
2442
2443 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2444 {
2445
2446 }
2447
2448 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2449 {
2450         struct proc_dir_entry *pde = seq->private;
2451         struct neigh_table *tbl = pde->data;
2452         struct neigh_statistics *st = v;
2453
2454         if (v == SEQ_START_TOKEN) {
2455                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2456                 return 0;
2457         }
2458
2459         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2460                         "%08lx %08lx  %08lx %08lx %08lx\n",
2461                    atomic_read(&tbl->entries),
2462
2463                    st->allocs,
2464                    st->destroys,
2465                    st->hash_grows,
2466
2467                    st->lookups,
2468                    st->hits,
2469
2470                    st->res_failed,
2471
2472                    st->rcv_probes_mcast,
2473                    st->rcv_probes_ucast,
2474
2475                    st->periodic_gc_runs,
2476                    st->forced_gc_runs,
2477                    st->unres_discards
2478                    );
2479
2480         return 0;
2481 }
2482
2483 static const struct seq_operations neigh_stat_seq_ops = {
2484         .start  = neigh_stat_seq_start,
2485         .next   = neigh_stat_seq_next,
2486         .stop   = neigh_stat_seq_stop,
2487         .show   = neigh_stat_seq_show,
2488 };
2489
2490 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2491 {
2492         int ret = seq_open(file, &neigh_stat_seq_ops);
2493
2494         if (!ret) {
2495                 struct seq_file *sf = file->private_data;
2496                 sf->private = PDE(inode);
2497         }
2498         return ret;
2499 };
2500
2501 static const struct file_operations neigh_stat_seq_fops = {
2502         .owner   = THIS_MODULE,
2503         .open    = neigh_stat_seq_open,
2504         .read    = seq_read,
2505         .llseek  = seq_lseek,
2506         .release = seq_release,
2507 };
2508
2509 #endif /* CONFIG_PROC_FS */
2510
2511 static inline size_t neigh_nlmsg_size(void)
2512 {
2513         return NLMSG_ALIGN(sizeof(struct ndmsg))
2514                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2515                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2516                + nla_total_size(sizeof(struct nda_cacheinfo))
2517                + nla_total_size(4); /* NDA_PROBES */
2518 }
2519
2520 static void __neigh_notify(struct neighbour *n, int type, int flags)
2521 {
2522         struct net *net = dev_net(n->dev);
2523         struct sk_buff *skb;
2524         int err = -ENOBUFS;
2525
2526         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2527         if (skb == NULL)
2528                 goto errout;
2529
2530         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2531         if (err < 0) {
2532                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2533                 WARN_ON(err == -EMSGSIZE);
2534                 kfree_skb(skb);
2535                 goto errout;
2536         }
2537         err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2538 errout:
2539         if (err < 0)
2540                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2541 }
2542
2543 #ifdef CONFIG_ARPD
2544 void neigh_app_ns(struct neighbour *n)
2545 {
2546         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2547 }
2548 EXPORT_SYMBOL(neigh_app_ns);
2549 #endif /* CONFIG_ARPD */
2550
2551 #ifdef CONFIG_SYSCTL
2552
2553 static struct neigh_sysctl_table {
2554         struct ctl_table_header *sysctl_header;
2555         struct ctl_table neigh_vars[__NET_NEIGH_MAX];
2556         char *dev_name;
2557 } neigh_sysctl_template __read_mostly = {
2558         .neigh_vars = {
2559                 {
2560                         .ctl_name       = NET_NEIGH_MCAST_SOLICIT,
2561                         .procname       = "mcast_solicit",
2562                         .maxlen         = sizeof(int),
2563                         .mode           = 0644,
2564                         .proc_handler   = proc_dointvec,
2565                 },
2566                 {
2567                         .ctl_name       = NET_NEIGH_UCAST_SOLICIT,
2568                         .procname       = "ucast_solicit",
2569                         .maxlen         = sizeof(int),
2570                         .mode           = 0644,
2571                         .proc_handler   = proc_dointvec,
2572                 },
2573                 {
2574                         .ctl_name       = NET_NEIGH_APP_SOLICIT,
2575                         .procname       = "app_solicit",
2576                         .maxlen         = sizeof(int),
2577                         .mode           = 0644,
2578                         .proc_handler   = proc_dointvec,
2579                 },
2580                 {
2581                         .procname       = "retrans_time",
2582                         .maxlen         = sizeof(int),
2583                         .mode           = 0644,
2584                         .proc_handler   = proc_dointvec_userhz_jiffies,
2585                 },
2586                 {
2587                         .ctl_name       = NET_NEIGH_REACHABLE_TIME,
2588                         .procname       = "base_reachable_time",
2589                         .maxlen         = sizeof(int),
2590                         .mode           = 0644,
2591                         .proc_handler   = proc_dointvec_jiffies,
2592                         .strategy       = sysctl_jiffies,
2593                 },
2594                 {
2595                         .ctl_name       = NET_NEIGH_DELAY_PROBE_TIME,
2596                         .procname       = "delay_first_probe_time",
2597                         .maxlen         = sizeof(int),
2598                         .mode           = 0644,
2599                         .proc_handler   = proc_dointvec_jiffies,
2600                         .strategy       = sysctl_jiffies,
2601                 },
2602                 {
2603                         .ctl_name       = NET_NEIGH_GC_STALE_TIME,
2604                         .procname       = "gc_stale_time",
2605                         .maxlen         = sizeof(int),
2606                         .mode           = 0644,
2607                         .proc_handler   = proc_dointvec_jiffies,
2608                         .strategy       = sysctl_jiffies,
2609                 },
2610                 {
2611                         .ctl_name       = NET_NEIGH_UNRES_QLEN,
2612                         .procname       = "unres_qlen",
2613                         .maxlen         = sizeof(int),
2614                         .mode           = 0644,
2615                         .proc_handler   = proc_dointvec,
2616                 },
2617                 {
2618                         .ctl_name       = NET_NEIGH_PROXY_QLEN,
2619                         .procname       = "proxy_qlen",
2620                         .maxlen         = sizeof(int),
2621                         .mode           = 0644,
2622                         .proc_handler   = proc_dointvec,
2623                 },
2624                 {
2625                         .procname       = "anycast_delay",
2626                         .maxlen         = sizeof(int),
2627                         .mode           = 0644,
2628                         .proc_handler   = proc_dointvec_userhz_jiffies,
2629                 },
2630                 {
2631                         .procname       = "proxy_delay",
2632                         .maxlen         = sizeof(int),
2633                         .mode           = 0644,
2634                         .proc_handler   = proc_dointvec_userhz_jiffies,
2635                 },
2636                 {
2637                         .procname       = "locktime",
2638                         .maxlen         = sizeof(int),
2639                         .mode           = 0644,
2640                         .proc_handler   = proc_dointvec_userhz_jiffies,
2641                 },
2642                 {
2643                         .ctl_name       = NET_NEIGH_RETRANS_TIME_MS,
2644                         .procname       = "retrans_time_ms",
2645                         .maxlen         = sizeof(int),
2646                         .mode           = 0644,
2647                         .proc_handler   = proc_dointvec_ms_jiffies,
2648                         .strategy       = sysctl_ms_jiffies,
2649                 },
2650                 {
2651                         .ctl_name       = NET_NEIGH_REACHABLE_TIME_MS,
2652                         .procname       = "base_reachable_time_ms",
2653                         .maxlen         = sizeof(int),
2654                         .mode           = 0644,
2655                         .proc_handler   = proc_dointvec_ms_jiffies,
2656                         .strategy       = sysctl_ms_jiffies,
2657                 },
2658                 {
2659                         .ctl_name       = NET_NEIGH_GC_INTERVAL,
2660                         .procname       = "gc_interval",
2661                         .maxlen         = sizeof(int),
2662                         .mode           = 0644,
2663                         .proc_handler   = proc_dointvec_jiffies,
2664                         .strategy       = sysctl_jiffies,
2665                 },
2666                 {
2667                         .ctl_name       = NET_NEIGH_GC_THRESH1,
2668                         .procname       = "gc_thresh1",
2669                         .maxlen         = sizeof(int),
2670                         .mode           = 0644,
2671                         .proc_handler   = proc_dointvec,
2672                 },
2673                 {
2674                         .ctl_name       = NET_NEIGH_GC_THRESH2,
2675                         .procname       = "gc_thresh2",
2676                         .maxlen         = sizeof(int),
2677                         .mode           = 0644,
2678                         .proc_handler   = proc_dointvec,
2679                 },
2680                 {
2681                         .ctl_name       = NET_NEIGH_GC_THRESH3,
2682                         .procname       = "gc_thresh3",
2683                         .maxlen         = sizeof(int),
2684                         .mode           = 0644,
2685                         .proc_handler   = proc_dointvec,
2686                 },
2687                 {},
2688         },
2689 };
2690
2691 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2692                           int p_id, int pdev_id, char *p_name,
2693                           proc_handler *handler, ctl_handler *strategy)
2694 {
2695         struct neigh_sysctl_table *t;
2696         const char *dev_name_source = NULL;
2697
2698 #define NEIGH_CTL_PATH_ROOT     0
2699 #define NEIGH_CTL_PATH_PROTO    1
2700 #define NEIGH_CTL_PATH_NEIGH    2
2701 #define NEIGH_CTL_PATH_DEV      3
2702
2703         struct ctl_path neigh_path[] = {
2704                 { .procname = "net",     .ctl_name = CTL_NET, },
2705                 { .procname = "proto",   .ctl_name = 0, },
2706                 { .procname = "neigh",   .ctl_name = 0, },
2707                 { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
2708                 { },
2709         };
2710
2711         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2712         if (!t)
2713                 goto err;
2714
2715         t->neigh_vars[0].data  = &p->mcast_probes;
2716         t->neigh_vars[1].data  = &p->ucast_probes;
2717         t->neigh_vars[2].data  = &p->app_probes;
2718         t->neigh_vars[3].data  = &p->retrans_time;
2719         t->neigh_vars[4].data  = &p->base_reachable_time;
2720         t->neigh_vars[5].data  = &p->delay_probe_time;
2721         t->neigh_vars[6].data  = &p->gc_staletime;
2722         t->neigh_vars[7].data  = &p->queue_len;
2723         t->neigh_vars[8].data  = &p->proxy_qlen;
2724         t->neigh_vars[9].data  = &p->anycast_delay;
2725         t->neigh_vars[10].data = &p->proxy_delay;
2726         t->neigh_vars[11].data = &p->locktime;
2727         t->neigh_vars[12].data  = &p->retrans_time;
2728         t->neigh_vars[13].data  = &p->base_reachable_time;
2729
2730         if (dev) {
2731                 dev_name_source = dev->name;
2732                 neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
2733                 /* Terminate the table early */
2734                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2735         } else {
2736                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2737                 t->neigh_vars[14].data = (int *)(p + 1);
2738                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2739                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2740                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2741         }
2742
2743
2744         if (handler || strategy) {
2745                 /* RetransTime */
2746                 t->neigh_vars[3].proc_handler = handler;
2747                 t->neigh_vars[3].strategy = strategy;
2748                 t->neigh_vars[3].extra1 = dev;
2749                 if (!strategy)
2750                         t->neigh_vars[3].ctl_name = CTL_UNNUMBERED;
2751                 /* ReachableTime */
2752                 t->neigh_vars[4].proc_handler = handler;
2753                 t->neigh_vars[4].strategy = strategy;
2754                 t->neigh_vars[4].extra1 = dev;
2755                 if (!strategy)
2756                         t->neigh_vars[4].ctl_name = CTL_UNNUMBERED;
2757                 /* RetransTime (in milliseconds)*/
2758                 t->neigh_vars[12].proc_handler = handler;
2759                 t->neigh_vars[12].strategy = strategy;
2760                 t->neigh_vars[12].extra1 = dev;
2761                 if (!strategy)
2762                         t->neigh_vars[12].ctl_name = CTL_UNNUMBERED;
2763                 /* ReachableTime (in milliseconds) */
2764                 t->neigh_vars[13].proc_handler = handler;
2765                 t->neigh_vars[13].strategy = strategy;
2766                 t->neigh_vars[13].extra1 = dev;
2767                 if (!strategy)
2768                         t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
2769         }
2770
2771         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2772         if (!t->dev_name)
2773                 goto free;
2774
2775         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2776         neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
2777         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2778         neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
2779
2780         t->sysctl_header =
2781                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2782         if (!t->sysctl_header)
2783                 goto free_procname;
2784
2785         p->sysctl_table = t;
2786         return 0;
2787
2788 free_procname:
2789         kfree(t->dev_name);
2790 free:
2791         kfree(t);
2792 err:
2793         return -ENOBUFS;
2794 }
2795 EXPORT_SYMBOL(neigh_sysctl_register);
2796
2797 void neigh_sysctl_unregister(struct neigh_parms *p)
2798 {
2799         if (p->sysctl_table) {
2800                 struct neigh_sysctl_table *t = p->sysctl_table;
2801                 p->sysctl_table = NULL;
2802                 unregister_sysctl_table(t->sysctl_header);
2803                 kfree(t->dev_name);
2804                 kfree(t);
2805         }
2806 }
2807 EXPORT_SYMBOL(neigh_sysctl_unregister);
2808
2809 #endif  /* CONFIG_SYSCTL */
2810
2811 static int __init neigh_init(void)
2812 {
2813         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2814         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2815         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2816
2817         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2818         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2819
2820         return 0;
2821 }
2822
2823 subsys_initcall(neigh_init);
2824