[PATCH] (1/2) inode trimming
[opensuse:kernel.git] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Lars Fenneberg                  :       fixed MTU setting on receipt
19  *                                              of an RA.
20  *
21  *      Janos Farkas                    :       kmalloc failure checks
22  *      Alexey Kuznetsov                :       state machine reworked
23  *                                              and moved to net/core.
24  *      Pekka Savola                    :       RFC2461 validation
25  */
26
27 /* Set to 3 to get tracing... */
28 #define ND_DEBUG 1
29
30 #define ND_PRINTK(x...) printk(KERN_DEBUG x)
31 #define ND_NOPRINTK(x...) do { ; } while(0)
32 #define ND_PRINTK0 ND_PRINTK
33 #define ND_PRINTK1 ND_NOPRINTK
34 #define ND_PRINTK2 ND_NOPRINTK
35 #if ND_DEBUG >= 1
36 #undef ND_PRINTK1
37 #define ND_PRINTK1 ND_PRINTK
38 #endif
39 #if ND_DEBUG >= 2
40 #undef ND_PRINTK2
41 #define ND_PRINTK2 ND_PRINTK
42 #endif
43
44 #define __NO_VERSION__
45 #include <linux/module.h>
46 #include <linux/config.h>
47 #include <linux/errno.h>
48 #include <linux/types.h>
49 #include <linux/socket.h>
50 #include <linux/sockios.h>
51 #include <linux/sched.h>
52 #include <linux/net.h>
53 #include <linux/in6.h>
54 #include <linux/route.h>
55 #include <linux/init.h>
56 #ifdef CONFIG_SYSCTL
57 #include <linux/sysctl.h>
58 #endif
59
60 #include <linux/if_arp.h>
61 #include <linux/ipv6.h>
62 #include <linux/icmpv6.h>
63
64 #include <net/sock.h>
65 #include <net/snmp.h>
66
67 #include <net/ipv6.h>
68 #include <net/protocol.h>
69 #include <net/ndisc.h>
70 #include <net/ip6_route.h>
71 #include <net/addrconf.h>
72 #include <net/icmp.h>
73
74 #include <net/checksum.h>
75 #include <linux/proc_fs.h>
76
77 static struct socket *ndisc_socket;
78
79 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
80 static int ndisc_constructor(struct neighbour *neigh);
81 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
82 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
83 static int pndisc_constructor(struct pneigh_entry *n);
84 static void pndisc_destructor(struct pneigh_entry *n);
85 static void pndisc_redo(struct sk_buff *skb);
86
87 static struct neigh_ops ndisc_generic_ops =
88 {
89         AF_INET6,
90         NULL,
91         ndisc_solicit,
92         ndisc_error_report,
93         neigh_resolve_output,
94         neigh_connected_output,
95         dev_queue_xmit,
96         dev_queue_xmit
97 };
98
99 static struct neigh_ops ndisc_hh_ops =
100 {
101         AF_INET6,
102         NULL,
103         ndisc_solicit,
104         ndisc_error_report,
105         neigh_resolve_output,
106         neigh_resolve_output,
107         dev_queue_xmit,
108         dev_queue_xmit
109 };
110
111
112 static struct neigh_ops ndisc_direct_ops =
113 {
114         AF_INET6,
115         NULL,
116         NULL,
117         NULL,
118         dev_queue_xmit,
119         dev_queue_xmit,
120         dev_queue_xmit,
121         dev_queue_xmit
122 };
123
124 struct neigh_table nd_tbl =
125 {
126         NULL,
127         AF_INET6,
128         sizeof(struct neighbour) + sizeof(struct in6_addr),
129         sizeof(struct in6_addr),
130         ndisc_hash,
131         ndisc_constructor,
132         pndisc_constructor,
133         pndisc_destructor,
134         pndisc_redo,
135         "ndisc_cache",
136         { NULL, NULL, &nd_tbl, 0, NULL, NULL,
137                   30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 64, 0 },
138         30*HZ, 128, 512, 1024,
139 };
140
141 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
142
143 static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len)
144 {
145         int space = NDISC_OPT_SPACE(data_len);
146
147         opt[0] = type;
148         opt[1] = space>>3;
149         memcpy(opt+2, data, data_len);
150         data_len += 2;
151         opt += data_len;
152         if ((space -= data_len) > 0)
153                 memset(opt, 0, space);
154         return opt + space;
155 }
156
157 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
158 {
159         switch (dev->type) {
160         case ARPHRD_ETHER:
161         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
162         case ARPHRD_FDDI:
163                 ipv6_eth_mc_map(addr, buf);
164                 return 0;
165         case ARPHRD_IEEE802_TR:
166                 ipv6_tr_mc_map(addr,buf);
167                 return 0;
168         default:
169                 if (dir) {
170                         memcpy(buf, dev->broadcast, dev->addr_len);
171                         return 0;
172                 }
173         }
174         return -EINVAL;
175 }
176
177 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
178 {
179         u32 hash_val;
180
181         hash_val = *(u32*)(pkey + sizeof(struct in6_addr) - 4);
182         hash_val ^= (hash_val>>16);
183         hash_val ^= hash_val>>8;
184         hash_val ^= hash_val>>3;
185         hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
186
187         return hash_val;
188 }
189
190 static int ndisc_constructor(struct neighbour *neigh)
191 {
192         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
193         struct net_device *dev = neigh->dev;
194         struct inet6_dev *in6_dev = in6_dev_get(dev);
195         int addr_type;
196
197         if (in6_dev == NULL)
198                 return -EINVAL;
199
200         addr_type = ipv6_addr_type(addr);
201         if (in6_dev->nd_parms)
202                 neigh->parms = in6_dev->nd_parms;
203
204         if (addr_type&IPV6_ADDR_MULTICAST)
205                 neigh->type = RTN_MULTICAST;
206         else
207                 neigh->type = RTN_UNICAST;
208         if (dev->hard_header == NULL) {
209                 neigh->nud_state = NUD_NOARP;
210                 neigh->ops = &ndisc_direct_ops;
211                 neigh->output = neigh->ops->queue_xmit;
212         } else {
213                 if (addr_type&IPV6_ADDR_MULTICAST) {
214                         neigh->nud_state = NUD_NOARP;
215                         ndisc_mc_map(addr, neigh->ha, dev, 1);
216                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
217                         neigh->nud_state = NUD_NOARP;
218                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
219                         if (dev->flags&IFF_LOOPBACK)
220                                 neigh->type = RTN_LOCAL;
221                 } else if (dev->flags&IFF_POINTOPOINT) {
222                         neigh->nud_state = NUD_NOARP;
223                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
224                 }
225                 if (dev->hard_header_cache)
226                         neigh->ops = &ndisc_hh_ops;
227                 else
228                         neigh->ops = &ndisc_generic_ops;
229                 if (neigh->nud_state&NUD_VALID)
230                         neigh->output = neigh->ops->connected_output;
231                 else
232                         neigh->output = neigh->ops->output;
233         }
234         in6_dev_put(in6_dev);
235         return 0;
236 }
237
238 static int pndisc_constructor(struct pneigh_entry *n)
239 {
240         struct in6_addr *addr = (struct in6_addr*)&n->key;
241         struct in6_addr maddr;
242         struct net_device *dev = n->dev;
243
244         if (dev == NULL || __in6_dev_get(dev) == NULL)
245                 return -EINVAL;
246         addrconf_addr_solict_mult(addr, &maddr);
247         ipv6_dev_mc_inc(dev, &maddr);
248         return 0;
249 }
250
251 static void pndisc_destructor(struct pneigh_entry *n)
252 {
253         struct in6_addr *addr = (struct in6_addr*)&n->key;
254         struct in6_addr maddr;
255         struct net_device *dev = n->dev;
256
257         if (dev == NULL || __in6_dev_get(dev) == NULL)
258                 return;
259         addrconf_addr_solict_mult(addr, &maddr);
260         ipv6_dev_mc_dec(dev, &maddr);
261 }
262
263
264
265 static int
266 ndisc_build_ll_hdr(struct sk_buff *skb, struct net_device *dev,
267                    struct in6_addr *daddr, struct neighbour *neigh, int len)
268 {
269         unsigned char ha[MAX_ADDR_LEN];
270         unsigned char *h_dest = NULL;
271
272         skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
273
274         if (dev->hard_header) {
275                 if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) {
276                         ndisc_mc_map(daddr, ha, dev, 1);
277                         h_dest = ha;
278                 } else if (neigh) {
279                         read_lock_bh(&neigh->lock);
280                         if (neigh->nud_state&NUD_VALID) {
281                                 memcpy(ha, neigh->ha, dev->addr_len);
282                                 h_dest = ha;
283                         }
284                         read_unlock_bh(&neigh->lock);
285                 } else {
286                         neigh = neigh_lookup(&nd_tbl, daddr, dev);
287                         if (neigh) {
288                                 read_lock_bh(&neigh->lock);
289                                 if (neigh->nud_state&NUD_VALID) {
290                                         memcpy(ha, neigh->ha, dev->addr_len);
291                                         h_dest = ha;
292                                 }
293                                 read_unlock_bh(&neigh->lock);
294                                 neigh_release(neigh);
295                         }
296                 }
297
298                 if (dev->hard_header(skb, dev, ETH_P_IPV6, h_dest, NULL, len) < 0)
299                         return 0;
300         }
301
302         return 1;
303 }
304
305
306 /*
307  *      Send a Neighbour Advertisement
308  */
309
310 void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
311                    struct in6_addr *daddr, struct in6_addr *solicited_addr,
312                    int router, int solicited, int override, int inc_opt) 
313 {
314         struct sock *sk = ndisc_socket->sk;
315         struct nd_msg *msg;
316         int len;
317         struct sk_buff *skb;
318         int err;
319
320         len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
321
322         if (inc_opt) {
323                 if (dev->addr_len)
324                         len += NDISC_OPT_SPACE(dev->addr_len);
325                 else
326                         inc_opt = 0;
327         }
328
329         skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
330                                   0, &err);
331
332         if (skb == NULL) {
333                 ND_PRINTK1("send_na: alloc skb failed\n");
334                 return;
335         }
336
337         if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
338                 kfree_skb(skb);
339                 return;
340         }
341
342         ip6_nd_hdr(sk, skb, dev, solicited_addr, daddr, IPPROTO_ICMPV6, len);
343
344         msg = (struct nd_msg *) skb_put(skb, len);
345
346         msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
347         msg->icmph.icmp6_code = 0;
348         msg->icmph.icmp6_cksum = 0;
349
350         msg->icmph.icmp6_unused = 0;
351         msg->icmph.icmp6_router    = router;
352         msg->icmph.icmp6_solicited = solicited;
353         msg->icmph.icmp6_override  = !!override;
354
355         /* Set the target address. */
356         ipv6_addr_copy(&msg->target, solicited_addr);
357
358         if (inc_opt)
359                 ndisc_fill_option((void*)&msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len);
360
361         /* checksum */
362         msg->icmph.icmp6_cksum = csum_ipv6_magic(solicited_addr, daddr, len, 
363                                                  IPPROTO_ICMPV6,
364                                                  csum_partial((__u8 *) msg, 
365                                                               len, 0));
366
367         dev_queue_xmit(skb);
368
369         ICMP6_INC_STATS(Icmp6OutNeighborAdvertisements);
370         ICMP6_INC_STATS(Icmp6OutMsgs);
371 }        
372
373 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
374                    struct in6_addr *solicit,
375                    struct in6_addr *daddr, struct in6_addr *saddr) 
376 {
377         struct sock *sk = ndisc_socket->sk;
378         struct sk_buff *skb;
379         struct nd_msg *msg;
380         struct in6_addr addr_buf;
381         int len;
382         int err;
383         int send_llinfo;
384
385         if (saddr == NULL) {
386                 if (ipv6_get_lladdr(dev, &addr_buf))
387                         return;
388                 saddr = &addr_buf;
389         }
390
391         len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
392         send_llinfo = dev->addr_len && ipv6_addr_type(saddr) != IPV6_ADDR_ANY;
393         if (send_llinfo)
394                 len += NDISC_OPT_SPACE(dev->addr_len);
395
396         skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
397                                   0, &err);
398         if (skb == NULL) {
399                 ND_PRINTK1("send_ns: alloc skb failed\n");
400                 return;
401         }
402
403         if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
404                 kfree_skb(skb);
405                 return;
406         }
407
408         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
409
410         msg = (struct nd_msg *)skb_put(skb, len);
411         msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
412         msg->icmph.icmp6_code = 0;
413         msg->icmph.icmp6_cksum = 0;
414         msg->icmph.icmp6_unused = 0;
415
416         /* Set the target address. */
417         ipv6_addr_copy(&msg->target, solicit);
418
419         if (send_llinfo)
420                 ndisc_fill_option((void*)&msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
421
422         /* checksum */
423         msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
424                                                  daddr, len, 
425                                                  IPPROTO_ICMPV6,
426                                                  csum_partial((__u8 *) msg, 
427                                                               len, 0));
428         /* send it! */
429         dev_queue_xmit(skb);
430
431         ICMP6_INC_STATS(Icmp6OutNeighborSolicits);
432         ICMP6_INC_STATS(Icmp6OutMsgs);
433 }
434
435 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
436                    struct in6_addr *daddr)
437 {
438         struct sock *sk = ndisc_socket->sk;
439         struct sk_buff *skb;
440         struct icmp6hdr *hdr;
441         __u8 * opt;
442         int len;
443         int err;
444
445         len = sizeof(struct icmp6hdr);
446         if (dev->addr_len)
447                 len += NDISC_OPT_SPACE(dev->addr_len);
448
449         skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
450                                   0, &err);
451         if (skb == NULL) {
452                 ND_PRINTK1("send_ns: alloc skb failed\n");
453                 return;
454         }
455
456         if (ndisc_build_ll_hdr(skb, dev, daddr, NULL, len) == 0) {
457                 kfree_skb(skb);
458                 return;
459         }
460
461         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
462
463         hdr = (struct icmp6hdr *) skb_put(skb, len);
464         hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
465         hdr->icmp6_code = 0;
466         hdr->icmp6_cksum = 0;
467         hdr->icmp6_unused = 0;
468
469         opt = (u8*) (hdr + 1);
470
471         if (dev->addr_len)
472                 ndisc_fill_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
473
474         /* checksum */
475         hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
476                                            IPPROTO_ICMPV6,
477                                            csum_partial((__u8 *) hdr, len, 0));
478
479         /* send it! */
480         dev_queue_xmit(skb);
481
482         ICMP6_INC_STATS(Icmp6OutRouterSolicits);
483         ICMP6_INC_STATS(Icmp6OutMsgs);
484 }
485                    
486
487 static u8 * ndisc_find_option(u8 *opt, int opt_len, int len, int option)
488 {
489         while (opt_len <= len) {
490                 int l = opt[1]<<3;
491
492                 if (opt[0] == option && l >= opt_len)
493                         return opt + 2;
494
495                 if (l == 0) {
496                         if (net_ratelimit())
497                             printk(KERN_WARNING "ndisc: option has 0 len\n");
498                         return NULL;
499                 }
500
501                 opt += l;
502                 len -= l;
503         }
504         return NULL;
505 }
506
507
508 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
509 {
510         /*
511          *      "The sender MUST return an ICMP
512          *       destination unreachable"
513          */
514         dst_link_failure(skb);
515         kfree_skb(skb);
516 }
517
518 /* Called with locked neigh: either read or both */
519
520 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
521 {
522         struct in6_addr *saddr = NULL;
523         struct in6_addr mcaddr;
524         struct net_device *dev = neigh->dev;
525         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
526         int probes = atomic_read(&neigh->probes);
527
528         if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev))
529                 saddr = &skb->nh.ipv6h->saddr;
530
531         if ((probes -= neigh->parms->ucast_probes) < 0) {
532                 if (!(neigh->nud_state&NUD_VALID))
533                         ND_PRINTK1("trying to ucast probe in NUD_INVALID\n");
534                 ndisc_send_ns(dev, neigh, target, target, saddr);
535         } else if ((probes -= neigh->parms->app_probes) < 0) {
536 #ifdef CONFIG_ARPD
537                 neigh_app_ns(neigh);
538 #endif
539         } else {
540                 addrconf_addr_solict_mult(target, &mcaddr);
541                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
542         }
543 }
544
545
546 static void ndisc_update(struct neighbour *neigh, u8* opt, int len, int type)
547 {
548         opt = ndisc_find_option(opt, neigh->dev->addr_len+2, len, type);
549         neigh_update(neigh, opt, NUD_STALE, 1, 1);
550 }
551
552 static void ndisc_router_discovery(struct sk_buff *skb)
553 {
554         struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
555         struct neighbour *neigh;
556         struct inet6_dev *in6_dev;
557         struct rt6_info *rt;
558         int lifetime;
559         int optlen;
560
561         __u8 * opt = (__u8 *)(ra_msg + 1);
562
563         optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
564
565         if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
566                 if (net_ratelimit())
567                         printk(KERN_WARNING "ICMP RA: source address is not linklocal\n");
568                 return;
569         }
570
571         /*
572          *      set the RA_RECV flag in the interface
573          */
574
575         in6_dev = in6_dev_get(skb->dev);
576         if (in6_dev == NULL) {
577                 ND_PRINTK1("RA: can't find in6 device\n");
578                 return;
579         }
580         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
581                 in6_dev_put(in6_dev);
582                 return;
583         }
584
585         if (in6_dev->if_flags & IF_RS_SENT) {
586                 /*
587                  *      flag that an RA was received after an RS was sent
588                  *      out on this interface.
589                  */
590                 in6_dev->if_flags |= IF_RA_RCVD;
591         }
592
593         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
594
595         rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
596
597         if (rt && lifetime == 0) {
598                 ip6_del_rt(rt);
599                 rt = NULL;
600         }
601
602         if (rt == NULL && lifetime) {
603                 ND_PRINTK2("ndisc_rdisc: adding default router\n");
604
605                 rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
606                 if (rt == NULL) {
607                         ND_PRINTK1("route_add failed\n");
608                         in6_dev_put(in6_dev);
609                         return;
610                 }
611
612                 neigh = rt->rt6i_nexthop;
613                 if (neigh == NULL) {
614                         ND_PRINTK1("nd: add default router: null neighbour\n");
615                         dst_release(&rt->u.dst);
616                         in6_dev_put(in6_dev);
617                         return;
618                 }
619                 neigh->flags |= NTF_ROUTER;
620
621                 /*
622                  *      If we where using an "all destinations on link" route
623                  *      delete it
624                  */
625
626                 rt6_purge_dflt_routers(RTF_ALLONLINK);
627         }
628
629         if (rt)
630                 rt->rt6i_expires = jiffies + (HZ * lifetime);
631
632         if (ra_msg->icmph.icmp6_hop_limit)
633                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
634
635         /*
636          *      Update Reachable Time and Retrans Timer
637          */
638
639         if (in6_dev->nd_parms) {
640                 __u32 rtime = ntohl(ra_msg->retrans_timer);
641
642                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
643                         rtime = (rtime*HZ)/1000;
644                         if (rtime < HZ/10)
645                                 rtime = HZ/10;
646                         in6_dev->nd_parms->retrans_time = rtime;
647                 }
648
649                 rtime = ntohl(ra_msg->reachable_time);
650                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
651                         rtime = (rtime*HZ)/1000;
652
653                         if (rtime < HZ/10)
654                                 rtime = HZ/10;
655
656                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
657                                 in6_dev->nd_parms->base_reachable_time = rtime;
658                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
659                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
660                         }
661                 }
662         }
663
664         /*
665          *      Process options.
666          */
667
668         while (optlen > 0) {
669                 int len = (opt[1] << 3);
670
671                 if (len == 0) {
672                         ND_PRINTK0("RA: opt has 0 len\n");
673                         break;
674                 }
675
676                 switch(*opt) {
677                 case ND_OPT_SOURCE_LL_ADDR:
678
679                         if (rt == NULL)
680                                 break;
681                         
682                         if ((neigh = rt->rt6i_nexthop) != NULL &&
683                             skb->dev->addr_len + 2 >= len)
684                                 neigh_update(neigh, opt+2, NUD_STALE, 1, 1);
685                         break;
686
687                 case ND_OPT_PREFIX_INFO:
688                         addrconf_prefix_rcv(skb->dev, opt, len);
689                         break;
690
691                 case ND_OPT_MTU:
692                         {
693                                 int mtu;
694                                 
695                                 mtu = htonl(*(__u32 *)(opt+4));
696
697                                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
698                                         ND_PRINTK0("NDISC: router "
699                                                    "announcement with mtu = %d\n",
700                                                    mtu);
701                                         break;
702                                 }
703
704                                 if (in6_dev->cnf.mtu6 != mtu) {
705                                         in6_dev->cnf.mtu6 = mtu;
706
707                                         if (rt)
708                                                 rt->u.dst.pmtu = mtu;
709
710                                         rt6_mtu_change(skb->dev, mtu);
711                                 }
712                         }
713                         break;
714
715                 case ND_OPT_TARGET_LL_ADDR:
716                 case ND_OPT_REDIRECT_HDR:
717                         ND_PRINTK0("got illegal option with RA");
718                         break;
719                 default:
720                         ND_PRINTK0("unkown option in RA\n");
721                 };
722                 optlen -= len;
723                 opt += len;
724         }
725         if (rt)
726                 dst_release(&rt->u.dst);
727         in6_dev_put(in6_dev);
728 }
729
730 static void ndisc_redirect_rcv(struct sk_buff *skb)
731 {
732         struct inet6_dev *in6_dev;
733         struct icmp6hdr *icmph;
734         struct in6_addr *dest;
735         struct in6_addr *target;        /* new first hop to destination */
736         struct neighbour *neigh;
737         int on_link = 0;
738         int optlen;
739
740         if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
741                 if (net_ratelimit())
742                         printk(KERN_WARNING "ICMP redirect: source address is not linklocal\n");
743                 return;
744         }
745
746         optlen = skb->tail - skb->h.raw;
747         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
748
749         if (optlen < 0) {
750                 if (net_ratelimit())
751                         printk(KERN_WARNING "ICMP redirect: packet too small\n");
752                 return;
753         }
754
755         icmph = (struct icmp6hdr *) skb->h.raw;
756         target = (struct in6_addr *) (icmph + 1);
757         dest = target + 1;
758
759         if (ipv6_addr_type(dest) & IPV6_ADDR_MULTICAST) {
760                 if (net_ratelimit())
761                         printk(KERN_WARNING "ICMP redirect for multicast addr\n");
762                 return;
763         }
764
765         if (ipv6_addr_cmp(dest, target) == 0) {
766                 on_link = 1;
767         } else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
768                 if (net_ratelimit())
769                         printk(KERN_WARNING "ICMP redirect: target address is not linklocal\n");
770                 return;
771         }
772
773         in6_dev = in6_dev_get(skb->dev);
774         if (!in6_dev)
775                 return;
776         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
777                 in6_dev_put(in6_dev);
778                 return;
779         }
780
781         /* XXX: RFC2461 8.1: 
782          *      The IP source address of the Redirect MUST be the same as the current
783          *      first-hop router for the specified ICMP Destination Address.
784          */
785                 
786         /* passed validation tests */
787
788         /*
789            We install redirect only if nexthop state is valid.
790          */
791
792         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
793         if (neigh) {
794                 ndisc_update(neigh, (u8*)(dest + 1), optlen, ND_OPT_TARGET_LL_ADDR);
795                 if (neigh->nud_state&NUD_VALID)
796                         rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, on_link);
797                 else
798                         __neigh_event_send(neigh, NULL);
799                 neigh_release(neigh);
800         }
801         in6_dev_put(in6_dev);
802 }
803
804 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
805                          struct in6_addr *target)
806 {
807         struct sock *sk = ndisc_socket->sk;
808         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
809         struct sk_buff *buff;
810         struct icmp6hdr *icmph;
811         struct in6_addr saddr_buf;
812         struct in6_addr *addrp;
813         struct net_device *dev;
814         struct rt6_info *rt;
815         u8 *opt;
816         int rd_len;
817         int err;
818         int hlen;
819
820         dev = skb->dev;
821         rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
822
823         if (rt == NULL)
824                 return;
825
826         if (rt->rt6i_flags & RTF_GATEWAY) {
827                 ND_PRINTK1("ndisc_send_redirect: not a neighbour\n");
828                 dst_release(&rt->u.dst);
829                 return;
830         }
831         if (!xrlim_allow(&rt->u.dst, 1*HZ)) {
832                 dst_release(&rt->u.dst);
833                 return;
834         }
835         dst_release(&rt->u.dst);
836
837         if (dev->addr_len) {
838                 if (neigh->nud_state&NUD_VALID) {
839                         len  += NDISC_OPT_SPACE(dev->addr_len);
840                 } else {
841                         /* If nexthop is not valid, do not redirect!
842                            We will make it later, when will be sure,
843                            that it is alive.
844                          */
845                         return;
846                 }
847         }
848
849         rd_len = min_t(unsigned int,
850                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
851         rd_len &= ~0x7;
852         len += rd_len;
853
854         if (ipv6_get_lladdr(dev, &saddr_buf)) {
855                 ND_PRINTK1("redirect: no link_local addr for dev\n");
856                 return;
857         }
858
859         buff = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
860                                    0, &err);
861         if (buff == NULL) {
862                 ND_PRINTK1("ndisc_send_redirect: alloc_skb failed\n");
863                 return;
864         }
865
866         hlen = 0;
867
868         if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) {
869                 kfree_skb(buff);
870                 return;
871         }
872
873         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
874                    IPPROTO_ICMPV6, len);
875
876         icmph = (struct icmp6hdr *) skb_put(buff, len);
877
878         memset(icmph, 0, sizeof(struct icmp6hdr));
879         icmph->icmp6_type = NDISC_REDIRECT;
880
881         /*
882          *      copy target and destination addresses
883          */
884
885         addrp = (struct in6_addr *)(icmph + 1);
886         ipv6_addr_copy(addrp, target);
887         addrp++;
888         ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
889
890         opt = (u8*) (addrp + 1);
891
892         /*
893          *      include target_address option
894          */
895
896         if (dev->addr_len)
897                 opt = ndisc_fill_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, dev->addr_len);
898
899         /*
900          *      build redirect option and copy skb over to the new packet.
901          */
902
903         memset(opt, 0, 8);      
904         *(opt++) = ND_OPT_REDIRECT_HDR;
905         *(opt++) = (rd_len >> 3);
906         opt += 6;
907
908         memcpy(opt, skb->nh.ipv6h, rd_len - 8);
909
910         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
911                                              len, IPPROTO_ICMPV6,
912                                              csum_partial((u8 *) icmph, len, 0));
913
914         dev_queue_xmit(buff);
915
916         ICMP6_INC_STATS(Icmp6OutRedirects);
917         ICMP6_INC_STATS(Icmp6OutMsgs);
918 }
919
920 static __inline__ struct neighbour *
921 ndisc_recv_ns(struct in6_addr *saddr, struct sk_buff *skb)
922 {
923         u8 *opt;
924
925         opt = skb->h.raw;
926         opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
927         opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_SOURCE_LL_ADDR);
928
929         return neigh_event_ns(&nd_tbl, opt, saddr, skb->dev);
930 }
931
932 static __inline__ int ndisc_recv_na(struct neighbour *neigh, struct sk_buff *skb)
933 {
934         struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
935         u8 *opt;
936
937         opt = skb->h.raw;
938         opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
939         opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_TARGET_LL_ADDR);
940
941         return neigh_update(neigh, opt,
942                             msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
943                             msg->icmph.icmp6_override, 1);
944 }
945
946 static void pndisc_redo(struct sk_buff *skb)
947 {
948         ndisc_rcv(skb);
949         kfree_skb(skb);
950 }
951
952 int ndisc_rcv(struct sk_buff *skb)
953 {
954         struct net_device *dev = skb->dev;
955         struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
956         struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
957         struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
958         struct neighbour *neigh;
959         struct inet6_ifaddr *ifp;
960         unsigned int payload_len;
961
962         __skb_push(skb, skb->data-skb->h.raw);
963
964         if (skb->nh.ipv6h->hop_limit != 255) {
965                 if (net_ratelimit())
966                         printk(KERN_WARNING
967                                "ICMP NDISC: fake message with non-255 Hop Limit received: %d\n",
968                                         skb->nh.ipv6h->hop_limit);
969                 return 0;
970         }
971
972         if (msg->icmph.icmp6_code != 0) {
973                 if (net_ratelimit())
974                         printk(KERN_WARNING "ICMP NDISC: code is not zero\n");
975                 return 0;
976         }
977
978         /* XXX: RFC2461 Validation of [all ndisc messages]:
979          *      All included ndisc options MUST be of non-zero length
980          *      (Some checking in ndisc_find_option)
981          */
982
983         payload_len = ntohs(skb->nh.ipv6h->payload_len);
984         switch (msg->icmph.icmp6_type) {
985         case NDISC_NEIGHBOUR_SOLICITATION:
986                 /* XXX: import nd_neighbor_solicit from glibc netinet/icmp6.h */
987                 if (payload_len < 8+16) {
988                         if (net_ratelimit())
989                                 printk(KERN_WARNING "ICMP NS: packet too short\n");
990                         return 0;
991                 }
992
993                 if (ipv6_addr_type(&msg->target)&IPV6_ADDR_MULTICAST) {
994                         if (net_ratelimit())
995                                 printk(KERN_WARNING "ICMP NS: target address is multicast\n");
996                         return 0;
997                 }
998
999                 /* XXX: RFC2461 7.1.1:
1000                  *      If the IP source address is the unspecified address, there
1001                  *      MUST NOT be source link-layer address option in the message.
1002                  *
1003                  *      NOTE! Linux kernel < 2.4.4 broke this rule.
1004                  */
1005                         
1006                 /* XXX: RFC2461 7.1.1:
1007                  *      If the IP source address is the unspecified address, the IP
1008                  *      destination address MUST be a solicited-node multicast address.
1009                  */
1010
1011                 if ((ifp = ipv6_get_ifaddr(&msg->target, dev)) != NULL) {
1012                         int addr_type = ipv6_addr_type(saddr);
1013
1014                         if (ifp->flags & IFA_F_TENTATIVE) {
1015                                 /* Address is tentative. If the source
1016                                    is unspecified address, it is someone
1017                                    does DAD, otherwise we ignore solicitations
1018                                    until DAD timer expires.
1019                                  */
1020                                 if (addr_type == IPV6_ADDR_ANY) {
1021                                         if (dev->type == ARPHRD_IEEE802_TR) { 
1022                                                 unsigned char *sadr = skb->mac.raw ;
1023                                                 if (((sadr[8] &0x7f) != (dev->dev_addr[0] & 0x7f)) ||
1024                                                 (sadr[9] != dev->dev_addr[1]) ||
1025                                                 (sadr[10] != dev->dev_addr[2]) ||
1026                                                 (sadr[11] != dev->dev_addr[3]) ||
1027                                                 (sadr[12] != dev->dev_addr[4]) ||
1028                                                 (sadr[13] != dev->dev_addr[5])) 
1029                                                 {
1030                                                         addrconf_dad_failure(ifp) ; 
1031                                                 }
1032                                         } else {
1033                                                 addrconf_dad_failure(ifp);
1034                                         }
1035                                 } else
1036                                         in6_ifa_put(ifp);
1037                                 return 0;
1038                         }
1039
1040                         if (addr_type == IPV6_ADDR_ANY) {
1041                                 struct in6_addr maddr;
1042
1043                                 ipv6_addr_all_nodes(&maddr);
1044                                 ndisc_send_na(dev, NULL, &maddr, &ifp->addr, 
1045                                               ifp->idev->cnf.forwarding, 0, 
1046                                               ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1, 
1047                                               1);
1048                                 in6_ifa_put(ifp);
1049                                 return 0;
1050                         }
1051
1052                         if (addr_type & IPV6_ADDR_UNICAST) {
1053                                 int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
1054
1055                                 if (inc)
1056                                         nd_tbl.stats.rcv_probes_mcast++;
1057                                 else
1058                                         nd_tbl.stats.rcv_probes_ucast++;
1059
1060                                 /* 
1061                                  *      update / create cache entry
1062                                  *      for the source adddress
1063                                  */
1064
1065                                 neigh = ndisc_recv_ns(saddr, skb);
1066
1067                                 if (neigh) {
1068                                         ndisc_send_na(dev, neigh, saddr, &ifp->addr, 
1069                                                       ifp->idev->cnf.forwarding, 1, 
1070                                                       ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1, 
1071                                                       1);
1072                                         neigh_release(neigh);
1073                                 }
1074                         }
1075                         in6_ifa_put(ifp);
1076                 } else {
1077                         struct inet6_dev *in6_dev = in6_dev_get(dev);
1078                         int addr_type = ipv6_addr_type(saddr);
1079
1080                         if (in6_dev && in6_dev->cnf.forwarding &&
1081                             (addr_type & IPV6_ADDR_UNICAST) &&
1082                             pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
1083                                 int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
1084
1085                                 if (skb->stamp.tv_sec == 0 ||
1086                                     skb->pkt_type == PACKET_HOST ||
1087                                     inc == 0 ||
1088                                     in6_dev->nd_parms->proxy_delay == 0) {
1089                                         if (inc)
1090                                                 nd_tbl.stats.rcv_probes_mcast++;
1091                                         else
1092                                                 nd_tbl.stats.rcv_probes_ucast++;
1093
1094                                         neigh = ndisc_recv_ns(saddr, skb);
1095
1096                                         if (neigh) {
1097                                                 ndisc_send_na(dev, neigh, saddr, &msg->target,
1098                                                               0, 1, 0, 1);
1099                                                 neigh_release(neigh);
1100                                         }
1101                                 } else {
1102                                         struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
1103                                         if (n)
1104                                                 pneigh_enqueue(&nd_tbl, in6_dev->nd_parms, n);
1105                                         in6_dev_put(in6_dev);
1106                                         return 0;
1107                                 }
1108                         }
1109                         if (in6_dev)
1110                                 in6_dev_put(in6_dev);
1111                         
1112                 }
1113                 return 0;
1114
1115         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1116                 /* XXX: import nd_neighbor_advert from glibc netinet/icmp6.h */
1117                 if (payload_len < 16+8 ) {
1118                         if (net_ratelimit())
1119                                 printk(KERN_WARNING "ICMP NA: packet too short\n");
1120                         return 0;
1121                 }
1122
1123                 if (ipv6_addr_type(&msg->target)&IPV6_ADDR_MULTICAST) {
1124                         if (net_ratelimit())
1125                                 printk(KERN_WARNING "NDISC NA: target address is multicast\n");
1126                         return 0;
1127                 }
1128
1129                 if ((ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST) &&
1130                     msg->icmph.icmp6_solicited) {
1131                         ND_PRINTK0("NDISC: solicited NA is multicasted\n");
1132                         return 0;
1133                 }
1134                 
1135                 if ((ifp = ipv6_get_ifaddr(&msg->target, dev))) {
1136                         if (ifp->flags & IFA_F_TENTATIVE) {
1137                                 addrconf_dad_failure(ifp);
1138                                 return 0;
1139                         }
1140                         /* What should we make now? The advertisement
1141                            is invalid, but ndisc specs say nothing
1142                            about it. It could be misconfiguration, or
1143                            an smart proxy agent tries to help us :-)
1144                          */
1145                         ND_PRINTK0("%s: someone advertises our address!\n",
1146                                    ifp->idev->dev->name);
1147                         in6_ifa_put(ifp);
1148                         return 0;
1149                 }
1150                 neigh = neigh_lookup(&nd_tbl, &msg->target, skb->dev);
1151
1152                 if (neigh) {
1153                         if (neigh->flags & NTF_ROUTER) {
1154                                 if (msg->icmph.icmp6_router == 0) {
1155                                         /*
1156                                          *      Change: router to host
1157                                          */
1158                                         struct rt6_info *rt;
1159                                         rt = rt6_get_dflt_router(saddr, skb->dev);
1160                                         if (rt) {
1161                                                 /* It is safe only because
1162                                                    we aer in BH */
1163                                                 dst_release(&rt->u.dst);
1164                                                 ip6_del_rt(rt);
1165                                         }
1166                                 }
1167                         } else {
1168                                 if (msg->icmph.icmp6_router)
1169                                         neigh->flags |= NTF_ROUTER;
1170                         }
1171
1172                         ndisc_recv_na(neigh, skb);
1173                         neigh_release(neigh);
1174                 }
1175                 break;
1176
1177         case NDISC_ROUTER_ADVERTISEMENT:
1178                 /* XXX: import nd_router_advert from glibc netinet/icmp6.h */
1179                 if (payload_len < 8+4+4) {
1180                         if (net_ratelimit())
1181                                 printk(KERN_WARNING "ICMP RA: packet too short\n");
1182                         return 0;
1183                 }
1184                 ndisc_router_discovery(skb);
1185                 break;
1186
1187         case NDISC_REDIRECT:
1188                 /* XXX: import nd_redirect from glibc netinet/icmp6.h */
1189                 if (payload_len < 8+16+16) {
1190                         if (net_ratelimit())
1191                                 printk(KERN_WARNING "ICMP redirect: packet too short\n");
1192                         return 0;
1193                 }
1194                 ndisc_redirect_rcv(skb);
1195                 break;
1196
1197         case NDISC_ROUTER_SOLICITATION:
1198                 /* No RS support in the kernel, but we do some required checks */
1199
1200                 /* XXX: import nd_router_solicit from glibc netinet/icmp6.h */
1201                 if (payload_len < 8) {
1202                         if (net_ratelimit())
1203                                 printk(KERN_WARNING "ICMP RS: packet too short\n");
1204                         return 0;
1205                 }
1206                 break;
1207         };
1208
1209         return 0;
1210 }
1211
1212 int __init ndisc_init(struct net_proto_family *ops)
1213 {
1214         struct sock *sk;
1215         int err;
1216
1217         err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket);
1218         if (err < 0) {
1219                 printk(KERN_ERR
1220                        "Failed to initialize the NDISC control socket (err %d).\n",
1221                        err);
1222                 ndisc_socket = NULL; /* For safety. */
1223                 return err;
1224         }
1225
1226         sk = ndisc_socket->sk;
1227         sk->allocation = GFP_ATOMIC;
1228         sk->net_pinfo.af_inet6.hop_limit = 255;
1229         /* Do not loopback ndisc messages */
1230         sk->net_pinfo.af_inet6.mc_loop = 0;
1231         sk->prot->unhash(sk);
1232
1233         /*
1234          * Initialize the neighbour table
1235          */
1236         
1237         neigh_table_init(&nd_tbl);
1238
1239 #ifdef CONFIG_SYSCTL
1240         neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6");
1241 #endif
1242
1243         return 0;
1244 }
1245
1246 void ndisc_cleanup(void)
1247 {
1248         neigh_table_clear(&nd_tbl);
1249         sock_release(ndisc_socket);
1250         ndisc_socket = NULL; /* For safety. */
1251 }