some kmalloc/memset ->kzalloc (tree wide)
[linux-2.6] / net / ipv4 / ip_options.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              The options processing module for ip.c
7  *
8  * Version:     $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
9  *
10  * Authors:     A.N.Kuznetsov
11  *
12  */
13
14 #include <linux/capability.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/ip.h>
20 #include <linux/icmp.h>
21 #include <linux/netdevice.h>
22 #include <linux/rtnetlink.h>
23 #include <net/sock.h>
24 #include <net/ip.h>
25 #include <net/icmp.h>
26 #include <net/route.h>
27 #include <net/cipso_ipv4.h>
28
29 /*
30  * Write options to IP header, record destination address to
31  * source route option, address of outgoing interface
32  * (we should already know it, so that this  function is allowed be
33  * called only after routing decision) and timestamp,
34  * if we originate this datagram.
35  *
36  * daddr is real destination address, next hop is recorded in IP header.
37  * saddr is address of outgoing interface.
38  */
39
40 void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
41                             __be32 daddr, struct rtable *rt, int is_frag)
42 {
43         unsigned char *iph = skb_network_header(skb);
44
45         memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
46         memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
47         opt = &(IPCB(skb)->opt);
48         opt->is_data = 0;
49
50         if (opt->srr)
51                 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
52
53         if (!is_frag) {
54                 if (opt->rr_needaddr)
55                         ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt);
56                 if (opt->ts_needaddr)
57                         ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
58                 if (opt->ts_needtime) {
59                         struct timeval tv;
60                         __be32 midtime;
61                         do_gettimeofday(&tv);
62                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
63                         memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
64                 }
65                 return;
66         }
67         if (opt->rr) {
68                 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
69                 opt->rr = 0;
70                 opt->rr_needaddr = 0;
71         }
72         if (opt->ts) {
73                 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
74                 opt->ts = 0;
75                 opt->ts_needaddr = opt->ts_needtime = 0;
76         }
77 }
78
79 /*
80  * Provided (sopt, skb) points to received options,
81  * build in dopt compiled option set appropriate for answering.
82  * i.e. invert SRR option, copy anothers,
83  * and grab room in RR/TS options.
84  *
85  * NOTE: dopt cannot point to skb.
86  */
87
88 int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
89 {
90         struct ip_options *sopt;
91         unsigned char *sptr, *dptr;
92         int soffset, doffset;
93         int     optlen;
94         __be32  daddr;
95
96         memset(dopt, 0, sizeof(struct ip_options));
97
98         dopt->is_data = 1;
99
100         sopt = &(IPCB(skb)->opt);
101
102         if (sopt->optlen == 0) {
103                 dopt->optlen = 0;
104                 return 0;
105         }
106
107         sptr = skb_network_header(skb);
108         dptr = dopt->__data;
109
110         if (skb->dst)
111                 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
112         else
113                 daddr = ip_hdr(skb)->daddr;
114
115         if (sopt->rr) {
116                 optlen  = sptr[sopt->rr+1];
117                 soffset = sptr[sopt->rr+2];
118                 dopt->rr = dopt->optlen + sizeof(struct iphdr);
119                 memcpy(dptr, sptr+sopt->rr, optlen);
120                 if (sopt->rr_needaddr && soffset <= optlen) {
121                         if (soffset + 3 > optlen)
122                                 return -EINVAL;
123                         dptr[2] = soffset + 4;
124                         dopt->rr_needaddr = 1;
125                 }
126                 dptr += optlen;
127                 dopt->optlen += optlen;
128         }
129         if (sopt->ts) {
130                 optlen = sptr[sopt->ts+1];
131                 soffset = sptr[sopt->ts+2];
132                 dopt->ts = dopt->optlen + sizeof(struct iphdr);
133                 memcpy(dptr, sptr+sopt->ts, optlen);
134                 if (soffset <= optlen) {
135                         if (sopt->ts_needaddr) {
136                                 if (soffset + 3 > optlen)
137                                         return -EINVAL;
138                                 dopt->ts_needaddr = 1;
139                                 soffset += 4;
140                         }
141                         if (sopt->ts_needtime) {
142                                 if (soffset + 3 > optlen)
143                                         return -EINVAL;
144                                 if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) {
145                                         dopt->ts_needtime = 1;
146                                         soffset += 4;
147                                 } else {
148                                         dopt->ts_needtime = 0;
149
150                                         if (soffset + 8 <= optlen) {
151                                                 __be32 addr;
152
153                                                 memcpy(&addr, sptr+soffset-1, 4);
154                                                 if (inet_addr_type(addr) != RTN_LOCAL) {
155                                                         dopt->ts_needtime = 1;
156                                                         soffset += 8;
157                                                 }
158                                         }
159                                 }
160                         }
161                         dptr[2] = soffset;
162                 }
163                 dptr += optlen;
164                 dopt->optlen += optlen;
165         }
166         if (sopt->srr) {
167                 unsigned char * start = sptr+sopt->srr;
168                 __be32 faddr;
169
170                 optlen  = start[1];
171                 soffset = start[2];
172                 doffset = 0;
173                 if (soffset > optlen)
174                         soffset = optlen + 1;
175                 soffset -= 4;
176                 if (soffset > 3) {
177                         memcpy(&faddr, &start[soffset-1], 4);
178                         for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
179                                 memcpy(&dptr[doffset-1], &start[soffset-1], 4);
180                         /*
181                          * RFC1812 requires to fix illegal source routes.
182                          */
183                         if (memcmp(&ip_hdr(skb)->saddr,
184                                    &start[soffset + 3], 4) == 0)
185                                 doffset -= 4;
186                 }
187                 if (doffset > 3) {
188                         memcpy(&start[doffset-1], &daddr, 4);
189                         dopt->faddr = faddr;
190                         dptr[0] = start[0];
191                         dptr[1] = doffset+3;
192                         dptr[2] = 4;
193                         dptr += doffset+3;
194                         dopt->srr = dopt->optlen + sizeof(struct iphdr);
195                         dopt->optlen += doffset+3;
196                         dopt->is_strictroute = sopt->is_strictroute;
197                 }
198         }
199         if (sopt->cipso) {
200                 optlen  = sptr[sopt->cipso+1];
201                 dopt->cipso = dopt->optlen+sizeof(struct iphdr);
202                 memcpy(dptr, sptr+sopt->cipso, optlen);
203                 dptr += optlen;
204                 dopt->optlen += optlen;
205         }
206         while (dopt->optlen & 3) {
207                 *dptr++ = IPOPT_END;
208                 dopt->optlen++;
209         }
210         return 0;
211 }
212
213 /*
214  *      Options "fragmenting", just fill options not
215  *      allowed in fragments with NOOPs.
216  *      Simple and stupid 8), but the most efficient way.
217  */
218
219 void ip_options_fragment(struct sk_buff * skb)
220 {
221         unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
222         struct ip_options * opt = &(IPCB(skb)->opt);
223         int  l = opt->optlen;
224         int  optlen;
225
226         while (l > 0) {
227                 switch (*optptr) {
228                 case IPOPT_END:
229                         return;
230                 case IPOPT_NOOP:
231                         l--;
232                         optptr++;
233                         continue;
234                 }
235                 optlen = optptr[1];
236                 if (optlen<2 || optlen>l)
237                   return;
238                 if (!IPOPT_COPIED(*optptr))
239                         memset(optptr, IPOPT_NOOP, optlen);
240                 l -= optlen;
241                 optptr += optlen;
242         }
243         opt->ts = 0;
244         opt->rr = 0;
245         opt->rr_needaddr = 0;
246         opt->ts_needaddr = 0;
247         opt->ts_needtime = 0;
248         return;
249 }
250
251 /*
252  * Verify options and fill pointers in struct options.
253  * Caller should clear *opt, and set opt->data.
254  * If opt == NULL, then skb->data should point to IP header.
255  */
256
257 int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
258 {
259         int l;
260         unsigned char * iph;
261         unsigned char * optptr;
262         int optlen;
263         unsigned char * pp_ptr = NULL;
264         struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
265
266         if (!opt) {
267                 opt = &(IPCB(skb)->opt);
268                 iph = skb_network_header(skb);
269                 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
270                 optptr = iph + sizeof(struct iphdr);
271                 opt->is_data = 0;
272         } else {
273                 optptr = opt->is_data ? opt->__data :
274                                         (unsigned char *)&(ip_hdr(skb)[1]);
275                 iph = optptr - sizeof(struct iphdr);
276         }
277
278         for (l = opt->optlen; l > 0; ) {
279                 switch (*optptr) {
280                       case IPOPT_END:
281                         for (optptr++, l--; l>0; optptr++, l--) {
282                                 if (*optptr != IPOPT_END) {
283                                         *optptr = IPOPT_END;
284                                         opt->is_changed = 1;
285                                 }
286                         }
287                         goto eol;
288                       case IPOPT_NOOP:
289                         l--;
290                         optptr++;
291                         continue;
292                 }
293                 optlen = optptr[1];
294                 if (optlen<2 || optlen>l) {
295                         pp_ptr = optptr;
296                         goto error;
297                 }
298                 switch (*optptr) {
299                       case IPOPT_SSRR:
300                       case IPOPT_LSRR:
301                         if (optlen < 3) {
302                                 pp_ptr = optptr + 1;
303                                 goto error;
304                         }
305                         if (optptr[2] < 4) {
306                                 pp_ptr = optptr + 2;
307                                 goto error;
308                         }
309                         /* NB: cf RFC-1812 5.2.4.1 */
310                         if (opt->srr) {
311                                 pp_ptr = optptr;
312                                 goto error;
313                         }
314                         if (!skb) {
315                                 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
316                                         pp_ptr = optptr + 1;
317                                         goto error;
318                                 }
319                                 memcpy(&opt->faddr, &optptr[3], 4);
320                                 if (optlen > 7)
321                                         memmove(&optptr[3], &optptr[7], optlen-7);
322                         }
323                         opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
324                         opt->srr = optptr - iph;
325                         break;
326                       case IPOPT_RR:
327                         if (opt->rr) {
328                                 pp_ptr = optptr;
329                                 goto error;
330                         }
331                         if (optlen < 3) {
332                                 pp_ptr = optptr + 1;
333                                 goto error;
334                         }
335                         if (optptr[2] < 4) {
336                                 pp_ptr = optptr + 2;
337                                 goto error;
338                         }
339                         if (optptr[2] <= optlen) {
340                                 if (optptr[2]+3 > optlen) {
341                                         pp_ptr = optptr + 2;
342                                         goto error;
343                                 }
344                                 if (skb) {
345                                         memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
346                                         opt->is_changed = 1;
347                                 }
348                                 optptr[2] += 4;
349                                 opt->rr_needaddr = 1;
350                         }
351                         opt->rr = optptr - iph;
352                         break;
353                       case IPOPT_TIMESTAMP:
354                         if (opt->ts) {
355                                 pp_ptr = optptr;
356                                 goto error;
357                         }
358                         if (optlen < 4) {
359                                 pp_ptr = optptr + 1;
360                                 goto error;
361                         }
362                         if (optptr[2] < 5) {
363                                 pp_ptr = optptr + 2;
364                                 goto error;
365                         }
366                         if (optptr[2] <= optlen) {
367                                 __be32 *timeptr = NULL;
368                                 if (optptr[2]+3 > optptr[1]) {
369                                         pp_ptr = optptr + 2;
370                                         goto error;
371                                 }
372                                 switch (optptr[3]&0xF) {
373                                       case IPOPT_TS_TSONLY:
374                                         opt->ts = optptr - iph;
375                                         if (skb)
376                                                 timeptr = (__be32*)&optptr[optptr[2]-1];
377                                         opt->ts_needtime = 1;
378                                         optptr[2] += 4;
379                                         break;
380                                       case IPOPT_TS_TSANDADDR:
381                                         if (optptr[2]+7 > optptr[1]) {
382                                                 pp_ptr = optptr + 2;
383                                                 goto error;
384                                         }
385                                         opt->ts = optptr - iph;
386                                         if (skb) {
387                                                 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
388                                                 timeptr = (__be32*)&optptr[optptr[2]+3];
389                                         }
390                                         opt->ts_needaddr = 1;
391                                         opt->ts_needtime = 1;
392                                         optptr[2] += 8;
393                                         break;
394                                       case IPOPT_TS_PRESPEC:
395                                         if (optptr[2]+7 > optptr[1]) {
396                                                 pp_ptr = optptr + 2;
397                                                 goto error;
398                                         }
399                                         opt->ts = optptr - iph;
400                                         {
401                                                 __be32 addr;
402                                                 memcpy(&addr, &optptr[optptr[2]-1], 4);
403                                                 if (inet_addr_type(addr) == RTN_UNICAST)
404                                                         break;
405                                                 if (skb)
406                                                         timeptr = (__be32*)&optptr[optptr[2]+3];
407                                         }
408                                         opt->ts_needtime = 1;
409                                         optptr[2] += 8;
410                                         break;
411                                       default:
412                                         if (!skb && !capable(CAP_NET_RAW)) {
413                                                 pp_ptr = optptr + 3;
414                                                 goto error;
415                                         }
416                                         break;
417                                 }
418                                 if (timeptr) {
419                                         struct timeval tv;
420                                         __be32  midtime;
421                                         do_gettimeofday(&tv);
422                                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
423                                         memcpy(timeptr, &midtime, sizeof(__be32));
424                                         opt->is_changed = 1;
425                                 }
426                         } else {
427                                 unsigned overflow = optptr[3]>>4;
428                                 if (overflow == 15) {
429                                         pp_ptr = optptr + 3;
430                                         goto error;
431                                 }
432                                 opt->ts = optptr - iph;
433                                 if (skb) {
434                                         optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
435                                         opt->is_changed = 1;
436                                 }
437                         }
438                         break;
439                       case IPOPT_RA:
440                         if (optlen < 4) {
441                                 pp_ptr = optptr + 1;
442                                 goto error;
443                         }
444                         if (optptr[2] == 0 && optptr[3] == 0)
445                                 opt->router_alert = optptr - iph;
446                         break;
447                       case IPOPT_CIPSO:
448                         if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
449                                 pp_ptr = optptr;
450                                 goto error;
451                         }
452                         opt->cipso = optptr - iph;
453                         if (cipso_v4_validate(&optptr)) {
454                                 pp_ptr = optptr;
455                                 goto error;
456                         }
457                         break;
458                       case IPOPT_SEC:
459                       case IPOPT_SID:
460                       default:
461                         if (!skb && !capable(CAP_NET_RAW)) {
462                                 pp_ptr = optptr;
463                                 goto error;
464                         }
465                         break;
466                 }
467                 l -= optlen;
468                 optptr += optlen;
469         }
470
471 eol:
472         if (!pp_ptr)
473                 return 0;
474
475 error:
476         if (skb) {
477                 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
478         }
479         return -EINVAL;
480 }
481
482
483 /*
484  *      Undo all the changes done by ip_options_compile().
485  */
486
487 void ip_options_undo(struct ip_options * opt)
488 {
489         if (opt->srr) {
490                 unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
491                 memmove(optptr+7, optptr+3, optptr[1]-7);
492                 memcpy(optptr+3, &opt->faddr, 4);
493         }
494         if (opt->rr_needaddr) {
495                 unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
496                 optptr[2] -= 4;
497                 memset(&optptr[optptr[2]-1], 0, 4);
498         }
499         if (opt->ts) {
500                 unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
501                 if (opt->ts_needtime) {
502                         optptr[2] -= 4;
503                         memset(&optptr[optptr[2]-1], 0, 4);
504                         if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC)
505                                 optptr[2] -= 4;
506                 }
507                 if (opt->ts_needaddr) {
508                         optptr[2] -= 4;
509                         memset(&optptr[optptr[2]-1], 0, 4);
510                 }
511         }
512 }
513
514 static struct ip_options *ip_options_get_alloc(const int optlen)
515 {
516         struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3),
517                                          GFP_KERNEL);
518         if (opt)
519                 memset(opt, 0, sizeof(*opt));
520         return opt;
521 }
522
523 static int ip_options_get_finish(struct ip_options **optp,
524                                  struct ip_options *opt, int optlen)
525 {
526         while (optlen & 3)
527                 opt->__data[optlen++] = IPOPT_END;
528         opt->optlen = optlen;
529         opt->is_data = 1;
530         if (optlen && ip_options_compile(opt, NULL)) {
531                 kfree(opt);
532                 return -EINVAL;
533         }
534         kfree(*optp);
535         *optp = opt;
536         return 0;
537 }
538
539 int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen)
540 {
541         struct ip_options *opt = ip_options_get_alloc(optlen);
542
543         if (!opt)
544                 return -ENOMEM;
545         if (optlen && copy_from_user(opt->__data, data, optlen)) {
546                 kfree(opt);
547                 return -EFAULT;
548         }
549         return ip_options_get_finish(optp, opt, optlen);
550 }
551
552 int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
553 {
554         struct ip_options *opt = ip_options_get_alloc(optlen);
555
556         if (!opt)
557                 return -ENOMEM;
558         if (optlen)
559                 memcpy(opt->__data, data, optlen);
560         return ip_options_get_finish(optp, opt, optlen);
561 }
562
563 void ip_forward_options(struct sk_buff *skb)
564 {
565         struct   ip_options * opt       = &(IPCB(skb)->opt);
566         unsigned char * optptr;
567         struct rtable *rt = (struct rtable*)skb->dst;
568         unsigned char *raw = skb_network_header(skb);
569
570         if (opt->rr_needaddr) {
571                 optptr = (unsigned char *)raw + opt->rr;
572                 ip_rt_get_source(&optptr[optptr[2]-5], rt);
573                 opt->is_changed = 1;
574         }
575         if (opt->srr_is_hit) {
576                 int srrptr, srrspace;
577
578                 optptr = raw + opt->srr;
579
580                 for ( srrptr=optptr[2], srrspace = optptr[1];
581                      srrptr <= srrspace;
582                      srrptr += 4
583                      ) {
584                         if (srrptr + 3 > srrspace)
585                                 break;
586                         if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0)
587                                 break;
588                 }
589                 if (srrptr + 3 <= srrspace) {
590                         opt->is_changed = 1;
591                         ip_rt_get_source(&optptr[srrptr-1], rt);
592                         ip_hdr(skb)->daddr = rt->rt_dst;
593                         optptr[2] = srrptr+4;
594                 } else if (net_ratelimit())
595                         printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
596                 if (opt->ts_needaddr) {
597                         optptr = raw + opt->ts;
598                         ip_rt_get_source(&optptr[optptr[2]-9], rt);
599                         opt->is_changed = 1;
600                 }
601         }
602         if (opt->is_changed) {
603                 opt->is_changed = 0;
604                 ip_send_check(ip_hdr(skb));
605         }
606 }
607
608 int ip_options_rcv_srr(struct sk_buff *skb)
609 {
610         struct ip_options *opt = &(IPCB(skb)->opt);
611         int srrspace, srrptr;
612         __be32 nexthop;
613         struct iphdr *iph = ip_hdr(skb);
614         unsigned char *optptr = skb_network_header(skb) + opt->srr;
615         struct rtable *rt = (struct rtable*)skb->dst;
616         struct rtable *rt2;
617         int err;
618
619         if (!opt->srr)
620                 return 0;
621
622         if (skb->pkt_type != PACKET_HOST)
623                 return -EINVAL;
624         if (rt->rt_type == RTN_UNICAST) {
625                 if (!opt->is_strictroute)
626                         return 0;
627                 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24));
628                 return -EINVAL;
629         }
630         if (rt->rt_type != RTN_LOCAL)
631                 return -EINVAL;
632
633         for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
634                 if (srrptr + 3 > srrspace) {
635                         icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
636                         return -EINVAL;
637                 }
638                 memcpy(&nexthop, &optptr[srrptr-1], 4);
639
640                 rt = (struct rtable*)skb->dst;
641                 skb->dst = NULL;
642                 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
643                 rt2 = (struct rtable*)skb->dst;
644                 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
645                         ip_rt_put(rt2);
646                         skb->dst = &rt->u.dst;
647                         return -EINVAL;
648                 }
649                 ip_rt_put(rt);
650                 if (rt2->rt_type != RTN_LOCAL)
651                         break;
652                 /* Superfast 8) loopback forward */
653                 memcpy(&iph->daddr, &optptr[srrptr-1], 4);
654                 opt->is_changed = 1;
655         }
656         if (srrptr <= srrspace) {
657                 opt->srr_is_hit = 1;
658                 opt->is_changed = 1;
659         }
660         return 0;
661 }