[IPV4]: inet_diag annotations
[linux-2.6] / net / ipv4 / ip_options.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              The options processing module for ip.c
7  *
8  * Version:     $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
9  *
10  * Authors:     A.N.Kuznetsov
11  *              
12  */
13
14 #include <linux/capability.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/ip.h>
20 #include <linux/icmp.h>
21 #include <linux/netdevice.h>
22 #include <linux/rtnetlink.h>
23 #include <net/sock.h>
24 #include <net/ip.h>
25 #include <net/icmp.h>
26 #include <net/route.h>
27 #include <net/cipso_ipv4.h>
28
29 /* 
30  * Write options to IP header, record destination address to
31  * source route option, address of outgoing interface
32  * (we should already know it, so that this  function is allowed be
33  * called only after routing decision) and timestamp,
34  * if we originate this datagram.
35  *
36  * daddr is real destination address, next hop is recorded in IP header.
37  * saddr is address of outgoing interface.
38  */
39
40 void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
41                             __be32 daddr, struct rtable *rt, int is_frag)
42 {
43         unsigned char * iph = skb->nh.raw;
44
45         memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
46         memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
47         opt = &(IPCB(skb)->opt);
48         opt->is_data = 0;
49
50         if (opt->srr)
51                 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
52
53         if (!is_frag) {
54                 if (opt->rr_needaddr)
55                         ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt);
56                 if (opt->ts_needaddr)
57                         ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
58                 if (opt->ts_needtime) {
59                         struct timeval tv;
60                         __be32 midtime;
61                         do_gettimeofday(&tv);
62                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
63                         memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
64                 }
65                 return;
66         }
67         if (opt->rr) {
68                 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
69                 opt->rr = 0;
70                 opt->rr_needaddr = 0;
71         }
72         if (opt->ts) {
73                 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
74                 opt->ts = 0;
75                 opt->ts_needaddr = opt->ts_needtime = 0;
76         }
77 }
78
79 /* 
80  * Provided (sopt, skb) points to received options,
81  * build in dopt compiled option set appropriate for answering.
82  * i.e. invert SRR option, copy anothers,
83  * and grab room in RR/TS options.
84  *
85  * NOTE: dopt cannot point to skb.
86  */
87
88 int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 
89 {
90         struct ip_options *sopt;
91         unsigned char *sptr, *dptr;
92         int soffset, doffset;
93         int     optlen;
94         __be32  daddr;
95
96         memset(dopt, 0, sizeof(struct ip_options));
97
98         dopt->is_data = 1;
99
100         sopt = &(IPCB(skb)->opt);
101
102         if (sopt->optlen == 0) {
103                 dopt->optlen = 0;
104                 return 0;
105         }
106
107         sptr = skb->nh.raw;
108         dptr = dopt->__data;
109
110         if (skb->dst)
111                 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
112         else
113                 daddr = skb->nh.iph->daddr;
114
115         if (sopt->rr) {
116                 optlen  = sptr[sopt->rr+1];
117                 soffset = sptr[sopt->rr+2];
118                 dopt->rr = dopt->optlen + sizeof(struct iphdr);
119                 memcpy(dptr, sptr+sopt->rr, optlen);
120                 if (sopt->rr_needaddr && soffset <= optlen) {
121                         if (soffset + 3 > optlen)
122                                 return -EINVAL;
123                         dptr[2] = soffset + 4;
124                         dopt->rr_needaddr = 1;
125                 }
126                 dptr += optlen;
127                 dopt->optlen += optlen;
128         }
129         if (sopt->ts) {
130                 optlen = sptr[sopt->ts+1];
131                 soffset = sptr[sopt->ts+2];
132                 dopt->ts = dopt->optlen + sizeof(struct iphdr);
133                 memcpy(dptr, sptr+sopt->ts, optlen);
134                 if (soffset <= optlen) {
135                         if (sopt->ts_needaddr) {
136                                 if (soffset + 3 > optlen)
137                                         return -EINVAL;
138                                 dopt->ts_needaddr = 1;
139                                 soffset += 4;
140                         }
141                         if (sopt->ts_needtime) {
142                                 if (soffset + 3 > optlen)
143                                         return -EINVAL;
144                                 if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) {
145                                         dopt->ts_needtime = 1;
146                                         soffset += 4;
147                                 } else {
148                                         dopt->ts_needtime = 0;
149
150                                         if (soffset + 8 <= optlen) {
151                                                 __be32 addr;
152
153                                                 memcpy(&addr, sptr+soffset-1, 4);
154                                                 if (inet_addr_type(addr) != RTN_LOCAL) {
155                                                         dopt->ts_needtime = 1;
156                                                         soffset += 8;
157                                                 }
158                                         }
159                                 }
160                         }
161                         dptr[2] = soffset;
162                 }
163                 dptr += optlen;
164                 dopt->optlen += optlen;
165         }
166         if (sopt->srr) {
167                 unsigned char * start = sptr+sopt->srr;
168                 __be32 faddr;
169
170                 optlen  = start[1];
171                 soffset = start[2];
172                 doffset = 0;
173                 if (soffset > optlen)
174                         soffset = optlen + 1;
175                 soffset -= 4;
176                 if (soffset > 3) {
177                         memcpy(&faddr, &start[soffset-1], 4);
178                         for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
179                                 memcpy(&dptr[doffset-1], &start[soffset-1], 4);
180                         /*
181                          * RFC1812 requires to fix illegal source routes.
182                          */
183                         if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
184                                 doffset -= 4;
185                 }
186                 if (doffset > 3) {
187                         memcpy(&start[doffset-1], &daddr, 4);
188                         dopt->faddr = faddr;
189                         dptr[0] = start[0];
190                         dptr[1] = doffset+3;
191                         dptr[2] = 4;
192                         dptr += doffset+3;
193                         dopt->srr = dopt->optlen + sizeof(struct iphdr);
194                         dopt->optlen += doffset+3;
195                         dopt->is_strictroute = sopt->is_strictroute;
196                 }
197         }
198         if (sopt->cipso) {
199                 optlen  = sptr[sopt->cipso+1];
200                 dopt->cipso = dopt->optlen+sizeof(struct iphdr);
201                 memcpy(dptr, sptr+sopt->cipso, optlen);
202                 dptr += optlen;
203                 dopt->optlen += optlen;
204         }
205         while (dopt->optlen & 3) {
206                 *dptr++ = IPOPT_END;
207                 dopt->optlen++;
208         }
209         return 0;
210 }
211
212 /*
213  *      Options "fragmenting", just fill options not
214  *      allowed in fragments with NOOPs.
215  *      Simple and stupid 8), but the most efficient way.
216  */
217
218 void ip_options_fragment(struct sk_buff * skb) 
219 {
220         unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
221         struct ip_options * opt = &(IPCB(skb)->opt);
222         int  l = opt->optlen;
223         int  optlen;
224
225         while (l > 0) {
226                 switch (*optptr) {
227                 case IPOPT_END:
228                         return;
229                 case IPOPT_NOOP:
230                         l--;
231                         optptr++;
232                         continue;
233                 }
234                 optlen = optptr[1];
235                 if (optlen<2 || optlen>l)
236                   return;
237                 if (!IPOPT_COPIED(*optptr))
238                         memset(optptr, IPOPT_NOOP, optlen);
239                 l -= optlen;
240                 optptr += optlen;
241         }
242         opt->ts = 0;
243         opt->rr = 0;
244         opt->rr_needaddr = 0;
245         opt->ts_needaddr = 0;
246         opt->ts_needtime = 0;
247         return;
248 }
249
250 /*
251  * Verify options and fill pointers in struct options.
252  * Caller should clear *opt, and set opt->data.
253  * If opt == NULL, then skb->data should point to IP header.
254  */
255
256 int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
257 {
258         int l;
259         unsigned char * iph;
260         unsigned char * optptr;
261         int optlen;
262         unsigned char * pp_ptr = NULL;
263         struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
264
265         if (!opt) {
266                 opt = &(IPCB(skb)->opt);
267                 iph = skb->nh.raw;
268                 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
269                 optptr = iph + sizeof(struct iphdr);
270                 opt->is_data = 0;
271         } else {
272                 optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
273                 iph = optptr - sizeof(struct iphdr);
274         }
275
276         for (l = opt->optlen; l > 0; ) {
277                 switch (*optptr) {
278                       case IPOPT_END:
279                         for (optptr++, l--; l>0; optptr++, l--) {
280                                 if (*optptr != IPOPT_END) {
281                                         *optptr = IPOPT_END;
282                                         opt->is_changed = 1;
283                                 }
284                         }
285                         goto eol;
286                       case IPOPT_NOOP:
287                         l--;
288                         optptr++;
289                         continue;
290                 }
291                 optlen = optptr[1];
292                 if (optlen<2 || optlen>l) {
293                         pp_ptr = optptr;
294                         goto error;
295                 }
296                 switch (*optptr) {
297                       case IPOPT_SSRR:
298                       case IPOPT_LSRR:
299                         if (optlen < 3) {
300                                 pp_ptr = optptr + 1;
301                                 goto error;
302                         }
303                         if (optptr[2] < 4) {
304                                 pp_ptr = optptr + 2;
305                                 goto error;
306                         }
307                         /* NB: cf RFC-1812 5.2.4.1 */
308                         if (opt->srr) {
309                                 pp_ptr = optptr;
310                                 goto error;
311                         }
312                         if (!skb) {
313                                 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
314                                         pp_ptr = optptr + 1;
315                                         goto error;
316                                 }
317                                 memcpy(&opt->faddr, &optptr[3], 4);
318                                 if (optlen > 7)
319                                         memmove(&optptr[3], &optptr[7], optlen-7);
320                         }
321                         opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
322                         opt->srr = optptr - iph;
323                         break;
324                       case IPOPT_RR:
325                         if (opt->rr) {
326                                 pp_ptr = optptr;
327                                 goto error;
328                         }
329                         if (optlen < 3) {
330                                 pp_ptr = optptr + 1;
331                                 goto error;
332                         }
333                         if (optptr[2] < 4) {
334                                 pp_ptr = optptr + 2;
335                                 goto error;
336                         }
337                         if (optptr[2] <= optlen) {
338                                 if (optptr[2]+3 > optlen) {
339                                         pp_ptr = optptr + 2;
340                                         goto error;
341                                 }
342                                 if (skb) {
343                                         memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
344                                         opt->is_changed = 1;
345                                 }
346                                 optptr[2] += 4;
347                                 opt->rr_needaddr = 1;
348                         }
349                         opt->rr = optptr - iph;
350                         break;
351                       case IPOPT_TIMESTAMP:
352                         if (opt->ts) {
353                                 pp_ptr = optptr;
354                                 goto error;
355                         }
356                         if (optlen < 4) {
357                                 pp_ptr = optptr + 1;
358                                 goto error;
359                         }
360                         if (optptr[2] < 5) {
361                                 pp_ptr = optptr + 2;
362                                 goto error;
363                         }
364                         if (optptr[2] <= optlen) {
365                                 __be32 *timeptr = NULL;
366                                 if (optptr[2]+3 > optptr[1]) {
367                                         pp_ptr = optptr + 2;
368                                         goto error;
369                                 }
370                                 switch (optptr[3]&0xF) {
371                                       case IPOPT_TS_TSONLY:
372                                         opt->ts = optptr - iph;
373                                         if (skb) 
374                                                 timeptr = (__be32*)&optptr[optptr[2]-1];
375                                         opt->ts_needtime = 1;
376                                         optptr[2] += 4;
377                                         break;
378                                       case IPOPT_TS_TSANDADDR:
379                                         if (optptr[2]+7 > optptr[1]) {
380                                                 pp_ptr = optptr + 2;
381                                                 goto error;
382                                         }
383                                         opt->ts = optptr - iph;
384                                         if (skb) {
385                                                 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
386                                                 timeptr = (__be32*)&optptr[optptr[2]+3];
387                                         }
388                                         opt->ts_needaddr = 1;
389                                         opt->ts_needtime = 1;
390                                         optptr[2] += 8;
391                                         break;
392                                       case IPOPT_TS_PRESPEC:
393                                         if (optptr[2]+7 > optptr[1]) {
394                                                 pp_ptr = optptr + 2;
395                                                 goto error;
396                                         }
397                                         opt->ts = optptr - iph;
398                                         {
399                                                 __be32 addr;
400                                                 memcpy(&addr, &optptr[optptr[2]-1], 4);
401                                                 if (inet_addr_type(addr) == RTN_UNICAST)
402                                                         break;
403                                                 if (skb)
404                                                         timeptr = (__be32*)&optptr[optptr[2]+3];
405                                         }
406                                         opt->ts_needtime = 1;
407                                         optptr[2] += 8;
408                                         break;
409                                       default:
410                                         if (!skb && !capable(CAP_NET_RAW)) {
411                                                 pp_ptr = optptr + 3;
412                                                 goto error;
413                                         }
414                                         break;
415                                 }
416                                 if (timeptr) {
417                                         struct timeval tv;
418                                         __be32  midtime;
419                                         do_gettimeofday(&tv);
420                                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
421                                         memcpy(timeptr, &midtime, sizeof(__be32));
422                                         opt->is_changed = 1;
423                                 }
424                         } else {
425                                 unsigned overflow = optptr[3]>>4;
426                                 if (overflow == 15) {
427                                         pp_ptr = optptr + 3;
428                                         goto error;
429                                 }
430                                 opt->ts = optptr - iph;
431                                 if (skb) {
432                                         optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
433                                         opt->is_changed = 1;
434                                 }
435                         }
436                         break;
437                       case IPOPT_RA:
438                         if (optlen < 4) {
439                                 pp_ptr = optptr + 1;
440                                 goto error;
441                         }
442                         if (optptr[2] == 0 && optptr[3] == 0)
443                                 opt->router_alert = optptr - iph;
444                         break;
445                       case IPOPT_CIPSO:
446                         if (opt->cipso) {
447                                 pp_ptr = optptr;
448                                 goto error;
449                         }
450                         opt->cipso = optptr - iph;
451                         if (cipso_v4_validate(&optptr)) {
452                                 pp_ptr = optptr;
453                                 goto error;
454                         }
455                         break;
456                       case IPOPT_SEC:
457                       case IPOPT_SID:
458                       default:
459                         if (!skb && !capable(CAP_NET_RAW)) {
460                                 pp_ptr = optptr;
461                                 goto error;
462                         }
463                         break;
464                 }
465                 l -= optlen;
466                 optptr += optlen;
467         }
468
469 eol:
470         if (!pp_ptr)
471                 return 0;
472
473 error:
474         if (skb) {
475                 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
476         }
477         return -EINVAL;
478 }
479
480
481 /*
482  *      Undo all the changes done by ip_options_compile().
483  */
484
485 void ip_options_undo(struct ip_options * opt)
486 {
487         if (opt->srr) {
488                 unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
489                 memmove(optptr+7, optptr+3, optptr[1]-7);
490                 memcpy(optptr+3, &opt->faddr, 4);
491         }
492         if (opt->rr_needaddr) {
493                 unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
494                 optptr[2] -= 4;
495                 memset(&optptr[optptr[2]-1], 0, 4);
496         }
497         if (opt->ts) {
498                 unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
499                 if (opt->ts_needtime) {
500                         optptr[2] -= 4;
501                         memset(&optptr[optptr[2]-1], 0, 4);
502                         if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC)
503                                 optptr[2] -= 4;
504                 }
505                 if (opt->ts_needaddr) {
506                         optptr[2] -= 4;
507                         memset(&optptr[optptr[2]-1], 0, 4);
508                 }
509         }
510 }
511
512 static struct ip_options *ip_options_get_alloc(const int optlen)
513 {
514         struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3),
515                                          GFP_KERNEL);
516         if (opt)
517                 memset(opt, 0, sizeof(*opt));
518         return opt;
519 }
520
521 static int ip_options_get_finish(struct ip_options **optp,
522                                  struct ip_options *opt, int optlen)
523 {
524         while (optlen & 3)
525                 opt->__data[optlen++] = IPOPT_END;
526         opt->optlen = optlen;
527         opt->is_data = 1;
528         if (optlen && ip_options_compile(opt, NULL)) {
529                 kfree(opt);
530                 return -EINVAL;
531         }
532         kfree(*optp);
533         *optp = opt;
534         return 0;
535 }
536
537 int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen)
538 {
539         struct ip_options *opt = ip_options_get_alloc(optlen);
540
541         if (!opt)
542                 return -ENOMEM;
543         if (optlen && copy_from_user(opt->__data, data, optlen)) {
544                 kfree(opt);
545                 return -EFAULT;
546         }
547         return ip_options_get_finish(optp, opt, optlen);
548 }
549
550 int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
551 {
552         struct ip_options *opt = ip_options_get_alloc(optlen);
553
554         if (!opt)
555                 return -ENOMEM;
556         if (optlen)
557                 memcpy(opt->__data, data, optlen);
558         return ip_options_get_finish(optp, opt, optlen);
559 }
560
561 void ip_forward_options(struct sk_buff *skb)
562 {
563         struct   ip_options * opt       = &(IPCB(skb)->opt);
564         unsigned char * optptr;
565         struct rtable *rt = (struct rtable*)skb->dst;
566         unsigned char *raw = skb->nh.raw;
567
568         if (opt->rr_needaddr) {
569                 optptr = (unsigned char *)raw + opt->rr;
570                 ip_rt_get_source(&optptr[optptr[2]-5], rt);
571                 opt->is_changed = 1;
572         }
573         if (opt->srr_is_hit) {
574                 int srrptr, srrspace;
575
576                 optptr = raw + opt->srr;
577
578                 for ( srrptr=optptr[2], srrspace = optptr[1];
579                      srrptr <= srrspace;
580                      srrptr += 4
581                      ) {
582                         if (srrptr + 3 > srrspace)
583                                 break;
584                         if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0)
585                                 break;
586                 }
587                 if (srrptr + 3 <= srrspace) {
588                         opt->is_changed = 1;
589                         ip_rt_get_source(&optptr[srrptr-1], rt);
590                         skb->nh.iph->daddr = rt->rt_dst;
591                         optptr[2] = srrptr+4;
592                 } else if (net_ratelimit())
593                         printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
594                 if (opt->ts_needaddr) {
595                         optptr = raw + opt->ts;
596                         ip_rt_get_source(&optptr[optptr[2]-9], rt);
597                         opt->is_changed = 1;
598                 }
599         }
600         if (opt->is_changed) {
601                 opt->is_changed = 0;
602                 ip_send_check(skb->nh.iph);
603         }
604 }
605
606 int ip_options_rcv_srr(struct sk_buff *skb)
607 {
608         struct ip_options *opt = &(IPCB(skb)->opt);
609         int srrspace, srrptr;
610         __be32 nexthop;
611         struct iphdr *iph = skb->nh.iph;
612         unsigned char * optptr = skb->nh.raw + opt->srr;
613         struct rtable *rt = (struct rtable*)skb->dst;
614         struct rtable *rt2;
615         int err;
616
617         if (!opt->srr)
618                 return 0;
619
620         if (skb->pkt_type != PACKET_HOST)
621                 return -EINVAL;
622         if (rt->rt_type == RTN_UNICAST) {
623                 if (!opt->is_strictroute)
624                         return 0;
625                 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24));
626                 return -EINVAL;
627         }
628         if (rt->rt_type != RTN_LOCAL)
629                 return -EINVAL;
630
631         for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
632                 if (srrptr + 3 > srrspace) {
633                         icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
634                         return -EINVAL;
635                 }
636                 memcpy(&nexthop, &optptr[srrptr-1], 4);
637
638                 rt = (struct rtable*)skb->dst;
639                 skb->dst = NULL;
640                 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
641                 rt2 = (struct rtable*)skb->dst;
642                 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
643                         ip_rt_put(rt2);
644                         skb->dst = &rt->u.dst;
645                         return -EINVAL;
646                 }
647                 ip_rt_put(rt);
648                 if (rt2->rt_type != RTN_LOCAL)
649                         break;
650                 /* Superfast 8) loopback forward */
651                 memcpy(&iph->daddr, &optptr[srrptr-1], 4);
652                 opt->is_changed = 1;
653         }
654         if (srrptr <= srrspace) {
655                 opt->srr_is_hit = 1;
656                 opt->is_changed = 1;
657         }
658         return 0;
659 }