[PATCH] ramfs needs to update directory m/ctime on symlink
[linux-2.6] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and 
38  *                                      packet_set_ring memory leak.
39  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
40  *                                      The convention is that longer addresses
41  *                                      will simply extend the hardware address
42  *                                      byte arrays at the end of sockaddr_ll 
43  *                                      and packet_mreq.
44  *
45  *              This program is free software; you can redistribute it and/or
46  *              modify it under the terms of the GNU General Public License
47  *              as published by the Free Software Foundation; either version
48  *              2 of the License, or (at your option) any later version.
49  *
50  */
51  
52 #include <linux/config.h>
53 #include <linux/types.h>
54 #include <linux/sched.h>
55 #include <linux/mm.h>
56 #include <linux/capability.h>
57 #include <linux/fcntl.h>
58 #include <linux/socket.h>
59 #include <linux/in.h>
60 #include <linux/inet.h>
61 #include <linux/netdevice.h>
62 #include <linux/if_packet.h>
63 #include <linux/wireless.h>
64 #include <linux/kmod.h>
65 #include <net/ip.h>
66 #include <net/protocol.h>
67 #include <linux/skbuff.h>
68 #include <net/sock.h>
69 #include <linux/errno.h>
70 #include <linux/timer.h>
71 #include <asm/system.h>
72 #include <asm/uaccess.h>
73 #include <asm/ioctls.h>
74 #include <asm/page.h>
75 #include <asm/io.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/poll.h>
79 #include <linux/module.h>
80 #include <linux/init.h>
81
82 #ifdef CONFIG_INET
83 #include <net/inet_common.h>
84 #endif
85
86 #define CONFIG_SOCK_PACKET      1
87
88 /*
89    Proposed replacement for SIOC{ADD,DEL}MULTI and
90    IFF_PROMISC, IFF_ALLMULTI flags.
91
92    It is more expensive, but I believe,
93    it is really correct solution: reentereble, safe and fault tolerant.
94
95    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
96    reference count and global flag, so that real status is
97    (gflag|(count != 0)), so that we can use obsolete faulty interface
98    not harming clever users.
99  */
100 #define CONFIG_PACKET_MULTICAST 1
101
102 /*
103    Assumptions:
104    - if device has no dev->hard_header routine, it adds and removes ll header
105      inside itself. In this case ll header is invisible outside of device,
106      but higher levels still should reserve dev->hard_header_len.
107      Some devices are enough clever to reallocate skb, when header
108      will not fit to reserved space (tunnel), another ones are silly
109      (PPP).
110    - packet socket receives packets with pulled ll header,
111      so that SOCK_RAW should push it back.
112
113 On receive:
114 -----------
115
116 Incoming, dev->hard_header!=NULL
117    mac.raw -> ll header
118    data    -> data
119
120 Outgoing, dev->hard_header!=NULL
121    mac.raw -> ll header
122    data    -> ll header
123
124 Incoming, dev->hard_header==NULL
125    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
126               PPP makes it, that is wrong, because introduce assymetry
127               between rx and tx paths.
128    data    -> data
129
130 Outgoing, dev->hard_header==NULL
131    mac.raw -> data. ll header is still not built!
132    data    -> data
133
134 Resume
135   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
136
137
138 On transmit:
139 ------------
140
141 dev->hard_header != NULL
142    mac.raw -> ll header
143    data    -> ll header
144
145 dev->hard_header == NULL (ll header is added by device, we cannot control it)
146    mac.raw -> data
147    data -> data
148
149    We should set nh.raw on output to correct posistion,
150    packet classifier depends on it.
151  */
152
153 /* List of all packet sockets. */
154 static HLIST_HEAD(packet_sklist);
155 static DEFINE_RWLOCK(packet_sklist_lock);
156
157 static atomic_t packet_socks_nr;
158
159
160 /* Private packet socket structures. */
161
162 #ifdef CONFIG_PACKET_MULTICAST
163 struct packet_mclist
164 {
165         struct packet_mclist    *next;
166         int                     ifindex;
167         int                     count;
168         unsigned short          type;
169         unsigned short          alen;
170         unsigned char           addr[MAX_ADDR_LEN];
171 };
172 /* identical to struct packet_mreq except it has
173  * a longer address field.
174  */
175 struct packet_mreq_max
176 {
177         int             mr_ifindex;
178         unsigned short  mr_type;
179         unsigned short  mr_alen;
180         unsigned char   mr_address[MAX_ADDR_LEN];
181 };
182 #endif
183 #ifdef CONFIG_PACKET_MMAP
184 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
185 #endif
186
187 static void packet_flush_mclist(struct sock *sk);
188
189 struct packet_sock {
190         /* struct sock has to be the first member of packet_sock */
191         struct sock             sk;
192         struct tpacket_stats    stats;
193 #ifdef CONFIG_PACKET_MMAP
194         char *                  *pg_vec;
195         unsigned int            head;
196         unsigned int            frames_per_block;
197         unsigned int            frame_size;
198         unsigned int            frame_max;
199         int                     copy_thresh;
200 #endif
201         struct packet_type      prot_hook;
202         spinlock_t              bind_lock;
203         char                    running;        /* prot_hook is attached*/
204         int                     ifindex;        /* bound device         */
205         unsigned short          num;
206 #ifdef CONFIG_PACKET_MULTICAST
207         struct packet_mclist    *mclist;
208 #endif
209 #ifdef CONFIG_PACKET_MMAP
210         atomic_t                mapped;
211         unsigned int            pg_vec_order;
212         unsigned int            pg_vec_pages;
213         unsigned int            pg_vec_len;
214 #endif
215 };
216
217 #ifdef CONFIG_PACKET_MMAP
218
219 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
220 {
221         unsigned int pg_vec_pos, frame_offset;
222         char *frame;
223
224         pg_vec_pos = position / po->frames_per_block;
225         frame_offset = position % po->frames_per_block;
226
227         frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
228         
229         return frame;
230 }
231 #endif
232
233 static inline struct packet_sock *pkt_sk(struct sock *sk)
234 {
235         return (struct packet_sock *)sk;
236 }
237
238 static void packet_sock_destruct(struct sock *sk)
239 {
240         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
241         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
242
243         if (!sock_flag(sk, SOCK_DEAD)) {
244                 printk("Attempt to release alive packet socket: %p\n", sk);
245                 return;
246         }
247
248         atomic_dec(&packet_socks_nr);
249 #ifdef PACKET_REFCNT_DEBUG
250         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
251 #endif
252 }
253
254
255 static const struct proto_ops packet_ops;
256
257 #ifdef CONFIG_SOCK_PACKET
258 static const struct proto_ops packet_ops_spkt;
259
260 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
261 {
262         struct sock *sk;
263         struct sockaddr_pkt *spkt;
264
265         /*
266          *      When we registered the protocol we saved the socket in the data
267          *      field for just this event.
268          */
269
270         sk = pt->af_packet_priv;
271         
272         /*
273          *      Yank back the headers [hope the device set this
274          *      right or kerboom...]
275          *
276          *      Incoming packets have ll header pulled,
277          *      push it back.
278          *
279          *      For outgoing ones skb->data == skb->mac.raw
280          *      so that this procedure is noop.
281          */
282
283         if (skb->pkt_type == PACKET_LOOPBACK)
284                 goto out;
285
286         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
287                 goto oom;
288
289         /* drop any routing info */
290         dst_release(skb->dst);
291         skb->dst = NULL;
292
293         /* drop conntrack reference */
294         nf_reset(skb);
295
296         spkt = (struct sockaddr_pkt*)skb->cb;
297
298         skb_push(skb, skb->data-skb->mac.raw);
299
300         /*
301          *      The SOCK_PACKET socket receives _all_ frames.
302          */
303
304         spkt->spkt_family = dev->type;
305         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
306         spkt->spkt_protocol = skb->protocol;
307
308         /*
309          *      Charge the memory to the socket. This is done specifically
310          *      to prevent sockets using all the memory up.
311          */
312
313         if (sock_queue_rcv_skb(sk,skb) == 0)
314                 return 0;
315
316 out:
317         kfree_skb(skb);
318 oom:
319         return 0;
320 }
321
322
323 /*
324  *      Output a raw packet to a device layer. This bypasses all the other
325  *      protocol layers and you must therefore supply it with a complete frame
326  */
327  
328 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
329                                struct msghdr *msg, size_t len)
330 {
331         struct sock *sk = sock->sk;
332         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
333         struct sk_buff *skb;
334         struct net_device *dev;
335         unsigned short proto=0;
336         int err;
337         
338         /*
339          *      Get and verify the address. 
340          */
341
342         if (saddr)
343         {
344                 if (msg->msg_namelen < sizeof(struct sockaddr))
345                         return(-EINVAL);
346                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
347                         proto=saddr->spkt_protocol;
348         }
349         else
350                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
351
352         /*
353          *      Find the device first to size check it 
354          */
355
356         saddr->spkt_device[13] = 0;
357         dev = dev_get_by_name(saddr->spkt_device);
358         err = -ENODEV;
359         if (dev == NULL)
360                 goto out_unlock;
361         
362         /*
363          *      You may not queue a frame bigger than the mtu. This is the lowest level
364          *      raw protocol and you must do your own fragmentation at this level.
365          */
366          
367         err = -EMSGSIZE;
368         if (len > dev->mtu + dev->hard_header_len)
369                 goto out_unlock;
370
371         err = -ENOBUFS;
372         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
373
374         /*
375          *      If the write buffer is full, then tough. At this level the user gets to
376          *      deal with the problem - do your own algorithmic backoffs. That's far
377          *      more flexible.
378          */
379          
380         if (skb == NULL) 
381                 goto out_unlock;
382
383         /*
384          *      Fill it in 
385          */
386          
387         /* FIXME: Save some space for broken drivers that write a
388          * hard header at transmission time by themselves. PPP is the
389          * notable one here. This should really be fixed at the driver level.
390          */
391         skb_reserve(skb, LL_RESERVED_SPACE(dev));
392         skb->nh.raw = skb->data;
393
394         /* Try to align data part correctly */
395         if (dev->hard_header) {
396                 skb->data -= dev->hard_header_len;
397                 skb->tail -= dev->hard_header_len;
398                 if (len < dev->hard_header_len)
399                         skb->nh.raw = skb->data;
400         }
401
402         /* Returns -EFAULT on error */
403         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
404         skb->protocol = proto;
405         skb->dev = dev;
406         skb->priority = sk->sk_priority;
407         if (err)
408                 goto out_free;
409
410         err = -ENETDOWN;
411         if (!(dev->flags & IFF_UP))
412                 goto out_free;
413
414         /*
415          *      Now send it
416          */
417
418         dev_queue_xmit(skb);
419         dev_put(dev);
420         return(len);
421
422 out_free:
423         kfree_skb(skb);
424 out_unlock:
425         if (dev)
426                 dev_put(dev);
427         return err;
428 }
429 #endif
430
431 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
432 {
433         struct sk_filter *filter;
434
435         bh_lock_sock(sk);
436         filter = sk->sk_filter;
437         /*
438          * Our caller already checked that filter != NULL but we need to
439          * verify that under bh_lock_sock() to be safe
440          */
441         if (likely(filter != NULL))
442                 res = sk_run_filter(skb, filter->insns, filter->len);
443         bh_unlock_sock(sk);
444
445         return res;
446 }
447
448 /*
449    This function makes lazy skb cloning in hope that most of packets
450    are discarded by BPF.
451
452    Note tricky part: we DO mangle shared skb! skb->data, skb->len
453    and skb->cb are mangled. It works because (and until) packets
454    falling here are owned by current CPU. Output packets are cloned
455    by dev_queue_xmit_nit(), input packets are processed by net_bh
456    sequencially, so that if we return skb to original state on exit,
457    we will not harm anyone.
458  */
459
460 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
461 {
462         struct sock *sk;
463         struct sockaddr_ll *sll;
464         struct packet_sock *po;
465         u8 * skb_head = skb->data;
466         int skb_len = skb->len;
467         unsigned snaplen;
468
469         if (skb->pkt_type == PACKET_LOOPBACK)
470                 goto drop;
471
472         sk = pt->af_packet_priv;
473         po = pkt_sk(sk);
474
475         skb->dev = dev;
476
477         if (dev->hard_header) {
478                 /* The device has an explicit notion of ll header,
479                    exported to higher levels.
480
481                    Otherwise, the device hides datails of it frame
482                    structure, so that corresponding packet head
483                    never delivered to user.
484                  */
485                 if (sk->sk_type != SOCK_DGRAM)
486                         skb_push(skb, skb->data - skb->mac.raw);
487                 else if (skb->pkt_type == PACKET_OUTGOING) {
488                         /* Special case: outgoing packets have ll header at head */
489                         skb_pull(skb, skb->nh.raw - skb->data);
490                 }
491         }
492
493         snaplen = skb->len;
494
495         if (sk->sk_filter) {
496                 unsigned res = run_filter(skb, sk, snaplen);
497                 if (res == 0)
498                         goto drop_n_restore;
499                 if (snaplen > res)
500                         snaplen = res;
501         }
502
503         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
504             (unsigned)sk->sk_rcvbuf)
505                 goto drop_n_acct;
506
507         if (skb_shared(skb)) {
508                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
509                 if (nskb == NULL)
510                         goto drop_n_acct;
511
512                 if (skb_head != skb->data) {
513                         skb->data = skb_head;
514                         skb->len = skb_len;
515                 }
516                 kfree_skb(skb);
517                 skb = nskb;
518         }
519
520         sll = (struct sockaddr_ll*)skb->cb;
521         sll->sll_family = AF_PACKET;
522         sll->sll_hatype = dev->type;
523         sll->sll_protocol = skb->protocol;
524         sll->sll_pkttype = skb->pkt_type;
525         sll->sll_ifindex = dev->ifindex;
526         sll->sll_halen = 0;
527
528         if (dev->hard_header_parse)
529                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
530
531         if (pskb_trim(skb, snaplen))
532                 goto drop_n_acct;
533
534         skb_set_owner_r(skb, sk);
535         skb->dev = NULL;
536         dst_release(skb->dst);
537         skb->dst = NULL;
538
539         /* drop conntrack reference */
540         nf_reset(skb);
541
542         spin_lock(&sk->sk_receive_queue.lock);
543         po->stats.tp_packets++;
544         __skb_queue_tail(&sk->sk_receive_queue, skb);
545         spin_unlock(&sk->sk_receive_queue.lock);
546         sk->sk_data_ready(sk, skb->len);
547         return 0;
548
549 drop_n_acct:
550         spin_lock(&sk->sk_receive_queue.lock);
551         po->stats.tp_drops++;
552         spin_unlock(&sk->sk_receive_queue.lock);
553
554 drop_n_restore:
555         if (skb_head != skb->data && skb_shared(skb)) {
556                 skb->data = skb_head;
557                 skb->len = skb_len;
558         }
559 drop:
560         kfree_skb(skb);
561         return 0;
562 }
563
564 #ifdef CONFIG_PACKET_MMAP
565 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
566 {
567         struct sock *sk;
568         struct packet_sock *po;
569         struct sockaddr_ll *sll;
570         struct tpacket_hdr *h;
571         u8 * skb_head = skb->data;
572         int skb_len = skb->len;
573         unsigned snaplen;
574         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
575         unsigned short macoff, netoff;
576         struct sk_buff *copy_skb = NULL;
577
578         if (skb->pkt_type == PACKET_LOOPBACK)
579                 goto drop;
580
581         sk = pt->af_packet_priv;
582         po = pkt_sk(sk);
583
584         if (dev->hard_header) {
585                 if (sk->sk_type != SOCK_DGRAM)
586                         skb_push(skb, skb->data - skb->mac.raw);
587                 else if (skb->pkt_type == PACKET_OUTGOING) {
588                         /* Special case: outgoing packets have ll header at head */
589                         skb_pull(skb, skb->nh.raw - skb->data);
590                         if (skb->ip_summed == CHECKSUM_HW)
591                                 status |= TP_STATUS_CSUMNOTREADY;
592                 }
593         }
594
595         snaplen = skb->len;
596
597         if (sk->sk_filter) {
598                 unsigned res = run_filter(skb, sk, snaplen);
599                 if (res == 0)
600                         goto drop_n_restore;
601                 if (snaplen > res)
602                         snaplen = res;
603         }
604
605         if (sk->sk_type == SOCK_DGRAM) {
606                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
607         } else {
608                 unsigned maclen = skb->nh.raw - skb->data;
609                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
610                 macoff = netoff - maclen;
611         }
612
613         if (macoff + snaplen > po->frame_size) {
614                 if (po->copy_thresh &&
615                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
616                     (unsigned)sk->sk_rcvbuf) {
617                         if (skb_shared(skb)) {
618                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
619                         } else {
620                                 copy_skb = skb_get(skb);
621                                 skb_head = skb->data;
622                         }
623                         if (copy_skb)
624                                 skb_set_owner_r(copy_skb, sk);
625                 }
626                 snaplen = po->frame_size - macoff;
627                 if ((int)snaplen < 0)
628                         snaplen = 0;
629         }
630         if (snaplen > skb->len-skb->data_len)
631                 snaplen = skb->len-skb->data_len;
632
633         spin_lock(&sk->sk_receive_queue.lock);
634         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
635         
636         if (h->tp_status)
637                 goto ring_is_full;
638         po->head = po->head != po->frame_max ? po->head+1 : 0;
639         po->stats.tp_packets++;
640         if (copy_skb) {
641                 status |= TP_STATUS_COPY;
642                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
643         }
644         if (!po->stats.tp_drops)
645                 status &= ~TP_STATUS_LOSING;
646         spin_unlock(&sk->sk_receive_queue.lock);
647
648         memcpy((u8*)h + macoff, skb->data, snaplen);
649
650         h->tp_len = skb->len;
651         h->tp_snaplen = snaplen;
652         h->tp_mac = macoff;
653         h->tp_net = netoff;
654         if (skb->tstamp.off_sec == 0) { 
655                 __net_timestamp(skb);
656                 sock_enable_timestamp(sk);
657         }
658         h->tp_sec = skb->tstamp.off_sec;
659         h->tp_usec = skb->tstamp.off_usec;
660
661         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
662         sll->sll_halen = 0;
663         if (dev->hard_header_parse)
664                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
665         sll->sll_family = AF_PACKET;
666         sll->sll_hatype = dev->type;
667         sll->sll_protocol = skb->protocol;
668         sll->sll_pkttype = skb->pkt_type;
669         sll->sll_ifindex = dev->ifindex;
670
671         h->tp_status = status;
672         mb();
673
674         {
675                 struct page *p_start, *p_end;
676                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
677
678                 p_start = virt_to_page(h);
679                 p_end = virt_to_page(h_end);
680                 while (p_start <= p_end) {
681                         flush_dcache_page(p_start);
682                         p_start++;
683                 }
684         }
685
686         sk->sk_data_ready(sk, 0);
687
688 drop_n_restore:
689         if (skb_head != skb->data && skb_shared(skb)) {
690                 skb->data = skb_head;
691                 skb->len = skb_len;
692         }
693 drop:
694         kfree_skb(skb);
695         return 0;
696
697 ring_is_full:
698         po->stats.tp_drops++;
699         spin_unlock(&sk->sk_receive_queue.lock);
700
701         sk->sk_data_ready(sk, 0);
702         if (copy_skb)
703                 kfree_skb(copy_skb);
704         goto drop_n_restore;
705 }
706
707 #endif
708
709
710 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
711                           struct msghdr *msg, size_t len)
712 {
713         struct sock *sk = sock->sk;
714         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
715         struct sk_buff *skb;
716         struct net_device *dev;
717         unsigned short proto;
718         unsigned char *addr;
719         int ifindex, err, reserve = 0;
720
721         /*
722          *      Get and verify the address. 
723          */
724          
725         if (saddr == NULL) {
726                 struct packet_sock *po = pkt_sk(sk);
727
728                 ifindex = po->ifindex;
729                 proto   = po->num;
730                 addr    = NULL;
731         } else {
732                 err = -EINVAL;
733                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
734                         goto out;
735                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
736                         goto out;
737                 ifindex = saddr->sll_ifindex;
738                 proto   = saddr->sll_protocol;
739                 addr    = saddr->sll_addr;
740         }
741
742
743         dev = dev_get_by_index(ifindex);
744         err = -ENXIO;
745         if (dev == NULL)
746                 goto out_unlock;
747         if (sock->type == SOCK_RAW)
748                 reserve = dev->hard_header_len;
749
750         err = -EMSGSIZE;
751         if (len > dev->mtu+reserve)
752                 goto out_unlock;
753
754         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
755                                 msg->msg_flags & MSG_DONTWAIT, &err);
756         if (skb==NULL)
757                 goto out_unlock;
758
759         skb_reserve(skb, LL_RESERVED_SPACE(dev));
760         skb->nh.raw = skb->data;
761
762         if (dev->hard_header) {
763                 int res;
764                 err = -EINVAL;
765                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
766                 if (sock->type != SOCK_DGRAM) {
767                         skb->tail = skb->data;
768                         skb->len = 0;
769                 } else if (res < 0)
770                         goto out_free;
771         }
772
773         /* Returns -EFAULT on error */
774         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
775         if (err)
776                 goto out_free;
777
778         skb->protocol = proto;
779         skb->dev = dev;
780         skb->priority = sk->sk_priority;
781
782         err = -ENETDOWN;
783         if (!(dev->flags & IFF_UP))
784                 goto out_free;
785
786         /*
787          *      Now send it
788          */
789
790         err = dev_queue_xmit(skb);
791         if (err > 0 && (err = net_xmit_errno(err)) != 0)
792                 goto out_unlock;
793
794         dev_put(dev);
795
796         return(len);
797
798 out_free:
799         kfree_skb(skb);
800 out_unlock:
801         if (dev)
802                 dev_put(dev);
803 out:
804         return err;
805 }
806
807 /*
808  *      Close a PACKET socket. This is fairly simple. We immediately go
809  *      to 'closed' state and remove our protocol entry in the device list.
810  */
811
812 static int packet_release(struct socket *sock)
813 {
814         struct sock *sk = sock->sk;
815         struct packet_sock *po;
816
817         if (!sk)
818                 return 0;
819
820         po = pkt_sk(sk);
821
822         write_lock_bh(&packet_sklist_lock);
823         sk_del_node_init(sk);
824         write_unlock_bh(&packet_sklist_lock);
825
826         /*
827          *      Unhook packet receive handler.
828          */
829
830         if (po->running) {
831                 /*
832                  *      Remove the protocol hook
833                  */
834                 dev_remove_pack(&po->prot_hook);
835                 po->running = 0;
836                 po->num = 0;
837                 __sock_put(sk);
838         }
839
840 #ifdef CONFIG_PACKET_MULTICAST
841         packet_flush_mclist(sk);
842 #endif
843
844 #ifdef CONFIG_PACKET_MMAP
845         if (po->pg_vec) {
846                 struct tpacket_req req;
847                 memset(&req, 0, sizeof(req));
848                 packet_set_ring(sk, &req, 1);
849         }
850 #endif
851
852         /*
853          *      Now the socket is dead. No more input will appear.
854          */
855
856         sock_orphan(sk);
857         sock->sk = NULL;
858
859         /* Purge queues */
860
861         skb_queue_purge(&sk->sk_receive_queue);
862
863         sock_put(sk);
864         return 0;
865 }
866
867 /*
868  *      Attach a packet hook.
869  */
870
871 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
872 {
873         struct packet_sock *po = pkt_sk(sk);
874         /*
875          *      Detach an existing hook if present.
876          */
877
878         lock_sock(sk);
879
880         spin_lock(&po->bind_lock);
881         if (po->running) {
882                 __sock_put(sk);
883                 po->running = 0;
884                 po->num = 0;
885                 spin_unlock(&po->bind_lock);
886                 dev_remove_pack(&po->prot_hook);
887                 spin_lock(&po->bind_lock);
888         }
889
890         po->num = protocol;
891         po->prot_hook.type = protocol;
892         po->prot_hook.dev = dev;
893
894         po->ifindex = dev ? dev->ifindex : 0;
895
896         if (protocol == 0)
897                 goto out_unlock;
898
899         if (dev) {
900                 if (dev->flags&IFF_UP) {
901                         dev_add_pack(&po->prot_hook);
902                         sock_hold(sk);
903                         po->running = 1;
904                 } else {
905                         sk->sk_err = ENETDOWN;
906                         if (!sock_flag(sk, SOCK_DEAD))
907                                 sk->sk_error_report(sk);
908                 }
909         } else {
910                 dev_add_pack(&po->prot_hook);
911                 sock_hold(sk);
912                 po->running = 1;
913         }
914
915 out_unlock:
916         spin_unlock(&po->bind_lock);
917         release_sock(sk);
918         return 0;
919 }
920
921 /*
922  *      Bind a packet socket to a device
923  */
924
925 #ifdef CONFIG_SOCK_PACKET
926
927 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
928 {
929         struct sock *sk=sock->sk;
930         char name[15];
931         struct net_device *dev;
932         int err = -ENODEV;
933         
934         /*
935          *      Check legality
936          */
937          
938         if (addr_len != sizeof(struct sockaddr))
939                 return -EINVAL;
940         strlcpy(name,uaddr->sa_data,sizeof(name));
941
942         dev = dev_get_by_name(name);
943         if (dev) {
944                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
945                 dev_put(dev);
946         }
947         return err;
948 }
949 #endif
950
951 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
952 {
953         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
954         struct sock *sk=sock->sk;
955         struct net_device *dev = NULL;
956         int err;
957
958
959         /*
960          *      Check legality
961          */
962          
963         if (addr_len < sizeof(struct sockaddr_ll))
964                 return -EINVAL;
965         if (sll->sll_family != AF_PACKET)
966                 return -EINVAL;
967
968         if (sll->sll_ifindex) {
969                 err = -ENODEV;
970                 dev = dev_get_by_index(sll->sll_ifindex);
971                 if (dev == NULL)
972                         goto out;
973         }
974         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
975         if (dev)
976                 dev_put(dev);
977
978 out:
979         return err;
980 }
981
982 static struct proto packet_proto = {
983         .name     = "PACKET",
984         .owner    = THIS_MODULE,
985         .obj_size = sizeof(struct packet_sock),
986 };
987
988 /*
989  *      Create a packet of type SOCK_PACKET. 
990  */
991
992 static int packet_create(struct socket *sock, int protocol)
993 {
994         struct sock *sk;
995         struct packet_sock *po;
996         int err;
997
998         if (!capable(CAP_NET_RAW))
999                 return -EPERM;
1000         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
1001 #ifdef CONFIG_SOCK_PACKET
1002             && sock->type != SOCK_PACKET
1003 #endif
1004             )
1005                 return -ESOCKTNOSUPPORT;
1006
1007         sock->state = SS_UNCONNECTED;
1008
1009         err = -ENOBUFS;
1010         sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1011         if (sk == NULL)
1012                 goto out;
1013
1014         sock->ops = &packet_ops;
1015 #ifdef CONFIG_SOCK_PACKET
1016         if (sock->type == SOCK_PACKET)
1017                 sock->ops = &packet_ops_spkt;
1018 #endif
1019         sock_init_data(sock, sk);
1020
1021         po = pkt_sk(sk);
1022         sk->sk_family = PF_PACKET;
1023         po->num = protocol;
1024
1025         sk->sk_destruct = packet_sock_destruct;
1026         atomic_inc(&packet_socks_nr);
1027
1028         /*
1029          *      Attach a protocol block
1030          */
1031
1032         spin_lock_init(&po->bind_lock);
1033         po->prot_hook.func = packet_rcv;
1034 #ifdef CONFIG_SOCK_PACKET
1035         if (sock->type == SOCK_PACKET)
1036                 po->prot_hook.func = packet_rcv_spkt;
1037 #endif
1038         po->prot_hook.af_packet_priv = sk;
1039
1040         if (protocol) {
1041                 po->prot_hook.type = protocol;
1042                 dev_add_pack(&po->prot_hook);
1043                 sock_hold(sk);
1044                 po->running = 1;
1045         }
1046
1047         write_lock_bh(&packet_sklist_lock);
1048         sk_add_node(sk, &packet_sklist);
1049         write_unlock_bh(&packet_sklist_lock);
1050         return(0);
1051 out:
1052         return err;
1053 }
1054
1055 /*
1056  *      Pull a packet from our receive queue and hand it to the user.
1057  *      If necessary we block.
1058  */
1059
1060 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1061                           struct msghdr *msg, size_t len, int flags)
1062 {
1063         struct sock *sk = sock->sk;
1064         struct sk_buff *skb;
1065         int copied, err;
1066         struct sockaddr_ll *sll;
1067
1068         err = -EINVAL;
1069         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1070                 goto out;
1071
1072 #if 0
1073         /* What error should we return now? EUNATTACH? */
1074         if (pkt_sk(sk)->ifindex < 0)
1075                 return -ENODEV;
1076 #endif
1077
1078         /*
1079          *      Call the generic datagram receiver. This handles all sorts
1080          *      of horrible races and re-entrancy so we can forget about it
1081          *      in the protocol layers.
1082          *
1083          *      Now it will return ENETDOWN, if device have just gone down,
1084          *      but then it will block.
1085          */
1086
1087         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1088
1089         /*
1090          *      An error occurred so return it. Because skb_recv_datagram() 
1091          *      handles the blocking we don't see and worry about blocking
1092          *      retries.
1093          */
1094
1095         if (skb == NULL)
1096                 goto out;
1097
1098         /*
1099          *      If the address length field is there to be filled in, we fill
1100          *      it in now.
1101          */
1102
1103         sll = (struct sockaddr_ll*)skb->cb;
1104         if (sock->type == SOCK_PACKET)
1105                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1106         else
1107                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1108
1109         /*
1110          *      You lose any data beyond the buffer you gave. If it worries a
1111          *      user program they can ask the device for its MTU anyway.
1112          */
1113
1114         copied = skb->len;
1115         if (copied > len)
1116         {
1117                 copied=len;
1118                 msg->msg_flags|=MSG_TRUNC;
1119         }
1120
1121         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1122         if (err)
1123                 goto out_free;
1124
1125         sock_recv_timestamp(msg, sk, skb);
1126
1127         if (msg->msg_name)
1128                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1129
1130         /*
1131          *      Free or return the buffer as appropriate. Again this
1132          *      hides all the races and re-entrancy issues from us.
1133          */
1134         err = (flags&MSG_TRUNC) ? skb->len : copied;
1135
1136 out_free:
1137         skb_free_datagram(sk, skb);
1138 out:
1139         return err;
1140 }
1141
1142 #ifdef CONFIG_SOCK_PACKET
1143 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1144                                int *uaddr_len, int peer)
1145 {
1146         struct net_device *dev;
1147         struct sock *sk = sock->sk;
1148
1149         if (peer)
1150                 return -EOPNOTSUPP;
1151
1152         uaddr->sa_family = AF_PACKET;
1153         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1154         if (dev) {
1155                 strlcpy(uaddr->sa_data, dev->name, 15);
1156                 dev_put(dev);
1157         } else
1158                 memset(uaddr->sa_data, 0, 14);
1159         *uaddr_len = sizeof(*uaddr);
1160
1161         return 0;
1162 }
1163 #endif
1164
1165 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1166                           int *uaddr_len, int peer)
1167 {
1168         struct net_device *dev;
1169         struct sock *sk = sock->sk;
1170         struct packet_sock *po = pkt_sk(sk);
1171         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1172
1173         if (peer)
1174                 return -EOPNOTSUPP;
1175
1176         sll->sll_family = AF_PACKET;
1177         sll->sll_ifindex = po->ifindex;
1178         sll->sll_protocol = po->num;
1179         dev = dev_get_by_index(po->ifindex);
1180         if (dev) {
1181                 sll->sll_hatype = dev->type;
1182                 sll->sll_halen = dev->addr_len;
1183                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1184                 dev_put(dev);
1185         } else {
1186                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1187                 sll->sll_halen = 0;
1188         }
1189         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1190
1191         return 0;
1192 }
1193
1194 #ifdef CONFIG_PACKET_MULTICAST
1195 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1196 {
1197         switch (i->type) {
1198         case PACKET_MR_MULTICAST:
1199                 if (what > 0)
1200                         dev_mc_add(dev, i->addr, i->alen, 0);
1201                 else
1202                         dev_mc_delete(dev, i->addr, i->alen, 0);
1203                 break;
1204         case PACKET_MR_PROMISC:
1205                 dev_set_promiscuity(dev, what);
1206                 break;
1207         case PACKET_MR_ALLMULTI:
1208                 dev_set_allmulti(dev, what);
1209                 break;
1210         default:;
1211         }
1212 }
1213
1214 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1215 {
1216         for ( ; i; i=i->next) {
1217                 if (i->ifindex == dev->ifindex)
1218                         packet_dev_mc(dev, i, what);
1219         }
1220 }
1221
1222 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1223 {
1224         struct packet_sock *po = pkt_sk(sk);
1225         struct packet_mclist *ml, *i;
1226         struct net_device *dev;
1227         int err;
1228
1229         rtnl_lock();
1230
1231         err = -ENODEV;
1232         dev = __dev_get_by_index(mreq->mr_ifindex);
1233         if (!dev)
1234                 goto done;
1235
1236         err = -EINVAL;
1237         if (mreq->mr_alen > dev->addr_len)
1238                 goto done;
1239
1240         err = -ENOBUFS;
1241         i = kmalloc(sizeof(*i), GFP_KERNEL);
1242         if (i == NULL)
1243                 goto done;
1244
1245         err = 0;
1246         for (ml = po->mclist; ml; ml = ml->next) {
1247                 if (ml->ifindex == mreq->mr_ifindex &&
1248                     ml->type == mreq->mr_type &&
1249                     ml->alen == mreq->mr_alen &&
1250                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1251                         ml->count++;
1252                         /* Free the new element ... */
1253                         kfree(i);
1254                         goto done;
1255                 }
1256         }
1257
1258         i->type = mreq->mr_type;
1259         i->ifindex = mreq->mr_ifindex;
1260         i->alen = mreq->mr_alen;
1261         memcpy(i->addr, mreq->mr_address, i->alen);
1262         i->count = 1;
1263         i->next = po->mclist;
1264         po->mclist = i;
1265         packet_dev_mc(dev, i, +1);
1266
1267 done:
1268         rtnl_unlock();
1269         return err;
1270 }
1271
1272 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1273 {
1274         struct packet_mclist *ml, **mlp;
1275
1276         rtnl_lock();
1277
1278         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1279                 if (ml->ifindex == mreq->mr_ifindex &&
1280                     ml->type == mreq->mr_type &&
1281                     ml->alen == mreq->mr_alen &&
1282                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1283                         if (--ml->count == 0) {
1284                                 struct net_device *dev;
1285                                 *mlp = ml->next;
1286                                 dev = dev_get_by_index(ml->ifindex);
1287                                 if (dev) {
1288                                         packet_dev_mc(dev, ml, -1);
1289                                         dev_put(dev);
1290                                 }
1291                                 kfree(ml);
1292                         }
1293                         rtnl_unlock();
1294                         return 0;
1295                 }
1296         }
1297         rtnl_unlock();
1298         return -EADDRNOTAVAIL;
1299 }
1300
1301 static void packet_flush_mclist(struct sock *sk)
1302 {
1303         struct packet_sock *po = pkt_sk(sk);
1304         struct packet_mclist *ml;
1305
1306         if (!po->mclist)
1307                 return;
1308
1309         rtnl_lock();
1310         while ((ml = po->mclist) != NULL) {
1311                 struct net_device *dev;
1312
1313                 po->mclist = ml->next;
1314                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1315                         packet_dev_mc(dev, ml, -1);
1316                         dev_put(dev);
1317                 }
1318                 kfree(ml);
1319         }
1320         rtnl_unlock();
1321 }
1322 #endif
1323
1324 static int
1325 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1326 {
1327         struct sock *sk = sock->sk;
1328         int ret;
1329
1330         if (level != SOL_PACKET)
1331                 return -ENOPROTOOPT;
1332
1333         switch(optname) {
1334 #ifdef CONFIG_PACKET_MULTICAST
1335         case PACKET_ADD_MEMBERSHIP:     
1336         case PACKET_DROP_MEMBERSHIP:
1337         {
1338                 struct packet_mreq_max mreq;
1339                 int len = optlen;
1340                 memset(&mreq, 0, sizeof(mreq));
1341                 if (len < sizeof(struct packet_mreq))
1342                         return -EINVAL;
1343                 if (len > sizeof(mreq))
1344                         len = sizeof(mreq);
1345                 if (copy_from_user(&mreq,optval,len))
1346                         return -EFAULT;
1347                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1348                         return -EINVAL;
1349                 if (optname == PACKET_ADD_MEMBERSHIP)
1350                         ret = packet_mc_add(sk, &mreq);
1351                 else
1352                         ret = packet_mc_drop(sk, &mreq);
1353                 return ret;
1354         }
1355 #endif
1356 #ifdef CONFIG_PACKET_MMAP
1357         case PACKET_RX_RING:
1358         {
1359                 struct tpacket_req req;
1360
1361                 if (optlen<sizeof(req))
1362                         return -EINVAL;
1363                 if (copy_from_user(&req,optval,sizeof(req)))
1364                         return -EFAULT;
1365                 return packet_set_ring(sk, &req, 0);
1366         }
1367         case PACKET_COPY_THRESH:
1368         {
1369                 int val;
1370
1371                 if (optlen!=sizeof(val))
1372                         return -EINVAL;
1373                 if (copy_from_user(&val,optval,sizeof(val)))
1374                         return -EFAULT;
1375
1376                 pkt_sk(sk)->copy_thresh = val;
1377                 return 0;
1378         }
1379 #endif
1380         default:
1381                 return -ENOPROTOOPT;
1382         }
1383 }
1384
1385 static int packet_getsockopt(struct socket *sock, int level, int optname,
1386                              char __user *optval, int __user *optlen)
1387 {
1388         int len;
1389         struct sock *sk = sock->sk;
1390         struct packet_sock *po = pkt_sk(sk);
1391
1392         if (level != SOL_PACKET)
1393                 return -ENOPROTOOPT;
1394
1395         if (get_user(len, optlen))
1396                 return -EFAULT;
1397
1398         if (len < 0)
1399                 return -EINVAL;
1400                 
1401         switch(optname) {
1402         case PACKET_STATISTICS:
1403         {
1404                 struct tpacket_stats st;
1405
1406                 if (len > sizeof(struct tpacket_stats))
1407                         len = sizeof(struct tpacket_stats);
1408                 spin_lock_bh(&sk->sk_receive_queue.lock);
1409                 st = po->stats;
1410                 memset(&po->stats, 0, sizeof(st));
1411                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1412                 st.tp_packets += st.tp_drops;
1413
1414                 if (copy_to_user(optval, &st, len))
1415                         return -EFAULT;
1416                 break;
1417         }
1418         default:
1419                 return -ENOPROTOOPT;
1420         }
1421
1422         if (put_user(len, optlen))
1423                 return -EFAULT;
1424         return 0;
1425 }
1426
1427
1428 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1429 {
1430         struct sock *sk;
1431         struct hlist_node *node;
1432         struct net_device *dev = (struct net_device*)data;
1433
1434         read_lock(&packet_sklist_lock);
1435         sk_for_each(sk, node, &packet_sklist) {
1436                 struct packet_sock *po = pkt_sk(sk);
1437
1438                 switch (msg) {
1439                 case NETDEV_UNREGISTER:
1440 #ifdef CONFIG_PACKET_MULTICAST
1441                         if (po->mclist)
1442                                 packet_dev_mclist(dev, po->mclist, -1);
1443                         // fallthrough
1444 #endif
1445                 case NETDEV_DOWN:
1446                         if (dev->ifindex == po->ifindex) {
1447                                 spin_lock(&po->bind_lock);
1448                                 if (po->running) {
1449                                         __dev_remove_pack(&po->prot_hook);
1450                                         __sock_put(sk);
1451                                         po->running = 0;
1452                                         sk->sk_err = ENETDOWN;
1453                                         if (!sock_flag(sk, SOCK_DEAD))
1454                                                 sk->sk_error_report(sk);
1455                                 }
1456                                 if (msg == NETDEV_UNREGISTER) {
1457                                         po->ifindex = -1;
1458                                         po->prot_hook.dev = NULL;
1459                                 }
1460                                 spin_unlock(&po->bind_lock);
1461                         }
1462                         break;
1463                 case NETDEV_UP:
1464                         spin_lock(&po->bind_lock);
1465                         if (dev->ifindex == po->ifindex && po->num &&
1466                             !po->running) {
1467                                 dev_add_pack(&po->prot_hook);
1468                                 sock_hold(sk);
1469                                 po->running = 1;
1470                         }
1471                         spin_unlock(&po->bind_lock);
1472                         break;
1473                 }
1474         }
1475         read_unlock(&packet_sklist_lock);
1476         return NOTIFY_DONE;
1477 }
1478
1479
1480 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1481                         unsigned long arg)
1482 {
1483         struct sock *sk = sock->sk;
1484
1485         switch(cmd) {
1486                 case SIOCOUTQ:
1487                 {
1488                         int amount = atomic_read(&sk->sk_wmem_alloc);
1489                         return put_user(amount, (int __user *)arg);
1490                 }
1491                 case SIOCINQ:
1492                 {
1493                         struct sk_buff *skb;
1494                         int amount = 0;
1495
1496                         spin_lock_bh(&sk->sk_receive_queue.lock);
1497                         skb = skb_peek(&sk->sk_receive_queue);
1498                         if (skb)
1499                                 amount = skb->len;
1500                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1501                         return put_user(amount, (int __user *)arg);
1502                 }
1503                 case SIOCGSTAMP:
1504                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1505                         
1506 #ifdef CONFIG_INET
1507                 case SIOCADDRT:
1508                 case SIOCDELRT:
1509                 case SIOCDARP:
1510                 case SIOCGARP:
1511                 case SIOCSARP:
1512                 case SIOCGIFADDR:
1513                 case SIOCSIFADDR:
1514                 case SIOCGIFBRDADDR:
1515                 case SIOCSIFBRDADDR:
1516                 case SIOCGIFNETMASK:
1517                 case SIOCSIFNETMASK:
1518                 case SIOCGIFDSTADDR:
1519                 case SIOCSIFDSTADDR:
1520                 case SIOCSIFFLAGS:
1521                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1522 #endif
1523
1524                 default:
1525                         return -ENOIOCTLCMD;
1526         }
1527         return 0;
1528 }
1529
1530 #ifndef CONFIG_PACKET_MMAP
1531 #define packet_mmap sock_no_mmap
1532 #define packet_poll datagram_poll
1533 #else
1534
1535 static unsigned int packet_poll(struct file * file, struct socket *sock,
1536                                 poll_table *wait)
1537 {
1538         struct sock *sk = sock->sk;
1539         struct packet_sock *po = pkt_sk(sk);
1540         unsigned int mask = datagram_poll(file, sock, wait);
1541
1542         spin_lock_bh(&sk->sk_receive_queue.lock);
1543         if (po->pg_vec) {
1544                 unsigned last = po->head ? po->head-1 : po->frame_max;
1545                 struct tpacket_hdr *h;
1546
1547                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1548
1549                 if (h->tp_status)
1550                         mask |= POLLIN | POLLRDNORM;
1551         }
1552         spin_unlock_bh(&sk->sk_receive_queue.lock);
1553         return mask;
1554 }
1555
1556
1557 /* Dirty? Well, I still did not learn better way to account
1558  * for user mmaps.
1559  */
1560
1561 static void packet_mm_open(struct vm_area_struct *vma)
1562 {
1563         struct file *file = vma->vm_file;
1564         struct socket * sock = file->private_data;
1565         struct sock *sk = sock->sk;
1566         
1567         if (sk)
1568                 atomic_inc(&pkt_sk(sk)->mapped);
1569 }
1570
1571 static void packet_mm_close(struct vm_area_struct *vma)
1572 {
1573         struct file *file = vma->vm_file;
1574         struct socket * sock = file->private_data;
1575         struct sock *sk = sock->sk;
1576         
1577         if (sk)
1578                 atomic_dec(&pkt_sk(sk)->mapped);
1579 }
1580
1581 static struct vm_operations_struct packet_mmap_ops = {
1582         .open = packet_mm_open,
1583         .close =packet_mm_close,
1584 };
1585
1586 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1587 {
1588         return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1589 }
1590
1591 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1592 {
1593         int i;
1594
1595         for (i = 0; i < len; i++) {
1596                 if (likely(pg_vec[i]))
1597                         free_pages((unsigned long) pg_vec[i], order);
1598         }
1599         kfree(pg_vec);
1600 }
1601
1602 static inline char *alloc_one_pg_vec_page(unsigned long order)
1603 {
1604         return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1605                                          order);
1606 }
1607
1608 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1609 {
1610         unsigned int block_nr = req->tp_block_nr;
1611         char **pg_vec;
1612         int i;
1613
1614         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1615         if (unlikely(!pg_vec))
1616                 goto out;
1617
1618         for (i = 0; i < block_nr; i++) {
1619                 pg_vec[i] = alloc_one_pg_vec_page(order);
1620                 if (unlikely(!pg_vec[i]))
1621                         goto out_free_pgvec;
1622         }
1623
1624 out:
1625         return pg_vec;
1626
1627 out_free_pgvec:
1628         free_pg_vec(pg_vec, order, block_nr);
1629         pg_vec = NULL;
1630         goto out;
1631 }
1632
1633 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1634 {
1635         char **pg_vec = NULL;
1636         struct packet_sock *po = pkt_sk(sk);
1637         int was_running, num, order = 0;
1638         int err = 0;
1639         
1640         if (req->tp_block_nr) {
1641                 int i, l;
1642
1643                 /* Sanity tests and some calculations */
1644
1645                 if (unlikely(po->pg_vec))
1646                         return -EBUSY;
1647
1648                 if (unlikely((int)req->tp_block_size <= 0))
1649                         return -EINVAL;
1650                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1651                         return -EINVAL;
1652                 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1653                         return -EINVAL;
1654                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1655                         return -EINVAL;
1656
1657                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1658                 if (unlikely(po->frames_per_block <= 0))
1659                         return -EINVAL;
1660                 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1661                              req->tp_frame_nr))
1662                         return -EINVAL;
1663
1664                 err = -ENOMEM;
1665                 order = get_order(req->tp_block_size);
1666                 pg_vec = alloc_pg_vec(req, order);
1667                 if (unlikely(!pg_vec))
1668                         goto out;
1669
1670                 l = 0;
1671                 for (i = 0; i < req->tp_block_nr; i++) {
1672                         char *ptr = pg_vec[i];
1673                         struct tpacket_hdr *header;
1674                         int k;
1675
1676                         for (k = 0; k < po->frames_per_block; k++) {
1677                                 header = (struct tpacket_hdr *) ptr;
1678                                 header->tp_status = TP_STATUS_KERNEL;
1679                                 ptr += req->tp_frame_size;
1680                         }
1681                 }
1682                 /* Done */
1683         } else {
1684                 if (unlikely(req->tp_frame_nr))
1685                         return -EINVAL;
1686         }
1687
1688         lock_sock(sk);
1689
1690         /* Detach socket from network */
1691         spin_lock(&po->bind_lock);
1692         was_running = po->running;
1693         num = po->num;
1694         if (was_running) {
1695                 __dev_remove_pack(&po->prot_hook);
1696                 po->num = 0;
1697                 po->running = 0;
1698                 __sock_put(sk);
1699         }
1700         spin_unlock(&po->bind_lock);
1701                 
1702         synchronize_net();
1703
1704         err = -EBUSY;
1705         if (closing || atomic_read(&po->mapped) == 0) {
1706                 err = 0;
1707 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1708
1709                 spin_lock_bh(&sk->sk_receive_queue.lock);
1710                 pg_vec = XC(po->pg_vec, pg_vec);
1711                 po->frame_max = (req->tp_frame_nr - 1);
1712                 po->head = 0;
1713                 po->frame_size = req->tp_frame_size;
1714                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1715
1716                 order = XC(po->pg_vec_order, order);
1717                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1718
1719                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1720                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1721                 skb_queue_purge(&sk->sk_receive_queue);
1722 #undef XC
1723                 if (atomic_read(&po->mapped))
1724                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1725         }
1726
1727         spin_lock(&po->bind_lock);
1728         if (was_running && !po->running) {
1729                 sock_hold(sk);
1730                 po->running = 1;
1731                 po->num = num;
1732                 dev_add_pack(&po->prot_hook);
1733         }
1734         spin_unlock(&po->bind_lock);
1735
1736         release_sock(sk);
1737
1738         if (pg_vec)
1739                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1740 out:
1741         return err;
1742 }
1743
1744 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1745 {
1746         struct sock *sk = sock->sk;
1747         struct packet_sock *po = pkt_sk(sk);
1748         unsigned long size;
1749         unsigned long start;
1750         int err = -EINVAL;
1751         int i;
1752
1753         if (vma->vm_pgoff)
1754                 return -EINVAL;
1755
1756         size = vma->vm_end - vma->vm_start;
1757
1758         lock_sock(sk);
1759         if (po->pg_vec == NULL)
1760                 goto out;
1761         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1762                 goto out;
1763
1764         start = vma->vm_start;
1765         for (i = 0; i < po->pg_vec_len; i++) {
1766                 struct page *page = virt_to_page(po->pg_vec[i]);
1767                 int pg_num;
1768
1769                 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1770                         err = vm_insert_page(vma, start, page);
1771                         if (unlikely(err))
1772                                 goto out;
1773                         start += PAGE_SIZE;
1774                 }
1775         }
1776         atomic_inc(&po->mapped);
1777         vma->vm_ops = &packet_mmap_ops;
1778         err = 0;
1779
1780 out:
1781         release_sock(sk);
1782         return err;
1783 }
1784 #endif
1785
1786
1787 #ifdef CONFIG_SOCK_PACKET
1788 static const struct proto_ops packet_ops_spkt = {
1789         .family =       PF_PACKET,
1790         .owner =        THIS_MODULE,
1791         .release =      packet_release,
1792         .bind =         packet_bind_spkt,
1793         .connect =      sock_no_connect,
1794         .socketpair =   sock_no_socketpair,
1795         .accept =       sock_no_accept,
1796         .getname =      packet_getname_spkt,
1797         .poll =         datagram_poll,
1798         .ioctl =        packet_ioctl,
1799         .listen =       sock_no_listen,
1800         .shutdown =     sock_no_shutdown,
1801         .setsockopt =   sock_no_setsockopt,
1802         .getsockopt =   sock_no_getsockopt,
1803         .sendmsg =      packet_sendmsg_spkt,
1804         .recvmsg =      packet_recvmsg,
1805         .mmap =         sock_no_mmap,
1806         .sendpage =     sock_no_sendpage,
1807 };
1808 #endif
1809
1810 static const struct proto_ops packet_ops = {
1811         .family =       PF_PACKET,
1812         .owner =        THIS_MODULE,
1813         .release =      packet_release,
1814         .bind =         packet_bind,
1815         .connect =      sock_no_connect,
1816         .socketpair =   sock_no_socketpair,
1817         .accept =       sock_no_accept,
1818         .getname =      packet_getname, 
1819         .poll =         packet_poll,
1820         .ioctl =        packet_ioctl,
1821         .listen =       sock_no_listen,
1822         .shutdown =     sock_no_shutdown,
1823         .setsockopt =   packet_setsockopt,
1824         .getsockopt =   packet_getsockopt,
1825         .sendmsg =      packet_sendmsg,
1826         .recvmsg =      packet_recvmsg,
1827         .mmap =         packet_mmap,
1828         .sendpage =     sock_no_sendpage,
1829 };
1830
1831 static struct net_proto_family packet_family_ops = {
1832         .family =       PF_PACKET,
1833         .create =       packet_create,
1834         .owner  =       THIS_MODULE,
1835 };
1836
1837 static struct notifier_block packet_netdev_notifier = {
1838         .notifier_call =packet_notifier,
1839 };
1840
1841 #ifdef CONFIG_PROC_FS
1842 static inline struct sock *packet_seq_idx(loff_t off)
1843 {
1844         struct sock *s;
1845         struct hlist_node *node;
1846
1847         sk_for_each(s, node, &packet_sklist) {
1848                 if (!off--)
1849                         return s;
1850         }
1851         return NULL;
1852 }
1853
1854 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1855 {
1856         read_lock(&packet_sklist_lock);
1857         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1858 }
1859
1860 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1861 {
1862         ++*pos;
1863         return  (v == SEQ_START_TOKEN) 
1864                 ? sk_head(&packet_sklist) 
1865                 : sk_next((struct sock*)v) ;
1866 }
1867
1868 static void packet_seq_stop(struct seq_file *seq, void *v)
1869 {
1870         read_unlock(&packet_sklist_lock);               
1871 }
1872
1873 static int packet_seq_show(struct seq_file *seq, void *v) 
1874 {
1875         if (v == SEQ_START_TOKEN)
1876                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1877         else {
1878                 struct sock *s = v;
1879                 const struct packet_sock *po = pkt_sk(s);
1880
1881                 seq_printf(seq,
1882                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1883                            s,
1884                            atomic_read(&s->sk_refcnt),
1885                            s->sk_type,
1886                            ntohs(po->num),
1887                            po->ifindex,
1888                            po->running,
1889                            atomic_read(&s->sk_rmem_alloc),
1890                            sock_i_uid(s),
1891                            sock_i_ino(s) );
1892         }
1893
1894         return 0;
1895 }
1896
1897 static struct seq_operations packet_seq_ops = {
1898         .start  = packet_seq_start,
1899         .next   = packet_seq_next,
1900         .stop   = packet_seq_stop,
1901         .show   = packet_seq_show,
1902 };
1903
1904 static int packet_seq_open(struct inode *inode, struct file *file)
1905 {
1906         return seq_open(file, &packet_seq_ops);
1907 }
1908
1909 static struct file_operations packet_seq_fops = {
1910         .owner          = THIS_MODULE,
1911         .open           = packet_seq_open,
1912         .read           = seq_read,
1913         .llseek         = seq_lseek,
1914         .release        = seq_release,
1915 };
1916
1917 #endif
1918
1919 static void __exit packet_exit(void)
1920 {
1921         proc_net_remove("packet");
1922         unregister_netdevice_notifier(&packet_netdev_notifier);
1923         sock_unregister(PF_PACKET);
1924         proto_unregister(&packet_proto);
1925 }
1926
1927 static int __init packet_init(void)
1928 {
1929         int rc = proto_register(&packet_proto, 0);
1930
1931         if (rc != 0)
1932                 goto out;
1933
1934         sock_register(&packet_family_ops);
1935         register_netdevice_notifier(&packet_netdev_notifier);
1936         proc_net_fops_create("packet", 0, &packet_seq_fops);
1937 out:
1938         return rc;
1939 }
1940
1941 module_init(packet_init);
1942 module_exit(packet_exit);
1943 MODULE_LICENSE("GPL");
1944 MODULE_ALIAS_NETPROTO(PF_PACKET);