[PATCH] Fix comment to synchronize_sched()
[linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18
19 #include <asm/uaccess.h>
20 #include <asm/ioctls.h>
21
22 /*
23  * We use a start+len construction, which provides full use of the 
24  * allocated memory.
25  * -- Florian Coosmann (FGC)
26  * 
27  * Reads with count = 0 should always return 0.
28  * -- Julian Bradfield 1999-06-07.
29  *
30  * FIFOs and Pipes now generate SIGIO for both readers and writers.
31  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
32  *
33  * pipe_read & write cleanup
34  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
35  */
36
37 /* Drop the inode semaphore and wait for a pipe event, atomically */
38 void pipe_wait(struct inode * inode)
39 {
40         DEFINE_WAIT(wait);
41
42         /*
43          * Pipes are system-local resources, so sleeping on them
44          * is considered a noninteractive wait:
45          */
46         prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
47         mutex_unlock(PIPE_MUTEX(*inode));
48         schedule();
49         finish_wait(PIPE_WAIT(*inode), &wait);
50         mutex_lock(PIPE_MUTEX(*inode));
51 }
52
53 static int
54 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
55 {
56         unsigned long copy;
57
58         while (len > 0) {
59                 while (!iov->iov_len)
60                         iov++;
61                 copy = min_t(unsigned long, len, iov->iov_len);
62
63                 if (copy_from_user(to, iov->iov_base, copy))
64                         return -EFAULT;
65                 to += copy;
66                 len -= copy;
67                 iov->iov_base += copy;
68                 iov->iov_len -= copy;
69         }
70         return 0;
71 }
72
73 static int
74 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
75 {
76         unsigned long copy;
77
78         while (len > 0) {
79                 while (!iov->iov_len)
80                         iov++;
81                 copy = min_t(unsigned long, len, iov->iov_len);
82
83                 if (copy_to_user(iov->iov_base, from, copy))
84                         return -EFAULT;
85                 from += copy;
86                 len -= copy;
87                 iov->iov_base += copy;
88                 iov->iov_len -= copy;
89         }
90         return 0;
91 }
92
93 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
94 {
95         struct page *page = buf->page;
96
97         if (info->tmp_page) {
98                 __free_page(page);
99                 return;
100         }
101         info->tmp_page = page;
102 }
103
104 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
105 {
106         return kmap(buf->page);
107 }
108
109 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
110 {
111         kunmap(buf->page);
112 }
113
114 static struct pipe_buf_operations anon_pipe_buf_ops = {
115         .can_merge = 1,
116         .map = anon_pipe_buf_map,
117         .unmap = anon_pipe_buf_unmap,
118         .release = anon_pipe_buf_release,
119 };
120
121 static ssize_t
122 pipe_readv(struct file *filp, const struct iovec *_iov,
123            unsigned long nr_segs, loff_t *ppos)
124 {
125         struct inode *inode = filp->f_dentry->d_inode;
126         struct pipe_inode_info *info;
127         int do_wakeup;
128         ssize_t ret;
129         struct iovec *iov = (struct iovec *)_iov;
130         size_t total_len;
131
132         total_len = iov_length(iov, nr_segs);
133         /* Null read succeeds. */
134         if (unlikely(total_len == 0))
135                 return 0;
136
137         do_wakeup = 0;
138         ret = 0;
139         mutex_lock(PIPE_MUTEX(*inode));
140         info = inode->i_pipe;
141         for (;;) {
142                 int bufs = info->nrbufs;
143                 if (bufs) {
144                         int curbuf = info->curbuf;
145                         struct pipe_buffer *buf = info->bufs + curbuf;
146                         struct pipe_buf_operations *ops = buf->ops;
147                         void *addr;
148                         size_t chars = buf->len;
149                         int error;
150
151                         if (chars > total_len)
152                                 chars = total_len;
153
154                         addr = ops->map(filp, info, buf);
155                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
156                         ops->unmap(info, buf);
157                         if (unlikely(error)) {
158                                 if (!ret) ret = -EFAULT;
159                                 break;
160                         }
161                         ret += chars;
162                         buf->offset += chars;
163                         buf->len -= chars;
164                         if (!buf->len) {
165                                 buf->ops = NULL;
166                                 ops->release(info, buf);
167                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
168                                 info->curbuf = curbuf;
169                                 info->nrbufs = --bufs;
170                                 do_wakeup = 1;
171                         }
172                         total_len -= chars;
173                         if (!total_len)
174                                 break;  /* common path: read succeeded */
175                 }
176                 if (bufs)       /* More to do? */
177                         continue;
178                 if (!PIPE_WRITERS(*inode))
179                         break;
180                 if (!PIPE_WAITING_WRITERS(*inode)) {
181                         /* syscall merging: Usually we must not sleep
182                          * if O_NONBLOCK is set, or if we got some data.
183                          * But if a writer sleeps in kernel space, then
184                          * we can wait for that data without violating POSIX.
185                          */
186                         if (ret)
187                                 break;
188                         if (filp->f_flags & O_NONBLOCK) {
189                                 ret = -EAGAIN;
190                                 break;
191                         }
192                 }
193                 if (signal_pending(current)) {
194                         if (!ret) ret = -ERESTARTSYS;
195                         break;
196                 }
197                 if (do_wakeup) {
198                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
199                         kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
200                 }
201                 pipe_wait(inode);
202         }
203         mutex_unlock(PIPE_MUTEX(*inode));
204         /* Signal writers asynchronously that there is more room.  */
205         if (do_wakeup) {
206                 wake_up_interruptible(PIPE_WAIT(*inode));
207                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
208         }
209         if (ret > 0)
210                 file_accessed(filp);
211         return ret;
212 }
213
214 static ssize_t
215 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
216 {
217         struct iovec iov = { .iov_base = buf, .iov_len = count };
218         return pipe_readv(filp, &iov, 1, ppos);
219 }
220
221 static ssize_t
222 pipe_writev(struct file *filp, const struct iovec *_iov,
223             unsigned long nr_segs, loff_t *ppos)
224 {
225         struct inode *inode = filp->f_dentry->d_inode;
226         struct pipe_inode_info *info;
227         ssize_t ret;
228         int do_wakeup;
229         struct iovec *iov = (struct iovec *)_iov;
230         size_t total_len;
231         ssize_t chars;
232
233         total_len = iov_length(iov, nr_segs);
234         /* Null write succeeds. */
235         if (unlikely(total_len == 0))
236                 return 0;
237
238         do_wakeup = 0;
239         ret = 0;
240         mutex_lock(PIPE_MUTEX(*inode));
241         info = inode->i_pipe;
242
243         if (!PIPE_READERS(*inode)) {
244                 send_sig(SIGPIPE, current, 0);
245                 ret = -EPIPE;
246                 goto out;
247         }
248
249         /* We try to merge small writes */
250         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
251         if (info->nrbufs && chars != 0) {
252                 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
253                 struct pipe_buffer *buf = info->bufs + lastbuf;
254                 struct pipe_buf_operations *ops = buf->ops;
255                 int offset = buf->offset + buf->len;
256                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
257                         void *addr = ops->map(filp, info, buf);
258                         int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
259                         ops->unmap(info, buf);
260                         ret = error;
261                         do_wakeup = 1;
262                         if (error)
263                                 goto out;
264                         buf->len += chars;
265                         total_len -= chars;
266                         ret = chars;
267                         if (!total_len)
268                                 goto out;
269                 }
270         }
271
272         for (;;) {
273                 int bufs;
274                 if (!PIPE_READERS(*inode)) {
275                         send_sig(SIGPIPE, current, 0);
276                         if (!ret) ret = -EPIPE;
277                         break;
278                 }
279                 bufs = info->nrbufs;
280                 if (bufs < PIPE_BUFFERS) {
281                         int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
282                         struct pipe_buffer *buf = info->bufs + newbuf;
283                         struct page *page = info->tmp_page;
284                         int error;
285
286                         if (!page) {
287                                 page = alloc_page(GFP_HIGHUSER);
288                                 if (unlikely(!page)) {
289                                         ret = ret ? : -ENOMEM;
290                                         break;
291                                 }
292                                 info->tmp_page = page;
293                         }
294                         /* Always wakeup, even if the copy fails. Otherwise
295                          * we lock up (O_NONBLOCK-)readers that sleep due to
296                          * syscall merging.
297                          * FIXME! Is this really true?
298                          */
299                         do_wakeup = 1;
300                         chars = PAGE_SIZE;
301                         if (chars > total_len)
302                                 chars = total_len;
303
304                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
305                         kunmap(page);
306                         if (unlikely(error)) {
307                                 if (!ret) ret = -EFAULT;
308                                 break;
309                         }
310                         ret += chars;
311
312                         /* Insert it into the buffer array */
313                         buf->page = page;
314                         buf->ops = &anon_pipe_buf_ops;
315                         buf->offset = 0;
316                         buf->len = chars;
317                         info->nrbufs = ++bufs;
318                         info->tmp_page = NULL;
319
320                         total_len -= chars;
321                         if (!total_len)
322                                 break;
323                 }
324                 if (bufs < PIPE_BUFFERS)
325                         continue;
326                 if (filp->f_flags & O_NONBLOCK) {
327                         if (!ret) ret = -EAGAIN;
328                         break;
329                 }
330                 if (signal_pending(current)) {
331                         if (!ret) ret = -ERESTARTSYS;
332                         break;
333                 }
334                 if (do_wakeup) {
335                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
336                         kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
337                         do_wakeup = 0;
338                 }
339                 PIPE_WAITING_WRITERS(*inode)++;
340                 pipe_wait(inode);
341                 PIPE_WAITING_WRITERS(*inode)--;
342         }
343 out:
344         mutex_unlock(PIPE_MUTEX(*inode));
345         if (do_wakeup) {
346                 wake_up_interruptible(PIPE_WAIT(*inode));
347                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
348         }
349         if (ret > 0)
350                 file_update_time(filp);
351         return ret;
352 }
353
354 static ssize_t
355 pipe_write(struct file *filp, const char __user *buf,
356            size_t count, loff_t *ppos)
357 {
358         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
359         return pipe_writev(filp, &iov, 1, ppos);
360 }
361
362 static ssize_t
363 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
364 {
365         return -EBADF;
366 }
367
368 static ssize_t
369 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
370 {
371         return -EBADF;
372 }
373
374 static int
375 pipe_ioctl(struct inode *pino, struct file *filp,
376            unsigned int cmd, unsigned long arg)
377 {
378         struct inode *inode = filp->f_dentry->d_inode;
379         struct pipe_inode_info *info;
380         int count, buf, nrbufs;
381
382         switch (cmd) {
383                 case FIONREAD:
384                         mutex_lock(PIPE_MUTEX(*inode));
385                         info =  inode->i_pipe;
386                         count = 0;
387                         buf = info->curbuf;
388                         nrbufs = info->nrbufs;
389                         while (--nrbufs >= 0) {
390                                 count += info->bufs[buf].len;
391                                 buf = (buf+1) & (PIPE_BUFFERS-1);
392                         }
393                         mutex_unlock(PIPE_MUTEX(*inode));
394                         return put_user(count, (int __user *)arg);
395                 default:
396                         return -EINVAL;
397         }
398 }
399
400 /* No kernel lock held - fine */
401 static unsigned int
402 pipe_poll(struct file *filp, poll_table *wait)
403 {
404         unsigned int mask;
405         struct inode *inode = filp->f_dentry->d_inode;
406         struct pipe_inode_info *info = inode->i_pipe;
407         int nrbufs;
408
409         poll_wait(filp, PIPE_WAIT(*inode), wait);
410
411         /* Reading only -- no need for acquiring the semaphore.  */
412         nrbufs = info->nrbufs;
413         mask = 0;
414         if (filp->f_mode & FMODE_READ) {
415                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
416                 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
417                         mask |= POLLHUP;
418         }
419
420         if (filp->f_mode & FMODE_WRITE) {
421                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
422                 /*
423                  * Most Unices do not set POLLERR for FIFOs but on Linux they
424                  * behave exactly like pipes for poll().
425                  */
426                 if (!PIPE_READERS(*inode))
427                         mask |= POLLERR;
428         }
429
430         return mask;
431 }
432
433 static int
434 pipe_release(struct inode *inode, int decr, int decw)
435 {
436         mutex_lock(PIPE_MUTEX(*inode));
437         PIPE_READERS(*inode) -= decr;
438         PIPE_WRITERS(*inode) -= decw;
439         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
440                 free_pipe_info(inode);
441         } else {
442                 wake_up_interruptible(PIPE_WAIT(*inode));
443                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
444                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
445         }
446         mutex_unlock(PIPE_MUTEX(*inode));
447
448         return 0;
449 }
450
451 static int
452 pipe_read_fasync(int fd, struct file *filp, int on)
453 {
454         struct inode *inode = filp->f_dentry->d_inode;
455         int retval;
456
457         mutex_lock(PIPE_MUTEX(*inode));
458         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
459         mutex_unlock(PIPE_MUTEX(*inode));
460
461         if (retval < 0)
462                 return retval;
463
464         return 0;
465 }
466
467
468 static int
469 pipe_write_fasync(int fd, struct file *filp, int on)
470 {
471         struct inode *inode = filp->f_dentry->d_inode;
472         int retval;
473
474         mutex_lock(PIPE_MUTEX(*inode));
475         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
476         mutex_unlock(PIPE_MUTEX(*inode));
477
478         if (retval < 0)
479                 return retval;
480
481         return 0;
482 }
483
484
485 static int
486 pipe_rdwr_fasync(int fd, struct file *filp, int on)
487 {
488         struct inode *inode = filp->f_dentry->d_inode;
489         int retval;
490
491         mutex_lock(PIPE_MUTEX(*inode));
492
493         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
494
495         if (retval >= 0)
496                 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
497
498         mutex_unlock(PIPE_MUTEX(*inode));
499
500         if (retval < 0)
501                 return retval;
502
503         return 0;
504 }
505
506
507 static int
508 pipe_read_release(struct inode *inode, struct file *filp)
509 {
510         pipe_read_fasync(-1, filp, 0);
511         return pipe_release(inode, 1, 0);
512 }
513
514 static int
515 pipe_write_release(struct inode *inode, struct file *filp)
516 {
517         pipe_write_fasync(-1, filp, 0);
518         return pipe_release(inode, 0, 1);
519 }
520
521 static int
522 pipe_rdwr_release(struct inode *inode, struct file *filp)
523 {
524         int decr, decw;
525
526         pipe_rdwr_fasync(-1, filp, 0);
527         decr = (filp->f_mode & FMODE_READ) != 0;
528         decw = (filp->f_mode & FMODE_WRITE) != 0;
529         return pipe_release(inode, decr, decw);
530 }
531
532 static int
533 pipe_read_open(struct inode *inode, struct file *filp)
534 {
535         /* We could have perhaps used atomic_t, but this and friends
536            below are the only places.  So it doesn't seem worthwhile.  */
537         mutex_lock(PIPE_MUTEX(*inode));
538         PIPE_READERS(*inode)++;
539         mutex_unlock(PIPE_MUTEX(*inode));
540
541         return 0;
542 }
543
544 static int
545 pipe_write_open(struct inode *inode, struct file *filp)
546 {
547         mutex_lock(PIPE_MUTEX(*inode));
548         PIPE_WRITERS(*inode)++;
549         mutex_unlock(PIPE_MUTEX(*inode));
550
551         return 0;
552 }
553
554 static int
555 pipe_rdwr_open(struct inode *inode, struct file *filp)
556 {
557         mutex_lock(PIPE_MUTEX(*inode));
558         if (filp->f_mode & FMODE_READ)
559                 PIPE_READERS(*inode)++;
560         if (filp->f_mode & FMODE_WRITE)
561                 PIPE_WRITERS(*inode)++;
562         mutex_unlock(PIPE_MUTEX(*inode));
563
564         return 0;
565 }
566
567 /*
568  * The file_operations structs are not static because they
569  * are also used in linux/fs/fifo.c to do operations on FIFOs.
570  */
571 struct file_operations read_fifo_fops = {
572         .llseek         = no_llseek,
573         .read           = pipe_read,
574         .readv          = pipe_readv,
575         .write          = bad_pipe_w,
576         .poll           = pipe_poll,
577         .ioctl          = pipe_ioctl,
578         .open           = pipe_read_open,
579         .release        = pipe_read_release,
580         .fasync         = pipe_read_fasync,
581 };
582
583 struct file_operations write_fifo_fops = {
584         .llseek         = no_llseek,
585         .read           = bad_pipe_r,
586         .write          = pipe_write,
587         .writev         = pipe_writev,
588         .poll           = pipe_poll,
589         .ioctl          = pipe_ioctl,
590         .open           = pipe_write_open,
591         .release        = pipe_write_release,
592         .fasync         = pipe_write_fasync,
593 };
594
595 struct file_operations rdwr_fifo_fops = {
596         .llseek         = no_llseek,
597         .read           = pipe_read,
598         .readv          = pipe_readv,
599         .write          = pipe_write,
600         .writev         = pipe_writev,
601         .poll           = pipe_poll,
602         .ioctl          = pipe_ioctl,
603         .open           = pipe_rdwr_open,
604         .release        = pipe_rdwr_release,
605         .fasync         = pipe_rdwr_fasync,
606 };
607
608 struct file_operations read_pipe_fops = {
609         .llseek         = no_llseek,
610         .read           = pipe_read,
611         .readv          = pipe_readv,
612         .write          = bad_pipe_w,
613         .poll           = pipe_poll,
614         .ioctl          = pipe_ioctl,
615         .open           = pipe_read_open,
616         .release        = pipe_read_release,
617         .fasync         = pipe_read_fasync,
618 };
619
620 struct file_operations write_pipe_fops = {
621         .llseek         = no_llseek,
622         .read           = bad_pipe_r,
623         .write          = pipe_write,
624         .writev         = pipe_writev,
625         .poll           = pipe_poll,
626         .ioctl          = pipe_ioctl,
627         .open           = pipe_write_open,
628         .release        = pipe_write_release,
629         .fasync         = pipe_write_fasync,
630 };
631
632 struct file_operations rdwr_pipe_fops = {
633         .llseek         = no_llseek,
634         .read           = pipe_read,
635         .readv          = pipe_readv,
636         .write          = pipe_write,
637         .writev         = pipe_writev,
638         .poll           = pipe_poll,
639         .ioctl          = pipe_ioctl,
640         .open           = pipe_rdwr_open,
641         .release        = pipe_rdwr_release,
642         .fasync         = pipe_rdwr_fasync,
643 };
644
645 void free_pipe_info(struct inode *inode)
646 {
647         int i;
648         struct pipe_inode_info *info = inode->i_pipe;
649
650         inode->i_pipe = NULL;
651         for (i = 0; i < PIPE_BUFFERS; i++) {
652                 struct pipe_buffer *buf = info->bufs + i;
653                 if (buf->ops)
654                         buf->ops->release(info, buf);
655         }
656         if (info->tmp_page)
657                 __free_page(info->tmp_page);
658         kfree(info);
659 }
660
661 struct inode* pipe_new(struct inode* inode)
662 {
663         struct pipe_inode_info *info;
664
665         info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
666         if (!info)
667                 goto fail_page;
668         memset(info, 0, sizeof(*info));
669         inode->i_pipe = info;
670
671         init_waitqueue_head(PIPE_WAIT(*inode));
672         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
673
674         return inode;
675 fail_page:
676         return NULL;
677 }
678
679 static struct vfsmount *pipe_mnt;
680 static int pipefs_delete_dentry(struct dentry *dentry)
681 {
682         return 1;
683 }
684 static struct dentry_operations pipefs_dentry_operations = {
685         .d_delete       = pipefs_delete_dentry,
686 };
687
688 static struct inode * get_pipe_inode(void)
689 {
690         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
691
692         if (!inode)
693                 goto fail_inode;
694
695         if(!pipe_new(inode))
696                 goto fail_iput;
697         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
698         inode->i_fop = &rdwr_pipe_fops;
699
700         /*
701          * Mark the inode dirty from the very beginning,
702          * that way it will never be moved to the dirty
703          * list because "mark_inode_dirty()" will think
704          * that it already _is_ on the dirty list.
705          */
706         inode->i_state = I_DIRTY;
707         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
708         inode->i_uid = current->fsuid;
709         inode->i_gid = current->fsgid;
710         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
711         inode->i_blksize = PAGE_SIZE;
712         return inode;
713
714 fail_iput:
715         iput(inode);
716 fail_inode:
717         return NULL;
718 }
719
720 int do_pipe(int *fd)
721 {
722         struct qstr this;
723         char name[32];
724         struct dentry *dentry;
725         struct inode * inode;
726         struct file *f1, *f2;
727         int error;
728         int i,j;
729
730         error = -ENFILE;
731         f1 = get_empty_filp();
732         if (!f1)
733                 goto no_files;
734
735         f2 = get_empty_filp();
736         if (!f2)
737                 goto close_f1;
738
739         inode = get_pipe_inode();
740         if (!inode)
741                 goto close_f12;
742
743         error = get_unused_fd();
744         if (error < 0)
745                 goto close_f12_inode;
746         i = error;
747
748         error = get_unused_fd();
749         if (error < 0)
750                 goto close_f12_inode_i;
751         j = error;
752
753         error = -ENOMEM;
754         sprintf(name, "[%lu]", inode->i_ino);
755         this.name = name;
756         this.len = strlen(name);
757         this.hash = inode->i_ino; /* will go */
758         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
759         if (!dentry)
760                 goto close_f12_inode_i_j;
761         dentry->d_op = &pipefs_dentry_operations;
762         d_add(dentry, inode);
763         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
764         f1->f_dentry = f2->f_dentry = dget(dentry);
765         f1->f_mapping = f2->f_mapping = inode->i_mapping;
766
767         /* read file */
768         f1->f_pos = f2->f_pos = 0;
769         f1->f_flags = O_RDONLY;
770         f1->f_op = &read_pipe_fops;
771         f1->f_mode = FMODE_READ;
772         f1->f_version = 0;
773
774         /* write file */
775         f2->f_flags = O_WRONLY;
776         f2->f_op = &write_pipe_fops;
777         f2->f_mode = FMODE_WRITE;
778         f2->f_version = 0;
779
780         fd_install(i, f1);
781         fd_install(j, f2);
782         fd[0] = i;
783         fd[1] = j;
784         return 0;
785
786 close_f12_inode_i_j:
787         put_unused_fd(j);
788 close_f12_inode_i:
789         put_unused_fd(i);
790 close_f12_inode:
791         free_pipe_info(inode);
792         iput(inode);
793 close_f12:
794         put_filp(f2);
795 close_f1:
796         put_filp(f1);
797 no_files:
798         return error;   
799 }
800
801 /*
802  * pipefs should _never_ be mounted by userland - too much of security hassle,
803  * no real gain from having the whole whorehouse mounted. So we don't need
804  * any operations on the root directory. However, we need a non-trivial
805  * d_name - pipe: will go nicely and kill the special-casing in procfs.
806  */
807
808 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
809         int flags, const char *dev_name, void *data)
810 {
811         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
812 }
813
814 static struct file_system_type pipe_fs_type = {
815         .name           = "pipefs",
816         .get_sb         = pipefs_get_sb,
817         .kill_sb        = kill_anon_super,
818 };
819
820 static int __init init_pipe_fs(void)
821 {
822         int err = register_filesystem(&pipe_fs_type);
823         if (!err) {
824                 pipe_mnt = kern_mount(&pipe_fs_type);
825                 if (IS_ERR(pipe_mnt)) {
826                         err = PTR_ERR(pipe_mnt);
827                         unregister_filesystem(&pipe_fs_type);
828                 }
829         }
830         return err;
831 }
832
833 static void __exit exit_pipe_fs(void)
834 {
835         unregister_filesystem(&pipe_fs_type);
836         mntput(pipe_mnt);
837 }
838
839 fs_initcall(init_pipe_fs);
840 module_exit(exit_pipe_fs);