[PATCH] splice: fix bugs in pipe_to_file()
[linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct pipe_inode_info *pipe)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(&pipe->wait, &wait,
48                         TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49         if (pipe->inode)
50                 mutex_unlock(&pipe->inode->i_mutex);
51         schedule();
52         finish_wait(&pipe->wait, &wait);
53         if (pipe->inode)
54                 mutex_lock(&pipe->inode->i_mutex);
55 }
56
57 static int
58 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
59 {
60         unsigned long copy;
61
62         while (len > 0) {
63                 while (!iov->iov_len)
64                         iov++;
65                 copy = min_t(unsigned long, len, iov->iov_len);
66
67                 if (copy_from_user(to, iov->iov_base, copy))
68                         return -EFAULT;
69                 to += copy;
70                 len -= copy;
71                 iov->iov_base += copy;
72                 iov->iov_len -= copy;
73         }
74         return 0;
75 }
76
77 static int
78 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
79 {
80         unsigned long copy;
81
82         while (len > 0) {
83                 while (!iov->iov_len)
84                         iov++;
85                 copy = min_t(unsigned long, len, iov->iov_len);
86
87                 if (copy_to_user(iov->iov_base, from, copy))
88                         return -EFAULT;
89                 from += copy;
90                 len -= copy;
91                 iov->iov_base += copy;
92                 iov->iov_len -= copy;
93         }
94         return 0;
95 }
96
97 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98                                   struct pipe_buffer *buf)
99 {
100         struct page *page = buf->page;
101
102         /*
103          * If nobody else uses this page, and we don't already have a
104          * temporary page, let's keep track of it as a one-deep
105          * allocation cache. (Otherwise just release our reference to it)
106          */
107         if (page_count(page) == 1 && !pipe->tmp_page)
108                 pipe->tmp_page = page;
109         else
110                 page_cache_release(page);
111 }
112
113 static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
114                                 struct pipe_buffer *buf)
115 {
116         return kmap(buf->page);
117 }
118
119 static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
120                                 struct pipe_buffer *buf)
121 {
122         kunmap(buf->page);
123 }
124
125 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
126                                struct pipe_buffer *buf)
127 {
128         struct page *page = buf->page;
129
130         if (page_count(page) == 1) {
131                 lock_page(page);
132                 return 0;
133         }
134
135         return 1;
136 }
137
138 static void anon_pipe_buf_get(struct pipe_inode_info *info,
139                               struct pipe_buffer *buf)
140 {
141         page_cache_get(buf->page);
142 }
143
144 static struct pipe_buf_operations anon_pipe_buf_ops = {
145         .can_merge = 1,
146         .map = anon_pipe_buf_map,
147         .unmap = anon_pipe_buf_unmap,
148         .release = anon_pipe_buf_release,
149         .steal = anon_pipe_buf_steal,
150         .get = anon_pipe_buf_get,
151 };
152
153 static ssize_t
154 pipe_readv(struct file *filp, const struct iovec *_iov,
155            unsigned long nr_segs, loff_t *ppos)
156 {
157         struct inode *inode = filp->f_dentry->d_inode;
158         struct pipe_inode_info *pipe;
159         int do_wakeup;
160         ssize_t ret;
161         struct iovec *iov = (struct iovec *)_iov;
162         size_t total_len;
163
164         total_len = iov_length(iov, nr_segs);
165         /* Null read succeeds. */
166         if (unlikely(total_len == 0))
167                 return 0;
168
169         do_wakeup = 0;
170         ret = 0;
171         mutex_lock(&inode->i_mutex);
172         pipe = inode->i_pipe;
173         for (;;) {
174                 int bufs = pipe->nrbufs;
175                 if (bufs) {
176                         int curbuf = pipe->curbuf;
177                         struct pipe_buffer *buf = pipe->bufs + curbuf;
178                         struct pipe_buf_operations *ops = buf->ops;
179                         void *addr;
180                         size_t chars = buf->len;
181                         int error;
182
183                         if (chars > total_len)
184                                 chars = total_len;
185
186                         addr = ops->map(filp, pipe, buf);
187                         if (IS_ERR(addr)) {
188                                 if (!ret)
189                                         ret = PTR_ERR(addr);
190                                 break;
191                         }
192                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
193                         ops->unmap(pipe, buf);
194                         if (unlikely(error)) {
195                                 if (!ret)
196                                         ret = -EFAULT;
197                                 break;
198                         }
199                         ret += chars;
200                         buf->offset += chars;
201                         buf->len -= chars;
202                         if (!buf->len) {
203                                 buf->ops = NULL;
204                                 ops->release(pipe, buf);
205                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
206                                 pipe->curbuf = curbuf;
207                                 pipe->nrbufs = --bufs;
208                                 do_wakeup = 1;
209                         }
210                         total_len -= chars;
211                         if (!total_len)
212                                 break;  /* common path: read succeeded */
213                 }
214                 if (bufs)       /* More to do? */
215                         continue;
216                 if (!pipe->writers)
217                         break;
218                 if (!pipe->waiting_writers) {
219                         /* syscall merging: Usually we must not sleep
220                          * if O_NONBLOCK is set, or if we got some data.
221                          * But if a writer sleeps in kernel space, then
222                          * we can wait for that data without violating POSIX.
223                          */
224                         if (ret)
225                                 break;
226                         if (filp->f_flags & O_NONBLOCK) {
227                                 ret = -EAGAIN;
228                                 break;
229                         }
230                 }
231                 if (signal_pending(current)) {
232                         if (!ret)
233                                 ret = -ERESTARTSYS;
234                         break;
235                 }
236                 if (do_wakeup) {
237                         wake_up_interruptible_sync(&pipe->wait);
238                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
239                 }
240                 pipe_wait(pipe);
241         }
242         mutex_unlock(&inode->i_mutex);
243
244         /* Signal writers asynchronously that there is more room. */
245         if (do_wakeup) {
246                 wake_up_interruptible(&pipe->wait);
247                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
248         }
249         if (ret > 0)
250                 file_accessed(filp);
251         return ret;
252 }
253
254 static ssize_t
255 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
256 {
257         struct iovec iov = { .iov_base = buf, .iov_len = count };
258
259         return pipe_readv(filp, &iov, 1, ppos);
260 }
261
262 static ssize_t
263 pipe_writev(struct file *filp, const struct iovec *_iov,
264             unsigned long nr_segs, loff_t *ppos)
265 {
266         struct inode *inode = filp->f_dentry->d_inode;
267         struct pipe_inode_info *pipe;
268         ssize_t ret;
269         int do_wakeup;
270         struct iovec *iov = (struct iovec *)_iov;
271         size_t total_len;
272         ssize_t chars;
273
274         total_len = iov_length(iov, nr_segs);
275         /* Null write succeeds. */
276         if (unlikely(total_len == 0))
277                 return 0;
278
279         do_wakeup = 0;
280         ret = 0;
281         mutex_lock(&inode->i_mutex);
282         pipe = inode->i_pipe;
283
284         if (!pipe->readers) {
285                 send_sig(SIGPIPE, current, 0);
286                 ret = -EPIPE;
287                 goto out;
288         }
289
290         /* We try to merge small writes */
291         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
292         if (pipe->nrbufs && chars != 0) {
293                 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
294                                                         (PIPE_BUFFERS-1);
295                 struct pipe_buffer *buf = pipe->bufs + lastbuf;
296                 struct pipe_buf_operations *ops = buf->ops;
297                 int offset = buf->offset + buf->len;
298
299                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
300                         void *addr;
301                         int error;
302
303                         addr = ops->map(filp, pipe, buf);
304                         if (IS_ERR(addr)) {
305                                 error = PTR_ERR(addr);
306                                 goto out;
307                         }
308                         error = pipe_iov_copy_from_user(offset + addr, iov,
309                                                         chars);
310                         ops->unmap(pipe, buf);
311                         ret = error;
312                         do_wakeup = 1;
313                         if (error)
314                                 goto out;
315                         buf->len += chars;
316                         total_len -= chars;
317                         ret = chars;
318                         if (!total_len)
319                                 goto out;
320                 }
321         }
322
323         for (;;) {
324                 int bufs;
325
326                 if (!pipe->readers) {
327                         send_sig(SIGPIPE, current, 0);
328                         if (!ret)
329                                 ret = -EPIPE;
330                         break;
331                 }
332                 bufs = pipe->nrbufs;
333                 if (bufs < PIPE_BUFFERS) {
334                         int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
335                         struct pipe_buffer *buf = pipe->bufs + newbuf;
336                         struct page *page = pipe->tmp_page;
337                         int error;
338
339                         if (!page) {
340                                 page = alloc_page(GFP_HIGHUSER);
341                                 if (unlikely(!page)) {
342                                         ret = ret ? : -ENOMEM;
343                                         break;
344                                 }
345                                 pipe->tmp_page = page;
346                         }
347                         /* Always wake up, even if the copy fails. Otherwise
348                          * we lock up (O_NONBLOCK-)readers that sleep due to
349                          * syscall merging.
350                          * FIXME! Is this really true?
351                          */
352                         do_wakeup = 1;
353                         chars = PAGE_SIZE;
354                         if (chars > total_len)
355                                 chars = total_len;
356
357                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
358                         kunmap(page);
359                         if (unlikely(error)) {
360                                 if (!ret)
361                                         ret = -EFAULT;
362                                 break;
363                         }
364                         ret += chars;
365
366                         /* Insert it into the buffer array */
367                         buf->page = page;
368                         buf->ops = &anon_pipe_buf_ops;
369                         buf->offset = 0;
370                         buf->len = chars;
371                         pipe->nrbufs = ++bufs;
372                         pipe->tmp_page = NULL;
373
374                         total_len -= chars;
375                         if (!total_len)
376                                 break;
377                 }
378                 if (bufs < PIPE_BUFFERS)
379                         continue;
380                 if (filp->f_flags & O_NONBLOCK) {
381                         if (!ret)
382                                 ret = -EAGAIN;
383                         break;
384                 }
385                 if (signal_pending(current)) {
386                         if (!ret)
387                                 ret = -ERESTARTSYS;
388                         break;
389                 }
390                 if (do_wakeup) {
391                         wake_up_interruptible_sync(&pipe->wait);
392                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
393                         do_wakeup = 0;
394                 }
395                 pipe->waiting_writers++;
396                 pipe_wait(pipe);
397                 pipe->waiting_writers--;
398         }
399 out:
400         mutex_unlock(&inode->i_mutex);
401         if (do_wakeup) {
402                 wake_up_interruptible(&pipe->wait);
403                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
404         }
405         if (ret > 0)
406                 file_update_time(filp);
407         return ret;
408 }
409
410 static ssize_t
411 pipe_write(struct file *filp, const char __user *buf,
412            size_t count, loff_t *ppos)
413 {
414         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
415
416         return pipe_writev(filp, &iov, 1, ppos);
417 }
418
419 static ssize_t
420 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
421 {
422         return -EBADF;
423 }
424
425 static ssize_t
426 bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
427            loff_t *ppos)
428 {
429         return -EBADF;
430 }
431
432 static int
433 pipe_ioctl(struct inode *pino, struct file *filp,
434            unsigned int cmd, unsigned long arg)
435 {
436         struct inode *inode = filp->f_dentry->d_inode;
437         struct pipe_inode_info *pipe;
438         int count, buf, nrbufs;
439
440         switch (cmd) {
441                 case FIONREAD:
442                         mutex_lock(&inode->i_mutex);
443                         pipe = inode->i_pipe;
444                         count = 0;
445                         buf = pipe->curbuf;
446                         nrbufs = pipe->nrbufs;
447                         while (--nrbufs >= 0) {
448                                 count += pipe->bufs[buf].len;
449                                 buf = (buf+1) & (PIPE_BUFFERS-1);
450                         }
451                         mutex_unlock(&inode->i_mutex);
452
453                         return put_user(count, (int __user *)arg);
454                 default:
455                         return -EINVAL;
456         }
457 }
458
459 /* No kernel lock held - fine */
460 static unsigned int
461 pipe_poll(struct file *filp, poll_table *wait)
462 {
463         unsigned int mask;
464         struct inode *inode = filp->f_dentry->d_inode;
465         struct pipe_inode_info *pipe = inode->i_pipe;
466         int nrbufs;
467
468         poll_wait(filp, &pipe->wait, wait);
469
470         /* Reading only -- no need for acquiring the semaphore.  */
471         nrbufs = pipe->nrbufs;
472         mask = 0;
473         if (filp->f_mode & FMODE_READ) {
474                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
475                 if (!pipe->writers && filp->f_version != pipe->w_counter)
476                         mask |= POLLHUP;
477         }
478
479         if (filp->f_mode & FMODE_WRITE) {
480                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
481                 /*
482                  * Most Unices do not set POLLERR for FIFOs but on Linux they
483                  * behave exactly like pipes for poll().
484                  */
485                 if (!pipe->readers)
486                         mask |= POLLERR;
487         }
488
489         return mask;
490 }
491
492 static int
493 pipe_release(struct inode *inode, int decr, int decw)
494 {
495         struct pipe_inode_info *pipe;
496
497         mutex_lock(&inode->i_mutex);
498         pipe = inode->i_pipe;
499         pipe->readers -= decr;
500         pipe->writers -= decw;
501
502         if (!pipe->readers && !pipe->writers) {
503                 free_pipe_info(inode);
504         } else {
505                 wake_up_interruptible(&pipe->wait);
506                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
507                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
508         }
509         mutex_unlock(&inode->i_mutex);
510
511         return 0;
512 }
513
514 static int
515 pipe_read_fasync(int fd, struct file *filp, int on)
516 {
517         struct inode *inode = filp->f_dentry->d_inode;
518         int retval;
519
520         mutex_lock(&inode->i_mutex);
521         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
522         mutex_unlock(&inode->i_mutex);
523
524         if (retval < 0)
525                 return retval;
526
527         return 0;
528 }
529
530
531 static int
532 pipe_write_fasync(int fd, struct file *filp, int on)
533 {
534         struct inode *inode = filp->f_dentry->d_inode;
535         int retval;
536
537         mutex_lock(&inode->i_mutex);
538         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
539         mutex_unlock(&inode->i_mutex);
540
541         if (retval < 0)
542                 return retval;
543
544         return 0;
545 }
546
547
548 static int
549 pipe_rdwr_fasync(int fd, struct file *filp, int on)
550 {
551         struct inode *inode = filp->f_dentry->d_inode;
552         struct pipe_inode_info *pipe = inode->i_pipe;
553         int retval;
554
555         mutex_lock(&inode->i_mutex);
556
557         retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
558
559         if (retval >= 0)
560                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
561
562         mutex_unlock(&inode->i_mutex);
563
564         if (retval < 0)
565                 return retval;
566
567         return 0;
568 }
569
570
571 static int
572 pipe_read_release(struct inode *inode, struct file *filp)
573 {
574         pipe_read_fasync(-1, filp, 0);
575         return pipe_release(inode, 1, 0);
576 }
577
578 static int
579 pipe_write_release(struct inode *inode, struct file *filp)
580 {
581         pipe_write_fasync(-1, filp, 0);
582         return pipe_release(inode, 0, 1);
583 }
584
585 static int
586 pipe_rdwr_release(struct inode *inode, struct file *filp)
587 {
588         int decr, decw;
589
590         pipe_rdwr_fasync(-1, filp, 0);
591         decr = (filp->f_mode & FMODE_READ) != 0;
592         decw = (filp->f_mode & FMODE_WRITE) != 0;
593         return pipe_release(inode, decr, decw);
594 }
595
596 static int
597 pipe_read_open(struct inode *inode, struct file *filp)
598 {
599         /* We could have perhaps used atomic_t, but this and friends
600            below are the only places.  So it doesn't seem worthwhile.  */
601         mutex_lock(&inode->i_mutex);
602         inode->i_pipe->readers++;
603         mutex_unlock(&inode->i_mutex);
604
605         return 0;
606 }
607
608 static int
609 pipe_write_open(struct inode *inode, struct file *filp)
610 {
611         mutex_lock(&inode->i_mutex);
612         inode->i_pipe->writers++;
613         mutex_unlock(&inode->i_mutex);
614
615         return 0;
616 }
617
618 static int
619 pipe_rdwr_open(struct inode *inode, struct file *filp)
620 {
621         mutex_lock(&inode->i_mutex);
622         if (filp->f_mode & FMODE_READ)
623                 inode->i_pipe->readers++;
624         if (filp->f_mode & FMODE_WRITE)
625                 inode->i_pipe->writers++;
626         mutex_unlock(&inode->i_mutex);
627
628         return 0;
629 }
630
631 /*
632  * The file_operations structs are not static because they
633  * are also used in linux/fs/fifo.c to do operations on FIFOs.
634  */
635 const struct file_operations read_fifo_fops = {
636         .llseek         = no_llseek,
637         .read           = pipe_read,
638         .readv          = pipe_readv,
639         .write          = bad_pipe_w,
640         .poll           = pipe_poll,
641         .ioctl          = pipe_ioctl,
642         .open           = pipe_read_open,
643         .release        = pipe_read_release,
644         .fasync         = pipe_read_fasync,
645 };
646
647 const struct file_operations write_fifo_fops = {
648         .llseek         = no_llseek,
649         .read           = bad_pipe_r,
650         .write          = pipe_write,
651         .writev         = pipe_writev,
652         .poll           = pipe_poll,
653         .ioctl          = pipe_ioctl,
654         .open           = pipe_write_open,
655         .release        = pipe_write_release,
656         .fasync         = pipe_write_fasync,
657 };
658
659 const struct file_operations rdwr_fifo_fops = {
660         .llseek         = no_llseek,
661         .read           = pipe_read,
662         .readv          = pipe_readv,
663         .write          = pipe_write,
664         .writev         = pipe_writev,
665         .poll           = pipe_poll,
666         .ioctl          = pipe_ioctl,
667         .open           = pipe_rdwr_open,
668         .release        = pipe_rdwr_release,
669         .fasync         = pipe_rdwr_fasync,
670 };
671
672 static struct file_operations read_pipe_fops = {
673         .llseek         = no_llseek,
674         .read           = pipe_read,
675         .readv          = pipe_readv,
676         .write          = bad_pipe_w,
677         .poll           = pipe_poll,
678         .ioctl          = pipe_ioctl,
679         .open           = pipe_read_open,
680         .release        = pipe_read_release,
681         .fasync         = pipe_read_fasync,
682 };
683
684 static struct file_operations write_pipe_fops = {
685         .llseek         = no_llseek,
686         .read           = bad_pipe_r,
687         .write          = pipe_write,
688         .writev         = pipe_writev,
689         .poll           = pipe_poll,
690         .ioctl          = pipe_ioctl,
691         .open           = pipe_write_open,
692         .release        = pipe_write_release,
693         .fasync         = pipe_write_fasync,
694 };
695
696 static struct file_operations rdwr_pipe_fops = {
697         .llseek         = no_llseek,
698         .read           = pipe_read,
699         .readv          = pipe_readv,
700         .write          = pipe_write,
701         .writev         = pipe_writev,
702         .poll           = pipe_poll,
703         .ioctl          = pipe_ioctl,
704         .open           = pipe_rdwr_open,
705         .release        = pipe_rdwr_release,
706         .fasync         = pipe_rdwr_fasync,
707 };
708
709 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
710 {
711         struct pipe_inode_info *pipe;
712
713         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
714         if (pipe) {
715                 init_waitqueue_head(&pipe->wait);
716                 pipe->r_counter = pipe->w_counter = 1;
717                 pipe->inode = inode;
718         }
719
720         return pipe;
721 }
722
723 void __free_pipe_info(struct pipe_inode_info *pipe)
724 {
725         int i;
726
727         for (i = 0; i < PIPE_BUFFERS; i++) {
728                 struct pipe_buffer *buf = pipe->bufs + i;
729                 if (buf->ops)
730                         buf->ops->release(pipe, buf);
731         }
732         if (pipe->tmp_page)
733                 __free_page(pipe->tmp_page);
734         kfree(pipe);
735 }
736
737 void free_pipe_info(struct inode *inode)
738 {
739         __free_pipe_info(inode->i_pipe);
740         inode->i_pipe = NULL;
741 }
742
743 static struct vfsmount *pipe_mnt __read_mostly;
744 static int pipefs_delete_dentry(struct dentry *dentry)
745 {
746         return 1;
747 }
748
749 static struct dentry_operations pipefs_dentry_operations = {
750         .d_delete       = pipefs_delete_dentry,
751 };
752
753 static struct inode * get_pipe_inode(void)
754 {
755         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
756         struct pipe_inode_info *pipe;
757
758         if (!inode)
759                 goto fail_inode;
760
761         pipe = alloc_pipe_info(inode);
762         if (!pipe)
763                 goto fail_iput;
764         inode->i_pipe = pipe;
765
766         pipe->readers = pipe->writers = 1;
767         inode->i_fop = &rdwr_pipe_fops;
768
769         /*
770          * Mark the inode dirty from the very beginning,
771          * that way it will never be moved to the dirty
772          * list because "mark_inode_dirty()" will think
773          * that it already _is_ on the dirty list.
774          */
775         inode->i_state = I_DIRTY;
776         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
777         inode->i_uid = current->fsuid;
778         inode->i_gid = current->fsgid;
779         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
780         inode->i_blksize = PAGE_SIZE;
781
782         return inode;
783
784 fail_iput:
785         iput(inode);
786
787 fail_inode:
788         return NULL;
789 }
790
791 int do_pipe(int *fd)
792 {
793         struct qstr this;
794         char name[32];
795         struct dentry *dentry;
796         struct inode * inode;
797         struct file *f1, *f2;
798         int error;
799         int i, j;
800
801         error = -ENFILE;
802         f1 = get_empty_filp();
803         if (!f1)
804                 goto no_files;
805
806         f2 = get_empty_filp();
807         if (!f2)
808                 goto close_f1;
809
810         inode = get_pipe_inode();
811         if (!inode)
812                 goto close_f12;
813
814         error = get_unused_fd();
815         if (error < 0)
816                 goto close_f12_inode;
817         i = error;
818
819         error = get_unused_fd();
820         if (error < 0)
821                 goto close_f12_inode_i;
822         j = error;
823
824         error = -ENOMEM;
825         sprintf(name, "[%lu]", inode->i_ino);
826         this.name = name;
827         this.len = strlen(name);
828         this.hash = inode->i_ino; /* will go */
829         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
830         if (!dentry)
831                 goto close_f12_inode_i_j;
832
833         dentry->d_op = &pipefs_dentry_operations;
834         d_add(dentry, inode);
835         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
836         f1->f_dentry = f2->f_dentry = dget(dentry);
837         f1->f_mapping = f2->f_mapping = inode->i_mapping;
838
839         /* read file */
840         f1->f_pos = f2->f_pos = 0;
841         f1->f_flags = O_RDONLY;
842         f1->f_op = &read_pipe_fops;
843         f1->f_mode = FMODE_READ;
844         f1->f_version = 0;
845
846         /* write file */
847         f2->f_flags = O_WRONLY;
848         f2->f_op = &write_pipe_fops;
849         f2->f_mode = FMODE_WRITE;
850         f2->f_version = 0;
851
852         fd_install(i, f1);
853         fd_install(j, f2);
854         fd[0] = i;
855         fd[1] = j;
856
857         return 0;
858
859 close_f12_inode_i_j:
860         put_unused_fd(j);
861 close_f12_inode_i:
862         put_unused_fd(i);
863 close_f12_inode:
864         free_pipe_info(inode);
865         iput(inode);
866 close_f12:
867         put_filp(f2);
868 close_f1:
869         put_filp(f1);
870 no_files:
871         return error;   
872 }
873
874 /*
875  * pipefs should _never_ be mounted by userland - too much of security hassle,
876  * no real gain from having the whole whorehouse mounted. So we don't need
877  * any operations on the root directory. However, we need a non-trivial
878  * d_name - pipe: will go nicely and kill the special-casing in procfs.
879  */
880
881 static struct super_block *
882 pipefs_get_sb(struct file_system_type *fs_type, int flags,
883               const char *dev_name, void *data)
884 {
885         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
886 }
887
888 static struct file_system_type pipe_fs_type = {
889         .name           = "pipefs",
890         .get_sb         = pipefs_get_sb,
891         .kill_sb        = kill_anon_super,
892 };
893
894 static int __init init_pipe_fs(void)
895 {
896         int err = register_filesystem(&pipe_fs_type);
897
898         if (!err) {
899                 pipe_mnt = kern_mount(&pipe_fs_type);
900                 if (IS_ERR(pipe_mnt)) {
901                         err = PTR_ERR(pipe_mnt);
902                         unregister_filesystem(&pipe_fs_type);
903                 }
904         }
905         return err;
906 }
907
908 static void __exit exit_pipe_fs(void)
909 {
910         unregister_filesystem(&pipe_fs_type);
911         mntput(pipe_mnt);
912 }
913
914 fs_initcall(init_pipe_fs);
915 module_exit(exit_pipe_fs);