Merge branch 'master'
[linux-2.6] / net / core / stream.c
1 /*
2  *     SUCS NET3:
3  *
4  *     Generic stream handling routines. These are generic for most
5  *     protocols. Even IP. Tonight 8-).
6  *     This is used because TCP, LLC (others too) layer all have mostly
7  *     identical sendmsg() and recvmsg() code.
8  *     So we (will) share it here.
9  *
10  *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
11  *                     (from old tcp.c code)
12  *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
13  */
14
15 #include <linux/module.h>
16 #include <linux/net.h>
17 #include <linux/signal.h>
18 #include <linux/tcp.h>
19 #include <linux/wait.h>
20 #include <net/sock.h>
21
22 /**
23  * sk_stream_write_space - stream socket write_space callback.
24  * @sk: socket
25  *
26  * FIXME: write proper description
27  */
28 void sk_stream_write_space(struct sock *sk)
29 {
30         struct socket *sock = sk->sk_socket;
31
32         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
33                 clear_bit(SOCK_NOSPACE, &sock->flags);
34
35                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
36                         wake_up_interruptible(sk->sk_sleep);
37                 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
38                         sock_wake_async(sock, 2, POLL_OUT);
39         }
40 }
41
42 EXPORT_SYMBOL(sk_stream_write_space);
43
44 /**
45  * sk_stream_wait_connect - Wait for a socket to get into the connected state
46  * @sk: sock to wait on
47  * @timeo_p: for how long to wait
48  *
49  * Must be called with the socket locked.
50  */
51 int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
52 {
53         struct task_struct *tsk = current;
54         DEFINE_WAIT(wait);
55         int done;
56
57         do {
58                 if (sk->sk_err)
59                         return sock_error(sk);
60                 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
61                         return -EPIPE;
62                 if (!*timeo_p)
63                         return -EAGAIN;
64                 if (signal_pending(tsk))
65                         return sock_intr_errno(*timeo_p);
66
67                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
68                 sk->sk_write_pending++;
69                 done = sk_wait_event(sk, timeo_p,
70                                      !((1 << sk->sk_state) & 
71                                        ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
72                 finish_wait(sk->sk_sleep, &wait);
73                 sk->sk_write_pending--;
74         } while (!done);
75         return 0;
76 }
77
78 EXPORT_SYMBOL(sk_stream_wait_connect);
79
80 /**
81  * sk_stream_closing - Return 1 if we still have things to send in our buffers.
82  * @sk: socket to verify
83  */
84 static inline int sk_stream_closing(struct sock *sk)
85 {
86         return (1 << sk->sk_state) &
87                (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
88 }
89
90 void sk_stream_wait_close(struct sock *sk, long timeout)
91 {
92         if (timeout) {
93                 DEFINE_WAIT(wait);
94
95                 do {
96                         prepare_to_wait(sk->sk_sleep, &wait,
97                                         TASK_INTERRUPTIBLE);
98                         if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
99                                 break;
100                 } while (!signal_pending(current) && timeout);
101
102                 finish_wait(sk->sk_sleep, &wait);
103         }
104 }
105
106 EXPORT_SYMBOL(sk_stream_wait_close);
107
108 /**
109  * sk_stream_wait_memory - Wait for more memory for a socket
110  * @sk: socket to wait for memory
111  * @timeo_p: for how long
112  */
113 int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
114 {
115         int err = 0;
116         long vm_wait = 0;
117         long current_timeo = *timeo_p;
118         DEFINE_WAIT(wait);
119
120         if (sk_stream_memory_free(sk))
121                 current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
122
123         while (1) {
124                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
125
126                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
127
128                 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
129                         goto do_error;
130                 if (!*timeo_p)
131                         goto do_nonblock;
132                 if (signal_pending(current))
133                         goto do_interrupted;
134                 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
135                 if (sk_stream_memory_free(sk) && !vm_wait)
136                         break;
137
138                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
139                 sk->sk_write_pending++;
140                 sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
141                                                   vm_wait);
142                 sk->sk_write_pending--;
143
144                 if (vm_wait) {
145                         vm_wait -= current_timeo;
146                         current_timeo = *timeo_p;
147                         if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
148                             (current_timeo -= vm_wait) < 0)
149                                 current_timeo = 0;
150                         vm_wait = 0;
151                 }
152                 *timeo_p = current_timeo;
153         }
154 out:
155         finish_wait(sk->sk_sleep, &wait);
156         return err;
157
158 do_error:
159         err = -EPIPE;
160         goto out;
161 do_nonblock:
162         err = -EAGAIN;
163         goto out;
164 do_interrupted:
165         err = sock_intr_errno(*timeo_p);
166         goto out;
167 }
168
169 EXPORT_SYMBOL(sk_stream_wait_memory);
170
171 void sk_stream_rfree(struct sk_buff *skb)
172 {
173         struct sock *sk = skb->sk;
174
175         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
176         sk->sk_forward_alloc += skb->truesize;
177 }
178
179 EXPORT_SYMBOL(sk_stream_rfree);
180
181 int sk_stream_error(struct sock *sk, int flags, int err)
182 {
183         if (err == -EPIPE)
184                 err = sock_error(sk) ? : -EPIPE;
185         if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
186                 send_sig(SIGPIPE, current, 0);
187         return err;
188 }
189
190 EXPORT_SYMBOL(sk_stream_error);
191
192 void __sk_stream_mem_reclaim(struct sock *sk)
193 {
194         if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
195                 atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
196                            sk->sk_prot->memory_allocated);
197                 sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
198                 if (*sk->sk_prot->memory_pressure &&
199                     (atomic_read(sk->sk_prot->memory_allocated) <
200                      sk->sk_prot->sysctl_mem[0]))
201                         *sk->sk_prot->memory_pressure = 0;
202         }
203 }
204
205 EXPORT_SYMBOL(__sk_stream_mem_reclaim);
206
207 int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
208 {
209         int amt = sk_stream_pages(size);
210
211         sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
212         atomic_add(amt, sk->sk_prot->memory_allocated);
213
214         /* Under limit. */
215         if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
216                 if (*sk->sk_prot->memory_pressure)
217                         *sk->sk_prot->memory_pressure = 0;
218                 return 1;
219         }
220
221         /* Over hard limit. */
222         if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
223                 sk->sk_prot->enter_memory_pressure();
224                 goto suppress_allocation;
225         }
226
227         /* Under pressure. */
228         if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
229                 sk->sk_prot->enter_memory_pressure();
230
231         if (kind) {
232                 if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
233                         return 1;
234         } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
235                 return 1;
236
237         if (!*sk->sk_prot->memory_pressure ||
238             sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
239                                 sk_stream_pages(sk->sk_wmem_queued +
240                                                 atomic_read(&sk->sk_rmem_alloc) +
241                                                 sk->sk_forward_alloc))
242                 return 1;
243
244 suppress_allocation:
245
246         if (!kind) {
247                 sk_stream_moderate_sndbuf(sk);
248
249                 /* Fail only if socket is _under_ its sndbuf.
250                  * In this case we cannot block, so that we have to fail.
251                  */
252                 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
253                         return 1;
254         }
255
256         /* Alas. Undo changes. */
257         sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
258         atomic_sub(amt, sk->sk_prot->memory_allocated);
259         return 0;
260 }
261
262 EXPORT_SYMBOL(sk_stream_mem_schedule);
263
264 void sk_stream_kill_queues(struct sock *sk)
265 {
266         /* First the read buffer. */
267         __skb_queue_purge(&sk->sk_receive_queue);
268
269         /* Next, the error queue. */
270         __skb_queue_purge(&sk->sk_error_queue);
271
272         /* Next, the write queue. */
273         BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
274
275         /* Account for returned memory. */
276         sk_stream_mem_reclaim(sk);
277
278         BUG_TRAP(!sk->sk_wmem_queued);
279         BUG_TRAP(!sk->sk_forward_alloc);
280
281         /* It is _impossible_ for the backlog to contain anything
282          * when we get here.  All user references to this socket
283          * have gone away, only the net layer knows can touch it.
284          */
285 }
286
287 EXPORT_SYMBOL(sk_stream_kill_queues);