Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6] / net / core / stream.c
1 /*
2  *     SUCS NET3:
3  *
4  *     Generic stream handling routines. These are generic for most
5  *     protocols. Even IP. Tonight 8-).
6  *     This is used because TCP, LLC (others too) layer all have mostly
7  *     identical sendmsg() and recvmsg() code.
8  *     So we (will) share it here.
9  *
10  *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
11  *                     (from old tcp.c code)
12  *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
13  */
14
15 #include <linux/module.h>
16 #include <linux/net.h>
17 #include <linux/signal.h>
18 #include <linux/tcp.h>
19 #include <linux/wait.h>
20 #include <net/sock.h>
21
22 /**
23  * sk_stream_write_space - stream socket write_space callback.
24  * @sk: socket
25  *
26  * FIXME: write proper description
27  */
28 void sk_stream_write_space(struct sock *sk)
29 {
30         struct socket *sock = sk->sk_socket;
31
32         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
33                 clear_bit(SOCK_NOSPACE, &sock->flags);
34
35                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
36                         wake_up_interruptible(sk->sk_sleep);
37                 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
38                         sock_wake_async(sock, 2, POLL_OUT);
39         }
40 }
41
42 EXPORT_SYMBOL(sk_stream_write_space);
43
44 /**
45  * sk_stream_wait_connect - Wait for a socket to get into the connected state
46  * @sk: sock to wait on
47  * @timeo_p: for how long to wait
48  *
49  * Must be called with the socket locked.
50  */
51 int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
52 {
53         struct task_struct *tsk = current;
54         DEFINE_WAIT(wait);
55         int done;
56
57         do {
58                 int err = sock_error(sk);
59                 if (err)
60                         return err;
61                 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
62                         return -EPIPE;
63                 if (!*timeo_p)
64                         return -EAGAIN;
65                 if (signal_pending(tsk))
66                         return sock_intr_errno(*timeo_p);
67
68                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
69                 sk->sk_write_pending++;
70                 done = sk_wait_event(sk, timeo_p,
71                                      !sk->sk_err &&
72                                      !((1 << sk->sk_state) &
73                                        ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
74                 finish_wait(sk->sk_sleep, &wait);
75                 sk->sk_write_pending--;
76         } while (!done);
77         return 0;
78 }
79
80 EXPORT_SYMBOL(sk_stream_wait_connect);
81
82 /**
83  * sk_stream_closing - Return 1 if we still have things to send in our buffers.
84  * @sk: socket to verify
85  */
86 static inline int sk_stream_closing(struct sock *sk)
87 {
88         return (1 << sk->sk_state) &
89                (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
90 }
91
92 void sk_stream_wait_close(struct sock *sk, long timeout)
93 {
94         if (timeout) {
95                 DEFINE_WAIT(wait);
96
97                 do {
98                         prepare_to_wait(sk->sk_sleep, &wait,
99                                         TASK_INTERRUPTIBLE);
100                         if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
101                                 break;
102                 } while (!signal_pending(current) && timeout);
103
104                 finish_wait(sk->sk_sleep, &wait);
105         }
106 }
107
108 EXPORT_SYMBOL(sk_stream_wait_close);
109
110 /**
111  * sk_stream_wait_memory - Wait for more memory for a socket
112  * @sk: socket to wait for memory
113  * @timeo_p: for how long
114  */
115 int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
116 {
117         int err = 0;
118         long vm_wait = 0;
119         long current_timeo = *timeo_p;
120         DEFINE_WAIT(wait);
121
122         if (sk_stream_memory_free(sk))
123                 current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
124
125         while (1) {
126                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
127
128                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
129
130                 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
131                         goto do_error;
132                 if (!*timeo_p)
133                         goto do_nonblock;
134                 if (signal_pending(current))
135                         goto do_interrupted;
136                 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
137                 if (sk_stream_memory_free(sk) && !vm_wait)
138                         break;
139
140                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
141                 sk->sk_write_pending++;
142                 sk_wait_event(sk, &current_timeo, !sk->sk_err &&
143                                                   !(sk->sk_shutdown & SEND_SHUTDOWN) &&
144                                                   sk_stream_memory_free(sk) &&
145                                                   vm_wait);
146                 sk->sk_write_pending--;
147
148                 if (vm_wait) {
149                         vm_wait -= current_timeo;
150                         current_timeo = *timeo_p;
151                         if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
152                             (current_timeo -= vm_wait) < 0)
153                                 current_timeo = 0;
154                         vm_wait = 0;
155                 }
156                 *timeo_p = current_timeo;
157         }
158 out:
159         finish_wait(sk->sk_sleep, &wait);
160         return err;
161
162 do_error:
163         err = -EPIPE;
164         goto out;
165 do_nonblock:
166         err = -EAGAIN;
167         goto out;
168 do_interrupted:
169         err = sock_intr_errno(*timeo_p);
170         goto out;
171 }
172
173 EXPORT_SYMBOL(sk_stream_wait_memory);
174
175 void sk_stream_rfree(struct sk_buff *skb)
176 {
177         struct sock *sk = skb->sk;
178
179         skb_truesize_check(skb);
180         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
181         sk->sk_forward_alloc += skb->truesize;
182 }
183
184 EXPORT_SYMBOL(sk_stream_rfree);
185
186 int sk_stream_error(struct sock *sk, int flags, int err)
187 {
188         if (err == -EPIPE)
189                 err = sock_error(sk) ? : -EPIPE;
190         if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
191                 send_sig(SIGPIPE, current, 0);
192         return err;
193 }
194
195 EXPORT_SYMBOL(sk_stream_error);
196
197 void __sk_stream_mem_reclaim(struct sock *sk)
198 {
199         atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
200                    sk->sk_prot->memory_allocated);
201         sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
202         if (*sk->sk_prot->memory_pressure &&
203             (atomic_read(sk->sk_prot->memory_allocated) <
204              sk->sk_prot->sysctl_mem[0]))
205                 *sk->sk_prot->memory_pressure = 0;
206 }
207
208 EXPORT_SYMBOL(__sk_stream_mem_reclaim);
209
210 int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
211 {
212         int amt = sk_stream_pages(size);
213
214         sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
215         atomic_add(amt, sk->sk_prot->memory_allocated);
216
217         /* Under limit. */
218         if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
219                 if (*sk->sk_prot->memory_pressure)
220                         *sk->sk_prot->memory_pressure = 0;
221                 return 1;
222         }
223
224         /* Over hard limit. */
225         if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
226                 sk->sk_prot->enter_memory_pressure();
227                 goto suppress_allocation;
228         }
229
230         /* Under pressure. */
231         if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
232                 sk->sk_prot->enter_memory_pressure();
233
234         if (kind) {
235                 if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
236                         return 1;
237         } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
238                 return 1;
239
240         if (!*sk->sk_prot->memory_pressure ||
241             sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
242                                 sk_stream_pages(sk->sk_wmem_queued +
243                                                 atomic_read(&sk->sk_rmem_alloc) +
244                                                 sk->sk_forward_alloc))
245                 return 1;
246
247 suppress_allocation:
248
249         if (!kind) {
250                 sk_stream_moderate_sndbuf(sk);
251
252                 /* Fail only if socket is _under_ its sndbuf.
253                  * In this case we cannot block, so that we have to fail.
254                  */
255                 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
256                         return 1;
257         }
258
259         /* Alas. Undo changes. */
260         sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
261         atomic_sub(amt, sk->sk_prot->memory_allocated);
262         return 0;
263 }
264
265 EXPORT_SYMBOL(sk_stream_mem_schedule);
266
267 void sk_stream_kill_queues(struct sock *sk)
268 {
269         /* First the read buffer. */
270         __skb_queue_purge(&sk->sk_receive_queue);
271
272         /* Next, the error queue. */
273         __skb_queue_purge(&sk->sk_error_queue);
274
275         /* Next, the write queue. */
276         BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
277
278         /* Account for returned memory. */
279         sk_stream_mem_reclaim(sk);
280
281         BUG_TRAP(!sk->sk_wmem_queued);
282         BUG_TRAP(!sk->sk_forward_alloc);
283
284         /* It is _impossible_ for the backlog to contain anything
285          * when we get here.  All user references to this socket
286          * have gone away, only the net layer knows can touch it.
287          */
288 }
289
290 EXPORT_SYMBOL(sk_stream_kill_queues);