sky2: ethtool speed report bug
[linux-2.6] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57
58 #if defined(CONFIG_SYSCTL)
59
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int max_threads;
67 extern int core_uses_pid;
68 extern int suid_dumpable;
69 extern char core_pattern[];
70 extern int pid_max;
71 extern int min_free_kbytes;
72 extern int printk_ratelimit_jiffies;
73 extern int printk_ratelimit_burst;
74 extern int pid_max_min, pid_max_max;
75 extern int sysctl_drop_caches;
76 extern int percpu_pagelist_fraction;
77 extern int compat_log;
78 extern int maps_protect;
79 extern int sysctl_stat_interval;
80 extern int audit_argv_kb;
81
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86
87 static int ngroups_max = NGROUPS_MAX;
88
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114
115 extern int sysctl_hz_timer;
116
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131                 void __user *, size_t, ctl_table *);
132 #endif
133
134
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137                   void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139                                void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144         { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162
163 extern int prove_locking;
164 extern int lock_stat;
165
166 /* The default sysctl tables: */
167
168 static ctl_table root_table[] = {
169         {
170                 .ctl_name       = CTL_KERN,
171                 .procname       = "kernel",
172                 .mode           = 0555,
173                 .child          = kern_table,
174         },
175         {
176                 .ctl_name       = CTL_VM,
177                 .procname       = "vm",
178                 .mode           = 0555,
179                 .child          = vm_table,
180         },
181 #ifdef CONFIG_NET
182         {
183                 .ctl_name       = CTL_NET,
184                 .procname       = "net",
185                 .mode           = 0555,
186                 .child          = net_table,
187         },
188 #endif
189         {
190                 .ctl_name       = CTL_FS,
191                 .procname       = "fs",
192                 .mode           = 0555,
193                 .child          = fs_table,
194         },
195         {
196                 .ctl_name       = CTL_DEBUG,
197                 .procname       = "debug",
198                 .mode           = 0555,
199                 .child          = debug_table,
200         },
201         {
202                 .ctl_name       = CTL_DEV,
203                 .procname       = "dev",
204                 .mode           = 0555,
205                 .child          = dev_table,
206         },
207 /*
208  * NOTE: do not add new entries to this table unless you have read
209  * Documentation/sysctl/ctl_unnumbered.txt
210  */
211         { .ctl_name = 0 }
212 };
213
214 #ifdef CONFIG_SCHED_DEBUG
215 static unsigned long min_sched_granularity_ns = 100000;         /* 100 usecs */
216 static unsigned long max_sched_granularity_ns = 1000000000;     /* 1 second */
217 static unsigned long min_wakeup_granularity_ns;                 /* 0 usecs */
218 static unsigned long max_wakeup_granularity_ns = 1000000000;    /* 1 second */
219 #endif
220
221 static ctl_table kern_table[] = {
222 #ifdef CONFIG_SCHED_DEBUG
223         {
224                 .ctl_name       = CTL_UNNUMBERED,
225                 .procname       = "sched_min_granularity_ns",
226                 .data           = &sysctl_sched_min_granularity,
227                 .maxlen         = sizeof(unsigned int),
228                 .mode           = 0644,
229                 .proc_handler   = &proc_dointvec_minmax,
230                 .strategy       = &sysctl_intvec,
231                 .extra1         = &min_sched_granularity_ns,
232                 .extra2         = &max_sched_granularity_ns,
233         },
234         {
235                 .ctl_name       = CTL_UNNUMBERED,
236                 .procname       = "sched_latency_ns",
237                 .data           = &sysctl_sched_latency,
238                 .maxlen         = sizeof(unsigned int),
239                 .mode           = 0644,
240                 .proc_handler   = &proc_dointvec_minmax,
241                 .strategy       = &sysctl_intvec,
242                 .extra1         = &min_sched_granularity_ns,
243                 .extra2         = &max_sched_granularity_ns,
244         },
245         {
246                 .ctl_name       = CTL_UNNUMBERED,
247                 .procname       = "sched_wakeup_granularity_ns",
248                 .data           = &sysctl_sched_wakeup_granularity,
249                 .maxlen         = sizeof(unsigned int),
250                 .mode           = 0644,
251                 .proc_handler   = &proc_dointvec_minmax,
252                 .strategy       = &sysctl_intvec,
253                 .extra1         = &min_wakeup_granularity_ns,
254                 .extra2         = &max_wakeup_granularity_ns,
255         },
256         {
257                 .ctl_name       = CTL_UNNUMBERED,
258                 .procname       = "sched_batch_wakeup_granularity_ns",
259                 .data           = &sysctl_sched_batch_wakeup_granularity,
260                 .maxlen         = sizeof(unsigned int),
261                 .mode           = 0644,
262                 .proc_handler   = &proc_dointvec_minmax,
263                 .strategy       = &sysctl_intvec,
264                 .extra1         = &min_wakeup_granularity_ns,
265                 .extra2         = &max_wakeup_granularity_ns,
266         },
267         {
268                 .ctl_name       = CTL_UNNUMBERED,
269                 .procname       = "sched_stat_granularity_ns",
270                 .data           = &sysctl_sched_stat_granularity,
271                 .maxlen         = sizeof(unsigned int),
272                 .mode           = 0644,
273                 .proc_handler   = &proc_dointvec_minmax,
274                 .strategy       = &sysctl_intvec,
275                 .extra1         = &min_wakeup_granularity_ns,
276                 .extra2         = &max_wakeup_granularity_ns,
277         },
278         {
279                 .ctl_name       = CTL_UNNUMBERED,
280                 .procname       = "sched_runtime_limit_ns",
281                 .data           = &sysctl_sched_runtime_limit,
282                 .maxlen         = sizeof(unsigned int),
283                 .mode           = 0644,
284                 .proc_handler   = &proc_dointvec_minmax,
285                 .strategy       = &sysctl_intvec,
286                 .extra1         = &min_sched_granularity_ns,
287                 .extra2         = &max_sched_granularity_ns,
288         },
289         {
290                 .ctl_name       = CTL_UNNUMBERED,
291                 .procname       = "sched_child_runs_first",
292                 .data           = &sysctl_sched_child_runs_first,
293                 .maxlen         = sizeof(unsigned int),
294                 .mode           = 0644,
295                 .proc_handler   = &proc_dointvec,
296         },
297         {
298                 .ctl_name       = CTL_UNNUMBERED,
299                 .procname       = "sched_features",
300                 .data           = &sysctl_sched_features,
301                 .maxlen         = sizeof(unsigned int),
302                 .mode           = 0644,
303                 .proc_handler   = &proc_dointvec,
304         },
305 #endif
306 #ifdef CONFIG_PROVE_LOCKING
307         {
308                 .ctl_name       = CTL_UNNUMBERED,
309                 .procname       = "prove_locking",
310                 .data           = &prove_locking,
311                 .maxlen         = sizeof(int),
312                 .mode           = 0644,
313                 .proc_handler   = &proc_dointvec,
314         },
315 #endif
316 #ifdef CONFIG_LOCK_STAT
317         {
318                 .ctl_name       = CTL_UNNUMBERED,
319                 .procname       = "lock_stat",
320                 .data           = &lock_stat,
321                 .maxlen         = sizeof(int),
322                 .mode           = 0644,
323                 .proc_handler   = &proc_dointvec,
324         },
325 #endif
326         {
327                 .ctl_name       = KERN_PANIC,
328                 .procname       = "panic",
329                 .data           = &panic_timeout,
330                 .maxlen         = sizeof(int),
331                 .mode           = 0644,
332                 .proc_handler   = &proc_dointvec,
333         },
334         {
335                 .ctl_name       = KERN_CORE_USES_PID,
336                 .procname       = "core_uses_pid",
337                 .data           = &core_uses_pid,
338                 .maxlen         = sizeof(int),
339                 .mode           = 0644,
340                 .proc_handler   = &proc_dointvec,
341         },
342 #ifdef CONFIG_AUDITSYSCALL
343         {
344                 .ctl_name       = CTL_UNNUMBERED,
345                 .procname       = "audit_argv_kb",
346                 .data           = &audit_argv_kb,
347                 .maxlen         = sizeof(int),
348                 .mode           = 0644,
349                 .proc_handler   = &proc_dointvec,
350         },
351 #endif
352         {
353                 .ctl_name       = KERN_CORE_PATTERN,
354                 .procname       = "core_pattern",
355                 .data           = core_pattern,
356                 .maxlen         = CORENAME_MAX_SIZE,
357                 .mode           = 0644,
358                 .proc_handler   = &proc_dostring,
359                 .strategy       = &sysctl_string,
360         },
361 #ifdef CONFIG_PROC_SYSCTL
362         {
363                 .ctl_name       = KERN_TAINTED,
364                 .procname       = "tainted",
365                 .data           = &tainted,
366                 .maxlen         = sizeof(int),
367                 .mode           = 0644,
368                 .proc_handler   = &proc_dointvec_taint,
369         },
370 #endif
371         {
372                 .ctl_name       = KERN_CAP_BSET,
373                 .procname       = "cap-bound",
374                 .data           = &cap_bset,
375                 .maxlen         = sizeof(kernel_cap_t),
376                 .mode           = 0600,
377                 .proc_handler   = &proc_dointvec_bset,
378         },
379 #ifdef CONFIG_BLK_DEV_INITRD
380         {
381                 .ctl_name       = KERN_REALROOTDEV,
382                 .procname       = "real-root-dev",
383                 .data           = &real_root_dev,
384                 .maxlen         = sizeof(int),
385                 .mode           = 0644,
386                 .proc_handler   = &proc_dointvec,
387         },
388 #endif
389         {
390                 .ctl_name       = CTL_UNNUMBERED,
391                 .procname       = "print-fatal-signals",
392                 .data           = &print_fatal_signals,
393                 .maxlen         = sizeof(int),
394                 .mode           = 0644,
395                 .proc_handler   = &proc_dointvec,
396         },
397 #ifdef __sparc__
398         {
399                 .ctl_name       = KERN_SPARC_REBOOT,
400                 .procname       = "reboot-cmd",
401                 .data           = reboot_command,
402                 .maxlen         = 256,
403                 .mode           = 0644,
404                 .proc_handler   = &proc_dostring,
405                 .strategy       = &sysctl_string,
406         },
407         {
408                 .ctl_name       = KERN_SPARC_STOP_A,
409                 .procname       = "stop-a",
410                 .data           = &stop_a_enabled,
411                 .maxlen         = sizeof (int),
412                 .mode           = 0644,
413                 .proc_handler   = &proc_dointvec,
414         },
415         {
416                 .ctl_name       = KERN_SPARC_SCONS_PWROFF,
417                 .procname       = "scons-poweroff",
418                 .data           = &scons_pwroff,
419                 .maxlen         = sizeof (int),
420                 .mode           = 0644,
421                 .proc_handler   = &proc_dointvec,
422         },
423 #endif
424 #ifdef __hppa__
425         {
426                 .ctl_name       = KERN_HPPA_PWRSW,
427                 .procname       = "soft-power",
428                 .data           = &pwrsw_enabled,
429                 .maxlen         = sizeof (int),
430                 .mode           = 0644,
431                 .proc_handler   = &proc_dointvec,
432         },
433         {
434                 .ctl_name       = KERN_HPPA_UNALIGNED,
435                 .procname       = "unaligned-trap",
436                 .data           = &unaligned_enabled,
437                 .maxlen         = sizeof (int),
438                 .mode           = 0644,
439                 .proc_handler   = &proc_dointvec,
440         },
441 #endif
442         {
443                 .ctl_name       = KERN_CTLALTDEL,
444                 .procname       = "ctrl-alt-del",
445                 .data           = &C_A_D,
446                 .maxlen         = sizeof(int),
447                 .mode           = 0644,
448                 .proc_handler   = &proc_dointvec,
449         },
450         {
451                 .ctl_name       = KERN_PRINTK,
452                 .procname       = "printk",
453                 .data           = &console_loglevel,
454                 .maxlen         = 4*sizeof(int),
455                 .mode           = 0644,
456                 .proc_handler   = &proc_dointvec,
457         },
458 #ifdef CONFIG_KMOD
459         {
460                 .ctl_name       = KERN_MODPROBE,
461                 .procname       = "modprobe",
462                 .data           = &modprobe_path,
463                 .maxlen         = KMOD_PATH_LEN,
464                 .mode           = 0644,
465                 .proc_handler   = &proc_dostring,
466                 .strategy       = &sysctl_string,
467         },
468 #endif
469 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
470         {
471                 .ctl_name       = KERN_HOTPLUG,
472                 .procname       = "hotplug",
473                 .data           = &uevent_helper,
474                 .maxlen         = UEVENT_HELPER_PATH_LEN,
475                 .mode           = 0644,
476                 .proc_handler   = &proc_dostring,
477                 .strategy       = &sysctl_string,
478         },
479 #endif
480 #ifdef CONFIG_CHR_DEV_SG
481         {
482                 .ctl_name       = KERN_SG_BIG_BUFF,
483                 .procname       = "sg-big-buff",
484                 .data           = &sg_big_buff,
485                 .maxlen         = sizeof (int),
486                 .mode           = 0444,
487                 .proc_handler   = &proc_dointvec,
488         },
489 #endif
490 #ifdef CONFIG_BSD_PROCESS_ACCT
491         {
492                 .ctl_name       = KERN_ACCT,
493                 .procname       = "acct",
494                 .data           = &acct_parm,
495                 .maxlen         = 3*sizeof(int),
496                 .mode           = 0644,
497                 .proc_handler   = &proc_dointvec,
498         },
499 #endif
500 #ifdef CONFIG_MAGIC_SYSRQ
501         {
502                 .ctl_name       = KERN_SYSRQ,
503                 .procname       = "sysrq",
504                 .data           = &__sysrq_enabled,
505                 .maxlen         = sizeof (int),
506                 .mode           = 0644,
507                 .proc_handler   = &proc_dointvec,
508         },
509 #endif
510 #ifdef CONFIG_PROC_SYSCTL
511         {
512                 .ctl_name       = KERN_CADPID,
513                 .procname       = "cad_pid",
514                 .data           = NULL,
515                 .maxlen         = sizeof (int),
516                 .mode           = 0600,
517                 .proc_handler   = &proc_do_cad_pid,
518         },
519 #endif
520         {
521                 .ctl_name       = KERN_MAX_THREADS,
522                 .procname       = "threads-max",
523                 .data           = &max_threads,
524                 .maxlen         = sizeof(int),
525                 .mode           = 0644,
526                 .proc_handler   = &proc_dointvec,
527         },
528         {
529                 .ctl_name       = KERN_RANDOM,
530                 .procname       = "random",
531                 .mode           = 0555,
532                 .child          = random_table,
533         },
534 #ifdef CONFIG_UNIX98_PTYS
535         {
536                 .ctl_name       = KERN_PTY,
537                 .procname       = "pty",
538                 .mode           = 0555,
539                 .child          = pty_table,
540         },
541 #endif
542         {
543                 .ctl_name       = KERN_OVERFLOWUID,
544                 .procname       = "overflowuid",
545                 .data           = &overflowuid,
546                 .maxlen         = sizeof(int),
547                 .mode           = 0644,
548                 .proc_handler   = &proc_dointvec_minmax,
549                 .strategy       = &sysctl_intvec,
550                 .extra1         = &minolduid,
551                 .extra2         = &maxolduid,
552         },
553         {
554                 .ctl_name       = KERN_OVERFLOWGID,
555                 .procname       = "overflowgid",
556                 .data           = &overflowgid,
557                 .maxlen         = sizeof(int),
558                 .mode           = 0644,
559                 .proc_handler   = &proc_dointvec_minmax,
560                 .strategy       = &sysctl_intvec,
561                 .extra1         = &minolduid,
562                 .extra2         = &maxolduid,
563         },
564 #ifdef CONFIG_S390
565 #ifdef CONFIG_MATHEMU
566         {
567                 .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
568                 .procname       = "ieee_emulation_warnings",
569                 .data           = &sysctl_ieee_emulation_warnings,
570                 .maxlen         = sizeof(int),
571                 .mode           = 0644,
572                 .proc_handler   = &proc_dointvec,
573         },
574 #endif
575 #ifdef CONFIG_NO_IDLE_HZ
576         {
577                 .ctl_name       = KERN_HZ_TIMER,
578                 .procname       = "hz_timer",
579                 .data           = &sysctl_hz_timer,
580                 .maxlen         = sizeof(int),
581                 .mode           = 0644,
582                 .proc_handler   = &proc_dointvec,
583         },
584 #endif
585         {
586                 .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
587                 .procname       = "userprocess_debug",
588                 .data           = &sysctl_userprocess_debug,
589                 .maxlen         = sizeof(int),
590                 .mode           = 0644,
591                 .proc_handler   = &proc_dointvec,
592         },
593 #endif
594         {
595                 .ctl_name       = KERN_PIDMAX,
596                 .procname       = "pid_max",
597                 .data           = &pid_max,
598                 .maxlen         = sizeof (int),
599                 .mode           = 0644,
600                 .proc_handler   = &proc_dointvec_minmax,
601                 .strategy       = sysctl_intvec,
602                 .extra1         = &pid_max_min,
603                 .extra2         = &pid_max_max,
604         },
605         {
606                 .ctl_name       = KERN_PANIC_ON_OOPS,
607                 .procname       = "panic_on_oops",
608                 .data           = &panic_on_oops,
609                 .maxlen         = sizeof(int),
610                 .mode           = 0644,
611                 .proc_handler   = &proc_dointvec,
612         },
613         {
614                 .ctl_name       = KERN_PRINTK_RATELIMIT,
615                 .procname       = "printk_ratelimit",
616                 .data           = &printk_ratelimit_jiffies,
617                 .maxlen         = sizeof(int),
618                 .mode           = 0644,
619                 .proc_handler   = &proc_dointvec_jiffies,
620                 .strategy       = &sysctl_jiffies,
621         },
622         {
623                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
624                 .procname       = "printk_ratelimit_burst",
625                 .data           = &printk_ratelimit_burst,
626                 .maxlen         = sizeof(int),
627                 .mode           = 0644,
628                 .proc_handler   = &proc_dointvec,
629         },
630         {
631                 .ctl_name       = KERN_NGROUPS_MAX,
632                 .procname       = "ngroups_max",
633                 .data           = &ngroups_max,
634                 .maxlen         = sizeof (int),
635                 .mode           = 0444,
636                 .proc_handler   = &proc_dointvec,
637         },
638 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
639         {
640                 .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
641                 .procname       = "unknown_nmi_panic",
642                 .data           = &unknown_nmi_panic,
643                 .maxlen         = sizeof (int),
644                 .mode           = 0644,
645                 .proc_handler   = &proc_dointvec,
646         },
647         {
648                 .ctl_name       = KERN_NMI_WATCHDOG,
649                 .procname       = "nmi_watchdog",
650                 .data           = &nmi_watchdog_enabled,
651                 .maxlen         = sizeof (int),
652                 .mode           = 0644,
653                 .proc_handler   = &proc_nmi_enabled,
654         },
655 #endif
656 #if defined(CONFIG_X86)
657         {
658                 .ctl_name       = KERN_PANIC_ON_NMI,
659                 .procname       = "panic_on_unrecovered_nmi",
660                 .data           = &panic_on_unrecovered_nmi,
661                 .maxlen         = sizeof(int),
662                 .mode           = 0644,
663                 .proc_handler   = &proc_dointvec,
664         },
665         {
666                 .ctl_name       = KERN_BOOTLOADER_TYPE,
667                 .procname       = "bootloader_type",
668                 .data           = &bootloader_type,
669                 .maxlen         = sizeof (int),
670                 .mode           = 0444,
671                 .proc_handler   = &proc_dointvec,
672         },
673         {
674                 .ctl_name       = CTL_UNNUMBERED,
675                 .procname       = "kstack_depth_to_print",
676                 .data           = &kstack_depth_to_print,
677                 .maxlen         = sizeof(int),
678                 .mode           = 0644,
679                 .proc_handler   = &proc_dointvec,
680         },
681 #endif
682 #if defined(CONFIG_MMU)
683         {
684                 .ctl_name       = KERN_RANDOMIZE,
685                 .procname       = "randomize_va_space",
686                 .data           = &randomize_va_space,
687                 .maxlen         = sizeof(int),
688                 .mode           = 0644,
689                 .proc_handler   = &proc_dointvec,
690         },
691 #endif
692 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
693         {
694                 .ctl_name       = KERN_SPIN_RETRY,
695                 .procname       = "spin_retry",
696                 .data           = &spin_retry,
697                 .maxlen         = sizeof (int),
698                 .mode           = 0644,
699                 .proc_handler   = &proc_dointvec,
700         },
701 #endif
702 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
703         {
704                 .ctl_name       = KERN_ACPI_VIDEO_FLAGS,
705                 .procname       = "acpi_video_flags",
706                 .data           = &acpi_realmode_flags,
707                 .maxlen         = sizeof (unsigned long),
708                 .mode           = 0644,
709                 .proc_handler   = &proc_doulongvec_minmax,
710         },
711 #endif
712 #ifdef CONFIG_IA64
713         {
714                 .ctl_name       = KERN_IA64_UNALIGNED,
715                 .procname       = "ignore-unaligned-usertrap",
716                 .data           = &no_unaligned_warning,
717                 .maxlen         = sizeof (int),
718                 .mode           = 0644,
719                 .proc_handler   = &proc_dointvec,
720         },
721 #endif
722 #ifdef CONFIG_COMPAT
723         {
724                 .ctl_name       = KERN_COMPAT_LOG,
725                 .procname       = "compat-log",
726                 .data           = &compat_log,
727                 .maxlen         = sizeof (int),
728                 .mode           = 0644,
729                 .proc_handler   = &proc_dointvec,
730         },
731 #endif
732 #ifdef CONFIG_RT_MUTEXES
733         {
734                 .ctl_name       = KERN_MAX_LOCK_DEPTH,
735                 .procname       = "max_lock_depth",
736                 .data           = &max_lock_depth,
737                 .maxlen         = sizeof(int),
738                 .mode           = 0644,
739                 .proc_handler   = &proc_dointvec,
740         },
741 #endif
742 #ifdef CONFIG_PROC_FS
743         {
744                 .ctl_name       = CTL_UNNUMBERED,
745                 .procname       = "maps_protect",
746                 .data           = &maps_protect,
747                 .maxlen         = sizeof(int),
748                 .mode           = 0644,
749                 .proc_handler   = &proc_dointvec,
750         },
751 #endif
752         {
753                 .ctl_name       = CTL_UNNUMBERED,
754                 .procname       = "poweroff_cmd",
755                 .data           = &poweroff_cmd,
756                 .maxlen         = POWEROFF_CMD_PATH_LEN,
757                 .mode           = 0644,
758                 .proc_handler   = &proc_dostring,
759                 .strategy       = &sysctl_string,
760         },
761 /*
762  * NOTE: do not add new entries to this table unless you have read
763  * Documentation/sysctl/ctl_unnumbered.txt
764  */
765         { .ctl_name = 0 }
766 };
767
768 /* Constants for minimum and maximum testing in vm_table.
769    We use these as one-element integer vectors. */
770 static int zero;
771 static int two = 2;
772 static int one_hundred = 100;
773
774
775 static ctl_table vm_table[] = {
776         {
777                 .ctl_name       = VM_OVERCOMMIT_MEMORY,
778                 .procname       = "overcommit_memory",
779                 .data           = &sysctl_overcommit_memory,
780                 .maxlen         = sizeof(sysctl_overcommit_memory),
781                 .mode           = 0644,
782                 .proc_handler   = &proc_dointvec,
783         },
784         {
785                 .ctl_name       = VM_PANIC_ON_OOM,
786                 .procname       = "panic_on_oom",
787                 .data           = &sysctl_panic_on_oom,
788                 .maxlen         = sizeof(sysctl_panic_on_oom),
789                 .mode           = 0644,
790                 .proc_handler   = &proc_dointvec,
791         },
792         {
793                 .ctl_name       = VM_OVERCOMMIT_RATIO,
794                 .procname       = "overcommit_ratio",
795                 .data           = &sysctl_overcommit_ratio,
796                 .maxlen         = sizeof(sysctl_overcommit_ratio),
797                 .mode           = 0644,
798                 .proc_handler   = &proc_dointvec,
799         },
800         {
801                 .ctl_name       = VM_PAGE_CLUSTER,
802                 .procname       = "page-cluster", 
803                 .data           = &page_cluster,
804                 .maxlen         = sizeof(int),
805                 .mode           = 0644,
806                 .proc_handler   = &proc_dointvec,
807         },
808         {
809                 .ctl_name       = VM_DIRTY_BACKGROUND,
810                 .procname       = "dirty_background_ratio",
811                 .data           = &dirty_background_ratio,
812                 .maxlen         = sizeof(dirty_background_ratio),
813                 .mode           = 0644,
814                 .proc_handler   = &proc_dointvec_minmax,
815                 .strategy       = &sysctl_intvec,
816                 .extra1         = &zero,
817                 .extra2         = &one_hundred,
818         },
819         {
820                 .ctl_name       = VM_DIRTY_RATIO,
821                 .procname       = "dirty_ratio",
822                 .data           = &vm_dirty_ratio,
823                 .maxlen         = sizeof(vm_dirty_ratio),
824                 .mode           = 0644,
825                 .proc_handler   = &proc_dointvec_minmax,
826                 .strategy       = &sysctl_intvec,
827                 .extra1         = &zero,
828                 .extra2         = &one_hundred,
829         },
830         {
831                 .ctl_name       = VM_DIRTY_WB_CS,
832                 .procname       = "dirty_writeback_centisecs",
833                 .data           = &dirty_writeback_interval,
834                 .maxlen         = sizeof(dirty_writeback_interval),
835                 .mode           = 0644,
836                 .proc_handler   = &dirty_writeback_centisecs_handler,
837         },
838         {
839                 .ctl_name       = VM_DIRTY_EXPIRE_CS,
840                 .procname       = "dirty_expire_centisecs",
841                 .data           = &dirty_expire_interval,
842                 .maxlen         = sizeof(dirty_expire_interval),
843                 .mode           = 0644,
844                 .proc_handler   = &proc_dointvec_userhz_jiffies,
845         },
846         {
847                 .ctl_name       = VM_NR_PDFLUSH_THREADS,
848                 .procname       = "nr_pdflush_threads",
849                 .data           = &nr_pdflush_threads,
850                 .maxlen         = sizeof nr_pdflush_threads,
851                 .mode           = 0444 /* read-only*/,
852                 .proc_handler   = &proc_dointvec,
853         },
854         {
855                 .ctl_name       = VM_SWAPPINESS,
856                 .procname       = "swappiness",
857                 .data           = &vm_swappiness,
858                 .maxlen         = sizeof(vm_swappiness),
859                 .mode           = 0644,
860                 .proc_handler   = &proc_dointvec_minmax,
861                 .strategy       = &sysctl_intvec,
862                 .extra1         = &zero,
863                 .extra2         = &one_hundred,
864         },
865 #ifdef CONFIG_HUGETLB_PAGE
866          {
867                 .ctl_name       = VM_HUGETLB_PAGES,
868                 .procname       = "nr_hugepages",
869                 .data           = &max_huge_pages,
870                 .maxlen         = sizeof(unsigned long),
871                 .mode           = 0644,
872                 .proc_handler   = &hugetlb_sysctl_handler,
873                 .extra1         = (void *)&hugetlb_zero,
874                 .extra2         = (void *)&hugetlb_infinity,
875          },
876          {
877                 .ctl_name       = VM_HUGETLB_GROUP,
878                 .procname       = "hugetlb_shm_group",
879                 .data           = &sysctl_hugetlb_shm_group,
880                 .maxlen         = sizeof(gid_t),
881                 .mode           = 0644,
882                 .proc_handler   = &proc_dointvec,
883          },
884          {
885                 .ctl_name       = CTL_UNNUMBERED,
886                 .procname       = "hugepages_treat_as_movable",
887                 .data           = &hugepages_treat_as_movable,
888                 .maxlen         = sizeof(int),
889                 .mode           = 0644,
890                 .proc_handler   = &hugetlb_treat_movable_handler,
891         },
892 #endif
893         {
894                 .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
895                 .procname       = "lowmem_reserve_ratio",
896                 .data           = &sysctl_lowmem_reserve_ratio,
897                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
898                 .mode           = 0644,
899                 .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
900                 .strategy       = &sysctl_intvec,
901         },
902         {
903                 .ctl_name       = VM_DROP_PAGECACHE,
904                 .procname       = "drop_caches",
905                 .data           = &sysctl_drop_caches,
906                 .maxlen         = sizeof(int),
907                 .mode           = 0644,
908                 .proc_handler   = drop_caches_sysctl_handler,
909                 .strategy       = &sysctl_intvec,
910         },
911         {
912                 .ctl_name       = VM_MIN_FREE_KBYTES,
913                 .procname       = "min_free_kbytes",
914                 .data           = &min_free_kbytes,
915                 .maxlen         = sizeof(min_free_kbytes),
916                 .mode           = 0644,
917                 .proc_handler   = &min_free_kbytes_sysctl_handler,
918                 .strategy       = &sysctl_intvec,
919                 .extra1         = &zero,
920         },
921         {
922                 .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
923                 .procname       = "percpu_pagelist_fraction",
924                 .data           = &percpu_pagelist_fraction,
925                 .maxlen         = sizeof(percpu_pagelist_fraction),
926                 .mode           = 0644,
927                 .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
928                 .strategy       = &sysctl_intvec,
929                 .extra1         = &min_percpu_pagelist_fract,
930         },
931 #ifdef CONFIG_MMU
932         {
933                 .ctl_name       = VM_MAX_MAP_COUNT,
934                 .procname       = "max_map_count",
935                 .data           = &sysctl_max_map_count,
936                 .maxlen         = sizeof(sysctl_max_map_count),
937                 .mode           = 0644,
938                 .proc_handler   = &proc_dointvec
939         },
940 #endif
941         {
942                 .ctl_name       = VM_LAPTOP_MODE,
943                 .procname       = "laptop_mode",
944                 .data           = &laptop_mode,
945                 .maxlen         = sizeof(laptop_mode),
946                 .mode           = 0644,
947                 .proc_handler   = &proc_dointvec_jiffies,
948                 .strategy       = &sysctl_jiffies,
949         },
950         {
951                 .ctl_name       = VM_BLOCK_DUMP,
952                 .procname       = "block_dump",
953                 .data           = &block_dump,
954                 .maxlen         = sizeof(block_dump),
955                 .mode           = 0644,
956                 .proc_handler   = &proc_dointvec,
957                 .strategy       = &sysctl_intvec,
958                 .extra1         = &zero,
959         },
960         {
961                 .ctl_name       = VM_VFS_CACHE_PRESSURE,
962                 .procname       = "vfs_cache_pressure",
963                 .data           = &sysctl_vfs_cache_pressure,
964                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
965                 .mode           = 0644,
966                 .proc_handler   = &proc_dointvec,
967                 .strategy       = &sysctl_intvec,
968                 .extra1         = &zero,
969         },
970 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
971         {
972                 .ctl_name       = VM_LEGACY_VA_LAYOUT,
973                 .procname       = "legacy_va_layout",
974                 .data           = &sysctl_legacy_va_layout,
975                 .maxlen         = sizeof(sysctl_legacy_va_layout),
976                 .mode           = 0644,
977                 .proc_handler   = &proc_dointvec,
978                 .strategy       = &sysctl_intvec,
979                 .extra1         = &zero,
980         },
981 #endif
982 #ifdef CONFIG_NUMA
983         {
984                 .ctl_name       = VM_ZONE_RECLAIM_MODE,
985                 .procname       = "zone_reclaim_mode",
986                 .data           = &zone_reclaim_mode,
987                 .maxlen         = sizeof(zone_reclaim_mode),
988                 .mode           = 0644,
989                 .proc_handler   = &proc_dointvec,
990                 .strategy       = &sysctl_intvec,
991                 .extra1         = &zero,
992         },
993         {
994                 .ctl_name       = VM_MIN_UNMAPPED,
995                 .procname       = "min_unmapped_ratio",
996                 .data           = &sysctl_min_unmapped_ratio,
997                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
998                 .mode           = 0644,
999                 .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
1000                 .strategy       = &sysctl_intvec,
1001                 .extra1         = &zero,
1002                 .extra2         = &one_hundred,
1003         },
1004         {
1005                 .ctl_name       = VM_MIN_SLAB,
1006                 .procname       = "min_slab_ratio",
1007                 .data           = &sysctl_min_slab_ratio,
1008                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1009                 .mode           = 0644,
1010                 .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1011                 .strategy       = &sysctl_intvec,
1012                 .extra1         = &zero,
1013                 .extra2         = &one_hundred,
1014         },
1015 #endif
1016 #ifdef CONFIG_SMP
1017         {
1018                 .ctl_name       = CTL_UNNUMBERED,
1019                 .procname       = "stat_interval",
1020                 .data           = &sysctl_stat_interval,
1021                 .maxlen         = sizeof(sysctl_stat_interval),
1022                 .mode           = 0644,
1023                 .proc_handler   = &proc_dointvec_jiffies,
1024                 .strategy       = &sysctl_jiffies,
1025         },
1026 #endif
1027 #ifdef CONFIG_SECURITY
1028         {
1029                 .ctl_name       = CTL_UNNUMBERED,
1030                 .procname       = "mmap_min_addr",
1031                 .data           = &mmap_min_addr,
1032                 .maxlen         = sizeof(unsigned long),
1033                 .mode           = 0644,
1034                 .proc_handler   = &proc_doulongvec_minmax,
1035         },
1036 #endif
1037 #ifdef CONFIG_NUMA
1038         {
1039                 .ctl_name       = CTL_UNNUMBERED,
1040                 .procname       = "numa_zonelist_order",
1041                 .data           = &numa_zonelist_order,
1042                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1043                 .mode           = 0644,
1044                 .proc_handler   = &numa_zonelist_order_handler,
1045                 .strategy       = &sysctl_string,
1046         },
1047 #endif
1048 #if defined(CONFIG_X86_32) || \
1049    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1050         {
1051                 .ctl_name       = VM_VDSO_ENABLED,
1052                 .procname       = "vdso_enabled",
1053                 .data           = &vdso_enabled,
1054                 .maxlen         = sizeof(vdso_enabled),
1055                 .mode           = 0644,
1056                 .proc_handler   = &proc_dointvec,
1057                 .strategy       = &sysctl_intvec,
1058                 .extra1         = &zero,
1059         },
1060 #endif
1061 /*
1062  * NOTE: do not add new entries to this table unless you have read
1063  * Documentation/sysctl/ctl_unnumbered.txt
1064  */
1065         { .ctl_name = 0 }
1066 };
1067
1068 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1069 static ctl_table binfmt_misc_table[] = {
1070         { .ctl_name = 0 }
1071 };
1072 #endif
1073
1074 static ctl_table fs_table[] = {
1075         {
1076                 .ctl_name       = FS_NRINODE,
1077                 .procname       = "inode-nr",
1078                 .data           = &inodes_stat,
1079                 .maxlen         = 2*sizeof(int),
1080                 .mode           = 0444,
1081                 .proc_handler   = &proc_dointvec,
1082         },
1083         {
1084                 .ctl_name       = FS_STATINODE,
1085                 .procname       = "inode-state",
1086                 .data           = &inodes_stat,
1087                 .maxlen         = 7*sizeof(int),
1088                 .mode           = 0444,
1089                 .proc_handler   = &proc_dointvec,
1090         },
1091         {
1092                 .ctl_name       = FS_NRFILE,
1093                 .procname       = "file-nr",
1094                 .data           = &files_stat,
1095                 .maxlen         = 3*sizeof(int),
1096                 .mode           = 0444,
1097                 .proc_handler   = &proc_nr_files,
1098         },
1099         {
1100                 .ctl_name       = FS_MAXFILE,
1101                 .procname       = "file-max",
1102                 .data           = &files_stat.max_files,
1103                 .maxlen         = sizeof(int),
1104                 .mode           = 0644,
1105                 .proc_handler   = &proc_dointvec,
1106         },
1107         {
1108                 .ctl_name       = FS_DENTRY,
1109                 .procname       = "dentry-state",
1110                 .data           = &dentry_stat,
1111                 .maxlen         = 6*sizeof(int),
1112                 .mode           = 0444,
1113                 .proc_handler   = &proc_dointvec,
1114         },
1115         {
1116                 .ctl_name       = FS_OVERFLOWUID,
1117                 .procname       = "overflowuid",
1118                 .data           = &fs_overflowuid,
1119                 .maxlen         = sizeof(int),
1120                 .mode           = 0644,
1121                 .proc_handler   = &proc_dointvec_minmax,
1122                 .strategy       = &sysctl_intvec,
1123                 .extra1         = &minolduid,
1124                 .extra2         = &maxolduid,
1125         },
1126         {
1127                 .ctl_name       = FS_OVERFLOWGID,
1128                 .procname       = "overflowgid",
1129                 .data           = &fs_overflowgid,
1130                 .maxlen         = sizeof(int),
1131                 .mode           = 0644,
1132                 .proc_handler   = &proc_dointvec_minmax,
1133                 .strategy       = &sysctl_intvec,
1134                 .extra1         = &minolduid,
1135                 .extra2         = &maxolduid,
1136         },
1137         {
1138                 .ctl_name       = FS_LEASES,
1139                 .procname       = "leases-enable",
1140                 .data           = &leases_enable,
1141                 .maxlen         = sizeof(int),
1142                 .mode           = 0644,
1143                 .proc_handler   = &proc_dointvec,
1144         },
1145 #ifdef CONFIG_DNOTIFY
1146         {
1147                 .ctl_name       = FS_DIR_NOTIFY,
1148                 .procname       = "dir-notify-enable",
1149                 .data           = &dir_notify_enable,
1150                 .maxlen         = sizeof(int),
1151                 .mode           = 0644,
1152                 .proc_handler   = &proc_dointvec,
1153         },
1154 #endif
1155 #ifdef CONFIG_MMU
1156         {
1157                 .ctl_name       = FS_LEASE_TIME,
1158                 .procname       = "lease-break-time",
1159                 .data           = &lease_break_time,
1160                 .maxlen         = sizeof(int),
1161                 .mode           = 0644,
1162                 .proc_handler   = &proc_dointvec_minmax,
1163                 .strategy       = &sysctl_intvec,
1164                 .extra1         = &zero,
1165                 .extra2         = &two,
1166         },
1167         {
1168                 .ctl_name       = FS_AIO_NR,
1169                 .procname       = "aio-nr",
1170                 .data           = &aio_nr,
1171                 .maxlen         = sizeof(aio_nr),
1172                 .mode           = 0444,
1173                 .proc_handler   = &proc_doulongvec_minmax,
1174         },
1175         {
1176                 .ctl_name       = FS_AIO_MAX_NR,
1177                 .procname       = "aio-max-nr",
1178                 .data           = &aio_max_nr,
1179                 .maxlen         = sizeof(aio_max_nr),
1180                 .mode           = 0644,
1181                 .proc_handler   = &proc_doulongvec_minmax,
1182         },
1183 #ifdef CONFIG_INOTIFY_USER
1184         {
1185                 .ctl_name       = FS_INOTIFY,
1186                 .procname       = "inotify",
1187                 .mode           = 0555,
1188                 .child          = inotify_table,
1189         },
1190 #endif  
1191 #endif
1192         {
1193                 .ctl_name       = KERN_SETUID_DUMPABLE,
1194                 .procname       = "suid_dumpable",
1195                 .data           = &suid_dumpable,
1196                 .maxlen         = sizeof(int),
1197                 .mode           = 0644,
1198                 .proc_handler   = &proc_dointvec,
1199         },
1200 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1201         {
1202                 .ctl_name       = CTL_UNNUMBERED,
1203                 .procname       = "binfmt_misc",
1204                 .mode           = 0555,
1205                 .child          = binfmt_misc_table,
1206         },
1207 #endif
1208 /*
1209  * NOTE: do not add new entries to this table unless you have read
1210  * Documentation/sysctl/ctl_unnumbered.txt
1211  */
1212         { .ctl_name = 0 }
1213 };
1214
1215 static ctl_table debug_table[] = {
1216 #ifdef CONFIG_X86
1217         {
1218                 .ctl_name       = CTL_UNNUMBERED,
1219                 .procname       = "exception-trace",
1220                 .data           = &show_unhandled_signals,
1221                 .maxlen         = sizeof(int),
1222                 .mode           = 0644,
1223                 .proc_handler   = proc_dointvec
1224         },
1225 #endif
1226         { .ctl_name = 0 }
1227 };
1228
1229 static ctl_table dev_table[] = {
1230         { .ctl_name = 0 }
1231 };
1232
1233 static DEFINE_SPINLOCK(sysctl_lock);
1234
1235 /* called under sysctl_lock */
1236 static int use_table(struct ctl_table_header *p)
1237 {
1238         if (unlikely(p->unregistering))
1239                 return 0;
1240         p->used++;
1241         return 1;
1242 }
1243
1244 /* called under sysctl_lock */
1245 static void unuse_table(struct ctl_table_header *p)
1246 {
1247         if (!--p->used)
1248                 if (unlikely(p->unregistering))
1249                         complete(p->unregistering);
1250 }
1251
1252 /* called under sysctl_lock, will reacquire if has to wait */
1253 static void start_unregistering(struct ctl_table_header *p)
1254 {
1255         /*
1256          * if p->used is 0, nobody will ever touch that entry again;
1257          * we'll eliminate all paths to it before dropping sysctl_lock
1258          */
1259         if (unlikely(p->used)) {
1260                 struct completion wait;
1261                 init_completion(&wait);
1262                 p->unregistering = &wait;
1263                 spin_unlock(&sysctl_lock);
1264                 wait_for_completion(&wait);
1265                 spin_lock(&sysctl_lock);
1266         }
1267         /*
1268          * do not remove from the list until nobody holds it; walking the
1269          * list in do_sysctl() relies on that.
1270          */
1271         list_del_init(&p->ctl_entry);
1272 }
1273
1274 void sysctl_head_finish(struct ctl_table_header *head)
1275 {
1276         if (!head)
1277                 return;
1278         spin_lock(&sysctl_lock);
1279         unuse_table(head);
1280         spin_unlock(&sysctl_lock);
1281 }
1282
1283 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1284 {
1285         struct ctl_table_header *head;
1286         struct list_head *tmp;
1287         spin_lock(&sysctl_lock);
1288         if (prev) {
1289                 tmp = &prev->ctl_entry;
1290                 unuse_table(prev);
1291                 goto next;
1292         }
1293         tmp = &root_table_header.ctl_entry;
1294         for (;;) {
1295                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1296
1297                 if (!use_table(head))
1298                         goto next;
1299                 spin_unlock(&sysctl_lock);
1300                 return head;
1301         next:
1302                 tmp = tmp->next;
1303                 if (tmp == &root_table_header.ctl_entry)
1304                         break;
1305         }
1306         spin_unlock(&sysctl_lock);
1307         return NULL;
1308 }
1309
1310 #ifdef CONFIG_SYSCTL_SYSCALL
1311 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1312                void __user *newval, size_t newlen)
1313 {
1314         struct ctl_table_header *head;
1315         int error = -ENOTDIR;
1316
1317         if (nlen <= 0 || nlen >= CTL_MAXNAME)
1318                 return -ENOTDIR;
1319         if (oldval) {
1320                 int old_len;
1321                 if (!oldlenp || get_user(old_len, oldlenp))
1322                         return -EFAULT;
1323         }
1324
1325         for (head = sysctl_head_next(NULL); head;
1326                         head = sysctl_head_next(head)) {
1327                 error = parse_table(name, nlen, oldval, oldlenp, 
1328                                         newval, newlen, head->ctl_table);
1329                 if (error != -ENOTDIR) {
1330                         sysctl_head_finish(head);
1331                         break;
1332                 }
1333         }
1334         return error;
1335 }
1336
1337 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1338 {
1339         struct __sysctl_args tmp;
1340         int error;
1341
1342         if (copy_from_user(&tmp, args, sizeof(tmp)))
1343                 return -EFAULT;
1344
1345         lock_kernel();
1346         error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1347                           tmp.newval, tmp.newlen);
1348         unlock_kernel();
1349         return error;
1350 }
1351 #endif /* CONFIG_SYSCTL_SYSCALL */
1352
1353 /*
1354  * sysctl_perm does NOT grant the superuser all rights automatically, because
1355  * some sysctl variables are readonly even to root.
1356  */
1357
1358 static int test_perm(int mode, int op)
1359 {
1360         if (!current->euid)
1361                 mode >>= 6;
1362         else if (in_egroup_p(0))
1363                 mode >>= 3;
1364         if ((mode & op & 0007) == op)
1365                 return 0;
1366         return -EACCES;
1367 }
1368
1369 int sysctl_perm(ctl_table *table, int op)
1370 {
1371         int error;
1372         error = security_sysctl(table, op);
1373         if (error)
1374                 return error;
1375         return test_perm(table->mode, op);
1376 }
1377
1378 #ifdef CONFIG_SYSCTL_SYSCALL
1379 static int parse_table(int __user *name, int nlen,
1380                        void __user *oldval, size_t __user *oldlenp,
1381                        void __user *newval, size_t newlen,
1382                        ctl_table *table)
1383 {
1384         int n;
1385 repeat:
1386         if (!nlen)
1387                 return -ENOTDIR;
1388         if (get_user(n, name))
1389                 return -EFAULT;
1390         for ( ; table->ctl_name || table->procname; table++) {
1391                 if (!table->ctl_name)
1392                         continue;
1393                 if (n == table->ctl_name) {
1394                         int error;
1395                         if (table->child) {
1396                                 if (sysctl_perm(table, 001))
1397                                         return -EPERM;
1398                                 name++;
1399                                 nlen--;
1400                                 table = table->child;
1401                                 goto repeat;
1402                         }
1403                         error = do_sysctl_strategy(table, name, nlen,
1404                                                    oldval, oldlenp,
1405                                                    newval, newlen);
1406                         return error;
1407                 }
1408         }
1409         return -ENOTDIR;
1410 }
1411
1412 /* Perform the actual read/write of a sysctl table entry. */
1413 int do_sysctl_strategy (ctl_table *table, 
1414                         int __user *name, int nlen,
1415                         void __user *oldval, size_t __user *oldlenp,
1416                         void __user *newval, size_t newlen)
1417 {
1418         int op = 0, rc;
1419         size_t len;
1420
1421         if (oldval)
1422                 op |= 004;
1423         if (newval) 
1424                 op |= 002;
1425         if (sysctl_perm(table, op))
1426                 return -EPERM;
1427
1428         if (table->strategy) {
1429                 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1430                                      newval, newlen);
1431                 if (rc < 0)
1432                         return rc;
1433                 if (rc > 0)
1434                         return 0;
1435         }
1436
1437         /* If there is no strategy routine, or if the strategy returns
1438          * zero, proceed with automatic r/w */
1439         if (table->data && table->maxlen) {
1440                 if (oldval && oldlenp) {
1441                         if (get_user(len, oldlenp))
1442                                 return -EFAULT;
1443                         if (len) {
1444                                 if (len > table->maxlen)
1445                                         len = table->maxlen;
1446                                 if(copy_to_user(oldval, table->data, len))
1447                                         return -EFAULT;
1448                                 if(put_user(len, oldlenp))
1449                                         return -EFAULT;
1450                         }
1451                 }
1452                 if (newval && newlen) {
1453                         len = newlen;
1454                         if (len > table->maxlen)
1455                                 len = table->maxlen;
1456                         if(copy_from_user(table->data, newval, len))
1457                                 return -EFAULT;
1458                 }
1459         }
1460         return 0;
1461 }
1462 #endif /* CONFIG_SYSCTL_SYSCALL */
1463
1464 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1465 {
1466         for (; table->ctl_name || table->procname; table++) {
1467                 table->parent = parent;
1468                 if (table->child)
1469                         sysctl_set_parent(table, table->child);
1470         }
1471 }
1472
1473 static __init int sysctl_init(void)
1474 {
1475         sysctl_set_parent(NULL, root_table);
1476         return 0;
1477 }
1478
1479 core_initcall(sysctl_init);
1480
1481 /**
1482  * register_sysctl_table - register a sysctl hierarchy
1483  * @table: the top-level table structure
1484  *
1485  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1486  * array. An entry with a ctl_name of 0 terminates the table. 
1487  *
1488  * The members of the &ctl_table structure are used as follows:
1489  *
1490  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1491  *            must be unique within that level of sysctl
1492  *
1493  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1494  *            enter a sysctl file
1495  *
1496  * data - a pointer to data for use by proc_handler
1497  *
1498  * maxlen - the maximum size in bytes of the data
1499  *
1500  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1501  *
1502  * child - a pointer to the child sysctl table if this entry is a directory, or
1503  *         %NULL.
1504  *
1505  * proc_handler - the text handler routine (described below)
1506  *
1507  * strategy - the strategy routine (described below)
1508  *
1509  * de - for internal use by the sysctl routines
1510  *
1511  * extra1, extra2 - extra pointers usable by the proc handler routines
1512  *
1513  * Leaf nodes in the sysctl tree will be represented by a single file
1514  * under /proc; non-leaf nodes will be represented by directories.
1515  *
1516  * sysctl(2) can automatically manage read and write requests through
1517  * the sysctl table.  The data and maxlen fields of the ctl_table
1518  * struct enable minimal validation of the values being written to be
1519  * performed, and the mode field allows minimal authentication.
1520  *
1521  * More sophisticated management can be enabled by the provision of a
1522  * strategy routine with the table entry.  This will be called before
1523  * any automatic read or write of the data is performed.
1524  *
1525  * The strategy routine may return
1526  *
1527  * < 0 - Error occurred (error is passed to user process)
1528  *
1529  * 0   - OK - proceed with automatic read or write.
1530  *
1531  * > 0 - OK - read or write has been done by the strategy routine, so
1532  *       return immediately.
1533  *
1534  * There must be a proc_handler routine for any terminal nodes
1535  * mirrored under /proc/sys (non-terminals are handled by a built-in
1536  * directory handler).  Several default handlers are available to
1537  * cover common cases -
1538  *
1539  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1540  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1541  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1542  *
1543  * It is the handler's job to read the input buffer from user memory
1544  * and process it. The handler should return 0 on success.
1545  *
1546  * This routine returns %NULL on a failure to register, and a pointer
1547  * to the table header on success.
1548  */
1549 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1550 {
1551         struct ctl_table_header *tmp;
1552         tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1553         if (!tmp)
1554                 return NULL;
1555         tmp->ctl_table = table;
1556         INIT_LIST_HEAD(&tmp->ctl_entry);
1557         tmp->used = 0;
1558         tmp->unregistering = NULL;
1559         sysctl_set_parent(NULL, table);
1560         spin_lock(&sysctl_lock);
1561         list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1562         spin_unlock(&sysctl_lock);
1563         return tmp;
1564 }
1565
1566 /**
1567  * unregister_sysctl_table - unregister a sysctl table hierarchy
1568  * @header: the header returned from register_sysctl_table
1569  *
1570  * Unregisters the sysctl table and all children. proc entries may not
1571  * actually be removed until they are no longer used by anyone.
1572  */
1573 void unregister_sysctl_table(struct ctl_table_header * header)
1574 {
1575         might_sleep();
1576         spin_lock(&sysctl_lock);
1577         start_unregistering(header);
1578         spin_unlock(&sysctl_lock);
1579         kfree(header);
1580 }
1581
1582 #else /* !CONFIG_SYSCTL */
1583 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1584 {
1585         return NULL;
1586 }
1587
1588 void unregister_sysctl_table(struct ctl_table_header * table)
1589 {
1590 }
1591
1592 #endif /* CONFIG_SYSCTL */
1593
1594 /*
1595  * /proc/sys support
1596  */
1597
1598 #ifdef CONFIG_PROC_SYSCTL
1599
1600 static int _proc_do_string(void* data, int maxlen, int write,
1601                            struct file *filp, void __user *buffer,
1602                            size_t *lenp, loff_t *ppos)
1603 {
1604         size_t len;
1605         char __user *p;
1606         char c;
1607
1608         if (!data || !maxlen || !*lenp) {
1609                 *lenp = 0;
1610                 return 0;
1611         }
1612
1613         if (write) {
1614                 len = 0;
1615                 p = buffer;
1616                 while (len < *lenp) {
1617                         if (get_user(c, p++))
1618                                 return -EFAULT;
1619                         if (c == 0 || c == '\n')
1620                                 break;
1621                         len++;
1622                 }
1623                 if (len >= maxlen)
1624                         len = maxlen-1;
1625                 if(copy_from_user(data, buffer, len))
1626                         return -EFAULT;
1627                 ((char *) data)[len] = 0;
1628                 *ppos += *lenp;
1629         } else {
1630                 len = strlen(data);
1631                 if (len > maxlen)
1632                         len = maxlen;
1633
1634                 if (*ppos > len) {
1635                         *lenp = 0;
1636                         return 0;
1637                 }
1638
1639                 data += *ppos;
1640                 len  -= *ppos;
1641
1642                 if (len > *lenp)
1643                         len = *lenp;
1644                 if (len)
1645                         if(copy_to_user(buffer, data, len))
1646                                 return -EFAULT;
1647                 if (len < *lenp) {
1648                         if(put_user('\n', ((char __user *) buffer) + len))
1649                                 return -EFAULT;
1650                         len++;
1651                 }
1652                 *lenp = len;
1653                 *ppos += len;
1654         }
1655         return 0;
1656 }
1657
1658 /**
1659  * proc_dostring - read a string sysctl
1660  * @table: the sysctl table
1661  * @write: %TRUE if this is a write to the sysctl file
1662  * @filp: the file structure
1663  * @buffer: the user buffer
1664  * @lenp: the size of the user buffer
1665  * @ppos: file position
1666  *
1667  * Reads/writes a string from/to the user buffer. If the kernel
1668  * buffer provided is not large enough to hold the string, the
1669  * string is truncated. The copied string is %NULL-terminated.
1670  * If the string is being read by the user process, it is copied
1671  * and a newline '\n' is added. It is truncated if the buffer is
1672  * not large enough.
1673  *
1674  * Returns 0 on success.
1675  */
1676 int proc_dostring(ctl_table *table, int write, struct file *filp,
1677                   void __user *buffer, size_t *lenp, loff_t *ppos)
1678 {
1679         return _proc_do_string(table->data, table->maxlen, write, filp,
1680                                buffer, lenp, ppos);
1681 }
1682
1683
1684 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1685                                  int *valp,
1686                                  int write, void *data)
1687 {
1688         if (write) {
1689                 *valp = *negp ? -*lvalp : *lvalp;
1690         } else {
1691                 int val = *valp;
1692                 if (val < 0) {
1693                         *negp = -1;
1694                         *lvalp = (unsigned long)-val;
1695                 } else {
1696                         *negp = 0;
1697                         *lvalp = (unsigned long)val;
1698                 }
1699         }
1700         return 0;
1701 }
1702
1703 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1704                   int write, struct file *filp, void __user *buffer,
1705                   size_t *lenp, loff_t *ppos,
1706                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1707                               int write, void *data),
1708                   void *data)
1709 {
1710 #define TMPBUFLEN 21
1711         int *i, vleft, first=1, neg, val;
1712         unsigned long lval;
1713         size_t left, len;
1714         
1715         char buf[TMPBUFLEN], *p;
1716         char __user *s = buffer;
1717         
1718         if (!tbl_data || !table->maxlen || !*lenp ||
1719             (*ppos && !write)) {
1720                 *lenp = 0;
1721                 return 0;
1722         }
1723         
1724         i = (int *) tbl_data;
1725         vleft = table->maxlen / sizeof(*i);
1726         left = *lenp;
1727
1728         if (!conv)
1729                 conv = do_proc_dointvec_conv;
1730
1731         for (; left && vleft--; i++, first=0) {
1732                 if (write) {
1733                         while (left) {
1734                                 char c;
1735                                 if (get_user(c, s))
1736                                         return -EFAULT;
1737                                 if (!isspace(c))
1738                                         break;
1739                                 left--;
1740                                 s++;
1741                         }
1742                         if (!left)
1743                                 break;
1744                         neg = 0;
1745                         len = left;
1746                         if (len > sizeof(buf) - 1)
1747                                 len = sizeof(buf) - 1;
1748                         if (copy_from_user(buf, s, len))
1749                                 return -EFAULT;
1750                         buf[len] = 0;
1751                         p = buf;
1752                         if (*p == '-' && left > 1) {
1753                                 neg = 1;
1754                                 p++;
1755                         }
1756                         if (*p < '0' || *p > '9')
1757                                 break;
1758
1759                         lval = simple_strtoul(p, &p, 0);
1760
1761                         len = p-buf;
1762                         if ((len < left) && *p && !isspace(*p))
1763                                 break;
1764                         if (neg)
1765                                 val = -val;
1766                         s += len;
1767                         left -= len;
1768
1769                         if (conv(&neg, &lval, i, 1, data))
1770                                 break;
1771                 } else {
1772                         p = buf;
1773                         if (!first)
1774                                 *p++ = '\t';
1775         
1776                         if (conv(&neg, &lval, i, 0, data))
1777                                 break;
1778
1779                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
1780                         len = strlen(buf);
1781                         if (len > left)
1782                                 len = left;
1783                         if(copy_to_user(s, buf, len))
1784                                 return -EFAULT;
1785                         left -= len;
1786                         s += len;
1787                 }
1788         }
1789
1790         if (!write && !first && left) {
1791                 if(put_user('\n', s))
1792                         return -EFAULT;
1793                 left--, s++;
1794         }
1795         if (write) {
1796                 while (left) {
1797                         char c;
1798                         if (get_user(c, s++))
1799                                 return -EFAULT;
1800                         if (!isspace(c))
1801                                 break;
1802                         left--;
1803                 }
1804         }
1805         if (write && first)
1806                 return -EINVAL;
1807         *lenp -= left;
1808         *ppos += *lenp;
1809         return 0;
1810 #undef TMPBUFLEN
1811 }
1812
1813 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1814                   void __user *buffer, size_t *lenp, loff_t *ppos,
1815                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1816                               int write, void *data),
1817                   void *data)
1818 {
1819         return __do_proc_dointvec(table->data, table, write, filp,
1820                         buffer, lenp, ppos, conv, data);
1821 }
1822
1823 /**
1824  * proc_dointvec - read a vector of integers
1825  * @table: the sysctl table
1826  * @write: %TRUE if this is a write to the sysctl file
1827  * @filp: the file structure
1828  * @buffer: the user buffer
1829  * @lenp: the size of the user buffer
1830  * @ppos: file position
1831  *
1832  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1833  * values from/to the user buffer, treated as an ASCII string. 
1834  *
1835  * Returns 0 on success.
1836  */
1837 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1838                      void __user *buffer, size_t *lenp, loff_t *ppos)
1839 {
1840     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1841                             NULL,NULL);
1842 }
1843
1844 #define OP_SET  0
1845 #define OP_AND  1
1846 #define OP_OR   2
1847
1848 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1849                                       int *valp,
1850                                       int write, void *data)
1851 {
1852         int op = *(int *)data;
1853         if (write) {
1854                 int val = *negp ? -*lvalp : *lvalp;
1855                 switch(op) {
1856                 case OP_SET:    *valp = val; break;
1857                 case OP_AND:    *valp &= val; break;
1858                 case OP_OR:     *valp |= val; break;
1859                 }
1860         } else {
1861                 int val = *valp;
1862                 if (val < 0) {
1863                         *negp = -1;
1864                         *lvalp = (unsigned long)-val;
1865                 } else {
1866                         *negp = 0;
1867                         *lvalp = (unsigned long)val;
1868                 }
1869         }
1870         return 0;
1871 }
1872
1873 /*
1874  *      init may raise the set.
1875  */
1876  
1877 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1878                         void __user *buffer, size_t *lenp, loff_t *ppos)
1879 {
1880         int op;
1881
1882         if (write && !capable(CAP_SYS_MODULE)) {
1883                 return -EPERM;
1884         }
1885
1886         op = is_init(current) ? OP_SET : OP_AND;
1887         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1888                                 do_proc_dointvec_bset_conv,&op);
1889 }
1890
1891 /*
1892  *      Taint values can only be increased
1893  */
1894 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1895                                void __user *buffer, size_t *lenp, loff_t *ppos)
1896 {
1897         int op;
1898
1899         if (write && !capable(CAP_SYS_ADMIN))
1900                 return -EPERM;
1901
1902         op = OP_OR;
1903         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1904                                 do_proc_dointvec_bset_conv,&op);
1905 }
1906
1907 struct do_proc_dointvec_minmax_conv_param {
1908         int *min;
1909         int *max;
1910 };
1911
1912 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
1913                                         int *valp, 
1914                                         int write, void *data)
1915 {
1916         struct do_proc_dointvec_minmax_conv_param *param = data;
1917         if (write) {
1918                 int val = *negp ? -*lvalp : *lvalp;
1919                 if ((param->min && *param->min > val) ||
1920                     (param->max && *param->max < val))
1921                         return -EINVAL;
1922                 *valp = val;
1923         } else {
1924                 int val = *valp;
1925                 if (val < 0) {
1926                         *negp = -1;
1927                         *lvalp = (unsigned long)-val;
1928                 } else {
1929                         *negp = 0;
1930                         *lvalp = (unsigned long)val;
1931                 }
1932         }
1933         return 0;
1934 }
1935
1936 /**
1937  * proc_dointvec_minmax - read a vector of integers with min/max values
1938  * @table: the sysctl table
1939  * @write: %TRUE if this is a write to the sysctl file
1940  * @filp: the file structure
1941  * @buffer: the user buffer
1942  * @lenp: the size of the user buffer
1943  * @ppos: file position
1944  *
1945  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1946  * values from/to the user buffer, treated as an ASCII string.
1947  *
1948  * This routine will ensure the values are within the range specified by
1949  * table->extra1 (min) and table->extra2 (max).
1950  *
1951  * Returns 0 on success.
1952  */
1953 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1954                   void __user *buffer, size_t *lenp, loff_t *ppos)
1955 {
1956         struct do_proc_dointvec_minmax_conv_param param = {
1957                 .min = (int *) table->extra1,
1958                 .max = (int *) table->extra2,
1959         };
1960         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1961                                 do_proc_dointvec_minmax_conv, &param);
1962 }
1963
1964 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1965                                      struct file *filp,
1966                                      void __user *buffer,
1967                                      size_t *lenp, loff_t *ppos,
1968                                      unsigned long convmul,
1969                                      unsigned long convdiv)
1970 {
1971 #define TMPBUFLEN 21
1972         unsigned long *i, *min, *max, val;
1973         int vleft, first=1, neg;
1974         size_t len, left;
1975         char buf[TMPBUFLEN], *p;
1976         char __user *s = buffer;
1977         
1978         if (!data || !table->maxlen || !*lenp ||
1979             (*ppos && !write)) {
1980                 *lenp = 0;
1981                 return 0;
1982         }
1983         
1984         i = (unsigned long *) data;
1985         min = (unsigned long *) table->extra1;
1986         max = (unsigned long *) table->extra2;
1987         vleft = table->maxlen / sizeof(unsigned long);
1988         left = *lenp;
1989         
1990         for (; left && vleft--; i++, min++, max++, first=0) {
1991                 if (write) {
1992                         while (left) {
1993                                 char c;
1994                                 if (get_user(c, s))
1995                                         return -EFAULT;
1996                                 if (!isspace(c))
1997                                         break;
1998                                 left--;
1999                                 s++;
2000                         }
2001                         if (!left)
2002                                 break;
2003                         neg = 0;
2004                         len = left;
2005                         if (len > TMPBUFLEN-1)
2006                                 len = TMPBUFLEN-1;
2007                         if (copy_from_user(buf, s, len))
2008                                 return -EFAULT;
2009                         buf[len] = 0;
2010                         p = buf;
2011                         if (*p == '-' && left > 1) {
2012                                 neg = 1;
2013                                 p++;
2014                         }
2015                         if (*p < '0' || *p > '9')
2016                                 break;
2017                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2018                         len = p-buf;
2019                         if ((len < left) && *p && !isspace(*p))
2020                                 break;
2021                         if (neg)
2022                                 val = -val;
2023                         s += len;
2024                         left -= len;
2025
2026                         if(neg)
2027                                 continue;
2028                         if ((min && val < *min) || (max && val > *max))
2029                                 continue;
2030                         *i = val;
2031                 } else {
2032                         p = buf;
2033                         if (!first)
2034                                 *p++ = '\t';
2035                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2036                         len = strlen(buf);
2037                         if (len > left)
2038                                 len = left;
2039                         if(copy_to_user(s, buf, len))
2040                                 return -EFAULT;
2041                         left -= len;
2042                         s += len;
2043                 }
2044         }
2045
2046         if (!write && !first && left) {
2047                 if(put_user('\n', s))
2048                         return -EFAULT;
2049                 left--, s++;
2050         }
2051         if (write) {
2052                 while (left) {
2053                         char c;
2054                         if (get_user(c, s++))
2055                                 return -EFAULT;
2056                         if (!isspace(c))
2057                                 break;
2058                         left--;
2059                 }
2060         }
2061         if (write && first)
2062                 return -EINVAL;
2063         *lenp -= left;
2064         *ppos += *lenp;
2065         return 0;
2066 #undef TMPBUFLEN
2067 }
2068
2069 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2070                                      struct file *filp,
2071                                      void __user *buffer,
2072                                      size_t *lenp, loff_t *ppos,
2073                                      unsigned long convmul,
2074                                      unsigned long convdiv)
2075 {
2076         return __do_proc_doulongvec_minmax(table->data, table, write,
2077                         filp, buffer, lenp, ppos, convmul, convdiv);
2078 }
2079
2080 /**
2081  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2082  * @table: the sysctl table
2083  * @write: %TRUE if this is a write to the sysctl file
2084  * @filp: the file structure
2085  * @buffer: the user buffer
2086  * @lenp: the size of the user buffer
2087  * @ppos: file position
2088  *
2089  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2090  * values from/to the user buffer, treated as an ASCII string.
2091  *
2092  * This routine will ensure the values are within the range specified by
2093  * table->extra1 (min) and table->extra2 (max).
2094  *
2095  * Returns 0 on success.
2096  */
2097 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2098                            void __user *buffer, size_t *lenp, loff_t *ppos)
2099 {
2100     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2101 }
2102
2103 /**
2104  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2105  * @table: the sysctl table
2106  * @write: %TRUE if this is a write to the sysctl file
2107  * @filp: the file structure
2108  * @buffer: the user buffer
2109  * @lenp: the size of the user buffer
2110  * @ppos: file position
2111  *
2112  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2113  * values from/to the user buffer, treated as an ASCII string. The values
2114  * are treated as milliseconds, and converted to jiffies when they are stored.
2115  *
2116  * This routine will ensure the values are within the range specified by
2117  * table->extra1 (min) and table->extra2 (max).
2118  *
2119  * Returns 0 on success.
2120  */
2121 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2122                                       struct file *filp,
2123                                       void __user *buffer,
2124                                       size_t *lenp, loff_t *ppos)
2125 {
2126     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2127                                      lenp, ppos, HZ, 1000l);
2128 }
2129
2130
2131 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2132                                          int *valp,
2133                                          int write, void *data)
2134 {
2135         if (write) {
2136                 if (*lvalp > LONG_MAX / HZ)
2137                         return 1;
2138                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2139         } else {
2140                 int val = *valp;
2141                 unsigned long lval;
2142                 if (val < 0) {
2143                         *negp = -1;
2144                         lval = (unsigned long)-val;
2145                 } else {
2146                         *negp = 0;
2147                         lval = (unsigned long)val;
2148                 }
2149                 *lvalp = lval / HZ;
2150         }
2151         return 0;
2152 }
2153
2154 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2155                                                 int *valp,
2156                                                 int write, void *data)
2157 {
2158         if (write) {
2159                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2160                         return 1;
2161                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2162         } else {
2163                 int val = *valp;
2164                 unsigned long lval;
2165                 if (val < 0) {
2166                         *negp = -1;
2167                         lval = (unsigned long)-val;
2168                 } else {
2169                         *negp = 0;
2170                         lval = (unsigned long)val;
2171                 }
2172                 *lvalp = jiffies_to_clock_t(lval);
2173         }
2174         return 0;
2175 }
2176
2177 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2178                                             int *valp,
2179                                             int write, void *data)
2180 {
2181         if (write) {
2182                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2183         } else {
2184                 int val = *valp;
2185                 unsigned long lval;
2186                 if (val < 0) {
2187                         *negp = -1;
2188                         lval = (unsigned long)-val;
2189                 } else {
2190                         *negp = 0;
2191                         lval = (unsigned long)val;
2192                 }
2193                 *lvalp = jiffies_to_msecs(lval);
2194         }
2195         return 0;
2196 }
2197
2198 /**
2199  * proc_dointvec_jiffies - read a vector of integers as seconds
2200  * @table: the sysctl table
2201  * @write: %TRUE if this is a write to the sysctl file
2202  * @filp: the file structure
2203  * @buffer: the user buffer
2204  * @lenp: the size of the user buffer
2205  * @ppos: file position
2206  *
2207  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2208  * values from/to the user buffer, treated as an ASCII string. 
2209  * The values read are assumed to be in seconds, and are converted into
2210  * jiffies.
2211  *
2212  * Returns 0 on success.
2213  */
2214 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2215                           void __user *buffer, size_t *lenp, loff_t *ppos)
2216 {
2217     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2218                             do_proc_dointvec_jiffies_conv,NULL);
2219 }
2220
2221 /**
2222  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2223  * @table: the sysctl table
2224  * @write: %TRUE if this is a write to the sysctl file
2225  * @filp: the file structure
2226  * @buffer: the user buffer
2227  * @lenp: the size of the user buffer
2228  * @ppos: pointer to the file position
2229  *
2230  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2231  * values from/to the user buffer, treated as an ASCII string. 
2232  * The values read are assumed to be in 1/USER_HZ seconds, and 
2233  * are converted into jiffies.
2234  *
2235  * Returns 0 on success.
2236  */
2237 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2238                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2239 {
2240     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2241                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2242 }
2243
2244 /**
2245  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2246  * @table: the sysctl table
2247  * @write: %TRUE if this is a write to the sysctl file
2248  * @filp: the file structure
2249  * @buffer: the user buffer
2250  * @lenp: the size of the user buffer
2251  * @ppos: file position
2252  * @ppos: the current position in the file
2253  *
2254  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2255  * values from/to the user buffer, treated as an ASCII string. 
2256  * The values read are assumed to be in 1/1000 seconds, and 
2257  * are converted into jiffies.
2258  *
2259  * Returns 0 on success.
2260  */
2261 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2262                              void __user *buffer, size_t *lenp, loff_t *ppos)
2263 {
2264         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2265                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2266 }
2267
2268 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2269                            void __user *buffer, size_t *lenp, loff_t *ppos)
2270 {
2271         struct pid *new_pid;
2272         pid_t tmp;
2273         int r;
2274
2275         tmp = pid_nr(cad_pid);
2276
2277         r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2278                                lenp, ppos, NULL, NULL);
2279         if (r || !write)
2280                 return r;
2281
2282         new_pid = find_get_pid(tmp);
2283         if (!new_pid)
2284                 return -ESRCH;
2285
2286         put_pid(xchg(&cad_pid, new_pid));
2287         return 0;
2288 }
2289
2290 #else /* CONFIG_PROC_FS */
2291
2292 int proc_dostring(ctl_table *table, int write, struct file *filp,
2293                   void __user *buffer, size_t *lenp, loff_t *ppos)
2294 {
2295         return -ENOSYS;
2296 }
2297
2298 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2299                   void __user *buffer, size_t *lenp, loff_t *ppos)
2300 {
2301         return -ENOSYS;
2302 }
2303
2304 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2305                         void __user *buffer, size_t *lenp, loff_t *ppos)
2306 {
2307         return -ENOSYS;
2308 }
2309
2310 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2311                     void __user *buffer, size_t *lenp, loff_t *ppos)
2312 {
2313         return -ENOSYS;
2314 }
2315
2316 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2317                     void __user *buffer, size_t *lenp, loff_t *ppos)
2318 {
2319         return -ENOSYS;
2320 }
2321
2322 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2323                     void __user *buffer, size_t *lenp, loff_t *ppos)
2324 {
2325         return -ENOSYS;
2326 }
2327
2328 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2329                              void __user *buffer, size_t *lenp, loff_t *ppos)
2330 {
2331         return -ENOSYS;
2332 }
2333
2334 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2335                     void __user *buffer, size_t *lenp, loff_t *ppos)
2336 {
2337         return -ENOSYS;
2338 }
2339
2340 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2341                                       struct file *filp,
2342                                       void __user *buffer,
2343                                       size_t *lenp, loff_t *ppos)
2344 {
2345     return -ENOSYS;
2346 }
2347
2348
2349 #endif /* CONFIG_PROC_FS */
2350
2351
2352 #ifdef CONFIG_SYSCTL_SYSCALL
2353 /*
2354  * General sysctl support routines 
2355  */
2356
2357 /* The generic string strategy routine: */
2358 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2359                   void __user *oldval, size_t __user *oldlenp,
2360                   void __user *newval, size_t newlen)
2361 {
2362         if (!table->data || !table->maxlen) 
2363                 return -ENOTDIR;
2364         
2365         if (oldval && oldlenp) {
2366                 size_t bufsize;
2367                 if (get_user(bufsize, oldlenp))
2368                         return -EFAULT;
2369                 if (bufsize) {
2370                         size_t len = strlen(table->data), copied;
2371
2372                         /* This shouldn't trigger for a well-formed sysctl */
2373                         if (len > table->maxlen)
2374                                 len = table->maxlen;
2375
2376                         /* Copy up to a max of bufsize-1 bytes of the string */
2377                         copied = (len >= bufsize) ? bufsize - 1 : len;
2378
2379                         if (copy_to_user(oldval, table->data, copied) ||
2380                             put_user(0, (char __user *)(oldval + copied)))
2381                                 return -EFAULT;
2382                         if (put_user(len, oldlenp))
2383                                 return -EFAULT;
2384                 }
2385         }
2386         if (newval && newlen) {
2387                 size_t len = newlen;
2388                 if (len > table->maxlen)
2389                         len = table->maxlen;
2390                 if(copy_from_user(table->data, newval, len))
2391                         return -EFAULT;
2392                 if (len == table->maxlen)
2393                         len--;
2394                 ((char *) table->data)[len] = 0;
2395         }
2396         return 1;
2397 }
2398
2399 /*
2400  * This function makes sure that all of the integers in the vector
2401  * are between the minimum and maximum values given in the arrays
2402  * table->extra1 and table->extra2, respectively.
2403  */
2404 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2405                 void __user *oldval, size_t __user *oldlenp,
2406                 void __user *newval, size_t newlen)
2407 {
2408
2409         if (newval && newlen) {
2410                 int __user *vec = (int __user *) newval;
2411                 int *min = (int *) table->extra1;
2412                 int *max = (int *) table->extra2;
2413                 size_t length;
2414                 int i;
2415
2416                 if (newlen % sizeof(int) != 0)
2417                         return -EINVAL;
2418
2419                 if (!table->extra1 && !table->extra2)
2420                         return 0;
2421
2422                 if (newlen > table->maxlen)
2423                         newlen = table->maxlen;
2424                 length = newlen / sizeof(int);
2425
2426                 for (i = 0; i < length; i++) {
2427                         int value;
2428                         if (get_user(value, vec + i))
2429                                 return -EFAULT;
2430                         if (min && value < min[i])
2431                                 return -EINVAL;
2432                         if (max && value > max[i])
2433                                 return -EINVAL;
2434                 }
2435         }
2436         return 0;
2437 }
2438
2439 /* Strategy function to convert jiffies to seconds */ 
2440 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2441                 void __user *oldval, size_t __user *oldlenp,
2442                 void __user *newval, size_t newlen)
2443 {
2444         if (oldval && oldlenp) {
2445                 size_t olen;
2446
2447                 if (get_user(olen, oldlenp))
2448                         return -EFAULT;
2449                 if (olen) {
2450                         int val;
2451
2452                         if (olen < sizeof(int))
2453                                 return -EINVAL;
2454
2455                         val = *(int *)(table->data) / HZ;
2456                         if (put_user(val, (int __user *)oldval))
2457                                 return -EFAULT;
2458                         if (put_user(sizeof(int), oldlenp))
2459                                 return -EFAULT;
2460                 }
2461         }
2462         if (newval && newlen) { 
2463                 int new;
2464                 if (newlen != sizeof(int))
2465                         return -EINVAL; 
2466                 if (get_user(new, (int __user *)newval))
2467                         return -EFAULT;
2468                 *(int *)(table->data) = new*HZ; 
2469         }
2470         return 1;
2471 }
2472
2473 /* Strategy function to convert jiffies to seconds */ 
2474 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2475                 void __user *oldval, size_t __user *oldlenp,
2476                 void __user *newval, size_t newlen)
2477 {
2478         if (oldval && oldlenp) {
2479                 size_t olen;
2480
2481                 if (get_user(olen, oldlenp))
2482                         return -EFAULT;
2483                 if (olen) {
2484                         int val;
2485
2486                         if (olen < sizeof(int))
2487                                 return -EINVAL;
2488
2489                         val = jiffies_to_msecs(*(int *)(table->data));
2490                         if (put_user(val, (int __user *)oldval))
2491                                 return -EFAULT;
2492                         if (put_user(sizeof(int), oldlenp))
2493                                 return -EFAULT;
2494                 }
2495         }
2496         if (newval && newlen) { 
2497                 int new;
2498                 if (newlen != sizeof(int))
2499                         return -EINVAL; 
2500                 if (get_user(new, (int __user *)newval))
2501                         return -EFAULT;
2502                 *(int *)(table->data) = msecs_to_jiffies(new);
2503         }
2504         return 1;
2505 }
2506
2507
2508
2509 #else /* CONFIG_SYSCTL_SYSCALL */
2510
2511
2512 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2513 {
2514         static int msg_count;
2515         struct __sysctl_args tmp;
2516         int name[CTL_MAXNAME];
2517         int i;
2518
2519         /* Read in the sysctl name for better debug message logging */
2520         if (copy_from_user(&tmp, args, sizeof(tmp)))
2521                 return -EFAULT;
2522         if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2523                 return -ENOTDIR;
2524         for (i = 0; i < tmp.nlen; i++)
2525                 if (get_user(name[i], tmp.name + i))
2526                         return -EFAULT;
2527
2528         /* Ignore accesses to kernel.version */
2529         if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2530                 goto out;
2531
2532         if (msg_count < 5) {
2533                 msg_count++;
2534                 printk(KERN_INFO
2535                         "warning: process `%s' used the removed sysctl "
2536                         "system call with ", current->comm);
2537                 for (i = 0; i < tmp.nlen; i++)
2538                         printk("%d.", name[i]);
2539                 printk("\n");
2540         }
2541 out:
2542         return -ENOSYS;
2543 }
2544
2545 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2546                   void __user *oldval, size_t __user *oldlenp,
2547                   void __user *newval, size_t newlen)
2548 {
2549         return -ENOSYS;
2550 }
2551
2552 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2553                 void __user *oldval, size_t __user *oldlenp,
2554                 void __user *newval, size_t newlen)
2555 {
2556         return -ENOSYS;
2557 }
2558
2559 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2560                 void __user *oldval, size_t __user *oldlenp,
2561                 void __user *newval, size_t newlen)
2562 {
2563         return -ENOSYS;
2564 }
2565
2566 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2567                 void __user *oldval, size_t __user *oldlenp,
2568                 void __user *newval, size_t newlen)
2569 {
2570         return -ENOSYS;
2571 }
2572
2573 #endif /* CONFIG_SYSCTL_SYSCALL */
2574
2575 /*
2576  * No sense putting this after each symbol definition, twice,
2577  * exception granted :-)
2578  */
2579 EXPORT_SYMBOL(proc_dointvec);
2580 EXPORT_SYMBOL(proc_dointvec_jiffies);
2581 EXPORT_SYMBOL(proc_dointvec_minmax);
2582 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2583 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2584 EXPORT_SYMBOL(proc_dostring);
2585 EXPORT_SYMBOL(proc_doulongvec_minmax);
2586 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2587 EXPORT_SYMBOL(register_sysctl_table);
2588 EXPORT_SYMBOL(sysctl_intvec);
2589 EXPORT_SYMBOL(sysctl_jiffies);
2590 EXPORT_SYMBOL(sysctl_ms_jiffies);
2591 EXPORT_SYMBOL(sysctl_string);
2592 EXPORT_SYMBOL(unregister_sysctl_table);