qla1280: sg chaining fixes
[linux-2.6] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57
58 #if defined(CONFIG_SYSCTL)
59
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int max_threads;
67 extern int core_uses_pid;
68 extern int suid_dumpable;
69 extern char core_pattern[];
70 extern int pid_max;
71 extern int min_free_kbytes;
72 extern int printk_ratelimit_jiffies;
73 extern int printk_ratelimit_burst;
74 extern int pid_max_min, pid_max_max;
75 extern int sysctl_drop_caches;
76 extern int percpu_pagelist_fraction;
77 extern int compat_log;
78 extern int maps_protect;
79 extern int sysctl_stat_interval;
80 extern int audit_argv_kb;
81
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86
87 static int ngroups_max = NGROUPS_MAX;
88
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114
115 extern int sysctl_hz_timer;
116
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131                 void __user *, size_t, ctl_table *);
132 #endif
133
134
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137                   void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139                                void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144         { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162
163 extern int prove_locking;
164 extern int lock_stat;
165
166 /* The default sysctl tables: */
167
168 static ctl_table root_table[] = {
169         {
170                 .ctl_name       = CTL_KERN,
171                 .procname       = "kernel",
172                 .mode           = 0555,
173                 .child          = kern_table,
174         },
175         {
176                 .ctl_name       = CTL_VM,
177                 .procname       = "vm",
178                 .mode           = 0555,
179                 .child          = vm_table,
180         },
181 #ifdef CONFIG_NET
182         {
183                 .ctl_name       = CTL_NET,
184                 .procname       = "net",
185                 .mode           = 0555,
186                 .child          = net_table,
187         },
188 #endif
189         {
190                 .ctl_name       = CTL_FS,
191                 .procname       = "fs",
192                 .mode           = 0555,
193                 .child          = fs_table,
194         },
195         {
196                 .ctl_name       = CTL_DEBUG,
197                 .procname       = "debug",
198                 .mode           = 0555,
199                 .child          = debug_table,
200         },
201         {
202                 .ctl_name       = CTL_DEV,
203                 .procname       = "dev",
204                 .mode           = 0555,
205                 .child          = dev_table,
206         },
207 /*
208  * NOTE: do not add new entries to this table unless you have read
209  * Documentation/sysctl/ctl_unnumbered.txt
210  */
211         { .ctl_name = 0 }
212 };
213
214 #ifdef CONFIG_SCHED_DEBUG
215 static unsigned long min_sched_granularity_ns = 100000;         /* 100 usecs */
216 static unsigned long max_sched_granularity_ns = 1000000000;     /* 1 second */
217 static unsigned long min_wakeup_granularity_ns;                 /* 0 usecs */
218 static unsigned long max_wakeup_granularity_ns = 1000000000;    /* 1 second */
219 #endif
220
221 static ctl_table kern_table[] = {
222 #ifdef CONFIG_SCHED_DEBUG
223         {
224                 .ctl_name       = CTL_UNNUMBERED,
225                 .procname       = "sched_nr_latency",
226                 .data           = &sysctl_sched_nr_latency,
227                 .maxlen         = sizeof(unsigned int),
228                 .mode           = 0644,
229                 .proc_handler   = &proc_dointvec,
230         },
231         {
232                 .ctl_name       = CTL_UNNUMBERED,
233                 .procname       = "sched_latency_ns",
234                 .data           = &sysctl_sched_latency,
235                 .maxlen         = sizeof(unsigned int),
236                 .mode           = 0644,
237                 .proc_handler   = &proc_dointvec_minmax,
238                 .strategy       = &sysctl_intvec,
239                 .extra1         = &min_sched_granularity_ns,
240                 .extra2         = &max_sched_granularity_ns,
241         },
242         {
243                 .ctl_name       = CTL_UNNUMBERED,
244                 .procname       = "sched_wakeup_granularity_ns",
245                 .data           = &sysctl_sched_wakeup_granularity,
246                 .maxlen         = sizeof(unsigned int),
247                 .mode           = 0644,
248                 .proc_handler   = &proc_dointvec_minmax,
249                 .strategy       = &sysctl_intvec,
250                 .extra1         = &min_wakeup_granularity_ns,
251                 .extra2         = &max_wakeup_granularity_ns,
252         },
253         {
254                 .ctl_name       = CTL_UNNUMBERED,
255                 .procname       = "sched_batch_wakeup_granularity_ns",
256                 .data           = &sysctl_sched_batch_wakeup_granularity,
257                 .maxlen         = sizeof(unsigned int),
258                 .mode           = 0644,
259                 .proc_handler   = &proc_dointvec_minmax,
260                 .strategy       = &sysctl_intvec,
261                 .extra1         = &min_wakeup_granularity_ns,
262                 .extra2         = &max_wakeup_granularity_ns,
263         },
264         {
265                 .ctl_name       = CTL_UNNUMBERED,
266                 .procname       = "sched_child_runs_first",
267                 .data           = &sysctl_sched_child_runs_first,
268                 .maxlen         = sizeof(unsigned int),
269                 .mode           = 0644,
270                 .proc_handler   = &proc_dointvec,
271         },
272         {
273                 .ctl_name       = CTL_UNNUMBERED,
274                 .procname       = "sched_features",
275                 .data           = &sysctl_sched_features,
276                 .maxlen         = sizeof(unsigned int),
277                 .mode           = 0644,
278                 .proc_handler   = &proc_dointvec,
279         },
280         {
281                 .ctl_name       = CTL_UNNUMBERED,
282                 .procname       = "sched_migration_cost",
283                 .data           = &sysctl_sched_migration_cost,
284                 .maxlen         = sizeof(unsigned int),
285                 .mode           = 0644,
286                 .proc_handler   = &proc_dointvec,
287         },
288 #endif
289         {
290                 .ctl_name       = CTL_UNNUMBERED,
291                 .procname       = "sched_compat_yield",
292                 .data           = &sysctl_sched_compat_yield,
293                 .maxlen         = sizeof(unsigned int),
294                 .mode           = 0644,
295                 .proc_handler   = &proc_dointvec,
296         },
297 #ifdef CONFIG_PROVE_LOCKING
298         {
299                 .ctl_name       = CTL_UNNUMBERED,
300                 .procname       = "prove_locking",
301                 .data           = &prove_locking,
302                 .maxlen         = sizeof(int),
303                 .mode           = 0644,
304                 .proc_handler   = &proc_dointvec,
305         },
306 #endif
307 #ifdef CONFIG_LOCK_STAT
308         {
309                 .ctl_name       = CTL_UNNUMBERED,
310                 .procname       = "lock_stat",
311                 .data           = &lock_stat,
312                 .maxlen         = sizeof(int),
313                 .mode           = 0644,
314                 .proc_handler   = &proc_dointvec,
315         },
316 #endif
317         {
318                 .ctl_name       = KERN_PANIC,
319                 .procname       = "panic",
320                 .data           = &panic_timeout,
321                 .maxlen         = sizeof(int),
322                 .mode           = 0644,
323                 .proc_handler   = &proc_dointvec,
324         },
325         {
326                 .ctl_name       = KERN_CORE_USES_PID,
327                 .procname       = "core_uses_pid",
328                 .data           = &core_uses_pid,
329                 .maxlen         = sizeof(int),
330                 .mode           = 0644,
331                 .proc_handler   = &proc_dointvec,
332         },
333 #ifdef CONFIG_AUDITSYSCALL
334         {
335                 .ctl_name       = CTL_UNNUMBERED,
336                 .procname       = "audit_argv_kb",
337                 .data           = &audit_argv_kb,
338                 .maxlen         = sizeof(int),
339                 .mode           = 0644,
340                 .proc_handler   = &proc_dointvec,
341         },
342 #endif
343         {
344                 .ctl_name       = KERN_CORE_PATTERN,
345                 .procname       = "core_pattern",
346                 .data           = core_pattern,
347                 .maxlen         = CORENAME_MAX_SIZE,
348                 .mode           = 0644,
349                 .proc_handler   = &proc_dostring,
350                 .strategy       = &sysctl_string,
351         },
352 #ifdef CONFIG_PROC_SYSCTL
353         {
354                 .ctl_name       = KERN_TAINTED,
355                 .procname       = "tainted",
356                 .data           = &tainted,
357                 .maxlen         = sizeof(int),
358                 .mode           = 0644,
359                 .proc_handler   = &proc_dointvec_taint,
360         },
361 #endif
362         {
363                 .ctl_name       = KERN_CAP_BSET,
364                 .procname       = "cap-bound",
365                 .data           = &cap_bset,
366                 .maxlen         = sizeof(kernel_cap_t),
367                 .mode           = 0600,
368                 .proc_handler   = &proc_dointvec_bset,
369         },
370 #ifdef CONFIG_BLK_DEV_INITRD
371         {
372                 .ctl_name       = KERN_REALROOTDEV,
373                 .procname       = "real-root-dev",
374                 .data           = &real_root_dev,
375                 .maxlen         = sizeof(int),
376                 .mode           = 0644,
377                 .proc_handler   = &proc_dointvec,
378         },
379 #endif
380         {
381                 .ctl_name       = CTL_UNNUMBERED,
382                 .procname       = "print-fatal-signals",
383                 .data           = &print_fatal_signals,
384                 .maxlen         = sizeof(int),
385                 .mode           = 0644,
386                 .proc_handler   = &proc_dointvec,
387         },
388 #ifdef __sparc__
389         {
390                 .ctl_name       = KERN_SPARC_REBOOT,
391                 .procname       = "reboot-cmd",
392                 .data           = reboot_command,
393                 .maxlen         = 256,
394                 .mode           = 0644,
395                 .proc_handler   = &proc_dostring,
396                 .strategy       = &sysctl_string,
397         },
398         {
399                 .ctl_name       = KERN_SPARC_STOP_A,
400                 .procname       = "stop-a",
401                 .data           = &stop_a_enabled,
402                 .maxlen         = sizeof (int),
403                 .mode           = 0644,
404                 .proc_handler   = &proc_dointvec,
405         },
406         {
407                 .ctl_name       = KERN_SPARC_SCONS_PWROFF,
408                 .procname       = "scons-poweroff",
409                 .data           = &scons_pwroff,
410                 .maxlen         = sizeof (int),
411                 .mode           = 0644,
412                 .proc_handler   = &proc_dointvec,
413         },
414 #endif
415 #ifdef __hppa__
416         {
417                 .ctl_name       = KERN_HPPA_PWRSW,
418                 .procname       = "soft-power",
419                 .data           = &pwrsw_enabled,
420                 .maxlen         = sizeof (int),
421                 .mode           = 0644,
422                 .proc_handler   = &proc_dointvec,
423         },
424         {
425                 .ctl_name       = KERN_HPPA_UNALIGNED,
426                 .procname       = "unaligned-trap",
427                 .data           = &unaligned_enabled,
428                 .maxlen         = sizeof (int),
429                 .mode           = 0644,
430                 .proc_handler   = &proc_dointvec,
431         },
432 #endif
433         {
434                 .ctl_name       = KERN_CTLALTDEL,
435                 .procname       = "ctrl-alt-del",
436                 .data           = &C_A_D,
437                 .maxlen         = sizeof(int),
438                 .mode           = 0644,
439                 .proc_handler   = &proc_dointvec,
440         },
441         {
442                 .ctl_name       = KERN_PRINTK,
443                 .procname       = "printk",
444                 .data           = &console_loglevel,
445                 .maxlen         = 4*sizeof(int),
446                 .mode           = 0644,
447                 .proc_handler   = &proc_dointvec,
448         },
449 #ifdef CONFIG_KMOD
450         {
451                 .ctl_name       = KERN_MODPROBE,
452                 .procname       = "modprobe",
453                 .data           = &modprobe_path,
454                 .maxlen         = KMOD_PATH_LEN,
455                 .mode           = 0644,
456                 .proc_handler   = &proc_dostring,
457                 .strategy       = &sysctl_string,
458         },
459 #endif
460 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
461         {
462                 .ctl_name       = KERN_HOTPLUG,
463                 .procname       = "hotplug",
464                 .data           = &uevent_helper,
465                 .maxlen         = UEVENT_HELPER_PATH_LEN,
466                 .mode           = 0644,
467                 .proc_handler   = &proc_dostring,
468                 .strategy       = &sysctl_string,
469         },
470 #endif
471 #ifdef CONFIG_CHR_DEV_SG
472         {
473                 .ctl_name       = KERN_SG_BIG_BUFF,
474                 .procname       = "sg-big-buff",
475                 .data           = &sg_big_buff,
476                 .maxlen         = sizeof (int),
477                 .mode           = 0444,
478                 .proc_handler   = &proc_dointvec,
479         },
480 #endif
481 #ifdef CONFIG_BSD_PROCESS_ACCT
482         {
483                 .ctl_name       = KERN_ACCT,
484                 .procname       = "acct",
485                 .data           = &acct_parm,
486                 .maxlen         = 3*sizeof(int),
487                 .mode           = 0644,
488                 .proc_handler   = &proc_dointvec,
489         },
490 #endif
491 #ifdef CONFIG_MAGIC_SYSRQ
492         {
493                 .ctl_name       = KERN_SYSRQ,
494                 .procname       = "sysrq",
495                 .data           = &__sysrq_enabled,
496                 .maxlen         = sizeof (int),
497                 .mode           = 0644,
498                 .proc_handler   = &proc_dointvec,
499         },
500 #endif
501 #ifdef CONFIG_PROC_SYSCTL
502         {
503                 .ctl_name       = KERN_CADPID,
504                 .procname       = "cad_pid",
505                 .data           = NULL,
506                 .maxlen         = sizeof (int),
507                 .mode           = 0600,
508                 .proc_handler   = &proc_do_cad_pid,
509         },
510 #endif
511         {
512                 .ctl_name       = KERN_MAX_THREADS,
513                 .procname       = "threads-max",
514                 .data           = &max_threads,
515                 .maxlen         = sizeof(int),
516                 .mode           = 0644,
517                 .proc_handler   = &proc_dointvec,
518         },
519         {
520                 .ctl_name       = KERN_RANDOM,
521                 .procname       = "random",
522                 .mode           = 0555,
523                 .child          = random_table,
524         },
525 #ifdef CONFIG_UNIX98_PTYS
526         {
527                 .ctl_name       = KERN_PTY,
528                 .procname       = "pty",
529                 .mode           = 0555,
530                 .child          = pty_table,
531         },
532 #endif
533         {
534                 .ctl_name       = KERN_OVERFLOWUID,
535                 .procname       = "overflowuid",
536                 .data           = &overflowuid,
537                 .maxlen         = sizeof(int),
538                 .mode           = 0644,
539                 .proc_handler   = &proc_dointvec_minmax,
540                 .strategy       = &sysctl_intvec,
541                 .extra1         = &minolduid,
542                 .extra2         = &maxolduid,
543         },
544         {
545                 .ctl_name       = KERN_OVERFLOWGID,
546                 .procname       = "overflowgid",
547                 .data           = &overflowgid,
548                 .maxlen         = sizeof(int),
549                 .mode           = 0644,
550                 .proc_handler   = &proc_dointvec_minmax,
551                 .strategy       = &sysctl_intvec,
552                 .extra1         = &minolduid,
553                 .extra2         = &maxolduid,
554         },
555 #ifdef CONFIG_S390
556 #ifdef CONFIG_MATHEMU
557         {
558                 .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
559                 .procname       = "ieee_emulation_warnings",
560                 .data           = &sysctl_ieee_emulation_warnings,
561                 .maxlen         = sizeof(int),
562                 .mode           = 0644,
563                 .proc_handler   = &proc_dointvec,
564         },
565 #endif
566 #ifdef CONFIG_NO_IDLE_HZ
567         {
568                 .ctl_name       = KERN_HZ_TIMER,
569                 .procname       = "hz_timer",
570                 .data           = &sysctl_hz_timer,
571                 .maxlen         = sizeof(int),
572                 .mode           = 0644,
573                 .proc_handler   = &proc_dointvec,
574         },
575 #endif
576         {
577                 .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
578                 .procname       = "userprocess_debug",
579                 .data           = &sysctl_userprocess_debug,
580                 .maxlen         = sizeof(int),
581                 .mode           = 0644,
582                 .proc_handler   = &proc_dointvec,
583         },
584 #endif
585         {
586                 .ctl_name       = KERN_PIDMAX,
587                 .procname       = "pid_max",
588                 .data           = &pid_max,
589                 .maxlen         = sizeof (int),
590                 .mode           = 0644,
591                 .proc_handler   = &proc_dointvec_minmax,
592                 .strategy       = sysctl_intvec,
593                 .extra1         = &pid_max_min,
594                 .extra2         = &pid_max_max,
595         },
596         {
597                 .ctl_name       = KERN_PANIC_ON_OOPS,
598                 .procname       = "panic_on_oops",
599                 .data           = &panic_on_oops,
600                 .maxlen         = sizeof(int),
601                 .mode           = 0644,
602                 .proc_handler   = &proc_dointvec,
603         },
604         {
605                 .ctl_name       = KERN_PRINTK_RATELIMIT,
606                 .procname       = "printk_ratelimit",
607                 .data           = &printk_ratelimit_jiffies,
608                 .maxlen         = sizeof(int),
609                 .mode           = 0644,
610                 .proc_handler   = &proc_dointvec_jiffies,
611                 .strategy       = &sysctl_jiffies,
612         },
613         {
614                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
615                 .procname       = "printk_ratelimit_burst",
616                 .data           = &printk_ratelimit_burst,
617                 .maxlen         = sizeof(int),
618                 .mode           = 0644,
619                 .proc_handler   = &proc_dointvec,
620         },
621         {
622                 .ctl_name       = KERN_NGROUPS_MAX,
623                 .procname       = "ngroups_max",
624                 .data           = &ngroups_max,
625                 .maxlen         = sizeof (int),
626                 .mode           = 0444,
627                 .proc_handler   = &proc_dointvec,
628         },
629 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
630         {
631                 .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
632                 .procname       = "unknown_nmi_panic",
633                 .data           = &unknown_nmi_panic,
634                 .maxlen         = sizeof (int),
635                 .mode           = 0644,
636                 .proc_handler   = &proc_dointvec,
637         },
638         {
639                 .ctl_name       = KERN_NMI_WATCHDOG,
640                 .procname       = "nmi_watchdog",
641                 .data           = &nmi_watchdog_enabled,
642                 .maxlen         = sizeof (int),
643                 .mode           = 0644,
644                 .proc_handler   = &proc_nmi_enabled,
645         },
646 #endif
647 #if defined(CONFIG_X86)
648         {
649                 .ctl_name       = KERN_PANIC_ON_NMI,
650                 .procname       = "panic_on_unrecovered_nmi",
651                 .data           = &panic_on_unrecovered_nmi,
652                 .maxlen         = sizeof(int),
653                 .mode           = 0644,
654                 .proc_handler   = &proc_dointvec,
655         },
656         {
657                 .ctl_name       = KERN_BOOTLOADER_TYPE,
658                 .procname       = "bootloader_type",
659                 .data           = &bootloader_type,
660                 .maxlen         = sizeof (int),
661                 .mode           = 0444,
662                 .proc_handler   = &proc_dointvec,
663         },
664         {
665                 .ctl_name       = CTL_UNNUMBERED,
666                 .procname       = "kstack_depth_to_print",
667                 .data           = &kstack_depth_to_print,
668                 .maxlen         = sizeof(int),
669                 .mode           = 0644,
670                 .proc_handler   = &proc_dointvec,
671         },
672 #endif
673 #if defined(CONFIG_MMU)
674         {
675                 .ctl_name       = KERN_RANDOMIZE,
676                 .procname       = "randomize_va_space",
677                 .data           = &randomize_va_space,
678                 .maxlen         = sizeof(int),
679                 .mode           = 0644,
680                 .proc_handler   = &proc_dointvec,
681         },
682 #endif
683 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
684         {
685                 .ctl_name       = KERN_SPIN_RETRY,
686                 .procname       = "spin_retry",
687                 .data           = &spin_retry,
688                 .maxlen         = sizeof (int),
689                 .mode           = 0644,
690                 .proc_handler   = &proc_dointvec,
691         },
692 #endif
693 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
694         {
695                 .ctl_name       = KERN_ACPI_VIDEO_FLAGS,
696                 .procname       = "acpi_video_flags",
697                 .data           = &acpi_realmode_flags,
698                 .maxlen         = sizeof (unsigned long),
699                 .mode           = 0644,
700                 .proc_handler   = &proc_doulongvec_minmax,
701         },
702 #endif
703 #ifdef CONFIG_IA64
704         {
705                 .ctl_name       = KERN_IA64_UNALIGNED,
706                 .procname       = "ignore-unaligned-usertrap",
707                 .data           = &no_unaligned_warning,
708                 .maxlen         = sizeof (int),
709                 .mode           = 0644,
710                 .proc_handler   = &proc_dointvec,
711         },
712 #endif
713 #ifdef CONFIG_COMPAT
714         {
715                 .ctl_name       = KERN_COMPAT_LOG,
716                 .procname       = "compat-log",
717                 .data           = &compat_log,
718                 .maxlen         = sizeof (int),
719                 .mode           = 0644,
720                 .proc_handler   = &proc_dointvec,
721         },
722 #endif
723 #ifdef CONFIG_RT_MUTEXES
724         {
725                 .ctl_name       = KERN_MAX_LOCK_DEPTH,
726                 .procname       = "max_lock_depth",
727                 .data           = &max_lock_depth,
728                 .maxlen         = sizeof(int),
729                 .mode           = 0644,
730                 .proc_handler   = &proc_dointvec,
731         },
732 #endif
733 #ifdef CONFIG_PROC_FS
734         {
735                 .ctl_name       = CTL_UNNUMBERED,
736                 .procname       = "maps_protect",
737                 .data           = &maps_protect,
738                 .maxlen         = sizeof(int),
739                 .mode           = 0644,
740                 .proc_handler   = &proc_dointvec,
741         },
742 #endif
743         {
744                 .ctl_name       = CTL_UNNUMBERED,
745                 .procname       = "poweroff_cmd",
746                 .data           = &poweroff_cmd,
747                 .maxlen         = POWEROFF_CMD_PATH_LEN,
748                 .mode           = 0644,
749                 .proc_handler   = &proc_dostring,
750                 .strategy       = &sysctl_string,
751         },
752 /*
753  * NOTE: do not add new entries to this table unless you have read
754  * Documentation/sysctl/ctl_unnumbered.txt
755  */
756         { .ctl_name = 0 }
757 };
758
759 /* Constants for minimum and maximum testing in vm_table.
760    We use these as one-element integer vectors. */
761 static int zero;
762 static int two = 2;
763 static int one_hundred = 100;
764
765
766 static ctl_table vm_table[] = {
767         {
768                 .ctl_name       = VM_OVERCOMMIT_MEMORY,
769                 .procname       = "overcommit_memory",
770                 .data           = &sysctl_overcommit_memory,
771                 .maxlen         = sizeof(sysctl_overcommit_memory),
772                 .mode           = 0644,
773                 .proc_handler   = &proc_dointvec,
774         },
775         {
776                 .ctl_name       = VM_PANIC_ON_OOM,
777                 .procname       = "panic_on_oom",
778                 .data           = &sysctl_panic_on_oom,
779                 .maxlen         = sizeof(sysctl_panic_on_oom),
780                 .mode           = 0644,
781                 .proc_handler   = &proc_dointvec,
782         },
783         {
784                 .ctl_name       = VM_OVERCOMMIT_RATIO,
785                 .procname       = "overcommit_ratio",
786                 .data           = &sysctl_overcommit_ratio,
787                 .maxlen         = sizeof(sysctl_overcommit_ratio),
788                 .mode           = 0644,
789                 .proc_handler   = &proc_dointvec,
790         },
791         {
792                 .ctl_name       = VM_PAGE_CLUSTER,
793                 .procname       = "page-cluster", 
794                 .data           = &page_cluster,
795                 .maxlen         = sizeof(int),
796                 .mode           = 0644,
797                 .proc_handler   = &proc_dointvec,
798         },
799         {
800                 .ctl_name       = VM_DIRTY_BACKGROUND,
801                 .procname       = "dirty_background_ratio",
802                 .data           = &dirty_background_ratio,
803                 .maxlen         = sizeof(dirty_background_ratio),
804                 .mode           = 0644,
805                 .proc_handler   = &proc_dointvec_minmax,
806                 .strategy       = &sysctl_intvec,
807                 .extra1         = &zero,
808                 .extra2         = &one_hundred,
809         },
810         {
811                 .ctl_name       = VM_DIRTY_RATIO,
812                 .procname       = "dirty_ratio",
813                 .data           = &vm_dirty_ratio,
814                 .maxlen         = sizeof(vm_dirty_ratio),
815                 .mode           = 0644,
816                 .proc_handler   = &proc_dointvec_minmax,
817                 .strategy       = &sysctl_intvec,
818                 .extra1         = &zero,
819                 .extra2         = &one_hundred,
820         },
821         {
822                 .ctl_name       = VM_DIRTY_WB_CS,
823                 .procname       = "dirty_writeback_centisecs",
824                 .data           = &dirty_writeback_interval,
825                 .maxlen         = sizeof(dirty_writeback_interval),
826                 .mode           = 0644,
827                 .proc_handler   = &dirty_writeback_centisecs_handler,
828         },
829         {
830                 .ctl_name       = VM_DIRTY_EXPIRE_CS,
831                 .procname       = "dirty_expire_centisecs",
832                 .data           = &dirty_expire_interval,
833                 .maxlen         = sizeof(dirty_expire_interval),
834                 .mode           = 0644,
835                 .proc_handler   = &proc_dointvec_userhz_jiffies,
836         },
837         {
838                 .ctl_name       = VM_NR_PDFLUSH_THREADS,
839                 .procname       = "nr_pdflush_threads",
840                 .data           = &nr_pdflush_threads,
841                 .maxlen         = sizeof nr_pdflush_threads,
842                 .mode           = 0444 /* read-only*/,
843                 .proc_handler   = &proc_dointvec,
844         },
845         {
846                 .ctl_name       = VM_SWAPPINESS,
847                 .procname       = "swappiness",
848                 .data           = &vm_swappiness,
849                 .maxlen         = sizeof(vm_swappiness),
850                 .mode           = 0644,
851                 .proc_handler   = &proc_dointvec_minmax,
852                 .strategy       = &sysctl_intvec,
853                 .extra1         = &zero,
854                 .extra2         = &one_hundred,
855         },
856 #ifdef CONFIG_HUGETLB_PAGE
857          {
858                 .ctl_name       = VM_HUGETLB_PAGES,
859                 .procname       = "nr_hugepages",
860                 .data           = &max_huge_pages,
861                 .maxlen         = sizeof(unsigned long),
862                 .mode           = 0644,
863                 .proc_handler   = &hugetlb_sysctl_handler,
864                 .extra1         = (void *)&hugetlb_zero,
865                 .extra2         = (void *)&hugetlb_infinity,
866          },
867          {
868                 .ctl_name       = VM_HUGETLB_GROUP,
869                 .procname       = "hugetlb_shm_group",
870                 .data           = &sysctl_hugetlb_shm_group,
871                 .maxlen         = sizeof(gid_t),
872                 .mode           = 0644,
873                 .proc_handler   = &proc_dointvec,
874          },
875          {
876                 .ctl_name       = CTL_UNNUMBERED,
877                 .procname       = "hugepages_treat_as_movable",
878                 .data           = &hugepages_treat_as_movable,
879                 .maxlen         = sizeof(int),
880                 .mode           = 0644,
881                 .proc_handler   = &hugetlb_treat_movable_handler,
882         },
883 #endif
884         {
885                 .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
886                 .procname       = "lowmem_reserve_ratio",
887                 .data           = &sysctl_lowmem_reserve_ratio,
888                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
889                 .mode           = 0644,
890                 .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
891                 .strategy       = &sysctl_intvec,
892         },
893         {
894                 .ctl_name       = VM_DROP_PAGECACHE,
895                 .procname       = "drop_caches",
896                 .data           = &sysctl_drop_caches,
897                 .maxlen         = sizeof(int),
898                 .mode           = 0644,
899                 .proc_handler   = drop_caches_sysctl_handler,
900                 .strategy       = &sysctl_intvec,
901         },
902         {
903                 .ctl_name       = VM_MIN_FREE_KBYTES,
904                 .procname       = "min_free_kbytes",
905                 .data           = &min_free_kbytes,
906                 .maxlen         = sizeof(min_free_kbytes),
907                 .mode           = 0644,
908                 .proc_handler   = &min_free_kbytes_sysctl_handler,
909                 .strategy       = &sysctl_intvec,
910                 .extra1         = &zero,
911         },
912         {
913                 .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
914                 .procname       = "percpu_pagelist_fraction",
915                 .data           = &percpu_pagelist_fraction,
916                 .maxlen         = sizeof(percpu_pagelist_fraction),
917                 .mode           = 0644,
918                 .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
919                 .strategy       = &sysctl_intvec,
920                 .extra1         = &min_percpu_pagelist_fract,
921         },
922 #ifdef CONFIG_MMU
923         {
924                 .ctl_name       = VM_MAX_MAP_COUNT,
925                 .procname       = "max_map_count",
926                 .data           = &sysctl_max_map_count,
927                 .maxlen         = sizeof(sysctl_max_map_count),
928                 .mode           = 0644,
929                 .proc_handler   = &proc_dointvec
930         },
931 #endif
932         {
933                 .ctl_name       = VM_LAPTOP_MODE,
934                 .procname       = "laptop_mode",
935                 .data           = &laptop_mode,
936                 .maxlen         = sizeof(laptop_mode),
937                 .mode           = 0644,
938                 .proc_handler   = &proc_dointvec_jiffies,
939                 .strategy       = &sysctl_jiffies,
940         },
941         {
942                 .ctl_name       = VM_BLOCK_DUMP,
943                 .procname       = "block_dump",
944                 .data           = &block_dump,
945                 .maxlen         = sizeof(block_dump),
946                 .mode           = 0644,
947                 .proc_handler   = &proc_dointvec,
948                 .strategy       = &sysctl_intvec,
949                 .extra1         = &zero,
950         },
951         {
952                 .ctl_name       = VM_VFS_CACHE_PRESSURE,
953                 .procname       = "vfs_cache_pressure",
954                 .data           = &sysctl_vfs_cache_pressure,
955                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
956                 .mode           = 0644,
957                 .proc_handler   = &proc_dointvec,
958                 .strategy       = &sysctl_intvec,
959                 .extra1         = &zero,
960         },
961 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
962         {
963                 .ctl_name       = VM_LEGACY_VA_LAYOUT,
964                 .procname       = "legacy_va_layout",
965                 .data           = &sysctl_legacy_va_layout,
966                 .maxlen         = sizeof(sysctl_legacy_va_layout),
967                 .mode           = 0644,
968                 .proc_handler   = &proc_dointvec,
969                 .strategy       = &sysctl_intvec,
970                 .extra1         = &zero,
971         },
972 #endif
973 #ifdef CONFIG_NUMA
974         {
975                 .ctl_name       = VM_ZONE_RECLAIM_MODE,
976                 .procname       = "zone_reclaim_mode",
977                 .data           = &zone_reclaim_mode,
978                 .maxlen         = sizeof(zone_reclaim_mode),
979                 .mode           = 0644,
980                 .proc_handler   = &proc_dointvec,
981                 .strategy       = &sysctl_intvec,
982                 .extra1         = &zero,
983         },
984         {
985                 .ctl_name       = VM_MIN_UNMAPPED,
986                 .procname       = "min_unmapped_ratio",
987                 .data           = &sysctl_min_unmapped_ratio,
988                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
989                 .mode           = 0644,
990                 .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
991                 .strategy       = &sysctl_intvec,
992                 .extra1         = &zero,
993                 .extra2         = &one_hundred,
994         },
995         {
996                 .ctl_name       = VM_MIN_SLAB,
997                 .procname       = "min_slab_ratio",
998                 .data           = &sysctl_min_slab_ratio,
999                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1000                 .mode           = 0644,
1001                 .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1002                 .strategy       = &sysctl_intvec,
1003                 .extra1         = &zero,
1004                 .extra2         = &one_hundred,
1005         },
1006 #endif
1007 #ifdef CONFIG_SMP
1008         {
1009                 .ctl_name       = CTL_UNNUMBERED,
1010                 .procname       = "stat_interval",
1011                 .data           = &sysctl_stat_interval,
1012                 .maxlen         = sizeof(sysctl_stat_interval),
1013                 .mode           = 0644,
1014                 .proc_handler   = &proc_dointvec_jiffies,
1015                 .strategy       = &sysctl_jiffies,
1016         },
1017 #endif
1018 #ifdef CONFIG_SECURITY
1019         {
1020                 .ctl_name       = CTL_UNNUMBERED,
1021                 .procname       = "mmap_min_addr",
1022                 .data           = &mmap_min_addr,
1023                 .maxlen         = sizeof(unsigned long),
1024                 .mode           = 0644,
1025                 .proc_handler   = &proc_doulongvec_minmax,
1026         },
1027 #endif
1028 #ifdef CONFIG_NUMA
1029         {
1030                 .ctl_name       = CTL_UNNUMBERED,
1031                 .procname       = "numa_zonelist_order",
1032                 .data           = &numa_zonelist_order,
1033                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1034                 .mode           = 0644,
1035                 .proc_handler   = &numa_zonelist_order_handler,
1036                 .strategy       = &sysctl_string,
1037         },
1038 #endif
1039 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1040    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1041         {
1042                 .ctl_name       = VM_VDSO_ENABLED,
1043                 .procname       = "vdso_enabled",
1044                 .data           = &vdso_enabled,
1045                 .maxlen         = sizeof(vdso_enabled),
1046                 .mode           = 0644,
1047                 .proc_handler   = &proc_dointvec,
1048                 .strategy       = &sysctl_intvec,
1049                 .extra1         = &zero,
1050         },
1051 #endif
1052 /*
1053  * NOTE: do not add new entries to this table unless you have read
1054  * Documentation/sysctl/ctl_unnumbered.txt
1055  */
1056         { .ctl_name = 0 }
1057 };
1058
1059 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1060 static ctl_table binfmt_misc_table[] = {
1061         { .ctl_name = 0 }
1062 };
1063 #endif
1064
1065 static ctl_table fs_table[] = {
1066         {
1067                 .ctl_name       = FS_NRINODE,
1068                 .procname       = "inode-nr",
1069                 .data           = &inodes_stat,
1070                 .maxlen         = 2*sizeof(int),
1071                 .mode           = 0444,
1072                 .proc_handler   = &proc_dointvec,
1073         },
1074         {
1075                 .ctl_name       = FS_STATINODE,
1076                 .procname       = "inode-state",
1077                 .data           = &inodes_stat,
1078                 .maxlen         = 7*sizeof(int),
1079                 .mode           = 0444,
1080                 .proc_handler   = &proc_dointvec,
1081         },
1082         {
1083                 .ctl_name       = FS_NRFILE,
1084                 .procname       = "file-nr",
1085                 .data           = &files_stat,
1086                 .maxlen         = 3*sizeof(int),
1087                 .mode           = 0444,
1088                 .proc_handler   = &proc_nr_files,
1089         },
1090         {
1091                 .ctl_name       = FS_MAXFILE,
1092                 .procname       = "file-max",
1093                 .data           = &files_stat.max_files,
1094                 .maxlen         = sizeof(int),
1095                 .mode           = 0644,
1096                 .proc_handler   = &proc_dointvec,
1097         },
1098         {
1099                 .ctl_name       = FS_DENTRY,
1100                 .procname       = "dentry-state",
1101                 .data           = &dentry_stat,
1102                 .maxlen         = 6*sizeof(int),
1103                 .mode           = 0444,
1104                 .proc_handler   = &proc_dointvec,
1105         },
1106         {
1107                 .ctl_name       = FS_OVERFLOWUID,
1108                 .procname       = "overflowuid",
1109                 .data           = &fs_overflowuid,
1110                 .maxlen         = sizeof(int),
1111                 .mode           = 0644,
1112                 .proc_handler   = &proc_dointvec_minmax,
1113                 .strategy       = &sysctl_intvec,
1114                 .extra1         = &minolduid,
1115                 .extra2         = &maxolduid,
1116         },
1117         {
1118                 .ctl_name       = FS_OVERFLOWGID,
1119                 .procname       = "overflowgid",
1120                 .data           = &fs_overflowgid,
1121                 .maxlen         = sizeof(int),
1122                 .mode           = 0644,
1123                 .proc_handler   = &proc_dointvec_minmax,
1124                 .strategy       = &sysctl_intvec,
1125                 .extra1         = &minolduid,
1126                 .extra2         = &maxolduid,
1127         },
1128         {
1129                 .ctl_name       = FS_LEASES,
1130                 .procname       = "leases-enable",
1131                 .data           = &leases_enable,
1132                 .maxlen         = sizeof(int),
1133                 .mode           = 0644,
1134                 .proc_handler   = &proc_dointvec,
1135         },
1136 #ifdef CONFIG_DNOTIFY
1137         {
1138                 .ctl_name       = FS_DIR_NOTIFY,
1139                 .procname       = "dir-notify-enable",
1140                 .data           = &dir_notify_enable,
1141                 .maxlen         = sizeof(int),
1142                 .mode           = 0644,
1143                 .proc_handler   = &proc_dointvec,
1144         },
1145 #endif
1146 #ifdef CONFIG_MMU
1147         {
1148                 .ctl_name       = FS_LEASE_TIME,
1149                 .procname       = "lease-break-time",
1150                 .data           = &lease_break_time,
1151                 .maxlen         = sizeof(int),
1152                 .mode           = 0644,
1153                 .proc_handler   = &proc_dointvec_minmax,
1154                 .strategy       = &sysctl_intvec,
1155                 .extra1         = &zero,
1156                 .extra2         = &two,
1157         },
1158         {
1159                 .ctl_name       = FS_AIO_NR,
1160                 .procname       = "aio-nr",
1161                 .data           = &aio_nr,
1162                 .maxlen         = sizeof(aio_nr),
1163                 .mode           = 0444,
1164                 .proc_handler   = &proc_doulongvec_minmax,
1165         },
1166         {
1167                 .ctl_name       = FS_AIO_MAX_NR,
1168                 .procname       = "aio-max-nr",
1169                 .data           = &aio_max_nr,
1170                 .maxlen         = sizeof(aio_max_nr),
1171                 .mode           = 0644,
1172                 .proc_handler   = &proc_doulongvec_minmax,
1173         },
1174 #ifdef CONFIG_INOTIFY_USER
1175         {
1176                 .ctl_name       = FS_INOTIFY,
1177                 .procname       = "inotify",
1178                 .mode           = 0555,
1179                 .child          = inotify_table,
1180         },
1181 #endif  
1182 #endif
1183         {
1184                 .ctl_name       = KERN_SETUID_DUMPABLE,
1185                 .procname       = "suid_dumpable",
1186                 .data           = &suid_dumpable,
1187                 .maxlen         = sizeof(int),
1188                 .mode           = 0644,
1189                 .proc_handler   = &proc_dointvec,
1190         },
1191 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1192         {
1193                 .ctl_name       = CTL_UNNUMBERED,
1194                 .procname       = "binfmt_misc",
1195                 .mode           = 0555,
1196                 .child          = binfmt_misc_table,
1197         },
1198 #endif
1199 /*
1200  * NOTE: do not add new entries to this table unless you have read
1201  * Documentation/sysctl/ctl_unnumbered.txt
1202  */
1203         { .ctl_name = 0 }
1204 };
1205
1206 static ctl_table debug_table[] = {
1207 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1208         {
1209                 .ctl_name       = CTL_UNNUMBERED,
1210                 .procname       = "exception-trace",
1211                 .data           = &show_unhandled_signals,
1212                 .maxlen         = sizeof(int),
1213                 .mode           = 0644,
1214                 .proc_handler   = proc_dointvec
1215         },
1216 #endif
1217         { .ctl_name = 0 }
1218 };
1219
1220 static ctl_table dev_table[] = {
1221         { .ctl_name = 0 }
1222 };
1223
1224 static DEFINE_SPINLOCK(sysctl_lock);
1225
1226 /* called under sysctl_lock */
1227 static int use_table(struct ctl_table_header *p)
1228 {
1229         if (unlikely(p->unregistering))
1230                 return 0;
1231         p->used++;
1232         return 1;
1233 }
1234
1235 /* called under sysctl_lock */
1236 static void unuse_table(struct ctl_table_header *p)
1237 {
1238         if (!--p->used)
1239                 if (unlikely(p->unregistering))
1240                         complete(p->unregistering);
1241 }
1242
1243 /* called under sysctl_lock, will reacquire if has to wait */
1244 static void start_unregistering(struct ctl_table_header *p)
1245 {
1246         /*
1247          * if p->used is 0, nobody will ever touch that entry again;
1248          * we'll eliminate all paths to it before dropping sysctl_lock
1249          */
1250         if (unlikely(p->used)) {
1251                 struct completion wait;
1252                 init_completion(&wait);
1253                 p->unregistering = &wait;
1254                 spin_unlock(&sysctl_lock);
1255                 wait_for_completion(&wait);
1256                 spin_lock(&sysctl_lock);
1257         }
1258         /*
1259          * do not remove from the list until nobody holds it; walking the
1260          * list in do_sysctl() relies on that.
1261          */
1262         list_del_init(&p->ctl_entry);
1263 }
1264
1265 void sysctl_head_finish(struct ctl_table_header *head)
1266 {
1267         if (!head)
1268                 return;
1269         spin_lock(&sysctl_lock);
1270         unuse_table(head);
1271         spin_unlock(&sysctl_lock);
1272 }
1273
1274 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1275 {
1276         struct ctl_table_header *head;
1277         struct list_head *tmp;
1278         spin_lock(&sysctl_lock);
1279         if (prev) {
1280                 tmp = &prev->ctl_entry;
1281                 unuse_table(prev);
1282                 goto next;
1283         }
1284         tmp = &root_table_header.ctl_entry;
1285         for (;;) {
1286                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1287
1288                 if (!use_table(head))
1289                         goto next;
1290                 spin_unlock(&sysctl_lock);
1291                 return head;
1292         next:
1293                 tmp = tmp->next;
1294                 if (tmp == &root_table_header.ctl_entry)
1295                         break;
1296         }
1297         spin_unlock(&sysctl_lock);
1298         return NULL;
1299 }
1300
1301 #ifdef CONFIG_SYSCTL_SYSCALL
1302 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1303                void __user *newval, size_t newlen)
1304 {
1305         struct ctl_table_header *head;
1306         int error = -ENOTDIR;
1307
1308         if (nlen <= 0 || nlen >= CTL_MAXNAME)
1309                 return -ENOTDIR;
1310         if (oldval) {
1311                 int old_len;
1312                 if (!oldlenp || get_user(old_len, oldlenp))
1313                         return -EFAULT;
1314         }
1315
1316         for (head = sysctl_head_next(NULL); head;
1317                         head = sysctl_head_next(head)) {
1318                 error = parse_table(name, nlen, oldval, oldlenp, 
1319                                         newval, newlen, head->ctl_table);
1320                 if (error != -ENOTDIR) {
1321                         sysctl_head_finish(head);
1322                         break;
1323                 }
1324         }
1325         return error;
1326 }
1327
1328 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1329 {
1330         struct __sysctl_args tmp;
1331         int error;
1332
1333         if (copy_from_user(&tmp, args, sizeof(tmp)))
1334                 return -EFAULT;
1335
1336         lock_kernel();
1337         error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1338                           tmp.newval, tmp.newlen);
1339         unlock_kernel();
1340         return error;
1341 }
1342 #endif /* CONFIG_SYSCTL_SYSCALL */
1343
1344 /*
1345  * sysctl_perm does NOT grant the superuser all rights automatically, because
1346  * some sysctl variables are readonly even to root.
1347  */
1348
1349 static int test_perm(int mode, int op)
1350 {
1351         if (!current->euid)
1352                 mode >>= 6;
1353         else if (in_egroup_p(0))
1354                 mode >>= 3;
1355         if ((mode & op & 0007) == op)
1356                 return 0;
1357         return -EACCES;
1358 }
1359
1360 int sysctl_perm(ctl_table *table, int op)
1361 {
1362         int error;
1363         error = security_sysctl(table, op);
1364         if (error)
1365                 return error;
1366         return test_perm(table->mode, op);
1367 }
1368
1369 #ifdef CONFIG_SYSCTL_SYSCALL
1370 static int parse_table(int __user *name, int nlen,
1371                        void __user *oldval, size_t __user *oldlenp,
1372                        void __user *newval, size_t newlen,
1373                        ctl_table *table)
1374 {
1375         int n;
1376 repeat:
1377         if (!nlen)
1378                 return -ENOTDIR;
1379         if (get_user(n, name))
1380                 return -EFAULT;
1381         for ( ; table->ctl_name || table->procname; table++) {
1382                 if (!table->ctl_name)
1383                         continue;
1384                 if (n == table->ctl_name) {
1385                         int error;
1386                         if (table->child) {
1387                                 if (sysctl_perm(table, 001))
1388                                         return -EPERM;
1389                                 name++;
1390                                 nlen--;
1391                                 table = table->child;
1392                                 goto repeat;
1393                         }
1394                         error = do_sysctl_strategy(table, name, nlen,
1395                                                    oldval, oldlenp,
1396                                                    newval, newlen);
1397                         return error;
1398                 }
1399         }
1400         return -ENOTDIR;
1401 }
1402
1403 /* Perform the actual read/write of a sysctl table entry. */
1404 int do_sysctl_strategy (ctl_table *table, 
1405                         int __user *name, int nlen,
1406                         void __user *oldval, size_t __user *oldlenp,
1407                         void __user *newval, size_t newlen)
1408 {
1409         int op = 0, rc;
1410         size_t len;
1411
1412         if (oldval)
1413                 op |= 004;
1414         if (newval) 
1415                 op |= 002;
1416         if (sysctl_perm(table, op))
1417                 return -EPERM;
1418
1419         if (table->strategy) {
1420                 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1421                                      newval, newlen);
1422                 if (rc < 0)
1423                         return rc;
1424                 if (rc > 0)
1425                         return 0;
1426         }
1427
1428         /* If there is no strategy routine, or if the strategy returns
1429          * zero, proceed with automatic r/w */
1430         if (table->data && table->maxlen) {
1431                 if (oldval && oldlenp) {
1432                         if (get_user(len, oldlenp))
1433                                 return -EFAULT;
1434                         if (len) {
1435                                 if (len > table->maxlen)
1436                                         len = table->maxlen;
1437                                 if(copy_to_user(oldval, table->data, len))
1438                                         return -EFAULT;
1439                                 if(put_user(len, oldlenp))
1440                                         return -EFAULT;
1441                         }
1442                 }
1443                 if (newval && newlen) {
1444                         len = newlen;
1445                         if (len > table->maxlen)
1446                                 len = table->maxlen;
1447                         if(copy_from_user(table->data, newval, len))
1448                                 return -EFAULT;
1449                 }
1450         }
1451         return 0;
1452 }
1453 #endif /* CONFIG_SYSCTL_SYSCALL */
1454
1455 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1456 {
1457         for (; table->ctl_name || table->procname; table++) {
1458                 table->parent = parent;
1459                 if (table->child)
1460                         sysctl_set_parent(table, table->child);
1461         }
1462 }
1463
1464 static __init int sysctl_init(void)
1465 {
1466         sysctl_set_parent(NULL, root_table);
1467         return 0;
1468 }
1469
1470 core_initcall(sysctl_init);
1471
1472 /**
1473  * register_sysctl_table - register a sysctl hierarchy
1474  * @table: the top-level table structure
1475  *
1476  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1477  * array. An entry with a ctl_name of 0 terminates the table. 
1478  *
1479  * The members of the &ctl_table structure are used as follows:
1480  *
1481  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1482  *            must be unique within that level of sysctl
1483  *
1484  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1485  *            enter a sysctl file
1486  *
1487  * data - a pointer to data for use by proc_handler
1488  *
1489  * maxlen - the maximum size in bytes of the data
1490  *
1491  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1492  *
1493  * child - a pointer to the child sysctl table if this entry is a directory, or
1494  *         %NULL.
1495  *
1496  * proc_handler - the text handler routine (described below)
1497  *
1498  * strategy - the strategy routine (described below)
1499  *
1500  * de - for internal use by the sysctl routines
1501  *
1502  * extra1, extra2 - extra pointers usable by the proc handler routines
1503  *
1504  * Leaf nodes in the sysctl tree will be represented by a single file
1505  * under /proc; non-leaf nodes will be represented by directories.
1506  *
1507  * sysctl(2) can automatically manage read and write requests through
1508  * the sysctl table.  The data and maxlen fields of the ctl_table
1509  * struct enable minimal validation of the values being written to be
1510  * performed, and the mode field allows minimal authentication.
1511  *
1512  * More sophisticated management can be enabled by the provision of a
1513  * strategy routine with the table entry.  This will be called before
1514  * any automatic read or write of the data is performed.
1515  *
1516  * The strategy routine may return
1517  *
1518  * < 0 - Error occurred (error is passed to user process)
1519  *
1520  * 0   - OK - proceed with automatic read or write.
1521  *
1522  * > 0 - OK - read or write has been done by the strategy routine, so
1523  *       return immediately.
1524  *
1525  * There must be a proc_handler routine for any terminal nodes
1526  * mirrored under /proc/sys (non-terminals are handled by a built-in
1527  * directory handler).  Several default handlers are available to
1528  * cover common cases -
1529  *
1530  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1531  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1532  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1533  *
1534  * It is the handler's job to read the input buffer from user memory
1535  * and process it. The handler should return 0 on success.
1536  *
1537  * This routine returns %NULL on a failure to register, and a pointer
1538  * to the table header on success.
1539  */
1540 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1541 {
1542         struct ctl_table_header *tmp;
1543         tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1544         if (!tmp)
1545                 return NULL;
1546         tmp->ctl_table = table;
1547         INIT_LIST_HEAD(&tmp->ctl_entry);
1548         tmp->used = 0;
1549         tmp->unregistering = NULL;
1550         sysctl_set_parent(NULL, table);
1551         spin_lock(&sysctl_lock);
1552         list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1553         spin_unlock(&sysctl_lock);
1554         return tmp;
1555 }
1556
1557 /**
1558  * unregister_sysctl_table - unregister a sysctl table hierarchy
1559  * @header: the header returned from register_sysctl_table
1560  *
1561  * Unregisters the sysctl table and all children. proc entries may not
1562  * actually be removed until they are no longer used by anyone.
1563  */
1564 void unregister_sysctl_table(struct ctl_table_header * header)
1565 {
1566         might_sleep();
1567         spin_lock(&sysctl_lock);
1568         start_unregistering(header);
1569         spin_unlock(&sysctl_lock);
1570         kfree(header);
1571 }
1572
1573 #else /* !CONFIG_SYSCTL */
1574 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1575 {
1576         return NULL;
1577 }
1578
1579 void unregister_sysctl_table(struct ctl_table_header * table)
1580 {
1581 }
1582
1583 #endif /* CONFIG_SYSCTL */
1584
1585 /*
1586  * /proc/sys support
1587  */
1588
1589 #ifdef CONFIG_PROC_SYSCTL
1590
1591 static int _proc_do_string(void* data, int maxlen, int write,
1592                            struct file *filp, void __user *buffer,
1593                            size_t *lenp, loff_t *ppos)
1594 {
1595         size_t len;
1596         char __user *p;
1597         char c;
1598
1599         if (!data || !maxlen || !*lenp) {
1600                 *lenp = 0;
1601                 return 0;
1602         }
1603
1604         if (write) {
1605                 len = 0;
1606                 p = buffer;
1607                 while (len < *lenp) {
1608                         if (get_user(c, p++))
1609                                 return -EFAULT;
1610                         if (c == 0 || c == '\n')
1611                                 break;
1612                         len++;
1613                 }
1614                 if (len >= maxlen)
1615                         len = maxlen-1;
1616                 if(copy_from_user(data, buffer, len))
1617                         return -EFAULT;
1618                 ((char *) data)[len] = 0;
1619                 *ppos += *lenp;
1620         } else {
1621                 len = strlen(data);
1622                 if (len > maxlen)
1623                         len = maxlen;
1624
1625                 if (*ppos > len) {
1626                         *lenp = 0;
1627                         return 0;
1628                 }
1629
1630                 data += *ppos;
1631                 len  -= *ppos;
1632
1633                 if (len > *lenp)
1634                         len = *lenp;
1635                 if (len)
1636                         if(copy_to_user(buffer, data, len))
1637                                 return -EFAULT;
1638                 if (len < *lenp) {
1639                         if(put_user('\n', ((char __user *) buffer) + len))
1640                                 return -EFAULT;
1641                         len++;
1642                 }
1643                 *lenp = len;
1644                 *ppos += len;
1645         }
1646         return 0;
1647 }
1648
1649 /**
1650  * proc_dostring - read a string sysctl
1651  * @table: the sysctl table
1652  * @write: %TRUE if this is a write to the sysctl file
1653  * @filp: the file structure
1654  * @buffer: the user buffer
1655  * @lenp: the size of the user buffer
1656  * @ppos: file position
1657  *
1658  * Reads/writes a string from/to the user buffer. If the kernel
1659  * buffer provided is not large enough to hold the string, the
1660  * string is truncated. The copied string is %NULL-terminated.
1661  * If the string is being read by the user process, it is copied
1662  * and a newline '\n' is added. It is truncated if the buffer is
1663  * not large enough.
1664  *
1665  * Returns 0 on success.
1666  */
1667 int proc_dostring(ctl_table *table, int write, struct file *filp,
1668                   void __user *buffer, size_t *lenp, loff_t *ppos)
1669 {
1670         return _proc_do_string(table->data, table->maxlen, write, filp,
1671                                buffer, lenp, ppos);
1672 }
1673
1674
1675 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1676                                  int *valp,
1677                                  int write, void *data)
1678 {
1679         if (write) {
1680                 *valp = *negp ? -*lvalp : *lvalp;
1681         } else {
1682                 int val = *valp;
1683                 if (val < 0) {
1684                         *negp = -1;
1685                         *lvalp = (unsigned long)-val;
1686                 } else {
1687                         *negp = 0;
1688                         *lvalp = (unsigned long)val;
1689                 }
1690         }
1691         return 0;
1692 }
1693
1694 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1695                   int write, struct file *filp, void __user *buffer,
1696                   size_t *lenp, loff_t *ppos,
1697                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1698                               int write, void *data),
1699                   void *data)
1700 {
1701 #define TMPBUFLEN 21
1702         int *i, vleft, first=1, neg, val;
1703         unsigned long lval;
1704         size_t left, len;
1705         
1706         char buf[TMPBUFLEN], *p;
1707         char __user *s = buffer;
1708         
1709         if (!tbl_data || !table->maxlen || !*lenp ||
1710             (*ppos && !write)) {
1711                 *lenp = 0;
1712                 return 0;
1713         }
1714         
1715         i = (int *) tbl_data;
1716         vleft = table->maxlen / sizeof(*i);
1717         left = *lenp;
1718
1719         if (!conv)
1720                 conv = do_proc_dointvec_conv;
1721
1722         for (; left && vleft--; i++, first=0) {
1723                 if (write) {
1724                         while (left) {
1725                                 char c;
1726                                 if (get_user(c, s))
1727                                         return -EFAULT;
1728                                 if (!isspace(c))
1729                                         break;
1730                                 left--;
1731                                 s++;
1732                         }
1733                         if (!left)
1734                                 break;
1735                         neg = 0;
1736                         len = left;
1737                         if (len > sizeof(buf) - 1)
1738                                 len = sizeof(buf) - 1;
1739                         if (copy_from_user(buf, s, len))
1740                                 return -EFAULT;
1741                         buf[len] = 0;
1742                         p = buf;
1743                         if (*p == '-' && left > 1) {
1744                                 neg = 1;
1745                                 p++;
1746                         }
1747                         if (*p < '0' || *p > '9')
1748                                 break;
1749
1750                         lval = simple_strtoul(p, &p, 0);
1751
1752                         len = p-buf;
1753                         if ((len < left) && *p && !isspace(*p))
1754                                 break;
1755                         if (neg)
1756                                 val = -val;
1757                         s += len;
1758                         left -= len;
1759
1760                         if (conv(&neg, &lval, i, 1, data))
1761                                 break;
1762                 } else {
1763                         p = buf;
1764                         if (!first)
1765                                 *p++ = '\t';
1766         
1767                         if (conv(&neg, &lval, i, 0, data))
1768                                 break;
1769
1770                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
1771                         len = strlen(buf);
1772                         if (len > left)
1773                                 len = left;
1774                         if(copy_to_user(s, buf, len))
1775                                 return -EFAULT;
1776                         left -= len;
1777                         s += len;
1778                 }
1779         }
1780
1781         if (!write && !first && left) {
1782                 if(put_user('\n', s))
1783                         return -EFAULT;
1784                 left--, s++;
1785         }
1786         if (write) {
1787                 while (left) {
1788                         char c;
1789                         if (get_user(c, s++))
1790                                 return -EFAULT;
1791                         if (!isspace(c))
1792                                 break;
1793                         left--;
1794                 }
1795         }
1796         if (write && first)
1797                 return -EINVAL;
1798         *lenp -= left;
1799         *ppos += *lenp;
1800         return 0;
1801 #undef TMPBUFLEN
1802 }
1803
1804 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1805                   void __user *buffer, size_t *lenp, loff_t *ppos,
1806                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1807                               int write, void *data),
1808                   void *data)
1809 {
1810         return __do_proc_dointvec(table->data, table, write, filp,
1811                         buffer, lenp, ppos, conv, data);
1812 }
1813
1814 /**
1815  * proc_dointvec - read a vector of integers
1816  * @table: the sysctl table
1817  * @write: %TRUE if this is a write to the sysctl file
1818  * @filp: the file structure
1819  * @buffer: the user buffer
1820  * @lenp: the size of the user buffer
1821  * @ppos: file position
1822  *
1823  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1824  * values from/to the user buffer, treated as an ASCII string. 
1825  *
1826  * Returns 0 on success.
1827  */
1828 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1829                      void __user *buffer, size_t *lenp, loff_t *ppos)
1830 {
1831     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1832                             NULL,NULL);
1833 }
1834
1835 #define OP_SET  0
1836 #define OP_AND  1
1837 #define OP_OR   2
1838
1839 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1840                                       int *valp,
1841                                       int write, void *data)
1842 {
1843         int op = *(int *)data;
1844         if (write) {
1845                 int val = *negp ? -*lvalp : *lvalp;
1846                 switch(op) {
1847                 case OP_SET:    *valp = val; break;
1848                 case OP_AND:    *valp &= val; break;
1849                 case OP_OR:     *valp |= val; break;
1850                 }
1851         } else {
1852                 int val = *valp;
1853                 if (val < 0) {
1854                         *negp = -1;
1855                         *lvalp = (unsigned long)-val;
1856                 } else {
1857                         *negp = 0;
1858                         *lvalp = (unsigned long)val;
1859                 }
1860         }
1861         return 0;
1862 }
1863
1864 /*
1865  *      init may raise the set.
1866  */
1867  
1868 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1869                         void __user *buffer, size_t *lenp, loff_t *ppos)
1870 {
1871         int op;
1872
1873         if (write && !capable(CAP_SYS_MODULE)) {
1874                 return -EPERM;
1875         }
1876
1877         op = is_init(current) ? OP_SET : OP_AND;
1878         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1879                                 do_proc_dointvec_bset_conv,&op);
1880 }
1881
1882 /*
1883  *      Taint values can only be increased
1884  */
1885 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1886                                void __user *buffer, size_t *lenp, loff_t *ppos)
1887 {
1888         int op;
1889
1890         if (write && !capable(CAP_SYS_ADMIN))
1891                 return -EPERM;
1892
1893         op = OP_OR;
1894         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1895                                 do_proc_dointvec_bset_conv,&op);
1896 }
1897
1898 struct do_proc_dointvec_minmax_conv_param {
1899         int *min;
1900         int *max;
1901 };
1902
1903 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
1904                                         int *valp, 
1905                                         int write, void *data)
1906 {
1907         struct do_proc_dointvec_minmax_conv_param *param = data;
1908         if (write) {
1909                 int val = *negp ? -*lvalp : *lvalp;
1910                 if ((param->min && *param->min > val) ||
1911                     (param->max && *param->max < val))
1912                         return -EINVAL;
1913                 *valp = val;
1914         } else {
1915                 int val = *valp;
1916                 if (val < 0) {
1917                         *negp = -1;
1918                         *lvalp = (unsigned long)-val;
1919                 } else {
1920                         *negp = 0;
1921                         *lvalp = (unsigned long)val;
1922                 }
1923         }
1924         return 0;
1925 }
1926
1927 /**
1928  * proc_dointvec_minmax - read a vector of integers with min/max values
1929  * @table: the sysctl table
1930  * @write: %TRUE if this is a write to the sysctl file
1931  * @filp: the file structure
1932  * @buffer: the user buffer
1933  * @lenp: the size of the user buffer
1934  * @ppos: file position
1935  *
1936  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1937  * values from/to the user buffer, treated as an ASCII string.
1938  *
1939  * This routine will ensure the values are within the range specified by
1940  * table->extra1 (min) and table->extra2 (max).
1941  *
1942  * Returns 0 on success.
1943  */
1944 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1945                   void __user *buffer, size_t *lenp, loff_t *ppos)
1946 {
1947         struct do_proc_dointvec_minmax_conv_param param = {
1948                 .min = (int *) table->extra1,
1949                 .max = (int *) table->extra2,
1950         };
1951         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1952                                 do_proc_dointvec_minmax_conv, &param);
1953 }
1954
1955 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1956                                      struct file *filp,
1957                                      void __user *buffer,
1958                                      size_t *lenp, loff_t *ppos,
1959                                      unsigned long convmul,
1960                                      unsigned long convdiv)
1961 {
1962 #define TMPBUFLEN 21
1963         unsigned long *i, *min, *max, val;
1964         int vleft, first=1, neg;
1965         size_t len, left;
1966         char buf[TMPBUFLEN], *p;
1967         char __user *s = buffer;
1968         
1969         if (!data || !table->maxlen || !*lenp ||
1970             (*ppos && !write)) {
1971                 *lenp = 0;
1972                 return 0;
1973         }
1974         
1975         i = (unsigned long *) data;
1976         min = (unsigned long *) table->extra1;
1977         max = (unsigned long *) table->extra2;
1978         vleft = table->maxlen / sizeof(unsigned long);
1979         left = *lenp;
1980         
1981         for (; left && vleft--; i++, min++, max++, first=0) {
1982                 if (write) {
1983                         while (left) {
1984                                 char c;
1985                                 if (get_user(c, s))
1986                                         return -EFAULT;
1987                                 if (!isspace(c))
1988                                         break;
1989                                 left--;
1990                                 s++;
1991                         }
1992                         if (!left)
1993                                 break;
1994                         neg = 0;
1995                         len = left;
1996                         if (len > TMPBUFLEN-1)
1997                                 len = TMPBUFLEN-1;
1998                         if (copy_from_user(buf, s, len))
1999                                 return -EFAULT;
2000                         buf[len] = 0;
2001                         p = buf;
2002                         if (*p == '-' && left > 1) {
2003                                 neg = 1;
2004                                 p++;
2005                         }
2006                         if (*p < '0' || *p > '9')
2007                                 break;
2008                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2009                         len = p-buf;
2010                         if ((len < left) && *p && !isspace(*p))
2011                                 break;
2012                         if (neg)
2013                                 val = -val;
2014                         s += len;
2015                         left -= len;
2016
2017                         if(neg)
2018                                 continue;
2019                         if ((min && val < *min) || (max && val > *max))
2020                                 continue;
2021                         *i = val;
2022                 } else {
2023                         p = buf;
2024                         if (!first)
2025                                 *p++ = '\t';
2026                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2027                         len = strlen(buf);
2028                         if (len > left)
2029                                 len = left;
2030                         if(copy_to_user(s, buf, len))
2031                                 return -EFAULT;
2032                         left -= len;
2033                         s += len;
2034                 }
2035         }
2036
2037         if (!write && !first && left) {
2038                 if(put_user('\n', s))
2039                         return -EFAULT;
2040                 left--, s++;
2041         }
2042         if (write) {
2043                 while (left) {
2044                         char c;
2045                         if (get_user(c, s++))
2046                                 return -EFAULT;
2047                         if (!isspace(c))
2048                                 break;
2049                         left--;
2050                 }
2051         }
2052         if (write && first)
2053                 return -EINVAL;
2054         *lenp -= left;
2055         *ppos += *lenp;
2056         return 0;
2057 #undef TMPBUFLEN
2058 }
2059
2060 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2061                                      struct file *filp,
2062                                      void __user *buffer,
2063                                      size_t *lenp, loff_t *ppos,
2064                                      unsigned long convmul,
2065                                      unsigned long convdiv)
2066 {
2067         return __do_proc_doulongvec_minmax(table->data, table, write,
2068                         filp, buffer, lenp, ppos, convmul, convdiv);
2069 }
2070
2071 /**
2072  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2073  * @table: the sysctl table
2074  * @write: %TRUE if this is a write to the sysctl file
2075  * @filp: the file structure
2076  * @buffer: the user buffer
2077  * @lenp: the size of the user buffer
2078  * @ppos: file position
2079  *
2080  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2081  * values from/to the user buffer, treated as an ASCII string.
2082  *
2083  * This routine will ensure the values are within the range specified by
2084  * table->extra1 (min) and table->extra2 (max).
2085  *
2086  * Returns 0 on success.
2087  */
2088 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2089                            void __user *buffer, size_t *lenp, loff_t *ppos)
2090 {
2091     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2092 }
2093
2094 /**
2095  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2096  * @table: the sysctl table
2097  * @write: %TRUE if this is a write to the sysctl file
2098  * @filp: the file structure
2099  * @buffer: the user buffer
2100  * @lenp: the size of the user buffer
2101  * @ppos: file position
2102  *
2103  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2104  * values from/to the user buffer, treated as an ASCII string. The values
2105  * are treated as milliseconds, and converted to jiffies when they are stored.
2106  *
2107  * This routine will ensure the values are within the range specified by
2108  * table->extra1 (min) and table->extra2 (max).
2109  *
2110  * Returns 0 on success.
2111  */
2112 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2113                                       struct file *filp,
2114                                       void __user *buffer,
2115                                       size_t *lenp, loff_t *ppos)
2116 {
2117     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2118                                      lenp, ppos, HZ, 1000l);
2119 }
2120
2121
2122 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2123                                          int *valp,
2124                                          int write, void *data)
2125 {
2126         if (write) {
2127                 if (*lvalp > LONG_MAX / HZ)
2128                         return 1;
2129                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2130         } else {
2131                 int val = *valp;
2132                 unsigned long lval;
2133                 if (val < 0) {
2134                         *negp = -1;
2135                         lval = (unsigned long)-val;
2136                 } else {
2137                         *negp = 0;
2138                         lval = (unsigned long)val;
2139                 }
2140                 *lvalp = lval / HZ;
2141         }
2142         return 0;
2143 }
2144
2145 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2146                                                 int *valp,
2147                                                 int write, void *data)
2148 {
2149         if (write) {
2150                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2151                         return 1;
2152                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2153         } else {
2154                 int val = *valp;
2155                 unsigned long lval;
2156                 if (val < 0) {
2157                         *negp = -1;
2158                         lval = (unsigned long)-val;
2159                 } else {
2160                         *negp = 0;
2161                         lval = (unsigned long)val;
2162                 }
2163                 *lvalp = jiffies_to_clock_t(lval);
2164         }
2165         return 0;
2166 }
2167
2168 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2169                                             int *valp,
2170                                             int write, void *data)
2171 {
2172         if (write) {
2173                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2174         } else {
2175                 int val = *valp;
2176                 unsigned long lval;
2177                 if (val < 0) {
2178                         *negp = -1;
2179                         lval = (unsigned long)-val;
2180                 } else {
2181                         *negp = 0;
2182                         lval = (unsigned long)val;
2183                 }
2184                 *lvalp = jiffies_to_msecs(lval);
2185         }
2186         return 0;
2187 }
2188
2189 /**
2190  * proc_dointvec_jiffies - read a vector of integers as seconds
2191  * @table: the sysctl table
2192  * @write: %TRUE if this is a write to the sysctl file
2193  * @filp: the file structure
2194  * @buffer: the user buffer
2195  * @lenp: the size of the user buffer
2196  * @ppos: file position
2197  *
2198  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2199  * values from/to the user buffer, treated as an ASCII string. 
2200  * The values read are assumed to be in seconds, and are converted into
2201  * jiffies.
2202  *
2203  * Returns 0 on success.
2204  */
2205 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2206                           void __user *buffer, size_t *lenp, loff_t *ppos)
2207 {
2208     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2209                             do_proc_dointvec_jiffies_conv,NULL);
2210 }
2211
2212 /**
2213  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2214  * @table: the sysctl table
2215  * @write: %TRUE if this is a write to the sysctl file
2216  * @filp: the file structure
2217  * @buffer: the user buffer
2218  * @lenp: the size of the user buffer
2219  * @ppos: pointer to the file position
2220  *
2221  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2222  * values from/to the user buffer, treated as an ASCII string. 
2223  * The values read are assumed to be in 1/USER_HZ seconds, and 
2224  * are converted into jiffies.
2225  *
2226  * Returns 0 on success.
2227  */
2228 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2229                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2230 {
2231     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2232                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2233 }
2234
2235 /**
2236  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2237  * @table: the sysctl table
2238  * @write: %TRUE if this is a write to the sysctl file
2239  * @filp: the file structure
2240  * @buffer: the user buffer
2241  * @lenp: the size of the user buffer
2242  * @ppos: file position
2243  * @ppos: the current position in the file
2244  *
2245  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2246  * values from/to the user buffer, treated as an ASCII string. 
2247  * The values read are assumed to be in 1/1000 seconds, and 
2248  * are converted into jiffies.
2249  *
2250  * Returns 0 on success.
2251  */
2252 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2253                              void __user *buffer, size_t *lenp, loff_t *ppos)
2254 {
2255         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2256                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2257 }
2258
2259 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2260                            void __user *buffer, size_t *lenp, loff_t *ppos)
2261 {
2262         struct pid *new_pid;
2263         pid_t tmp;
2264         int r;
2265
2266         tmp = pid_nr(cad_pid);
2267
2268         r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2269                                lenp, ppos, NULL, NULL);
2270         if (r || !write)
2271                 return r;
2272
2273         new_pid = find_get_pid(tmp);
2274         if (!new_pid)
2275                 return -ESRCH;
2276
2277         put_pid(xchg(&cad_pid, new_pid));
2278         return 0;
2279 }
2280
2281 #else /* CONFIG_PROC_FS */
2282
2283 int proc_dostring(ctl_table *table, int write, struct file *filp,
2284                   void __user *buffer, size_t *lenp, loff_t *ppos)
2285 {
2286         return -ENOSYS;
2287 }
2288
2289 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2290                   void __user *buffer, size_t *lenp, loff_t *ppos)
2291 {
2292         return -ENOSYS;
2293 }
2294
2295 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2296                         void __user *buffer, size_t *lenp, loff_t *ppos)
2297 {
2298         return -ENOSYS;
2299 }
2300
2301 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2302                     void __user *buffer, size_t *lenp, loff_t *ppos)
2303 {
2304         return -ENOSYS;
2305 }
2306
2307 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2308                     void __user *buffer, size_t *lenp, loff_t *ppos)
2309 {
2310         return -ENOSYS;
2311 }
2312
2313 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2314                     void __user *buffer, size_t *lenp, loff_t *ppos)
2315 {
2316         return -ENOSYS;
2317 }
2318
2319 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2320                              void __user *buffer, size_t *lenp, loff_t *ppos)
2321 {
2322         return -ENOSYS;
2323 }
2324
2325 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2326                     void __user *buffer, size_t *lenp, loff_t *ppos)
2327 {
2328         return -ENOSYS;
2329 }
2330
2331 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2332                                       struct file *filp,
2333                                       void __user *buffer,
2334                                       size_t *lenp, loff_t *ppos)
2335 {
2336     return -ENOSYS;
2337 }
2338
2339
2340 #endif /* CONFIG_PROC_FS */
2341
2342
2343 #ifdef CONFIG_SYSCTL_SYSCALL
2344 /*
2345  * General sysctl support routines 
2346  */
2347
2348 /* The generic string strategy routine: */
2349 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2350                   void __user *oldval, size_t __user *oldlenp,
2351                   void __user *newval, size_t newlen)
2352 {
2353         if (!table->data || !table->maxlen) 
2354                 return -ENOTDIR;
2355         
2356         if (oldval && oldlenp) {
2357                 size_t bufsize;
2358                 if (get_user(bufsize, oldlenp))
2359                         return -EFAULT;
2360                 if (bufsize) {
2361                         size_t len = strlen(table->data), copied;
2362
2363                         /* This shouldn't trigger for a well-formed sysctl */
2364                         if (len > table->maxlen)
2365                                 len = table->maxlen;
2366
2367                         /* Copy up to a max of bufsize-1 bytes of the string */
2368                         copied = (len >= bufsize) ? bufsize - 1 : len;
2369
2370                         if (copy_to_user(oldval, table->data, copied) ||
2371                             put_user(0, (char __user *)(oldval + copied)))
2372                                 return -EFAULT;
2373                         if (put_user(len, oldlenp))
2374                                 return -EFAULT;
2375                 }
2376         }
2377         if (newval && newlen) {
2378                 size_t len = newlen;
2379                 if (len > table->maxlen)
2380                         len = table->maxlen;
2381                 if(copy_from_user(table->data, newval, len))
2382                         return -EFAULT;
2383                 if (len == table->maxlen)
2384                         len--;
2385                 ((char *) table->data)[len] = 0;
2386         }
2387         return 1;
2388 }
2389
2390 /*
2391  * This function makes sure that all of the integers in the vector
2392  * are between the minimum and maximum values given in the arrays
2393  * table->extra1 and table->extra2, respectively.
2394  */
2395 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2396                 void __user *oldval, size_t __user *oldlenp,
2397                 void __user *newval, size_t newlen)
2398 {
2399
2400         if (newval && newlen) {
2401                 int __user *vec = (int __user *) newval;
2402                 int *min = (int *) table->extra1;
2403                 int *max = (int *) table->extra2;
2404                 size_t length;
2405                 int i;
2406
2407                 if (newlen % sizeof(int) != 0)
2408                         return -EINVAL;
2409
2410                 if (!table->extra1 && !table->extra2)
2411                         return 0;
2412
2413                 if (newlen > table->maxlen)
2414                         newlen = table->maxlen;
2415                 length = newlen / sizeof(int);
2416
2417                 for (i = 0; i < length; i++) {
2418                         int value;
2419                         if (get_user(value, vec + i))
2420                                 return -EFAULT;
2421                         if (min && value < min[i])
2422                                 return -EINVAL;
2423                         if (max && value > max[i])
2424                                 return -EINVAL;
2425                 }
2426         }
2427         return 0;
2428 }
2429
2430 /* Strategy function to convert jiffies to seconds */ 
2431 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2432                 void __user *oldval, size_t __user *oldlenp,
2433                 void __user *newval, size_t newlen)
2434 {
2435         if (oldval && oldlenp) {
2436                 size_t olen;
2437
2438                 if (get_user(olen, oldlenp))
2439                         return -EFAULT;
2440                 if (olen) {
2441                         int val;
2442
2443                         if (olen < sizeof(int))
2444                                 return -EINVAL;
2445
2446                         val = *(int *)(table->data) / HZ;
2447                         if (put_user(val, (int __user *)oldval))
2448                                 return -EFAULT;
2449                         if (put_user(sizeof(int), oldlenp))
2450                                 return -EFAULT;
2451                 }
2452         }
2453         if (newval && newlen) { 
2454                 int new;
2455                 if (newlen != sizeof(int))
2456                         return -EINVAL; 
2457                 if (get_user(new, (int __user *)newval))
2458                         return -EFAULT;
2459                 *(int *)(table->data) = new*HZ; 
2460         }
2461         return 1;
2462 }
2463
2464 /* Strategy function to convert jiffies to seconds */ 
2465 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2466                 void __user *oldval, size_t __user *oldlenp,
2467                 void __user *newval, size_t newlen)
2468 {
2469         if (oldval && oldlenp) {
2470                 size_t olen;
2471
2472                 if (get_user(olen, oldlenp))
2473                         return -EFAULT;
2474                 if (olen) {
2475                         int val;
2476
2477                         if (olen < sizeof(int))
2478                                 return -EINVAL;
2479
2480                         val = jiffies_to_msecs(*(int *)(table->data));
2481                         if (put_user(val, (int __user *)oldval))
2482                                 return -EFAULT;
2483                         if (put_user(sizeof(int), oldlenp))
2484                                 return -EFAULT;
2485                 }
2486         }
2487         if (newval && newlen) { 
2488                 int new;
2489                 if (newlen != sizeof(int))
2490                         return -EINVAL; 
2491                 if (get_user(new, (int __user *)newval))
2492                         return -EFAULT;
2493                 *(int *)(table->data) = msecs_to_jiffies(new);
2494         }
2495         return 1;
2496 }
2497
2498
2499
2500 #else /* CONFIG_SYSCTL_SYSCALL */
2501
2502
2503 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2504 {
2505         static int msg_count;
2506         struct __sysctl_args tmp;
2507         int name[CTL_MAXNAME];
2508         int i;
2509
2510         /* Read in the sysctl name for better debug message logging */
2511         if (copy_from_user(&tmp, args, sizeof(tmp)))
2512                 return -EFAULT;
2513         if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2514                 return -ENOTDIR;
2515         for (i = 0; i < tmp.nlen; i++)
2516                 if (get_user(name[i], tmp.name + i))
2517                         return -EFAULT;
2518
2519         /* Ignore accesses to kernel.version */
2520         if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2521                 goto out;
2522
2523         if (msg_count < 5) {
2524                 msg_count++;
2525                 printk(KERN_INFO
2526                         "warning: process `%s' used the removed sysctl "
2527                         "system call with ", current->comm);
2528                 for (i = 0; i < tmp.nlen; i++)
2529                         printk("%d.", name[i]);
2530                 printk("\n");
2531         }
2532 out:
2533         return -ENOSYS;
2534 }
2535
2536 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2537                   void __user *oldval, size_t __user *oldlenp,
2538                   void __user *newval, size_t newlen)
2539 {
2540         return -ENOSYS;
2541 }
2542
2543 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2544                 void __user *oldval, size_t __user *oldlenp,
2545                 void __user *newval, size_t newlen)
2546 {
2547         return -ENOSYS;
2548 }
2549
2550 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2551                 void __user *oldval, size_t __user *oldlenp,
2552                 void __user *newval, size_t newlen)
2553 {
2554         return -ENOSYS;
2555 }
2556
2557 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2558                 void __user *oldval, size_t __user *oldlenp,
2559                 void __user *newval, size_t newlen)
2560 {
2561         return -ENOSYS;
2562 }
2563
2564 #endif /* CONFIG_SYSCTL_SYSCALL */
2565
2566 /*
2567  * No sense putting this after each symbol definition, twice,
2568  * exception granted :-)
2569  */
2570 EXPORT_SYMBOL(proc_dointvec);
2571 EXPORT_SYMBOL(proc_dointvec_jiffies);
2572 EXPORT_SYMBOL(proc_dointvec_minmax);
2573 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2574 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2575 EXPORT_SYMBOL(proc_dostring);
2576 EXPORT_SYMBOL(proc_doulongvec_minmax);
2577 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2578 EXPORT_SYMBOL(register_sysctl_table);
2579 EXPORT_SYMBOL(sysctl_intvec);
2580 EXPORT_SYMBOL(sysctl_jiffies);
2581 EXPORT_SYMBOL(sysctl_ms_jiffies);
2582 EXPORT_SYMBOL(sysctl_string);
2583 EXPORT_SYMBOL(unregister_sysctl_table);