gcov: add gcov profiling infrastructure
[linux-2.6] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/kmemcheck.h>
31 #include <linux/smp_lock.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/initrd.h>
42 #include <linux/key.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/vmstat.h>
48 #include <linux/nfs_fs.h>
49 #include <linux/acpi.h>
50 #include <linux/reboot.h>
51 #include <linux/ftrace.h>
52 #include <linux/slow-work.h>
53 #include <linux/perf_counter.h>
54
55 #include <asm/uaccess.h>
56 #include <asm/processor.h>
57
58 #ifdef CONFIG_X86
59 #include <asm/nmi.h>
60 #include <asm/stacktrace.h>
61 #include <asm/io.h>
62 #endif
63
64 static int deprecated_sysctl_warning(struct __sysctl_args *args);
65
66 #if defined(CONFIG_SYSCTL)
67
68 /* External variables not in a header file. */
69 extern int C_A_D;
70 extern int print_fatal_signals;
71 extern int sysctl_overcommit_memory;
72 extern int sysctl_overcommit_ratio;
73 extern int sysctl_panic_on_oom;
74 extern int sysctl_oom_kill_allocating_task;
75 extern int sysctl_oom_dump_tasks;
76 extern int max_threads;
77 extern int core_uses_pid;
78 extern int suid_dumpable;
79 extern char core_pattern[];
80 extern int pid_max;
81 extern int min_free_kbytes;
82 extern int pid_max_min, pid_max_max;
83 extern int sysctl_drop_caches;
84 extern int percpu_pagelist_fraction;
85 extern int compat_log;
86 extern int latencytop_enabled;
87 extern int sysctl_nr_open_min, sysctl_nr_open_max;
88 #ifndef CONFIG_MMU
89 extern int sysctl_nr_trim_pages;
90 #endif
91 #ifdef CONFIG_RCU_TORTURE_TEST
92 extern int rcutorture_runnable;
93 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94
95 /* Constants used for minimum and  maximum */
96 #ifdef CONFIG_DETECT_SOFTLOCKUP
97 static int sixty = 60;
98 static int neg_one = -1;
99 #endif
100
101 static int zero;
102 static int __maybe_unused one = 1;
103 static int __maybe_unused two = 2;
104 static unsigned long one_ul = 1;
105 static int one_hundred = 100;
106
107 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
108 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
109
110 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
111 static int maxolduid = 65535;
112 static int minolduid;
113 static int min_percpu_pagelist_fract = 8;
114
115 static int ngroups_max = NGROUPS_MAX;
116
117 #ifdef CONFIG_MODULES
118 extern char modprobe_path[];
119 extern int modules_disabled;
120 #endif
121 #ifdef CONFIG_CHR_DEV_SG
122 extern int sg_big_buff;
123 #endif
124
125 #ifdef CONFIG_SPARC
126 #include <asm/system.h>
127 #endif
128
129 #ifdef CONFIG_SPARC64
130 extern int sysctl_tsb_ratio;
131 #endif
132
133 #ifdef __hppa__
134 extern int pwrsw_enabled;
135 extern int unaligned_enabled;
136 #endif
137
138 #ifdef CONFIG_S390
139 #ifdef CONFIG_MATHEMU
140 extern int sysctl_ieee_emulation_warnings;
141 #endif
142 extern int sysctl_userprocess_debug;
143 extern int spin_retry;
144 #endif
145
146 #ifdef CONFIG_BSD_PROCESS_ACCT
147 extern int acct_parm[];
148 #endif
149
150 #ifdef CONFIG_IA64
151 extern int no_unaligned_warning;
152 extern int unaligned_dump_stack;
153 #endif
154
155 #ifdef CONFIG_RT_MUTEXES
156 extern int max_lock_depth;
157 #endif
158
159 #ifdef CONFIG_PROC_SYSCTL
160 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
161                   void __user *buffer, size_t *lenp, loff_t *ppos);
162 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
163                                void __user *buffer, size_t *lenp, loff_t *ppos);
164 #endif
165
166 static struct ctl_table root_table[];
167 static struct ctl_table_root sysctl_table_root;
168 static struct ctl_table_header root_table_header = {
169         .count = 1,
170         .ctl_table = root_table,
171         .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
172         .root = &sysctl_table_root,
173         .set = &sysctl_table_root.default_set,
174 };
175 static struct ctl_table_root sysctl_table_root = {
176         .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
177         .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
178 };
179
180 static struct ctl_table kern_table[];
181 static struct ctl_table vm_table[];
182 static struct ctl_table fs_table[];
183 static struct ctl_table debug_table[];
184 static struct ctl_table dev_table[];
185 extern struct ctl_table random_table[];
186 #ifdef CONFIG_INOTIFY_USER
187 extern struct ctl_table inotify_table[];
188 #endif
189 #ifdef CONFIG_EPOLL
190 extern struct ctl_table epoll_table[];
191 #endif
192
193 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
194 int sysctl_legacy_va_layout;
195 #endif
196
197 extern int prove_locking;
198 extern int lock_stat;
199
200 /* The default sysctl tables: */
201
202 static struct ctl_table root_table[] = {
203         {
204                 .ctl_name       = CTL_KERN,
205                 .procname       = "kernel",
206                 .mode           = 0555,
207                 .child          = kern_table,
208         },
209         {
210                 .ctl_name       = CTL_VM,
211                 .procname       = "vm",
212                 .mode           = 0555,
213                 .child          = vm_table,
214         },
215         {
216                 .ctl_name       = CTL_FS,
217                 .procname       = "fs",
218                 .mode           = 0555,
219                 .child          = fs_table,
220         },
221         {
222                 .ctl_name       = CTL_DEBUG,
223                 .procname       = "debug",
224                 .mode           = 0555,
225                 .child          = debug_table,
226         },
227         {
228                 .ctl_name       = CTL_DEV,
229                 .procname       = "dev",
230                 .mode           = 0555,
231                 .child          = dev_table,
232         },
233 /*
234  * NOTE: do not add new entries to this table unless you have read
235  * Documentation/sysctl/ctl_unnumbered.txt
236  */
237         { .ctl_name = 0 }
238 };
239
240 #ifdef CONFIG_SCHED_DEBUG
241 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
242 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
243 static int min_wakeup_granularity_ns;                   /* 0 usecs */
244 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
245 #endif
246
247 static struct ctl_table kern_table[] = {
248 #ifdef CONFIG_SCHED_DEBUG
249         {
250                 .ctl_name       = CTL_UNNUMBERED,
251                 .procname       = "sched_min_granularity_ns",
252                 .data           = &sysctl_sched_min_granularity,
253                 .maxlen         = sizeof(unsigned int),
254                 .mode           = 0644,
255                 .proc_handler   = &sched_nr_latency_handler,
256                 .strategy       = &sysctl_intvec,
257                 .extra1         = &min_sched_granularity_ns,
258                 .extra2         = &max_sched_granularity_ns,
259         },
260         {
261                 .ctl_name       = CTL_UNNUMBERED,
262                 .procname       = "sched_latency_ns",
263                 .data           = &sysctl_sched_latency,
264                 .maxlen         = sizeof(unsigned int),
265                 .mode           = 0644,
266                 .proc_handler   = &sched_nr_latency_handler,
267                 .strategy       = &sysctl_intvec,
268                 .extra1         = &min_sched_granularity_ns,
269                 .extra2         = &max_sched_granularity_ns,
270         },
271         {
272                 .ctl_name       = CTL_UNNUMBERED,
273                 .procname       = "sched_wakeup_granularity_ns",
274                 .data           = &sysctl_sched_wakeup_granularity,
275                 .maxlen         = sizeof(unsigned int),
276                 .mode           = 0644,
277                 .proc_handler   = &proc_dointvec_minmax,
278                 .strategy       = &sysctl_intvec,
279                 .extra1         = &min_wakeup_granularity_ns,
280                 .extra2         = &max_wakeup_granularity_ns,
281         },
282         {
283                 .ctl_name       = CTL_UNNUMBERED,
284                 .procname       = "sched_shares_ratelimit",
285                 .data           = &sysctl_sched_shares_ratelimit,
286                 .maxlen         = sizeof(unsigned int),
287                 .mode           = 0644,
288                 .proc_handler   = &proc_dointvec,
289         },
290         {
291                 .ctl_name       = CTL_UNNUMBERED,
292                 .procname       = "sched_shares_thresh",
293                 .data           = &sysctl_sched_shares_thresh,
294                 .maxlen         = sizeof(unsigned int),
295                 .mode           = 0644,
296                 .proc_handler   = &proc_dointvec_minmax,
297                 .strategy       = &sysctl_intvec,
298                 .extra1         = &zero,
299         },
300         {
301                 .ctl_name       = CTL_UNNUMBERED,
302                 .procname       = "sched_child_runs_first",
303                 .data           = &sysctl_sched_child_runs_first,
304                 .maxlen         = sizeof(unsigned int),
305                 .mode           = 0644,
306                 .proc_handler   = &proc_dointvec,
307         },
308         {
309                 .ctl_name       = CTL_UNNUMBERED,
310                 .procname       = "sched_features",
311                 .data           = &sysctl_sched_features,
312                 .maxlen         = sizeof(unsigned int),
313                 .mode           = 0644,
314                 .proc_handler   = &proc_dointvec,
315         },
316         {
317                 .ctl_name       = CTL_UNNUMBERED,
318                 .procname       = "sched_migration_cost",
319                 .data           = &sysctl_sched_migration_cost,
320                 .maxlen         = sizeof(unsigned int),
321                 .mode           = 0644,
322                 .proc_handler   = &proc_dointvec,
323         },
324         {
325                 .ctl_name       = CTL_UNNUMBERED,
326                 .procname       = "sched_nr_migrate",
327                 .data           = &sysctl_sched_nr_migrate,
328                 .maxlen         = sizeof(unsigned int),
329                 .mode           = 0644,
330                 .proc_handler   = &proc_dointvec,
331         },
332         {
333                 .ctl_name       = CTL_UNNUMBERED,
334                 .procname       = "timer_migration",
335                 .data           = &sysctl_timer_migration,
336                 .maxlen         = sizeof(unsigned int),
337                 .mode           = 0644,
338                 .proc_handler   = &proc_dointvec,
339         },
340 #endif
341         {
342                 .ctl_name       = CTL_UNNUMBERED,
343                 .procname       = "sched_rt_period_us",
344                 .data           = &sysctl_sched_rt_period,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = &sched_rt_handler,
348         },
349         {
350                 .ctl_name       = CTL_UNNUMBERED,
351                 .procname       = "sched_rt_runtime_us",
352                 .data           = &sysctl_sched_rt_runtime,
353                 .maxlen         = sizeof(int),
354                 .mode           = 0644,
355                 .proc_handler   = &sched_rt_handler,
356         },
357         {
358                 .ctl_name       = CTL_UNNUMBERED,
359                 .procname       = "sched_compat_yield",
360                 .data           = &sysctl_sched_compat_yield,
361                 .maxlen         = sizeof(unsigned int),
362                 .mode           = 0644,
363                 .proc_handler   = &proc_dointvec,
364         },
365 #ifdef CONFIG_PROVE_LOCKING
366         {
367                 .ctl_name       = CTL_UNNUMBERED,
368                 .procname       = "prove_locking",
369                 .data           = &prove_locking,
370                 .maxlen         = sizeof(int),
371                 .mode           = 0644,
372                 .proc_handler   = &proc_dointvec,
373         },
374 #endif
375 #ifdef CONFIG_LOCK_STAT
376         {
377                 .ctl_name       = CTL_UNNUMBERED,
378                 .procname       = "lock_stat",
379                 .data           = &lock_stat,
380                 .maxlen         = sizeof(int),
381                 .mode           = 0644,
382                 .proc_handler   = &proc_dointvec,
383         },
384 #endif
385         {
386                 .ctl_name       = KERN_PANIC,
387                 .procname       = "panic",
388                 .data           = &panic_timeout,
389                 .maxlen         = sizeof(int),
390                 .mode           = 0644,
391                 .proc_handler   = &proc_dointvec,
392         },
393         {
394                 .ctl_name       = KERN_CORE_USES_PID,
395                 .procname       = "core_uses_pid",
396                 .data           = &core_uses_pid,
397                 .maxlen         = sizeof(int),
398                 .mode           = 0644,
399                 .proc_handler   = &proc_dointvec,
400         },
401         {
402                 .ctl_name       = KERN_CORE_PATTERN,
403                 .procname       = "core_pattern",
404                 .data           = core_pattern,
405                 .maxlen         = CORENAME_MAX_SIZE,
406                 .mode           = 0644,
407                 .proc_handler   = &proc_dostring,
408                 .strategy       = &sysctl_string,
409         },
410 #ifdef CONFIG_PROC_SYSCTL
411         {
412                 .procname       = "tainted",
413                 .maxlen         = sizeof(long),
414                 .mode           = 0644,
415                 .proc_handler   = &proc_taint,
416         },
417 #endif
418 #ifdef CONFIG_LATENCYTOP
419         {
420                 .procname       = "latencytop",
421                 .data           = &latencytop_enabled,
422                 .maxlen         = sizeof(int),
423                 .mode           = 0644,
424                 .proc_handler   = &proc_dointvec,
425         },
426 #endif
427 #ifdef CONFIG_BLK_DEV_INITRD
428         {
429                 .ctl_name       = KERN_REALROOTDEV,
430                 .procname       = "real-root-dev",
431                 .data           = &real_root_dev,
432                 .maxlen         = sizeof(int),
433                 .mode           = 0644,
434                 .proc_handler   = &proc_dointvec,
435         },
436 #endif
437         {
438                 .ctl_name       = CTL_UNNUMBERED,
439                 .procname       = "print-fatal-signals",
440                 .data           = &print_fatal_signals,
441                 .maxlen         = sizeof(int),
442                 .mode           = 0644,
443                 .proc_handler   = &proc_dointvec,
444         },
445 #ifdef CONFIG_SPARC
446         {
447                 .ctl_name       = KERN_SPARC_REBOOT,
448                 .procname       = "reboot-cmd",
449                 .data           = reboot_command,
450                 .maxlen         = 256,
451                 .mode           = 0644,
452                 .proc_handler   = &proc_dostring,
453                 .strategy       = &sysctl_string,
454         },
455         {
456                 .ctl_name       = KERN_SPARC_STOP_A,
457                 .procname       = "stop-a",
458                 .data           = &stop_a_enabled,
459                 .maxlen         = sizeof (int),
460                 .mode           = 0644,
461                 .proc_handler   = &proc_dointvec,
462         },
463         {
464                 .ctl_name       = KERN_SPARC_SCONS_PWROFF,
465                 .procname       = "scons-poweroff",
466                 .data           = &scons_pwroff,
467                 .maxlen         = sizeof (int),
468                 .mode           = 0644,
469                 .proc_handler   = &proc_dointvec,
470         },
471 #endif
472 #ifdef CONFIG_SPARC64
473         {
474                 .ctl_name       = CTL_UNNUMBERED,
475                 .procname       = "tsb-ratio",
476                 .data           = &sysctl_tsb_ratio,
477                 .maxlen         = sizeof (int),
478                 .mode           = 0644,
479                 .proc_handler   = &proc_dointvec,
480         },
481 #endif
482 #ifdef __hppa__
483         {
484                 .ctl_name       = KERN_HPPA_PWRSW,
485                 .procname       = "soft-power",
486                 .data           = &pwrsw_enabled,
487                 .maxlen         = sizeof (int),
488                 .mode           = 0644,
489                 .proc_handler   = &proc_dointvec,
490         },
491         {
492                 .ctl_name       = KERN_HPPA_UNALIGNED,
493                 .procname       = "unaligned-trap",
494                 .data           = &unaligned_enabled,
495                 .maxlen         = sizeof (int),
496                 .mode           = 0644,
497                 .proc_handler   = &proc_dointvec,
498         },
499 #endif
500         {
501                 .ctl_name       = KERN_CTLALTDEL,
502                 .procname       = "ctrl-alt-del",
503                 .data           = &C_A_D,
504                 .maxlen         = sizeof(int),
505                 .mode           = 0644,
506                 .proc_handler   = &proc_dointvec,
507         },
508 #ifdef CONFIG_FUNCTION_TRACER
509         {
510                 .ctl_name       = CTL_UNNUMBERED,
511                 .procname       = "ftrace_enabled",
512                 .data           = &ftrace_enabled,
513                 .maxlen         = sizeof(int),
514                 .mode           = 0644,
515                 .proc_handler   = &ftrace_enable_sysctl,
516         },
517 #endif
518 #ifdef CONFIG_STACK_TRACER
519         {
520                 .ctl_name       = CTL_UNNUMBERED,
521                 .procname       = "stack_tracer_enabled",
522                 .data           = &stack_tracer_enabled,
523                 .maxlen         = sizeof(int),
524                 .mode           = 0644,
525                 .proc_handler   = &stack_trace_sysctl,
526         },
527 #endif
528 #ifdef CONFIG_TRACING
529         {
530                 .ctl_name       = CTL_UNNUMBERED,
531                 .procname       = "ftrace_dump_on_oops",
532                 .data           = &ftrace_dump_on_oops,
533                 .maxlen         = sizeof(int),
534                 .mode           = 0644,
535                 .proc_handler   = &proc_dointvec,
536         },
537 #endif
538 #ifdef CONFIG_MODULES
539         {
540                 .ctl_name       = KERN_MODPROBE,
541                 .procname       = "modprobe",
542                 .data           = &modprobe_path,
543                 .maxlen         = KMOD_PATH_LEN,
544                 .mode           = 0644,
545                 .proc_handler   = &proc_dostring,
546                 .strategy       = &sysctl_string,
547         },
548         {
549                 .ctl_name       = CTL_UNNUMBERED,
550                 .procname       = "modules_disabled",
551                 .data           = &modules_disabled,
552                 .maxlen         = sizeof(int),
553                 .mode           = 0644,
554                 /* only handle a transition from default "0" to "1" */
555                 .proc_handler   = &proc_dointvec_minmax,
556                 .extra1         = &one,
557                 .extra2         = &one,
558         },
559 #endif
560 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
561         {
562                 .ctl_name       = KERN_HOTPLUG,
563                 .procname       = "hotplug",
564                 .data           = &uevent_helper,
565                 .maxlen         = UEVENT_HELPER_PATH_LEN,
566                 .mode           = 0644,
567                 .proc_handler   = &proc_dostring,
568                 .strategy       = &sysctl_string,
569         },
570 #endif
571 #ifdef CONFIG_CHR_DEV_SG
572         {
573                 .ctl_name       = KERN_SG_BIG_BUFF,
574                 .procname       = "sg-big-buff",
575                 .data           = &sg_big_buff,
576                 .maxlen         = sizeof (int),
577                 .mode           = 0444,
578                 .proc_handler   = &proc_dointvec,
579         },
580 #endif
581 #ifdef CONFIG_BSD_PROCESS_ACCT
582         {
583                 .ctl_name       = KERN_ACCT,
584                 .procname       = "acct",
585                 .data           = &acct_parm,
586                 .maxlen         = 3*sizeof(int),
587                 .mode           = 0644,
588                 .proc_handler   = &proc_dointvec,
589         },
590 #endif
591 #ifdef CONFIG_MAGIC_SYSRQ
592         {
593                 .ctl_name       = KERN_SYSRQ,
594                 .procname       = "sysrq",
595                 .data           = &__sysrq_enabled,
596                 .maxlen         = sizeof (int),
597                 .mode           = 0644,
598                 .proc_handler   = &proc_dointvec,
599         },
600 #endif
601 #ifdef CONFIG_PROC_SYSCTL
602         {
603                 .procname       = "cad_pid",
604                 .data           = NULL,
605                 .maxlen         = sizeof (int),
606                 .mode           = 0600,
607                 .proc_handler   = &proc_do_cad_pid,
608         },
609 #endif
610         {
611                 .ctl_name       = KERN_MAX_THREADS,
612                 .procname       = "threads-max",
613                 .data           = &max_threads,
614                 .maxlen         = sizeof(int),
615                 .mode           = 0644,
616                 .proc_handler   = &proc_dointvec,
617         },
618         {
619                 .ctl_name       = KERN_RANDOM,
620                 .procname       = "random",
621                 .mode           = 0555,
622                 .child          = random_table,
623         },
624         {
625                 .ctl_name       = KERN_OVERFLOWUID,
626                 .procname       = "overflowuid",
627                 .data           = &overflowuid,
628                 .maxlen         = sizeof(int),
629                 .mode           = 0644,
630                 .proc_handler   = &proc_dointvec_minmax,
631                 .strategy       = &sysctl_intvec,
632                 .extra1         = &minolduid,
633                 .extra2         = &maxolduid,
634         },
635         {
636                 .ctl_name       = KERN_OVERFLOWGID,
637                 .procname       = "overflowgid",
638                 .data           = &overflowgid,
639                 .maxlen         = sizeof(int),
640                 .mode           = 0644,
641                 .proc_handler   = &proc_dointvec_minmax,
642                 .strategy       = &sysctl_intvec,
643                 .extra1         = &minolduid,
644                 .extra2         = &maxolduid,
645         },
646 #ifdef CONFIG_S390
647 #ifdef CONFIG_MATHEMU
648         {
649                 .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
650                 .procname       = "ieee_emulation_warnings",
651                 .data           = &sysctl_ieee_emulation_warnings,
652                 .maxlen         = sizeof(int),
653                 .mode           = 0644,
654                 .proc_handler   = &proc_dointvec,
655         },
656 #endif
657         {
658                 .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
659                 .procname       = "userprocess_debug",
660                 .data           = &sysctl_userprocess_debug,
661                 .maxlen         = sizeof(int),
662                 .mode           = 0644,
663                 .proc_handler   = &proc_dointvec,
664         },
665 #endif
666         {
667                 .ctl_name       = KERN_PIDMAX,
668                 .procname       = "pid_max",
669                 .data           = &pid_max,
670                 .maxlen         = sizeof (int),
671                 .mode           = 0644,
672                 .proc_handler   = &proc_dointvec_minmax,
673                 .strategy       = sysctl_intvec,
674                 .extra1         = &pid_max_min,
675                 .extra2         = &pid_max_max,
676         },
677         {
678                 .ctl_name       = KERN_PANIC_ON_OOPS,
679                 .procname       = "panic_on_oops",
680                 .data           = &panic_on_oops,
681                 .maxlen         = sizeof(int),
682                 .mode           = 0644,
683                 .proc_handler   = &proc_dointvec,
684         },
685 #if defined CONFIG_PRINTK
686         {
687                 .ctl_name       = KERN_PRINTK,
688                 .procname       = "printk",
689                 .data           = &console_loglevel,
690                 .maxlen         = 4*sizeof(int),
691                 .mode           = 0644,
692                 .proc_handler   = &proc_dointvec,
693         },
694         {
695                 .ctl_name       = KERN_PRINTK_RATELIMIT,
696                 .procname       = "printk_ratelimit",
697                 .data           = &printk_ratelimit_state.interval,
698                 .maxlen         = sizeof(int),
699                 .mode           = 0644,
700                 .proc_handler   = &proc_dointvec_jiffies,
701                 .strategy       = &sysctl_jiffies,
702         },
703         {
704                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
705                 .procname       = "printk_ratelimit_burst",
706                 .data           = &printk_ratelimit_state.burst,
707                 .maxlen         = sizeof(int),
708                 .mode           = 0644,
709                 .proc_handler   = &proc_dointvec,
710         },
711 #endif
712         {
713                 .ctl_name       = KERN_NGROUPS_MAX,
714                 .procname       = "ngroups_max",
715                 .data           = &ngroups_max,
716                 .maxlen         = sizeof (int),
717                 .mode           = 0444,
718                 .proc_handler   = &proc_dointvec,
719         },
720 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
721         {
722                 .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
723                 .procname       = "unknown_nmi_panic",
724                 .data           = &unknown_nmi_panic,
725                 .maxlen         = sizeof (int),
726                 .mode           = 0644,
727                 .proc_handler   = &proc_dointvec,
728         },
729         {
730                 .procname       = "nmi_watchdog",
731                 .data           = &nmi_watchdog_enabled,
732                 .maxlen         = sizeof (int),
733                 .mode           = 0644,
734                 .proc_handler   = &proc_nmi_enabled,
735         },
736 #endif
737 #if defined(CONFIG_X86)
738         {
739                 .ctl_name       = KERN_PANIC_ON_NMI,
740                 .procname       = "panic_on_unrecovered_nmi",
741                 .data           = &panic_on_unrecovered_nmi,
742                 .maxlen         = sizeof(int),
743                 .mode           = 0644,
744                 .proc_handler   = &proc_dointvec,
745         },
746         {
747                 .ctl_name       = KERN_BOOTLOADER_TYPE,
748                 .procname       = "bootloader_type",
749                 .data           = &bootloader_type,
750                 .maxlen         = sizeof (int),
751                 .mode           = 0444,
752                 .proc_handler   = &proc_dointvec,
753         },
754         {
755                 .ctl_name       = CTL_UNNUMBERED,
756                 .procname       = "bootloader_version",
757                 .data           = &bootloader_version,
758                 .maxlen         = sizeof (int),
759                 .mode           = 0444,
760                 .proc_handler   = &proc_dointvec,
761         },
762         {
763                 .ctl_name       = CTL_UNNUMBERED,
764                 .procname       = "kstack_depth_to_print",
765                 .data           = &kstack_depth_to_print,
766                 .maxlen         = sizeof(int),
767                 .mode           = 0644,
768                 .proc_handler   = &proc_dointvec,
769         },
770         {
771                 .ctl_name       = CTL_UNNUMBERED,
772                 .procname       = "io_delay_type",
773                 .data           = &io_delay_type,
774                 .maxlen         = sizeof(int),
775                 .mode           = 0644,
776                 .proc_handler   = &proc_dointvec,
777         },
778 #endif
779 #if defined(CONFIG_MMU)
780         {
781                 .ctl_name       = KERN_RANDOMIZE,
782                 .procname       = "randomize_va_space",
783                 .data           = &randomize_va_space,
784                 .maxlen         = sizeof(int),
785                 .mode           = 0644,
786                 .proc_handler   = &proc_dointvec,
787         },
788 #endif
789 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
790         {
791                 .ctl_name       = KERN_SPIN_RETRY,
792                 .procname       = "spin_retry",
793                 .data           = &spin_retry,
794                 .maxlen         = sizeof (int),
795                 .mode           = 0644,
796                 .proc_handler   = &proc_dointvec,
797         },
798 #endif
799 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
800         {
801                 .procname       = "acpi_video_flags",
802                 .data           = &acpi_realmode_flags,
803                 .maxlen         = sizeof (unsigned long),
804                 .mode           = 0644,
805                 .proc_handler   = &proc_doulongvec_minmax,
806         },
807 #endif
808 #ifdef CONFIG_IA64
809         {
810                 .ctl_name       = KERN_IA64_UNALIGNED,
811                 .procname       = "ignore-unaligned-usertrap",
812                 .data           = &no_unaligned_warning,
813                 .maxlen         = sizeof (int),
814                 .mode           = 0644,
815                 .proc_handler   = &proc_dointvec,
816         },
817         {
818                 .ctl_name       = CTL_UNNUMBERED,
819                 .procname       = "unaligned-dump-stack",
820                 .data           = &unaligned_dump_stack,
821                 .maxlen         = sizeof (int),
822                 .mode           = 0644,
823                 .proc_handler   = &proc_dointvec,
824         },
825 #endif
826 #ifdef CONFIG_DETECT_SOFTLOCKUP
827         {
828                 .ctl_name       = CTL_UNNUMBERED,
829                 .procname       = "softlockup_panic",
830                 .data           = &softlockup_panic,
831                 .maxlen         = sizeof(int),
832                 .mode           = 0644,
833                 .proc_handler   = &proc_dointvec_minmax,
834                 .strategy       = &sysctl_intvec,
835                 .extra1         = &zero,
836                 .extra2         = &one,
837         },
838         {
839                 .ctl_name       = CTL_UNNUMBERED,
840                 .procname       = "softlockup_thresh",
841                 .data           = &softlockup_thresh,
842                 .maxlen         = sizeof(int),
843                 .mode           = 0644,
844                 .proc_handler   = &proc_dosoftlockup_thresh,
845                 .strategy       = &sysctl_intvec,
846                 .extra1         = &neg_one,
847                 .extra2         = &sixty,
848         },
849 #endif
850 #ifdef CONFIG_DETECT_HUNG_TASK
851         {
852                 .ctl_name       = CTL_UNNUMBERED,
853                 .procname       = "hung_task_panic",
854                 .data           = &sysctl_hung_task_panic,
855                 .maxlen         = sizeof(int),
856                 .mode           = 0644,
857                 .proc_handler   = &proc_dointvec_minmax,
858                 .strategy       = &sysctl_intvec,
859                 .extra1         = &zero,
860                 .extra2         = &one,
861         },
862         {
863                 .ctl_name       = CTL_UNNUMBERED,
864                 .procname       = "hung_task_check_count",
865                 .data           = &sysctl_hung_task_check_count,
866                 .maxlen         = sizeof(unsigned long),
867                 .mode           = 0644,
868                 .proc_handler   = &proc_doulongvec_minmax,
869                 .strategy       = &sysctl_intvec,
870         },
871         {
872                 .ctl_name       = CTL_UNNUMBERED,
873                 .procname       = "hung_task_timeout_secs",
874                 .data           = &sysctl_hung_task_timeout_secs,
875                 .maxlen         = sizeof(unsigned long),
876                 .mode           = 0644,
877                 .proc_handler   = &proc_dohung_task_timeout_secs,
878                 .strategy       = &sysctl_intvec,
879         },
880         {
881                 .ctl_name       = CTL_UNNUMBERED,
882                 .procname       = "hung_task_warnings",
883                 .data           = &sysctl_hung_task_warnings,
884                 .maxlen         = sizeof(unsigned long),
885                 .mode           = 0644,
886                 .proc_handler   = &proc_doulongvec_minmax,
887                 .strategy       = &sysctl_intvec,
888         },
889 #endif
890 #ifdef CONFIG_COMPAT
891         {
892                 .ctl_name       = KERN_COMPAT_LOG,
893                 .procname       = "compat-log",
894                 .data           = &compat_log,
895                 .maxlen         = sizeof (int),
896                 .mode           = 0644,
897                 .proc_handler   = &proc_dointvec,
898         },
899 #endif
900 #ifdef CONFIG_RT_MUTEXES
901         {
902                 .ctl_name       = KERN_MAX_LOCK_DEPTH,
903                 .procname       = "max_lock_depth",
904                 .data           = &max_lock_depth,
905                 .maxlen         = sizeof(int),
906                 .mode           = 0644,
907                 .proc_handler   = &proc_dointvec,
908         },
909 #endif
910         {
911                 .ctl_name       = CTL_UNNUMBERED,
912                 .procname       = "poweroff_cmd",
913                 .data           = &poweroff_cmd,
914                 .maxlen         = POWEROFF_CMD_PATH_LEN,
915                 .mode           = 0644,
916                 .proc_handler   = &proc_dostring,
917                 .strategy       = &sysctl_string,
918         },
919 #ifdef CONFIG_KEYS
920         {
921                 .ctl_name       = CTL_UNNUMBERED,
922                 .procname       = "keys",
923                 .mode           = 0555,
924                 .child          = key_sysctls,
925         },
926 #endif
927 #ifdef CONFIG_RCU_TORTURE_TEST
928         {
929                 .ctl_name       = CTL_UNNUMBERED,
930                 .procname       = "rcutorture_runnable",
931                 .data           = &rcutorture_runnable,
932                 .maxlen         = sizeof(int),
933                 .mode           = 0644,
934                 .proc_handler   = &proc_dointvec,
935         },
936 #endif
937 #ifdef CONFIG_SLOW_WORK
938         {
939                 .ctl_name       = CTL_UNNUMBERED,
940                 .procname       = "slow-work",
941                 .mode           = 0555,
942                 .child          = slow_work_sysctls,
943         },
944 #endif
945 #ifdef CONFIG_PERF_COUNTERS
946         {
947                 .ctl_name       = CTL_UNNUMBERED,
948                 .procname       = "perf_counter_paranoid",
949                 .data           = &sysctl_perf_counter_paranoid,
950                 .maxlen         = sizeof(sysctl_perf_counter_paranoid),
951                 .mode           = 0644,
952                 .proc_handler   = &proc_dointvec,
953         },
954         {
955                 .ctl_name       = CTL_UNNUMBERED,
956                 .procname       = "perf_counter_mlock_kb",
957                 .data           = &sysctl_perf_counter_mlock,
958                 .maxlen         = sizeof(sysctl_perf_counter_mlock),
959                 .mode           = 0644,
960                 .proc_handler   = &proc_dointvec,
961         },
962         {
963                 .ctl_name       = CTL_UNNUMBERED,
964                 .procname       = "perf_counter_max_sample_rate",
965                 .data           = &sysctl_perf_counter_sample_rate,
966                 .maxlen         = sizeof(sysctl_perf_counter_sample_rate),
967                 .mode           = 0644,
968                 .proc_handler   = &proc_dointvec,
969         },
970 #endif
971 #ifdef CONFIG_KMEMCHECK
972         {
973                 .ctl_name       = CTL_UNNUMBERED,
974                 .procname       = "kmemcheck",
975                 .data           = &kmemcheck_enabled,
976                 .maxlen         = sizeof(int),
977                 .mode           = 0644,
978                 .proc_handler   = &proc_dointvec,
979         },
980 #endif
981
982 /*
983  * NOTE: do not add new entries to this table unless you have read
984  * Documentation/sysctl/ctl_unnumbered.txt
985  */
986         { .ctl_name = 0 }
987 };
988
989 static struct ctl_table vm_table[] = {
990         {
991                 .ctl_name       = VM_OVERCOMMIT_MEMORY,
992                 .procname       = "overcommit_memory",
993                 .data           = &sysctl_overcommit_memory,
994                 .maxlen         = sizeof(sysctl_overcommit_memory),
995                 .mode           = 0644,
996                 .proc_handler   = &proc_dointvec,
997         },
998         {
999                 .ctl_name       = VM_PANIC_ON_OOM,
1000                 .procname       = "panic_on_oom",
1001                 .data           = &sysctl_panic_on_oom,
1002                 .maxlen         = sizeof(sysctl_panic_on_oom),
1003                 .mode           = 0644,
1004                 .proc_handler   = &proc_dointvec,
1005         },
1006         {
1007                 .ctl_name       = CTL_UNNUMBERED,
1008                 .procname       = "oom_kill_allocating_task",
1009                 .data           = &sysctl_oom_kill_allocating_task,
1010                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1011                 .mode           = 0644,
1012                 .proc_handler   = &proc_dointvec,
1013         },
1014         {
1015                 .ctl_name       = CTL_UNNUMBERED,
1016                 .procname       = "oom_dump_tasks",
1017                 .data           = &sysctl_oom_dump_tasks,
1018                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1019                 .mode           = 0644,
1020                 .proc_handler   = &proc_dointvec,
1021         },
1022         {
1023                 .ctl_name       = VM_OVERCOMMIT_RATIO,
1024                 .procname       = "overcommit_ratio",
1025                 .data           = &sysctl_overcommit_ratio,
1026                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1027                 .mode           = 0644,
1028                 .proc_handler   = &proc_dointvec,
1029         },
1030         {
1031                 .ctl_name       = VM_PAGE_CLUSTER,
1032                 .procname       = "page-cluster", 
1033                 .data           = &page_cluster,
1034                 .maxlen         = sizeof(int),
1035                 .mode           = 0644,
1036                 .proc_handler   = &proc_dointvec,
1037         },
1038         {
1039                 .ctl_name       = VM_DIRTY_BACKGROUND,
1040                 .procname       = "dirty_background_ratio",
1041                 .data           = &dirty_background_ratio,
1042                 .maxlen         = sizeof(dirty_background_ratio),
1043                 .mode           = 0644,
1044                 .proc_handler   = &dirty_background_ratio_handler,
1045                 .strategy       = &sysctl_intvec,
1046                 .extra1         = &zero,
1047                 .extra2         = &one_hundred,
1048         },
1049         {
1050                 .ctl_name       = CTL_UNNUMBERED,
1051                 .procname       = "dirty_background_bytes",
1052                 .data           = &dirty_background_bytes,
1053                 .maxlen         = sizeof(dirty_background_bytes),
1054                 .mode           = 0644,
1055                 .proc_handler   = &dirty_background_bytes_handler,
1056                 .strategy       = &sysctl_intvec,
1057                 .extra1         = &one_ul,
1058         },
1059         {
1060                 .ctl_name       = VM_DIRTY_RATIO,
1061                 .procname       = "dirty_ratio",
1062                 .data           = &vm_dirty_ratio,
1063                 .maxlen         = sizeof(vm_dirty_ratio),
1064                 .mode           = 0644,
1065                 .proc_handler   = &dirty_ratio_handler,
1066                 .strategy       = &sysctl_intvec,
1067                 .extra1         = &zero,
1068                 .extra2         = &one_hundred,
1069         },
1070         {
1071                 .ctl_name       = CTL_UNNUMBERED,
1072                 .procname       = "dirty_bytes",
1073                 .data           = &vm_dirty_bytes,
1074                 .maxlen         = sizeof(vm_dirty_bytes),
1075                 .mode           = 0644,
1076                 .proc_handler   = &dirty_bytes_handler,
1077                 .strategy       = &sysctl_intvec,
1078                 .extra1         = &dirty_bytes_min,
1079         },
1080         {
1081                 .procname       = "dirty_writeback_centisecs",
1082                 .data           = &dirty_writeback_interval,
1083                 .maxlen         = sizeof(dirty_writeback_interval),
1084                 .mode           = 0644,
1085                 .proc_handler   = &dirty_writeback_centisecs_handler,
1086         },
1087         {
1088                 .procname       = "dirty_expire_centisecs",
1089                 .data           = &dirty_expire_interval,
1090                 .maxlen         = sizeof(dirty_expire_interval),
1091                 .mode           = 0644,
1092                 .proc_handler   = &proc_dointvec,
1093         },
1094         {
1095                 .ctl_name       = VM_NR_PDFLUSH_THREADS,
1096                 .procname       = "nr_pdflush_threads",
1097                 .data           = &nr_pdflush_threads,
1098                 .maxlen         = sizeof nr_pdflush_threads,
1099                 .mode           = 0444 /* read-only*/,
1100                 .proc_handler   = &proc_dointvec,
1101         },
1102         {
1103                 .ctl_name       = VM_SWAPPINESS,
1104                 .procname       = "swappiness",
1105                 .data           = &vm_swappiness,
1106                 .maxlen         = sizeof(vm_swappiness),
1107                 .mode           = 0644,
1108                 .proc_handler   = &proc_dointvec_minmax,
1109                 .strategy       = &sysctl_intvec,
1110                 .extra1         = &zero,
1111                 .extra2         = &one_hundred,
1112         },
1113 #ifdef CONFIG_HUGETLB_PAGE
1114          {
1115                 .procname       = "nr_hugepages",
1116                 .data           = NULL,
1117                 .maxlen         = sizeof(unsigned long),
1118                 .mode           = 0644,
1119                 .proc_handler   = &hugetlb_sysctl_handler,
1120                 .extra1         = (void *)&hugetlb_zero,
1121                 .extra2         = (void *)&hugetlb_infinity,
1122          },
1123          {
1124                 .ctl_name       = VM_HUGETLB_GROUP,
1125                 .procname       = "hugetlb_shm_group",
1126                 .data           = &sysctl_hugetlb_shm_group,
1127                 .maxlen         = sizeof(gid_t),
1128                 .mode           = 0644,
1129                 .proc_handler   = &proc_dointvec,
1130          },
1131          {
1132                 .ctl_name       = CTL_UNNUMBERED,
1133                 .procname       = "hugepages_treat_as_movable",
1134                 .data           = &hugepages_treat_as_movable,
1135                 .maxlen         = sizeof(int),
1136                 .mode           = 0644,
1137                 .proc_handler   = &hugetlb_treat_movable_handler,
1138         },
1139         {
1140                 .ctl_name       = CTL_UNNUMBERED,
1141                 .procname       = "nr_overcommit_hugepages",
1142                 .data           = NULL,
1143                 .maxlen         = sizeof(unsigned long),
1144                 .mode           = 0644,
1145                 .proc_handler   = &hugetlb_overcommit_handler,
1146                 .extra1         = (void *)&hugetlb_zero,
1147                 .extra2         = (void *)&hugetlb_infinity,
1148         },
1149 #endif
1150         {
1151                 .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
1152                 .procname       = "lowmem_reserve_ratio",
1153                 .data           = &sysctl_lowmem_reserve_ratio,
1154                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1155                 .mode           = 0644,
1156                 .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
1157                 .strategy       = &sysctl_intvec,
1158         },
1159         {
1160                 .ctl_name       = VM_DROP_PAGECACHE,
1161                 .procname       = "drop_caches",
1162                 .data           = &sysctl_drop_caches,
1163                 .maxlen         = sizeof(int),
1164                 .mode           = 0644,
1165                 .proc_handler   = drop_caches_sysctl_handler,
1166                 .strategy       = &sysctl_intvec,
1167         },
1168         {
1169                 .ctl_name       = VM_MIN_FREE_KBYTES,
1170                 .procname       = "min_free_kbytes",
1171                 .data           = &min_free_kbytes,
1172                 .maxlen         = sizeof(min_free_kbytes),
1173                 .mode           = 0644,
1174                 .proc_handler   = &min_free_kbytes_sysctl_handler,
1175                 .strategy       = &sysctl_intvec,
1176                 .extra1         = &zero,
1177         },
1178         {
1179                 .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
1180                 .procname       = "percpu_pagelist_fraction",
1181                 .data           = &percpu_pagelist_fraction,
1182                 .maxlen         = sizeof(percpu_pagelist_fraction),
1183                 .mode           = 0644,
1184                 .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
1185                 .strategy       = &sysctl_intvec,
1186                 .extra1         = &min_percpu_pagelist_fract,
1187         },
1188 #ifdef CONFIG_MMU
1189         {
1190                 .ctl_name       = VM_MAX_MAP_COUNT,
1191                 .procname       = "max_map_count",
1192                 .data           = &sysctl_max_map_count,
1193                 .maxlen         = sizeof(sysctl_max_map_count),
1194                 .mode           = 0644,
1195                 .proc_handler   = &proc_dointvec
1196         },
1197 #else
1198         {
1199                 .ctl_name       = CTL_UNNUMBERED,
1200                 .procname       = "nr_trim_pages",
1201                 .data           = &sysctl_nr_trim_pages,
1202                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1203                 .mode           = 0644,
1204                 .proc_handler   = &proc_dointvec_minmax,
1205                 .strategy       = &sysctl_intvec,
1206                 .extra1         = &zero,
1207         },
1208 #endif
1209         {
1210                 .ctl_name       = VM_LAPTOP_MODE,
1211                 .procname       = "laptop_mode",
1212                 .data           = &laptop_mode,
1213                 .maxlen         = sizeof(laptop_mode),
1214                 .mode           = 0644,
1215                 .proc_handler   = &proc_dointvec_jiffies,
1216                 .strategy       = &sysctl_jiffies,
1217         },
1218         {
1219                 .ctl_name       = VM_BLOCK_DUMP,
1220                 .procname       = "block_dump",
1221                 .data           = &block_dump,
1222                 .maxlen         = sizeof(block_dump),
1223                 .mode           = 0644,
1224                 .proc_handler   = &proc_dointvec,
1225                 .strategy       = &sysctl_intvec,
1226                 .extra1         = &zero,
1227         },
1228         {
1229                 .ctl_name       = VM_VFS_CACHE_PRESSURE,
1230                 .procname       = "vfs_cache_pressure",
1231                 .data           = &sysctl_vfs_cache_pressure,
1232                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1233                 .mode           = 0644,
1234                 .proc_handler   = &proc_dointvec,
1235                 .strategy       = &sysctl_intvec,
1236                 .extra1         = &zero,
1237         },
1238 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1239         {
1240                 .ctl_name       = VM_LEGACY_VA_LAYOUT,
1241                 .procname       = "legacy_va_layout",
1242                 .data           = &sysctl_legacy_va_layout,
1243                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1244                 .mode           = 0644,
1245                 .proc_handler   = &proc_dointvec,
1246                 .strategy       = &sysctl_intvec,
1247                 .extra1         = &zero,
1248         },
1249 #endif
1250 #ifdef CONFIG_NUMA
1251         {
1252                 .ctl_name       = VM_ZONE_RECLAIM_MODE,
1253                 .procname       = "zone_reclaim_mode",
1254                 .data           = &zone_reclaim_mode,
1255                 .maxlen         = sizeof(zone_reclaim_mode),
1256                 .mode           = 0644,
1257                 .proc_handler   = &proc_dointvec,
1258                 .strategy       = &sysctl_intvec,
1259                 .extra1         = &zero,
1260         },
1261         {
1262                 .ctl_name       = VM_MIN_UNMAPPED,
1263                 .procname       = "min_unmapped_ratio",
1264                 .data           = &sysctl_min_unmapped_ratio,
1265                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1266                 .mode           = 0644,
1267                 .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
1268                 .strategy       = &sysctl_intvec,
1269                 .extra1         = &zero,
1270                 .extra2         = &one_hundred,
1271         },
1272         {
1273                 .ctl_name       = VM_MIN_SLAB,
1274                 .procname       = "min_slab_ratio",
1275                 .data           = &sysctl_min_slab_ratio,
1276                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1277                 .mode           = 0644,
1278                 .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1279                 .strategy       = &sysctl_intvec,
1280                 .extra1         = &zero,
1281                 .extra2         = &one_hundred,
1282         },
1283 #endif
1284 #ifdef CONFIG_SMP
1285         {
1286                 .ctl_name       = CTL_UNNUMBERED,
1287                 .procname       = "stat_interval",
1288                 .data           = &sysctl_stat_interval,
1289                 .maxlen         = sizeof(sysctl_stat_interval),
1290                 .mode           = 0644,
1291                 .proc_handler   = &proc_dointvec_jiffies,
1292                 .strategy       = &sysctl_jiffies,
1293         },
1294 #endif
1295         {
1296                 .ctl_name       = CTL_UNNUMBERED,
1297                 .procname       = "mmap_min_addr",
1298                 .data           = &mmap_min_addr,
1299                 .maxlen         = sizeof(unsigned long),
1300                 .mode           = 0644,
1301                 .proc_handler   = &proc_doulongvec_minmax,
1302         },
1303 #ifdef CONFIG_NUMA
1304         {
1305                 .ctl_name       = CTL_UNNUMBERED,
1306                 .procname       = "numa_zonelist_order",
1307                 .data           = &numa_zonelist_order,
1308                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1309                 .mode           = 0644,
1310                 .proc_handler   = &numa_zonelist_order_handler,
1311                 .strategy       = &sysctl_string,
1312         },
1313 #endif
1314 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1315    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1316         {
1317                 .ctl_name       = VM_VDSO_ENABLED,
1318                 .procname       = "vdso_enabled",
1319                 .data           = &vdso_enabled,
1320                 .maxlen         = sizeof(vdso_enabled),
1321                 .mode           = 0644,
1322                 .proc_handler   = &proc_dointvec,
1323                 .strategy       = &sysctl_intvec,
1324                 .extra1         = &zero,
1325         },
1326 #endif
1327 #ifdef CONFIG_HIGHMEM
1328         {
1329                 .ctl_name       = CTL_UNNUMBERED,
1330                 .procname       = "highmem_is_dirtyable",
1331                 .data           = &vm_highmem_is_dirtyable,
1332                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1333                 .mode           = 0644,
1334                 .proc_handler   = &proc_dointvec_minmax,
1335                 .strategy       = &sysctl_intvec,
1336                 .extra1         = &zero,
1337                 .extra2         = &one,
1338         },
1339 #endif
1340         {
1341                 .ctl_name       = CTL_UNNUMBERED,
1342                 .procname       = "scan_unevictable_pages",
1343                 .data           = &scan_unevictable_pages,
1344                 .maxlen         = sizeof(scan_unevictable_pages),
1345                 .mode           = 0644,
1346                 .proc_handler   = &scan_unevictable_handler,
1347         },
1348 /*
1349  * NOTE: do not add new entries to this table unless you have read
1350  * Documentation/sysctl/ctl_unnumbered.txt
1351  */
1352         { .ctl_name = 0 }
1353 };
1354
1355 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1356 static struct ctl_table binfmt_misc_table[] = {
1357         { .ctl_name = 0 }
1358 };
1359 #endif
1360
1361 static struct ctl_table fs_table[] = {
1362         {
1363                 .ctl_name       = FS_NRINODE,
1364                 .procname       = "inode-nr",
1365                 .data           = &inodes_stat,
1366                 .maxlen         = 2*sizeof(int),
1367                 .mode           = 0444,
1368                 .proc_handler   = &proc_dointvec,
1369         },
1370         {
1371                 .ctl_name       = FS_STATINODE,
1372                 .procname       = "inode-state",
1373                 .data           = &inodes_stat,
1374                 .maxlen         = 7*sizeof(int),
1375                 .mode           = 0444,
1376                 .proc_handler   = &proc_dointvec,
1377         },
1378         {
1379                 .procname       = "file-nr",
1380                 .data           = &files_stat,
1381                 .maxlen         = 3*sizeof(int),
1382                 .mode           = 0444,
1383                 .proc_handler   = &proc_nr_files,
1384         },
1385         {
1386                 .ctl_name       = FS_MAXFILE,
1387                 .procname       = "file-max",
1388                 .data           = &files_stat.max_files,
1389                 .maxlen         = sizeof(int),
1390                 .mode           = 0644,
1391                 .proc_handler   = &proc_dointvec,
1392         },
1393         {
1394                 .ctl_name       = CTL_UNNUMBERED,
1395                 .procname       = "nr_open",
1396                 .data           = &sysctl_nr_open,
1397                 .maxlen         = sizeof(int),
1398                 .mode           = 0644,
1399                 .proc_handler   = &proc_dointvec_minmax,
1400                 .extra1         = &sysctl_nr_open_min,
1401                 .extra2         = &sysctl_nr_open_max,
1402         },
1403         {
1404                 .ctl_name       = FS_DENTRY,
1405                 .procname       = "dentry-state",
1406                 .data           = &dentry_stat,
1407                 .maxlen         = 6*sizeof(int),
1408                 .mode           = 0444,
1409                 .proc_handler   = &proc_dointvec,
1410         },
1411         {
1412                 .ctl_name       = FS_OVERFLOWUID,
1413                 .procname       = "overflowuid",
1414                 .data           = &fs_overflowuid,
1415                 .maxlen         = sizeof(int),
1416                 .mode           = 0644,
1417                 .proc_handler   = &proc_dointvec_minmax,
1418                 .strategy       = &sysctl_intvec,
1419                 .extra1         = &minolduid,
1420                 .extra2         = &maxolduid,
1421         },
1422         {
1423                 .ctl_name       = FS_OVERFLOWGID,
1424                 .procname       = "overflowgid",
1425                 .data           = &fs_overflowgid,
1426                 .maxlen         = sizeof(int),
1427                 .mode           = 0644,
1428                 .proc_handler   = &proc_dointvec_minmax,
1429                 .strategy       = &sysctl_intvec,
1430                 .extra1         = &minolduid,
1431                 .extra2         = &maxolduid,
1432         },
1433 #ifdef CONFIG_FILE_LOCKING
1434         {
1435                 .ctl_name       = FS_LEASES,
1436                 .procname       = "leases-enable",
1437                 .data           = &leases_enable,
1438                 .maxlen         = sizeof(int),
1439                 .mode           = 0644,
1440                 .proc_handler   = &proc_dointvec,
1441         },
1442 #endif
1443 #ifdef CONFIG_DNOTIFY
1444         {
1445                 .ctl_name       = FS_DIR_NOTIFY,
1446                 .procname       = "dir-notify-enable",
1447                 .data           = &dir_notify_enable,
1448                 .maxlen         = sizeof(int),
1449                 .mode           = 0644,
1450                 .proc_handler   = &proc_dointvec,
1451         },
1452 #endif
1453 #ifdef CONFIG_MMU
1454 #ifdef CONFIG_FILE_LOCKING
1455         {
1456                 .ctl_name       = FS_LEASE_TIME,
1457                 .procname       = "lease-break-time",
1458                 .data           = &lease_break_time,
1459                 .maxlen         = sizeof(int),
1460                 .mode           = 0644,
1461                 .proc_handler   = &proc_dointvec,
1462         },
1463 #endif
1464 #ifdef CONFIG_AIO
1465         {
1466                 .procname       = "aio-nr",
1467                 .data           = &aio_nr,
1468                 .maxlen         = sizeof(aio_nr),
1469                 .mode           = 0444,
1470                 .proc_handler   = &proc_doulongvec_minmax,
1471         },
1472         {
1473                 .procname       = "aio-max-nr",
1474                 .data           = &aio_max_nr,
1475                 .maxlen         = sizeof(aio_max_nr),
1476                 .mode           = 0644,
1477                 .proc_handler   = &proc_doulongvec_minmax,
1478         },
1479 #endif /* CONFIG_AIO */
1480 #ifdef CONFIG_INOTIFY_USER
1481         {
1482                 .ctl_name       = FS_INOTIFY,
1483                 .procname       = "inotify",
1484                 .mode           = 0555,
1485                 .child          = inotify_table,
1486         },
1487 #endif  
1488 #ifdef CONFIG_EPOLL
1489         {
1490                 .procname       = "epoll",
1491                 .mode           = 0555,
1492                 .child          = epoll_table,
1493         },
1494 #endif
1495 #endif
1496         {
1497                 .ctl_name       = KERN_SETUID_DUMPABLE,
1498                 .procname       = "suid_dumpable",
1499                 .data           = &suid_dumpable,
1500                 .maxlen         = sizeof(int),
1501                 .mode           = 0644,
1502                 .proc_handler   = &proc_dointvec_minmax,
1503                 .strategy       = &sysctl_intvec,
1504                 .extra1         = &zero,
1505                 .extra2         = &two,
1506         },
1507 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1508         {
1509                 .ctl_name       = CTL_UNNUMBERED,
1510                 .procname       = "binfmt_misc",
1511                 .mode           = 0555,
1512                 .child          = binfmt_misc_table,
1513         },
1514 #endif
1515 /*
1516  * NOTE: do not add new entries to this table unless you have read
1517  * Documentation/sysctl/ctl_unnumbered.txt
1518  */
1519         { .ctl_name = 0 }
1520 };
1521
1522 static struct ctl_table debug_table[] = {
1523 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1524         {
1525                 .ctl_name       = CTL_UNNUMBERED,
1526                 .procname       = "exception-trace",
1527                 .data           = &show_unhandled_signals,
1528                 .maxlen         = sizeof(int),
1529                 .mode           = 0644,
1530                 .proc_handler   = proc_dointvec
1531         },
1532 #endif
1533         { .ctl_name = 0 }
1534 };
1535
1536 static struct ctl_table dev_table[] = {
1537         { .ctl_name = 0 }
1538 };
1539
1540 static DEFINE_SPINLOCK(sysctl_lock);
1541
1542 /* called under sysctl_lock */
1543 static int use_table(struct ctl_table_header *p)
1544 {
1545         if (unlikely(p->unregistering))
1546                 return 0;
1547         p->used++;
1548         return 1;
1549 }
1550
1551 /* called under sysctl_lock */
1552 static void unuse_table(struct ctl_table_header *p)
1553 {
1554         if (!--p->used)
1555                 if (unlikely(p->unregistering))
1556                         complete(p->unregistering);
1557 }
1558
1559 /* called under sysctl_lock, will reacquire if has to wait */
1560 static void start_unregistering(struct ctl_table_header *p)
1561 {
1562         /*
1563          * if p->used is 0, nobody will ever touch that entry again;
1564          * we'll eliminate all paths to it before dropping sysctl_lock
1565          */
1566         if (unlikely(p->used)) {
1567                 struct completion wait;
1568                 init_completion(&wait);
1569                 p->unregistering = &wait;
1570                 spin_unlock(&sysctl_lock);
1571                 wait_for_completion(&wait);
1572                 spin_lock(&sysctl_lock);
1573         } else {
1574                 /* anything non-NULL; we'll never dereference it */
1575                 p->unregistering = ERR_PTR(-EINVAL);
1576         }
1577         /*
1578          * do not remove from the list until nobody holds it; walking the
1579          * list in do_sysctl() relies on that.
1580          */
1581         list_del_init(&p->ctl_entry);
1582 }
1583
1584 void sysctl_head_get(struct ctl_table_header *head)
1585 {
1586         spin_lock(&sysctl_lock);
1587         head->count++;
1588         spin_unlock(&sysctl_lock);
1589 }
1590
1591 void sysctl_head_put(struct ctl_table_header *head)
1592 {
1593         spin_lock(&sysctl_lock);
1594         if (!--head->count)
1595                 kfree(head);
1596         spin_unlock(&sysctl_lock);
1597 }
1598
1599 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1600 {
1601         if (!head)
1602                 BUG();
1603         spin_lock(&sysctl_lock);
1604         if (!use_table(head))
1605                 head = ERR_PTR(-ENOENT);
1606         spin_unlock(&sysctl_lock);
1607         return head;
1608 }
1609
1610 void sysctl_head_finish(struct ctl_table_header *head)
1611 {
1612         if (!head)
1613                 return;
1614         spin_lock(&sysctl_lock);
1615         unuse_table(head);
1616         spin_unlock(&sysctl_lock);
1617 }
1618
1619 static struct ctl_table_set *
1620 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1621 {
1622         struct ctl_table_set *set = &root->default_set;
1623         if (root->lookup)
1624                 set = root->lookup(root, namespaces);
1625         return set;
1626 }
1627
1628 static struct list_head *
1629 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1630 {
1631         struct ctl_table_set *set = lookup_header_set(root, namespaces);
1632         return &set->list;
1633 }
1634
1635 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1636                                             struct ctl_table_header *prev)
1637 {
1638         struct ctl_table_root *root;
1639         struct list_head *header_list;
1640         struct ctl_table_header *head;
1641         struct list_head *tmp;
1642
1643         spin_lock(&sysctl_lock);
1644         if (prev) {
1645                 head = prev;
1646                 tmp = &prev->ctl_entry;
1647                 unuse_table(prev);
1648                 goto next;
1649         }
1650         tmp = &root_table_header.ctl_entry;
1651         for (;;) {
1652                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1653
1654                 if (!use_table(head))
1655                         goto next;
1656                 spin_unlock(&sysctl_lock);
1657                 return head;
1658         next:
1659                 root = head->root;
1660                 tmp = tmp->next;
1661                 header_list = lookup_header_list(root, namespaces);
1662                 if (tmp != header_list)
1663                         continue;
1664
1665                 do {
1666                         root = list_entry(root->root_list.next,
1667                                         struct ctl_table_root, root_list);
1668                         if (root == &sysctl_table_root)
1669                                 goto out;
1670                         header_list = lookup_header_list(root, namespaces);
1671                 } while (list_empty(header_list));
1672                 tmp = header_list->next;
1673         }
1674 out:
1675         spin_unlock(&sysctl_lock);
1676         return NULL;
1677 }
1678
1679 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1680 {
1681         return __sysctl_head_next(current->nsproxy, prev);
1682 }
1683
1684 void register_sysctl_root(struct ctl_table_root *root)
1685 {
1686         spin_lock(&sysctl_lock);
1687         list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1688         spin_unlock(&sysctl_lock);
1689 }
1690
1691 #ifdef CONFIG_SYSCTL_SYSCALL
1692 /* Perform the actual read/write of a sysctl table entry. */
1693 static int do_sysctl_strategy(struct ctl_table_root *root,
1694                         struct ctl_table *table,
1695                         void __user *oldval, size_t __user *oldlenp,
1696                         void __user *newval, size_t newlen)
1697 {
1698         int op = 0, rc;
1699
1700         if (oldval)
1701                 op |= MAY_READ;
1702         if (newval)
1703                 op |= MAY_WRITE;
1704         if (sysctl_perm(root, table, op))
1705                 return -EPERM;
1706
1707         if (table->strategy) {
1708                 rc = table->strategy(table, oldval, oldlenp, newval, newlen);
1709                 if (rc < 0)
1710                         return rc;
1711                 if (rc > 0)
1712                         return 0;
1713         }
1714
1715         /* If there is no strategy routine, or if the strategy returns
1716          * zero, proceed with automatic r/w */
1717         if (table->data && table->maxlen) {
1718                 rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
1719                 if (rc < 0)
1720                         return rc;
1721         }
1722         return 0;
1723 }
1724
1725 static int parse_table(int __user *name, int nlen,
1726                        void __user *oldval, size_t __user *oldlenp,
1727                        void __user *newval, size_t newlen,
1728                        struct ctl_table_root *root,
1729                        struct ctl_table *table)
1730 {
1731         int n;
1732 repeat:
1733         if (!nlen)
1734                 return -ENOTDIR;
1735         if (get_user(n, name))
1736                 return -EFAULT;
1737         for ( ; table->ctl_name || table->procname; table++) {
1738                 if (!table->ctl_name)
1739                         continue;
1740                 if (n == table->ctl_name) {
1741                         int error;
1742                         if (table->child) {
1743                                 if (sysctl_perm(root, table, MAY_EXEC))
1744                                         return -EPERM;
1745                                 name++;
1746                                 nlen--;
1747                                 table = table->child;
1748                                 goto repeat;
1749                         }
1750                         error = do_sysctl_strategy(root, table,
1751                                                    oldval, oldlenp,
1752                                                    newval, newlen);
1753                         return error;
1754                 }
1755         }
1756         return -ENOTDIR;
1757 }
1758
1759 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1760                void __user *newval, size_t newlen)
1761 {
1762         struct ctl_table_header *head;
1763         int error = -ENOTDIR;
1764
1765         if (nlen <= 0 || nlen >= CTL_MAXNAME)
1766                 return -ENOTDIR;
1767         if (oldval) {
1768                 int old_len;
1769                 if (!oldlenp || get_user(old_len, oldlenp))
1770                         return -EFAULT;
1771         }
1772
1773         for (head = sysctl_head_next(NULL); head;
1774                         head = sysctl_head_next(head)) {
1775                 error = parse_table(name, nlen, oldval, oldlenp, 
1776                                         newval, newlen,
1777                                         head->root, head->ctl_table);
1778                 if (error != -ENOTDIR) {
1779                         sysctl_head_finish(head);
1780                         break;
1781                 }
1782         }
1783         return error;
1784 }
1785
1786 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1787 {
1788         struct __sysctl_args tmp;
1789         int error;
1790
1791         if (copy_from_user(&tmp, args, sizeof(tmp)))
1792                 return -EFAULT;
1793
1794         error = deprecated_sysctl_warning(&tmp);
1795         if (error)
1796                 goto out;
1797
1798         lock_kernel();
1799         error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1800                           tmp.newval, tmp.newlen);
1801         unlock_kernel();
1802 out:
1803         return error;
1804 }
1805 #endif /* CONFIG_SYSCTL_SYSCALL */
1806
1807 /*
1808  * sysctl_perm does NOT grant the superuser all rights automatically, because
1809  * some sysctl variables are readonly even to root.
1810  */
1811
1812 static int test_perm(int mode, int op)
1813 {
1814         if (!current_euid())
1815                 mode >>= 6;
1816         else if (in_egroup_p(0))
1817                 mode >>= 3;
1818         if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1819                 return 0;
1820         return -EACCES;
1821 }
1822
1823 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1824 {
1825         int error;
1826         int mode;
1827
1828         error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1829         if (error)
1830                 return error;
1831
1832         if (root->permissions)
1833                 mode = root->permissions(root, current->nsproxy, table);
1834         else
1835                 mode = table->mode;
1836
1837         return test_perm(mode, op);
1838 }
1839
1840 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1841 {
1842         for (; table->ctl_name || table->procname; table++) {
1843                 table->parent = parent;
1844                 if (table->child)
1845                         sysctl_set_parent(table, table->child);
1846         }
1847 }
1848
1849 static __init int sysctl_init(void)
1850 {
1851         sysctl_set_parent(NULL, root_table);
1852 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1853         {
1854                 int err;
1855                 err = sysctl_check_table(current->nsproxy, root_table);
1856         }
1857 #endif
1858         return 0;
1859 }
1860
1861 core_initcall(sysctl_init);
1862
1863 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1864                                       struct ctl_table *table)
1865 {
1866         struct ctl_table *p;
1867         const char *s = branch->procname;
1868
1869         /* branch should have named subdirectory as its first element */
1870         if (!s || !branch->child)
1871                 return NULL;
1872
1873         /* ... and nothing else */
1874         if (branch[1].procname || branch[1].ctl_name)
1875                 return NULL;
1876
1877         /* table should contain subdirectory with the same name */
1878         for (p = table; p->procname || p->ctl_name; p++) {
1879                 if (!p->child)
1880                         continue;
1881                 if (p->procname && strcmp(p->procname, s) == 0)
1882                         return p;
1883         }
1884         return NULL;
1885 }
1886
1887 /* see if attaching q to p would be an improvement */
1888 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1889 {
1890         struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1891         struct ctl_table *next;
1892         int is_better = 0;
1893         int not_in_parent = !p->attached_by;
1894
1895         while ((next = is_branch_in(by, to)) != NULL) {
1896                 if (by == q->attached_by)
1897                         is_better = 1;
1898                 if (to == p->attached_by)
1899                         not_in_parent = 1;
1900                 by = by->child;
1901                 to = next->child;
1902         }
1903
1904         if (is_better && not_in_parent) {
1905                 q->attached_by = by;
1906                 q->attached_to = to;
1907                 q->parent = p;
1908         }
1909 }
1910
1911 /**
1912  * __register_sysctl_paths - register a sysctl hierarchy
1913  * @root: List of sysctl headers to register on
1914  * @namespaces: Data to compute which lists of sysctl entries are visible
1915  * @path: The path to the directory the sysctl table is in.
1916  * @table: the top-level table structure
1917  *
1918  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1919  * array. A completely 0 filled entry terminates the table.
1920  *
1921  * The members of the &struct ctl_table structure are used as follows:
1922  *
1923  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1924  *            must be unique within that level of sysctl
1925  *
1926  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1927  *            enter a sysctl file
1928  *
1929  * data - a pointer to data for use by proc_handler
1930  *
1931  * maxlen - the maximum size in bytes of the data
1932  *
1933  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1934  *
1935  * child - a pointer to the child sysctl table if this entry is a directory, or
1936  *         %NULL.
1937  *
1938  * proc_handler - the text handler routine (described below)
1939  *
1940  * strategy - the strategy routine (described below)
1941  *
1942  * de - for internal use by the sysctl routines
1943  *
1944  * extra1, extra2 - extra pointers usable by the proc handler routines
1945  *
1946  * Leaf nodes in the sysctl tree will be represented by a single file
1947  * under /proc; non-leaf nodes will be represented by directories.
1948  *
1949  * sysctl(2) can automatically manage read and write requests through
1950  * the sysctl table.  The data and maxlen fields of the ctl_table
1951  * struct enable minimal validation of the values being written to be
1952  * performed, and the mode field allows minimal authentication.
1953  *
1954  * More sophisticated management can be enabled by the provision of a
1955  * strategy routine with the table entry.  This will be called before
1956  * any automatic read or write of the data is performed.
1957  *
1958  * The strategy routine may return
1959  *
1960  * < 0 - Error occurred (error is passed to user process)
1961  *
1962  * 0   - OK - proceed with automatic read or write.
1963  *
1964  * > 0 - OK - read or write has been done by the strategy routine, so
1965  *       return immediately.
1966  *
1967  * There must be a proc_handler routine for any terminal nodes
1968  * mirrored under /proc/sys (non-terminals are handled by a built-in
1969  * directory handler).  Several default handlers are available to
1970  * cover common cases -
1971  *
1972  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1973  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1974  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1975  *
1976  * It is the handler's job to read the input buffer from user memory
1977  * and process it. The handler should return 0 on success.
1978  *
1979  * This routine returns %NULL on a failure to register, and a pointer
1980  * to the table header on success.
1981  */
1982 struct ctl_table_header *__register_sysctl_paths(
1983         struct ctl_table_root *root,
1984         struct nsproxy *namespaces,
1985         const struct ctl_path *path, struct ctl_table *table)
1986 {
1987         struct ctl_table_header *header;
1988         struct ctl_table *new, **prevp;
1989         unsigned int n, npath;
1990         struct ctl_table_set *set;
1991
1992         /* Count the path components */
1993         for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
1994                 ;
1995
1996         /*
1997          * For each path component, allocate a 2-element ctl_table array.
1998          * The first array element will be filled with the sysctl entry
1999          * for this, the second will be the sentinel (ctl_name == 0).
2000          *
2001          * We allocate everything in one go so that we don't have to
2002          * worry about freeing additional memory in unregister_sysctl_table.
2003          */
2004         header = kzalloc(sizeof(struct ctl_table_header) +
2005                          (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
2006         if (!header)
2007                 return NULL;
2008
2009         new = (struct ctl_table *) (header + 1);
2010
2011         /* Now connect the dots */
2012         prevp = &header->ctl_table;
2013         for (n = 0; n < npath; ++n, ++path) {
2014                 /* Copy the procname */
2015                 new->procname = path->procname;
2016                 new->ctl_name = path->ctl_name;
2017                 new->mode     = 0555;
2018
2019                 *prevp = new;
2020                 prevp = &new->child;
2021
2022                 new += 2;
2023         }
2024         *prevp = table;
2025         header->ctl_table_arg = table;
2026
2027         INIT_LIST_HEAD(&header->ctl_entry);
2028         header->used = 0;
2029         header->unregistering = NULL;
2030         header->root = root;
2031         sysctl_set_parent(NULL, header->ctl_table);
2032         header->count = 1;
2033 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
2034         if (sysctl_check_table(namespaces, header->ctl_table)) {
2035                 kfree(header);
2036                 return NULL;
2037         }
2038 #endif
2039         spin_lock(&sysctl_lock);
2040         header->set = lookup_header_set(root, namespaces);
2041         header->attached_by = header->ctl_table;
2042         header->attached_to = root_table;
2043         header->parent = &root_table_header;
2044         for (set = header->set; set; set = set->parent) {
2045                 struct ctl_table_header *p;
2046                 list_for_each_entry(p, &set->list, ctl_entry) {
2047                         if (p->unregistering)
2048                                 continue;
2049                         try_attach(p, header);
2050                 }
2051         }
2052         header->parent->count++;
2053         list_add_tail(&header->ctl_entry, &header->set->list);
2054         spin_unlock(&sysctl_lock);
2055
2056         return header;
2057 }
2058
2059 /**
2060  * register_sysctl_table_path - register a sysctl table hierarchy
2061  * @path: The path to the directory the sysctl table is in.
2062  * @table: the top-level table structure
2063  *
2064  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2065  * array. A completely 0 filled entry terminates the table.
2066  *
2067  * See __register_sysctl_paths for more details.
2068  */
2069 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2070                                                 struct ctl_table *table)
2071 {
2072         return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
2073                                         path, table);
2074 }
2075
2076 /**
2077  * register_sysctl_table - register a sysctl table hierarchy
2078  * @table: the top-level table structure
2079  *
2080  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2081  * array. A completely 0 filled entry terminates the table.
2082  *
2083  * See register_sysctl_paths for more details.
2084  */
2085 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
2086 {
2087         static const struct ctl_path null_path[] = { {} };
2088
2089         return register_sysctl_paths(null_path, table);
2090 }
2091
2092 /**
2093  * unregister_sysctl_table - unregister a sysctl table hierarchy
2094  * @header: the header returned from register_sysctl_table
2095  *
2096  * Unregisters the sysctl table and all children. proc entries may not
2097  * actually be removed until they are no longer used by anyone.
2098  */
2099 void unregister_sysctl_table(struct ctl_table_header * header)
2100 {
2101         might_sleep();
2102
2103         if (header == NULL)
2104                 return;
2105
2106         spin_lock(&sysctl_lock);
2107         start_unregistering(header);
2108         if (!--header->parent->count) {
2109                 WARN_ON(1);
2110                 kfree(header->parent);
2111         }
2112         if (!--header->count)
2113                 kfree(header);
2114         spin_unlock(&sysctl_lock);
2115 }
2116
2117 int sysctl_is_seen(struct ctl_table_header *p)
2118 {
2119         struct ctl_table_set *set = p->set;
2120         int res;
2121         spin_lock(&sysctl_lock);
2122         if (p->unregistering)
2123                 res = 0;
2124         else if (!set->is_seen)
2125                 res = 1;
2126         else
2127                 res = set->is_seen(set);
2128         spin_unlock(&sysctl_lock);
2129         return res;
2130 }
2131
2132 void setup_sysctl_set(struct ctl_table_set *p,
2133         struct ctl_table_set *parent,
2134         int (*is_seen)(struct ctl_table_set *))
2135 {
2136         INIT_LIST_HEAD(&p->list);
2137         p->parent = parent ? parent : &sysctl_table_root.default_set;
2138         p->is_seen = is_seen;
2139 }
2140
2141 #else /* !CONFIG_SYSCTL */
2142 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2143 {
2144         return NULL;
2145 }
2146
2147 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2148                                                     struct ctl_table *table)
2149 {
2150         return NULL;
2151 }
2152
2153 void unregister_sysctl_table(struct ctl_table_header * table)
2154 {
2155 }
2156
2157 void setup_sysctl_set(struct ctl_table_set *p,
2158         struct ctl_table_set *parent,
2159         int (*is_seen)(struct ctl_table_set *))
2160 {
2161 }
2162
2163 void sysctl_head_put(struct ctl_table_header *head)
2164 {
2165 }
2166
2167 #endif /* CONFIG_SYSCTL */
2168
2169 /*
2170  * /proc/sys support
2171  */
2172
2173 #ifdef CONFIG_PROC_SYSCTL
2174
2175 static int _proc_do_string(void* data, int maxlen, int write,
2176                            struct file *filp, void __user *buffer,
2177                            size_t *lenp, loff_t *ppos)
2178 {
2179         size_t len;
2180         char __user *p;
2181         char c;
2182
2183         if (!data || !maxlen || !*lenp) {
2184                 *lenp = 0;
2185                 return 0;
2186         }
2187
2188         if (write) {
2189                 len = 0;
2190                 p = buffer;
2191                 while (len < *lenp) {
2192                         if (get_user(c, p++))
2193                                 return -EFAULT;
2194                         if (c == 0 || c == '\n')
2195                                 break;
2196                         len++;
2197                 }
2198                 if (len >= maxlen)
2199                         len = maxlen-1;
2200                 if(copy_from_user(data, buffer, len))
2201                         return -EFAULT;
2202                 ((char *) data)[len] = 0;
2203                 *ppos += *lenp;
2204         } else {
2205                 len = strlen(data);
2206                 if (len > maxlen)
2207                         len = maxlen;
2208
2209                 if (*ppos > len) {
2210                         *lenp = 0;
2211                         return 0;
2212                 }
2213
2214                 data += *ppos;
2215                 len  -= *ppos;
2216
2217                 if (len > *lenp)
2218                         len = *lenp;
2219                 if (len)
2220                         if(copy_to_user(buffer, data, len))
2221                                 return -EFAULT;
2222                 if (len < *lenp) {
2223                         if(put_user('\n', ((char __user *) buffer) + len))
2224                                 return -EFAULT;
2225                         len++;
2226                 }
2227                 *lenp = len;
2228                 *ppos += len;
2229         }
2230         return 0;
2231 }
2232
2233 /**
2234  * proc_dostring - read a string sysctl
2235  * @table: the sysctl table
2236  * @write: %TRUE if this is a write to the sysctl file
2237  * @filp: the file structure
2238  * @buffer: the user buffer
2239  * @lenp: the size of the user buffer
2240  * @ppos: file position
2241  *
2242  * Reads/writes a string from/to the user buffer. If the kernel
2243  * buffer provided is not large enough to hold the string, the
2244  * string is truncated. The copied string is %NULL-terminated.
2245  * If the string is being read by the user process, it is copied
2246  * and a newline '\n' is added. It is truncated if the buffer is
2247  * not large enough.
2248  *
2249  * Returns 0 on success.
2250  */
2251 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2252                   void __user *buffer, size_t *lenp, loff_t *ppos)
2253 {
2254         return _proc_do_string(table->data, table->maxlen, write, filp,
2255                                buffer, lenp, ppos);
2256 }
2257
2258
2259 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2260                                  int *valp,
2261                                  int write, void *data)
2262 {
2263         if (write) {
2264                 *valp = *negp ? -*lvalp : *lvalp;
2265         } else {
2266                 int val = *valp;
2267                 if (val < 0) {
2268                         *negp = -1;
2269                         *lvalp = (unsigned long)-val;
2270                 } else {
2271                         *negp = 0;
2272                         *lvalp = (unsigned long)val;
2273                 }
2274         }
2275         return 0;
2276 }
2277
2278 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2279                   int write, struct file *filp, void __user *buffer,
2280                   size_t *lenp, loff_t *ppos,
2281                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2282                               int write, void *data),
2283                   void *data)
2284 {
2285 #define TMPBUFLEN 21
2286         int *i, vleft, first = 1, neg;
2287         unsigned long lval;
2288         size_t left, len;
2289         
2290         char buf[TMPBUFLEN], *p;
2291         char __user *s = buffer;
2292         
2293         if (!tbl_data || !table->maxlen || !*lenp ||
2294             (*ppos && !write)) {
2295                 *lenp = 0;
2296                 return 0;
2297         }
2298         
2299         i = (int *) tbl_data;
2300         vleft = table->maxlen / sizeof(*i);
2301         left = *lenp;
2302
2303         if (!conv)
2304                 conv = do_proc_dointvec_conv;
2305
2306         for (; left && vleft--; i++, first=0) {
2307                 if (write) {
2308                         while (left) {
2309                                 char c;
2310                                 if (get_user(c, s))
2311                                         return -EFAULT;
2312                                 if (!isspace(c))
2313                                         break;
2314                                 left--;
2315                                 s++;
2316                         }
2317                         if (!left)
2318                                 break;
2319                         neg = 0;
2320                         len = left;
2321                         if (len > sizeof(buf) - 1)
2322                                 len = sizeof(buf) - 1;
2323                         if (copy_from_user(buf, s, len))
2324                                 return -EFAULT;
2325                         buf[len] = 0;
2326                         p = buf;
2327                         if (*p == '-' && left > 1) {
2328                                 neg = 1;
2329                                 p++;
2330                         }
2331                         if (*p < '0' || *p > '9')
2332                                 break;
2333
2334                         lval = simple_strtoul(p, &p, 0);
2335
2336                         len = p-buf;
2337                         if ((len < left) && *p && !isspace(*p))
2338                                 break;
2339                         s += len;
2340                         left -= len;
2341
2342                         if (conv(&neg, &lval, i, 1, data))
2343                                 break;
2344                 } else {
2345                         p = buf;
2346                         if (!first)
2347                                 *p++ = '\t';
2348         
2349                         if (conv(&neg, &lval, i, 0, data))
2350                                 break;
2351
2352                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
2353                         len = strlen(buf);
2354                         if (len > left)
2355                                 len = left;
2356                         if(copy_to_user(s, buf, len))
2357                                 return -EFAULT;
2358                         left -= len;
2359                         s += len;
2360                 }
2361         }
2362
2363         if (!write && !first && left) {
2364                 if(put_user('\n', s))
2365                         return -EFAULT;
2366                 left--, s++;
2367         }
2368         if (write) {
2369                 while (left) {
2370                         char c;
2371                         if (get_user(c, s++))
2372                                 return -EFAULT;
2373                         if (!isspace(c))
2374                                 break;
2375                         left--;
2376                 }
2377         }
2378         if (write && first)
2379                 return -EINVAL;
2380         *lenp -= left;
2381         *ppos += *lenp;
2382         return 0;
2383 #undef TMPBUFLEN
2384 }
2385
2386 static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2387                   void __user *buffer, size_t *lenp, loff_t *ppos,
2388                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2389                               int write, void *data),
2390                   void *data)
2391 {
2392         return __do_proc_dointvec(table->data, table, write, filp,
2393                         buffer, lenp, ppos, conv, data);
2394 }
2395
2396 /**
2397  * proc_dointvec - read a vector of integers
2398  * @table: the sysctl table
2399  * @write: %TRUE if this is a write to the sysctl file
2400  * @filp: the file structure
2401  * @buffer: the user buffer
2402  * @lenp: the size of the user buffer
2403  * @ppos: file position
2404  *
2405  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2406  * values from/to the user buffer, treated as an ASCII string. 
2407  *
2408  * Returns 0 on success.
2409  */
2410 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2411                      void __user *buffer, size_t *lenp, loff_t *ppos)
2412 {
2413     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2414                             NULL,NULL);
2415 }
2416
2417 /*
2418  * Taint values can only be increased
2419  * This means we can safely use a temporary.
2420  */
2421 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
2422                                void __user *buffer, size_t *lenp, loff_t *ppos)
2423 {
2424         struct ctl_table t;
2425         unsigned long tmptaint = get_taint();
2426         int err;
2427
2428         if (write && !capable(CAP_SYS_ADMIN))
2429                 return -EPERM;
2430
2431         t = *table;
2432         t.data = &tmptaint;
2433         err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
2434         if (err < 0)
2435                 return err;
2436
2437         if (write) {
2438                 /*
2439                  * Poor man's atomic or. Not worth adding a primitive
2440                  * to everyone's atomic.h for this
2441                  */
2442                 int i;
2443                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2444                         if ((tmptaint >> i) & 1)
2445                                 add_taint(i);
2446                 }
2447         }
2448
2449         return err;
2450 }
2451
2452 struct do_proc_dointvec_minmax_conv_param {
2453         int *min;
2454         int *max;
2455 };
2456
2457 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
2458                                         int *valp, 
2459                                         int write, void *data)
2460 {
2461         struct do_proc_dointvec_minmax_conv_param *param = data;
2462         if (write) {
2463                 int val = *negp ? -*lvalp : *lvalp;
2464                 if ((param->min && *param->min > val) ||
2465                     (param->max && *param->max < val))
2466                         return -EINVAL;
2467                 *valp = val;
2468         } else {
2469                 int val = *valp;
2470                 if (val < 0) {
2471                         *negp = -1;
2472                         *lvalp = (unsigned long)-val;
2473                 } else {
2474                         *negp = 0;
2475                         *lvalp = (unsigned long)val;
2476                 }
2477         }
2478         return 0;
2479 }
2480
2481 /**
2482  * proc_dointvec_minmax - read a vector of integers with min/max values
2483  * @table: the sysctl table
2484  * @write: %TRUE if this is a write to the sysctl file
2485  * @filp: the file structure
2486  * @buffer: the user buffer
2487  * @lenp: the size of the user buffer
2488  * @ppos: file position
2489  *
2490  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2491  * values from/to the user buffer, treated as an ASCII string.
2492  *
2493  * This routine will ensure the values are within the range specified by
2494  * table->extra1 (min) and table->extra2 (max).
2495  *
2496  * Returns 0 on success.
2497  */
2498 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2499                   void __user *buffer, size_t *lenp, loff_t *ppos)
2500 {
2501         struct do_proc_dointvec_minmax_conv_param param = {
2502                 .min = (int *) table->extra1,
2503                 .max = (int *) table->extra2,
2504         };
2505         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2506                                 do_proc_dointvec_minmax_conv, &param);
2507 }
2508
2509 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2510                                      struct file *filp,
2511                                      void __user *buffer,
2512                                      size_t *lenp, loff_t *ppos,
2513                                      unsigned long convmul,
2514                                      unsigned long convdiv)
2515 {
2516 #define TMPBUFLEN 21
2517         unsigned long *i, *min, *max, val;
2518         int vleft, first=1, neg;
2519         size_t len, left;
2520         char buf[TMPBUFLEN], *p;
2521         char __user *s = buffer;
2522         
2523         if (!data || !table->maxlen || !*lenp ||
2524             (*ppos && !write)) {
2525                 *lenp = 0;
2526                 return 0;
2527         }
2528         
2529         i = (unsigned long *) data;
2530         min = (unsigned long *) table->extra1;
2531         max = (unsigned long *) table->extra2;
2532         vleft = table->maxlen / sizeof(unsigned long);
2533         left = *lenp;
2534         
2535         for (; left && vleft--; i++, min++, max++, first=0) {
2536                 if (write) {
2537                         while (left) {
2538                                 char c;
2539                                 if (get_user(c, s))
2540                                         return -EFAULT;
2541                                 if (!isspace(c))
2542                                         break;
2543                                 left--;
2544                                 s++;
2545                         }
2546                         if (!left)
2547                                 break;
2548                         neg = 0;
2549                         len = left;
2550                         if (len > TMPBUFLEN-1)
2551                                 len = TMPBUFLEN-1;
2552                         if (copy_from_user(buf, s, len))
2553                                 return -EFAULT;
2554                         buf[len] = 0;
2555                         p = buf;
2556                         if (*p == '-' && left > 1) {
2557                                 neg = 1;
2558                                 p++;
2559                         }
2560                         if (*p < '0' || *p > '9')
2561                                 break;
2562                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2563                         len = p-buf;
2564                         if ((len < left) && *p && !isspace(*p))
2565                                 break;
2566                         if (neg)
2567                                 val = -val;
2568                         s += len;
2569                         left -= len;
2570
2571                         if(neg)
2572                                 continue;
2573                         if ((min && val < *min) || (max && val > *max))
2574                                 continue;
2575                         *i = val;
2576                 } else {
2577                         p = buf;
2578                         if (!first)
2579                                 *p++ = '\t';
2580                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2581                         len = strlen(buf);
2582                         if (len > left)
2583                                 len = left;
2584                         if(copy_to_user(s, buf, len))
2585                                 return -EFAULT;
2586                         left -= len;
2587                         s += len;
2588                 }
2589         }
2590
2591         if (!write && !first && left) {
2592                 if(put_user('\n', s))
2593                         return -EFAULT;
2594                 left--, s++;
2595         }
2596         if (write) {
2597                 while (left) {
2598                         char c;
2599                         if (get_user(c, s++))
2600                                 return -EFAULT;
2601                         if (!isspace(c))
2602                                 break;
2603                         left--;
2604                 }
2605         }
2606         if (write && first)
2607                 return -EINVAL;
2608         *lenp -= left;
2609         *ppos += *lenp;
2610         return 0;
2611 #undef TMPBUFLEN
2612 }
2613
2614 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2615                                      struct file *filp,
2616                                      void __user *buffer,
2617                                      size_t *lenp, loff_t *ppos,
2618                                      unsigned long convmul,
2619                                      unsigned long convdiv)
2620 {
2621         return __do_proc_doulongvec_minmax(table->data, table, write,
2622                         filp, buffer, lenp, ppos, convmul, convdiv);
2623 }
2624
2625 /**
2626  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2627  * @table: the sysctl table
2628  * @write: %TRUE if this is a write to the sysctl file
2629  * @filp: the file structure
2630  * @buffer: the user buffer
2631  * @lenp: the size of the user buffer
2632  * @ppos: file position
2633  *
2634  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2635  * values from/to the user buffer, treated as an ASCII string.
2636  *
2637  * This routine will ensure the values are within the range specified by
2638  * table->extra1 (min) and table->extra2 (max).
2639  *
2640  * Returns 0 on success.
2641  */
2642 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2643                            void __user *buffer, size_t *lenp, loff_t *ppos)
2644 {
2645     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2646 }
2647
2648 /**
2649  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2650  * @table: the sysctl table
2651  * @write: %TRUE if this is a write to the sysctl file
2652  * @filp: the file structure
2653  * @buffer: the user buffer
2654  * @lenp: the size of the user buffer
2655  * @ppos: file position
2656  *
2657  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2658  * values from/to the user buffer, treated as an ASCII string. The values
2659  * are treated as milliseconds, and converted to jiffies when they are stored.
2660  *
2661  * This routine will ensure the values are within the range specified by
2662  * table->extra1 (min) and table->extra2 (max).
2663  *
2664  * Returns 0 on success.
2665  */
2666 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2667                                       struct file *filp,
2668                                       void __user *buffer,
2669                                       size_t *lenp, loff_t *ppos)
2670 {
2671     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2672                                      lenp, ppos, HZ, 1000l);
2673 }
2674
2675
2676 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2677                                          int *valp,
2678                                          int write, void *data)
2679 {
2680         if (write) {
2681                 if (*lvalp > LONG_MAX / HZ)
2682                         return 1;
2683                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2684         } else {
2685                 int val = *valp;
2686                 unsigned long lval;
2687                 if (val < 0) {
2688                         *negp = -1;
2689                         lval = (unsigned long)-val;
2690                 } else {
2691                         *negp = 0;
2692                         lval = (unsigned long)val;
2693                 }
2694                 *lvalp = lval / HZ;
2695         }
2696         return 0;
2697 }
2698
2699 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2700                                                 int *valp,
2701                                                 int write, void *data)
2702 {
2703         if (write) {
2704                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2705                         return 1;
2706                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2707         } else {
2708                 int val = *valp;
2709                 unsigned long lval;
2710                 if (val < 0) {
2711                         *negp = -1;
2712                         lval = (unsigned long)-val;
2713                 } else {
2714                         *negp = 0;
2715                         lval = (unsigned long)val;
2716                 }
2717                 *lvalp = jiffies_to_clock_t(lval);
2718         }
2719         return 0;
2720 }
2721
2722 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2723                                             int *valp,
2724                                             int write, void *data)
2725 {
2726         if (write) {
2727                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2728         } else {
2729                 int val = *valp;
2730                 unsigned long lval;
2731                 if (val < 0) {
2732                         *negp = -1;
2733                         lval = (unsigned long)-val;
2734                 } else {
2735                         *negp = 0;
2736                         lval = (unsigned long)val;
2737                 }
2738                 *lvalp = jiffies_to_msecs(lval);
2739         }
2740         return 0;
2741 }
2742
2743 /**
2744  * proc_dointvec_jiffies - read a vector of integers as seconds
2745  * @table: the sysctl table
2746  * @write: %TRUE if this is a write to the sysctl file
2747  * @filp: the file structure
2748  * @buffer: the user buffer
2749  * @lenp: the size of the user buffer
2750  * @ppos: file position
2751  *
2752  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2753  * values from/to the user buffer, treated as an ASCII string. 
2754  * The values read are assumed to be in seconds, and are converted into
2755  * jiffies.
2756  *
2757  * Returns 0 on success.
2758  */
2759 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2760                           void __user *buffer, size_t *lenp, loff_t *ppos)
2761 {
2762     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2763                             do_proc_dointvec_jiffies_conv,NULL);
2764 }
2765
2766 /**
2767  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2768  * @table: the sysctl table
2769  * @write: %TRUE if this is a write to the sysctl file
2770  * @filp: the file structure
2771  * @buffer: the user buffer
2772  * @lenp: the size of the user buffer
2773  * @ppos: pointer to the file position
2774  *
2775  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2776  * values from/to the user buffer, treated as an ASCII string. 
2777  * The values read are assumed to be in 1/USER_HZ seconds, and 
2778  * are converted into jiffies.
2779  *
2780  * Returns 0 on success.
2781  */
2782 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2783                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2784 {
2785     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2786                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2787 }
2788
2789 /**
2790  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2791  * @table: the sysctl table
2792  * @write: %TRUE if this is a write to the sysctl file
2793  * @filp: the file structure
2794  * @buffer: the user buffer
2795  * @lenp: the size of the user buffer
2796  * @ppos: file position
2797  * @ppos: the current position in the file
2798  *
2799  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2800  * values from/to the user buffer, treated as an ASCII string. 
2801  * The values read are assumed to be in 1/1000 seconds, and 
2802  * are converted into jiffies.
2803  *
2804  * Returns 0 on success.
2805  */
2806 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2807                              void __user *buffer, size_t *lenp, loff_t *ppos)
2808 {
2809         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2810                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2811 }
2812
2813 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
2814                            void __user *buffer, size_t *lenp, loff_t *ppos)
2815 {
2816         struct pid *new_pid;
2817         pid_t tmp;
2818         int r;
2819
2820         tmp = pid_vnr(cad_pid);
2821
2822         r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2823                                lenp, ppos, NULL, NULL);
2824         if (r || !write)
2825                 return r;
2826
2827         new_pid = find_get_pid(tmp);
2828         if (!new_pid)
2829                 return -ESRCH;
2830
2831         put_pid(xchg(&cad_pid, new_pid));
2832         return 0;
2833 }
2834
2835 #else /* CONFIG_PROC_FS */
2836
2837 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2838                   void __user *buffer, size_t *lenp, loff_t *ppos)
2839 {
2840         return -ENOSYS;
2841 }
2842
2843 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2844                   void __user *buffer, size_t *lenp, loff_t *ppos)
2845 {
2846         return -ENOSYS;
2847 }
2848
2849 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2850                     void __user *buffer, size_t *lenp, loff_t *ppos)
2851 {
2852         return -ENOSYS;
2853 }
2854
2855 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2856                     void __user *buffer, size_t *lenp, loff_t *ppos)
2857 {
2858         return -ENOSYS;
2859 }
2860
2861 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2862                     void __user *buffer, size_t *lenp, loff_t *ppos)
2863 {
2864         return -ENOSYS;
2865 }
2866
2867 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2868                              void __user *buffer, size_t *lenp, loff_t *ppos)
2869 {
2870         return -ENOSYS;
2871 }
2872
2873 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2874                     void __user *buffer, size_t *lenp, loff_t *ppos)
2875 {
2876         return -ENOSYS;
2877 }
2878
2879 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2880                                       struct file *filp,
2881                                       void __user *buffer,
2882                                       size_t *lenp, loff_t *ppos)
2883 {
2884     return -ENOSYS;
2885 }
2886
2887
2888 #endif /* CONFIG_PROC_FS */
2889
2890
2891 #ifdef CONFIG_SYSCTL_SYSCALL
2892 /*
2893  * General sysctl support routines 
2894  */
2895
2896 /* The generic sysctl data routine (used if no strategy routine supplied) */
2897 int sysctl_data(struct ctl_table *table,
2898                 void __user *oldval, size_t __user *oldlenp,
2899                 void __user *newval, size_t newlen)
2900 {
2901         size_t len;
2902
2903         /* Get out of I don't have a variable */
2904         if (!table->data || !table->maxlen)
2905                 return -ENOTDIR;
2906
2907         if (oldval && oldlenp) {
2908                 if (get_user(len, oldlenp))
2909                         return -EFAULT;
2910                 if (len) {
2911                         if (len > table->maxlen)
2912                                 len = table->maxlen;
2913                         if (copy_to_user(oldval, table->data, len))
2914                                 return -EFAULT;
2915                         if (put_user(len, oldlenp))
2916                                 return -EFAULT;
2917                 }
2918         }
2919
2920         if (newval && newlen) {
2921                 if (newlen > table->maxlen)
2922                         newlen = table->maxlen;
2923
2924                 if (copy_from_user(table->data, newval, newlen))
2925                         return -EFAULT;
2926         }
2927         return 1;
2928 }
2929
2930 /* The generic string strategy routine: */
2931 int sysctl_string(struct ctl_table *table,
2932                   void __user *oldval, size_t __user *oldlenp,
2933                   void __user *newval, size_t newlen)
2934 {
2935         if (!table->data || !table->maxlen) 
2936                 return -ENOTDIR;
2937         
2938         if (oldval && oldlenp) {
2939                 size_t bufsize;
2940                 if (get_user(bufsize, oldlenp))
2941                         return -EFAULT;
2942                 if (bufsize) {
2943                         size_t len = strlen(table->data), copied;
2944
2945                         /* This shouldn't trigger for a well-formed sysctl */
2946                         if (len > table->maxlen)
2947                                 len = table->maxlen;
2948
2949                         /* Copy up to a max of bufsize-1 bytes of the string */
2950                         copied = (len >= bufsize) ? bufsize - 1 : len;
2951
2952                         if (copy_to_user(oldval, table->data, copied) ||
2953                             put_user(0, (char __user *)(oldval + copied)))
2954                                 return -EFAULT;
2955                         if (put_user(len, oldlenp))
2956                                 return -EFAULT;
2957                 }
2958         }
2959         if (newval && newlen) {
2960                 size_t len = newlen;
2961                 if (len > table->maxlen)
2962                         len = table->maxlen;
2963                 if(copy_from_user(table->data, newval, len))
2964                         return -EFAULT;
2965                 if (len == table->maxlen)
2966                         len--;
2967                 ((char *) table->data)[len] = 0;
2968         }
2969         return 1;
2970 }
2971
2972 /*
2973  * This function makes sure that all of the integers in the vector
2974  * are between the minimum and maximum values given in the arrays
2975  * table->extra1 and table->extra2, respectively.
2976  */
2977 int sysctl_intvec(struct ctl_table *table,
2978                 void __user *oldval, size_t __user *oldlenp,
2979                 void __user *newval, size_t newlen)
2980 {
2981
2982         if (newval && newlen) {
2983                 int __user *vec = (int __user *) newval;
2984                 int *min = (int *) table->extra1;
2985                 int *max = (int *) table->extra2;
2986                 size_t length;
2987                 int i;
2988
2989                 if (newlen % sizeof(int) != 0)
2990                         return -EINVAL;
2991
2992                 if (!table->extra1 && !table->extra2)
2993                         return 0;
2994
2995                 if (newlen > table->maxlen)
2996                         newlen = table->maxlen;
2997                 length = newlen / sizeof(int);
2998
2999                 for (i = 0; i < length; i++) {
3000                         int value;
3001                         if (get_user(value, vec + i))
3002                                 return -EFAULT;
3003                         if (min && value < min[i])
3004                                 return -EINVAL;
3005                         if (max && value > max[i])
3006                                 return -EINVAL;
3007                 }
3008         }
3009         return 0;
3010 }
3011
3012 /* Strategy function to convert jiffies to seconds */ 
3013 int sysctl_jiffies(struct ctl_table *table,
3014                 void __user *oldval, size_t __user *oldlenp,
3015                 void __user *newval, size_t newlen)
3016 {
3017         if (oldval && oldlenp) {
3018                 size_t olen;
3019
3020                 if (get_user(olen, oldlenp))
3021                         return -EFAULT;
3022                 if (olen) {
3023                         int val;
3024
3025                         if (olen < sizeof(int))
3026                                 return -EINVAL;
3027
3028                         val = *(int *)(table->data) / HZ;
3029                         if (put_user(val, (int __user *)oldval))
3030                                 return -EFAULT;
3031                         if (put_user(sizeof(int), oldlenp))
3032                                 return -EFAULT;
3033                 }
3034         }
3035         if (newval && newlen) { 
3036                 int new;
3037                 if (newlen != sizeof(int))
3038                         return -EINVAL; 
3039                 if (get_user(new, (int __user *)newval))
3040                         return -EFAULT;
3041                 *(int *)(table->data) = new*HZ; 
3042         }
3043         return 1;
3044 }
3045
3046 /* Strategy function to convert jiffies to seconds */ 
3047 int sysctl_ms_jiffies(struct ctl_table *table,
3048                 void __user *oldval, size_t __user *oldlenp,
3049                 void __user *newval, size_t newlen)
3050 {
3051         if (oldval && oldlenp) {
3052                 size_t olen;
3053
3054                 if (get_user(olen, oldlenp))
3055                         return -EFAULT;
3056                 if (olen) {
3057                         int val;
3058
3059                         if (olen < sizeof(int))
3060                                 return -EINVAL;
3061
3062                         val = jiffies_to_msecs(*(int *)(table->data));
3063                         if (put_user(val, (int __user *)oldval))
3064                                 return -EFAULT;
3065                         if (put_user(sizeof(int), oldlenp))
3066                                 return -EFAULT;
3067                 }
3068         }
3069         if (newval && newlen) { 
3070                 int new;
3071                 if (newlen != sizeof(int))
3072                         return -EINVAL; 
3073                 if (get_user(new, (int __user *)newval))
3074                         return -EFAULT;
3075                 *(int *)(table->data) = msecs_to_jiffies(new);
3076         }
3077         return 1;
3078 }
3079
3080
3081
3082 #else /* CONFIG_SYSCTL_SYSCALL */
3083
3084
3085 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
3086 {
3087         struct __sysctl_args tmp;
3088         int error;
3089
3090         if (copy_from_user(&tmp, args, sizeof(tmp)))
3091                 return -EFAULT;
3092
3093         error = deprecated_sysctl_warning(&tmp);
3094
3095         /* If no error reading the parameters then just -ENOSYS ... */
3096         if (!error)
3097                 error = -ENOSYS;
3098
3099         return error;
3100 }
3101
3102 int sysctl_data(struct ctl_table *table,
3103                   void __user *oldval, size_t __user *oldlenp,
3104                   void __user *newval, size_t newlen)
3105 {
3106         return -ENOSYS;
3107 }
3108
3109 int sysctl_string(struct ctl_table *table,
3110                   void __user *oldval, size_t __user *oldlenp,
3111                   void __user *newval, size_t newlen)
3112 {
3113         return -ENOSYS;
3114 }
3115
3116 int sysctl_intvec(struct ctl_table *table,
3117                 void __user *oldval, size_t __user *oldlenp,
3118                 void __user *newval, size_t newlen)
3119 {
3120         return -ENOSYS;
3121 }
3122
3123 int sysctl_jiffies(struct ctl_table *table,
3124                 void __user *oldval, size_t __user *oldlenp,
3125                 void __user *newval, size_t newlen)
3126 {
3127         return -ENOSYS;
3128 }
3129
3130 int sysctl_ms_jiffies(struct ctl_table *table,
3131                 void __user *oldval, size_t __user *oldlenp,
3132                 void __user *newval, size_t newlen)
3133 {
3134         return -ENOSYS;
3135 }
3136
3137 #endif /* CONFIG_SYSCTL_SYSCALL */
3138
3139 static int deprecated_sysctl_warning(struct __sysctl_args *args)
3140 {
3141         static int msg_count;
3142         int name[CTL_MAXNAME];
3143         int i;
3144
3145         /* Check args->nlen. */
3146         if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
3147                 return -ENOTDIR;
3148
3149         /* Read in the sysctl name for better debug message logging */
3150         for (i = 0; i < args->nlen; i++)
3151                 if (get_user(name[i], args->name + i))
3152                         return -EFAULT;
3153
3154         /* Ignore accesses to kernel.version */
3155         if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
3156                 return 0;
3157
3158         if (msg_count < 5) {
3159                 msg_count++;
3160                 printk(KERN_INFO
3161                         "warning: process `%s' used the deprecated sysctl "
3162                         "system call with ", current->comm);
3163                 for (i = 0; i < args->nlen; i++)
3164                         printk("%d.", name[i]);
3165                 printk("\n");
3166         }
3167         return 0;
3168 }
3169
3170 /*
3171  * No sense putting this after each symbol definition, twice,
3172  * exception granted :-)
3173  */
3174 EXPORT_SYMBOL(proc_dointvec);
3175 EXPORT_SYMBOL(proc_dointvec_jiffies);
3176 EXPORT_SYMBOL(proc_dointvec_minmax);
3177 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3178 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3179 EXPORT_SYMBOL(proc_dostring);
3180 EXPORT_SYMBOL(proc_doulongvec_minmax);
3181 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3182 EXPORT_SYMBOL(register_sysctl_table);
3183 EXPORT_SYMBOL(register_sysctl_paths);
3184 EXPORT_SYMBOL(sysctl_intvec);
3185 EXPORT_SYMBOL(sysctl_jiffies);
3186 EXPORT_SYMBOL(sysctl_ms_jiffies);
3187 EXPORT_SYMBOL(sysctl_string);
3188 EXPORT_SYMBOL(sysctl_data);
3189 EXPORT_SYMBOL(unregister_sysctl_table);