x86: Add sysctl to allow panic on IOCK NMI error
[linux-2.6] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/kmemcheck.h>
31 #include <linux/smp_lock.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/initrd.h>
42 #include <linux/key.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/vmstat.h>
48 #include <linux/nfs_fs.h>
49 #include <linux/acpi.h>
50 #include <linux/reboot.h>
51 #include <linux/ftrace.h>
52 #include <linux/slow-work.h>
53 #include <linux/perf_counter.h>
54
55 #include <asm/uaccess.h>
56 #include <asm/processor.h>
57
58 #ifdef CONFIG_X86
59 #include <asm/nmi.h>
60 #include <asm/stacktrace.h>
61 #include <asm/io.h>
62 #endif
63
64 static int deprecated_sysctl_warning(struct __sysctl_args *args);
65
66 #if defined(CONFIG_SYSCTL)
67
68 /* External variables not in a header file. */
69 extern int C_A_D;
70 extern int print_fatal_signals;
71 extern int sysctl_overcommit_memory;
72 extern int sysctl_overcommit_ratio;
73 extern int sysctl_panic_on_oom;
74 extern int sysctl_oom_kill_allocating_task;
75 extern int sysctl_oom_dump_tasks;
76 extern int max_threads;
77 extern int core_uses_pid;
78 extern int suid_dumpable;
79 extern char core_pattern[];
80 extern int pid_max;
81 extern int min_free_kbytes;
82 extern int pid_max_min, pid_max_max;
83 extern int sysctl_drop_caches;
84 extern int percpu_pagelist_fraction;
85 extern int compat_log;
86 extern int latencytop_enabled;
87 extern int sysctl_nr_open_min, sysctl_nr_open_max;
88 #ifndef CONFIG_MMU
89 extern int sysctl_nr_trim_pages;
90 #endif
91 #ifdef CONFIG_RCU_TORTURE_TEST
92 extern int rcutorture_runnable;
93 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94
95 /* Constants used for minimum and  maximum */
96 #ifdef CONFIG_DETECT_SOFTLOCKUP
97 static int sixty = 60;
98 static int neg_one = -1;
99 #endif
100
101 static int zero;
102 static int __maybe_unused one = 1;
103 static int __maybe_unused two = 2;
104 static unsigned long one_ul = 1;
105 static int one_hundred = 100;
106
107 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
108 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
109
110 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
111 static int maxolduid = 65535;
112 static int minolduid;
113 static int min_percpu_pagelist_fract = 8;
114
115 static int ngroups_max = NGROUPS_MAX;
116
117 #ifdef CONFIG_MODULES
118 extern char modprobe_path[];
119 extern int modules_disabled;
120 #endif
121 #ifdef CONFIG_CHR_DEV_SG
122 extern int sg_big_buff;
123 #endif
124
125 #ifdef CONFIG_SPARC
126 #include <asm/system.h>
127 #endif
128
129 #ifdef CONFIG_SPARC64
130 extern int sysctl_tsb_ratio;
131 #endif
132
133 #ifdef __hppa__
134 extern int pwrsw_enabled;
135 extern int unaligned_enabled;
136 #endif
137
138 #ifdef CONFIG_S390
139 #ifdef CONFIG_MATHEMU
140 extern int sysctl_ieee_emulation_warnings;
141 #endif
142 extern int sysctl_userprocess_debug;
143 extern int spin_retry;
144 #endif
145
146 #ifdef CONFIG_BSD_PROCESS_ACCT
147 extern int acct_parm[];
148 #endif
149
150 #ifdef CONFIG_IA64
151 extern int no_unaligned_warning;
152 extern int unaligned_dump_stack;
153 #endif
154
155 #ifdef CONFIG_RT_MUTEXES
156 extern int max_lock_depth;
157 #endif
158
159 #ifdef CONFIG_PROC_SYSCTL
160 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
161                   void __user *buffer, size_t *lenp, loff_t *ppos);
162 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
163                                void __user *buffer, size_t *lenp, loff_t *ppos);
164 #endif
165
166 static struct ctl_table root_table[];
167 static struct ctl_table_root sysctl_table_root;
168 static struct ctl_table_header root_table_header = {
169         .count = 1,
170         .ctl_table = root_table,
171         .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
172         .root = &sysctl_table_root,
173         .set = &sysctl_table_root.default_set,
174 };
175 static struct ctl_table_root sysctl_table_root = {
176         .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
177         .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
178 };
179
180 static struct ctl_table kern_table[];
181 static struct ctl_table vm_table[];
182 static struct ctl_table fs_table[];
183 static struct ctl_table debug_table[];
184 static struct ctl_table dev_table[];
185 extern struct ctl_table random_table[];
186 #ifdef CONFIG_INOTIFY_USER
187 extern struct ctl_table inotify_table[];
188 #endif
189 #ifdef CONFIG_EPOLL
190 extern struct ctl_table epoll_table[];
191 #endif
192
193 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
194 int sysctl_legacy_va_layout;
195 #endif
196
197 extern int prove_locking;
198 extern int lock_stat;
199
200 /* The default sysctl tables: */
201
202 static struct ctl_table root_table[] = {
203         {
204                 .ctl_name       = CTL_KERN,
205                 .procname       = "kernel",
206                 .mode           = 0555,
207                 .child          = kern_table,
208         },
209         {
210                 .ctl_name       = CTL_VM,
211                 .procname       = "vm",
212                 .mode           = 0555,
213                 .child          = vm_table,
214         },
215         {
216                 .ctl_name       = CTL_FS,
217                 .procname       = "fs",
218                 .mode           = 0555,
219                 .child          = fs_table,
220         },
221         {
222                 .ctl_name       = CTL_DEBUG,
223                 .procname       = "debug",
224                 .mode           = 0555,
225                 .child          = debug_table,
226         },
227         {
228                 .ctl_name       = CTL_DEV,
229                 .procname       = "dev",
230                 .mode           = 0555,
231                 .child          = dev_table,
232         },
233 /*
234  * NOTE: do not add new entries to this table unless you have read
235  * Documentation/sysctl/ctl_unnumbered.txt
236  */
237         { .ctl_name = 0 }
238 };
239
240 #ifdef CONFIG_SCHED_DEBUG
241 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
242 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
243 static int min_wakeup_granularity_ns;                   /* 0 usecs */
244 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
245 #endif
246
247 static struct ctl_table kern_table[] = {
248 #ifdef CONFIG_SCHED_DEBUG
249         {
250                 .ctl_name       = CTL_UNNUMBERED,
251                 .procname       = "sched_min_granularity_ns",
252                 .data           = &sysctl_sched_min_granularity,
253                 .maxlen         = sizeof(unsigned int),
254                 .mode           = 0644,
255                 .proc_handler   = &sched_nr_latency_handler,
256                 .strategy       = &sysctl_intvec,
257                 .extra1         = &min_sched_granularity_ns,
258                 .extra2         = &max_sched_granularity_ns,
259         },
260         {
261                 .ctl_name       = CTL_UNNUMBERED,
262                 .procname       = "sched_latency_ns",
263                 .data           = &sysctl_sched_latency,
264                 .maxlen         = sizeof(unsigned int),
265                 .mode           = 0644,
266                 .proc_handler   = &sched_nr_latency_handler,
267                 .strategy       = &sysctl_intvec,
268                 .extra1         = &min_sched_granularity_ns,
269                 .extra2         = &max_sched_granularity_ns,
270         },
271         {
272                 .ctl_name       = CTL_UNNUMBERED,
273                 .procname       = "sched_wakeup_granularity_ns",
274                 .data           = &sysctl_sched_wakeup_granularity,
275                 .maxlen         = sizeof(unsigned int),
276                 .mode           = 0644,
277                 .proc_handler   = &proc_dointvec_minmax,
278                 .strategy       = &sysctl_intvec,
279                 .extra1         = &min_wakeup_granularity_ns,
280                 .extra2         = &max_wakeup_granularity_ns,
281         },
282         {
283                 .ctl_name       = CTL_UNNUMBERED,
284                 .procname       = "sched_shares_ratelimit",
285                 .data           = &sysctl_sched_shares_ratelimit,
286                 .maxlen         = sizeof(unsigned int),
287                 .mode           = 0644,
288                 .proc_handler   = &proc_dointvec,
289         },
290         {
291                 .ctl_name       = CTL_UNNUMBERED,
292                 .procname       = "sched_shares_thresh",
293                 .data           = &sysctl_sched_shares_thresh,
294                 .maxlen         = sizeof(unsigned int),
295                 .mode           = 0644,
296                 .proc_handler   = &proc_dointvec_minmax,
297                 .strategy       = &sysctl_intvec,
298                 .extra1         = &zero,
299         },
300         {
301                 .ctl_name       = CTL_UNNUMBERED,
302                 .procname       = "sched_child_runs_first",
303                 .data           = &sysctl_sched_child_runs_first,
304                 .maxlen         = sizeof(unsigned int),
305                 .mode           = 0644,
306                 .proc_handler   = &proc_dointvec,
307         },
308         {
309                 .ctl_name       = CTL_UNNUMBERED,
310                 .procname       = "sched_features",
311                 .data           = &sysctl_sched_features,
312                 .maxlen         = sizeof(unsigned int),
313                 .mode           = 0644,
314                 .proc_handler   = &proc_dointvec,
315         },
316         {
317                 .ctl_name       = CTL_UNNUMBERED,
318                 .procname       = "sched_migration_cost",
319                 .data           = &sysctl_sched_migration_cost,
320                 .maxlen         = sizeof(unsigned int),
321                 .mode           = 0644,
322                 .proc_handler   = &proc_dointvec,
323         },
324         {
325                 .ctl_name       = CTL_UNNUMBERED,
326                 .procname       = "sched_nr_migrate",
327                 .data           = &sysctl_sched_nr_migrate,
328                 .maxlen         = sizeof(unsigned int),
329                 .mode           = 0644,
330                 .proc_handler   = &proc_dointvec,
331         },
332         {
333                 .ctl_name       = CTL_UNNUMBERED,
334                 .procname       = "timer_migration",
335                 .data           = &sysctl_timer_migration,
336                 .maxlen         = sizeof(unsigned int),
337                 .mode           = 0644,
338                 .proc_handler   = &proc_dointvec,
339         },
340 #endif
341         {
342                 .ctl_name       = CTL_UNNUMBERED,
343                 .procname       = "sched_rt_period_us",
344                 .data           = &sysctl_sched_rt_period,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = &sched_rt_handler,
348         },
349         {
350                 .ctl_name       = CTL_UNNUMBERED,
351                 .procname       = "sched_rt_runtime_us",
352                 .data           = &sysctl_sched_rt_runtime,
353                 .maxlen         = sizeof(int),
354                 .mode           = 0644,
355                 .proc_handler   = &sched_rt_handler,
356         },
357         {
358                 .ctl_name       = CTL_UNNUMBERED,
359                 .procname       = "sched_compat_yield",
360                 .data           = &sysctl_sched_compat_yield,
361                 .maxlen         = sizeof(unsigned int),
362                 .mode           = 0644,
363                 .proc_handler   = &proc_dointvec,
364         },
365 #ifdef CONFIG_PROVE_LOCKING
366         {
367                 .ctl_name       = CTL_UNNUMBERED,
368                 .procname       = "prove_locking",
369                 .data           = &prove_locking,
370                 .maxlen         = sizeof(int),
371                 .mode           = 0644,
372                 .proc_handler   = &proc_dointvec,
373         },
374 #endif
375 #ifdef CONFIG_LOCK_STAT
376         {
377                 .ctl_name       = CTL_UNNUMBERED,
378                 .procname       = "lock_stat",
379                 .data           = &lock_stat,
380                 .maxlen         = sizeof(int),
381                 .mode           = 0644,
382                 .proc_handler   = &proc_dointvec,
383         },
384 #endif
385         {
386                 .ctl_name       = KERN_PANIC,
387                 .procname       = "panic",
388                 .data           = &panic_timeout,
389                 .maxlen         = sizeof(int),
390                 .mode           = 0644,
391                 .proc_handler   = &proc_dointvec,
392         },
393         {
394                 .ctl_name       = KERN_CORE_USES_PID,
395                 .procname       = "core_uses_pid",
396                 .data           = &core_uses_pid,
397                 .maxlen         = sizeof(int),
398                 .mode           = 0644,
399                 .proc_handler   = &proc_dointvec,
400         },
401         {
402                 .ctl_name       = KERN_CORE_PATTERN,
403                 .procname       = "core_pattern",
404                 .data           = core_pattern,
405                 .maxlen         = CORENAME_MAX_SIZE,
406                 .mode           = 0644,
407                 .proc_handler   = &proc_dostring,
408                 .strategy       = &sysctl_string,
409         },
410 #ifdef CONFIG_PROC_SYSCTL
411         {
412                 .procname       = "tainted",
413                 .maxlen         = sizeof(long),
414                 .mode           = 0644,
415                 .proc_handler   = &proc_taint,
416         },
417 #endif
418 #ifdef CONFIG_LATENCYTOP
419         {
420                 .procname       = "latencytop",
421                 .data           = &latencytop_enabled,
422                 .maxlen         = sizeof(int),
423                 .mode           = 0644,
424                 .proc_handler   = &proc_dointvec,
425         },
426 #endif
427 #ifdef CONFIG_BLK_DEV_INITRD
428         {
429                 .ctl_name       = KERN_REALROOTDEV,
430                 .procname       = "real-root-dev",
431                 .data           = &real_root_dev,
432                 .maxlen         = sizeof(int),
433                 .mode           = 0644,
434                 .proc_handler   = &proc_dointvec,
435         },
436 #endif
437         {
438                 .ctl_name       = CTL_UNNUMBERED,
439                 .procname       = "print-fatal-signals",
440                 .data           = &print_fatal_signals,
441                 .maxlen         = sizeof(int),
442                 .mode           = 0644,
443                 .proc_handler   = &proc_dointvec,
444         },
445 #ifdef CONFIG_SPARC
446         {
447                 .ctl_name       = KERN_SPARC_REBOOT,
448                 .procname       = "reboot-cmd",
449                 .data           = reboot_command,
450                 .maxlen         = 256,
451                 .mode           = 0644,
452                 .proc_handler   = &proc_dostring,
453                 .strategy       = &sysctl_string,
454         },
455         {
456                 .ctl_name       = KERN_SPARC_STOP_A,
457                 .procname       = "stop-a",
458                 .data           = &stop_a_enabled,
459                 .maxlen         = sizeof (int),
460                 .mode           = 0644,
461                 .proc_handler   = &proc_dointvec,
462         },
463         {
464                 .ctl_name       = KERN_SPARC_SCONS_PWROFF,
465                 .procname       = "scons-poweroff",
466                 .data           = &scons_pwroff,
467                 .maxlen         = sizeof (int),
468                 .mode           = 0644,
469                 .proc_handler   = &proc_dointvec,
470         },
471 #endif
472 #ifdef CONFIG_SPARC64
473         {
474                 .ctl_name       = CTL_UNNUMBERED,
475                 .procname       = "tsb-ratio",
476                 .data           = &sysctl_tsb_ratio,
477                 .maxlen         = sizeof (int),
478                 .mode           = 0644,
479                 .proc_handler   = &proc_dointvec,
480         },
481 #endif
482 #ifdef __hppa__
483         {
484                 .ctl_name       = KERN_HPPA_PWRSW,
485                 .procname       = "soft-power",
486                 .data           = &pwrsw_enabled,
487                 .maxlen         = sizeof (int),
488                 .mode           = 0644,
489                 .proc_handler   = &proc_dointvec,
490         },
491         {
492                 .ctl_name       = KERN_HPPA_UNALIGNED,
493                 .procname       = "unaligned-trap",
494                 .data           = &unaligned_enabled,
495                 .maxlen         = sizeof (int),
496                 .mode           = 0644,
497                 .proc_handler   = &proc_dointvec,
498         },
499 #endif
500         {
501                 .ctl_name       = KERN_CTLALTDEL,
502                 .procname       = "ctrl-alt-del",
503                 .data           = &C_A_D,
504                 .maxlen         = sizeof(int),
505                 .mode           = 0644,
506                 .proc_handler   = &proc_dointvec,
507         },
508 #ifdef CONFIG_FUNCTION_TRACER
509         {
510                 .ctl_name       = CTL_UNNUMBERED,
511                 .procname       = "ftrace_enabled",
512                 .data           = &ftrace_enabled,
513                 .maxlen         = sizeof(int),
514                 .mode           = 0644,
515                 .proc_handler   = &ftrace_enable_sysctl,
516         },
517 #endif
518 #ifdef CONFIG_STACK_TRACER
519         {
520                 .ctl_name       = CTL_UNNUMBERED,
521                 .procname       = "stack_tracer_enabled",
522                 .data           = &stack_tracer_enabled,
523                 .maxlen         = sizeof(int),
524                 .mode           = 0644,
525                 .proc_handler   = &stack_trace_sysctl,
526         },
527 #endif
528 #ifdef CONFIG_TRACING
529         {
530                 .ctl_name       = CTL_UNNUMBERED,
531                 .procname       = "ftrace_dump_on_oops",
532                 .data           = &ftrace_dump_on_oops,
533                 .maxlen         = sizeof(int),
534                 .mode           = 0644,
535                 .proc_handler   = &proc_dointvec,
536         },
537 #endif
538 #ifdef CONFIG_MODULES
539         {
540                 .ctl_name       = KERN_MODPROBE,
541                 .procname       = "modprobe",
542                 .data           = &modprobe_path,
543                 .maxlen         = KMOD_PATH_LEN,
544                 .mode           = 0644,
545                 .proc_handler   = &proc_dostring,
546                 .strategy       = &sysctl_string,
547         },
548         {
549                 .ctl_name       = CTL_UNNUMBERED,
550                 .procname       = "modules_disabled",
551                 .data           = &modules_disabled,
552                 .maxlen         = sizeof(int),
553                 .mode           = 0644,
554                 /* only handle a transition from default "0" to "1" */
555                 .proc_handler   = &proc_dointvec_minmax,
556                 .extra1         = &one,
557                 .extra2         = &one,
558         },
559 #endif
560 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
561         {
562                 .ctl_name       = KERN_HOTPLUG,
563                 .procname       = "hotplug",
564                 .data           = &uevent_helper,
565                 .maxlen         = UEVENT_HELPER_PATH_LEN,
566                 .mode           = 0644,
567                 .proc_handler   = &proc_dostring,
568                 .strategy       = &sysctl_string,
569         },
570 #endif
571 #ifdef CONFIG_CHR_DEV_SG
572         {
573                 .ctl_name       = KERN_SG_BIG_BUFF,
574                 .procname       = "sg-big-buff",
575                 .data           = &sg_big_buff,
576                 .maxlen         = sizeof (int),
577                 .mode           = 0444,
578                 .proc_handler   = &proc_dointvec,
579         },
580 #endif
581 #ifdef CONFIG_BSD_PROCESS_ACCT
582         {
583                 .ctl_name       = KERN_ACCT,
584                 .procname       = "acct",
585                 .data           = &acct_parm,
586                 .maxlen         = 3*sizeof(int),
587                 .mode           = 0644,
588                 .proc_handler   = &proc_dointvec,
589         },
590 #endif
591 #ifdef CONFIG_MAGIC_SYSRQ
592         {
593                 .ctl_name       = KERN_SYSRQ,
594                 .procname       = "sysrq",
595                 .data           = &__sysrq_enabled,
596                 .maxlen         = sizeof (int),
597                 .mode           = 0644,
598                 .proc_handler   = &proc_dointvec,
599         },
600 #endif
601 #ifdef CONFIG_PROC_SYSCTL
602         {
603                 .procname       = "cad_pid",
604                 .data           = NULL,
605                 .maxlen         = sizeof (int),
606                 .mode           = 0600,
607                 .proc_handler   = &proc_do_cad_pid,
608         },
609 #endif
610         {
611                 .ctl_name       = KERN_MAX_THREADS,
612                 .procname       = "threads-max",
613                 .data           = &max_threads,
614                 .maxlen         = sizeof(int),
615                 .mode           = 0644,
616                 .proc_handler   = &proc_dointvec,
617         },
618         {
619                 .ctl_name       = KERN_RANDOM,
620                 .procname       = "random",
621                 .mode           = 0555,
622                 .child          = random_table,
623         },
624         {
625                 .ctl_name       = KERN_OVERFLOWUID,
626                 .procname       = "overflowuid",
627                 .data           = &overflowuid,
628                 .maxlen         = sizeof(int),
629                 .mode           = 0644,
630                 .proc_handler   = &proc_dointvec_minmax,
631                 .strategy       = &sysctl_intvec,
632                 .extra1         = &minolduid,
633                 .extra2         = &maxolduid,
634         },
635         {
636                 .ctl_name       = KERN_OVERFLOWGID,
637                 .procname       = "overflowgid",
638                 .data           = &overflowgid,
639                 .maxlen         = sizeof(int),
640                 .mode           = 0644,
641                 .proc_handler   = &proc_dointvec_minmax,
642                 .strategy       = &sysctl_intvec,
643                 .extra1         = &minolduid,
644                 .extra2         = &maxolduid,
645         },
646 #ifdef CONFIG_S390
647 #ifdef CONFIG_MATHEMU
648         {
649                 .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
650                 .procname       = "ieee_emulation_warnings",
651                 .data           = &sysctl_ieee_emulation_warnings,
652                 .maxlen         = sizeof(int),
653                 .mode           = 0644,
654                 .proc_handler   = &proc_dointvec,
655         },
656 #endif
657         {
658                 .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
659                 .procname       = "userprocess_debug",
660                 .data           = &sysctl_userprocess_debug,
661                 .maxlen         = sizeof(int),
662                 .mode           = 0644,
663                 .proc_handler   = &proc_dointvec,
664         },
665 #endif
666         {
667                 .ctl_name       = KERN_PIDMAX,
668                 .procname       = "pid_max",
669                 .data           = &pid_max,
670                 .maxlen         = sizeof (int),
671                 .mode           = 0644,
672                 .proc_handler   = &proc_dointvec_minmax,
673                 .strategy       = sysctl_intvec,
674                 .extra1         = &pid_max_min,
675                 .extra2         = &pid_max_max,
676         },
677         {
678                 .ctl_name       = KERN_PANIC_ON_OOPS,
679                 .procname       = "panic_on_oops",
680                 .data           = &panic_on_oops,
681                 .maxlen         = sizeof(int),
682                 .mode           = 0644,
683                 .proc_handler   = &proc_dointvec,
684         },
685 #if defined CONFIG_PRINTK
686         {
687                 .ctl_name       = KERN_PRINTK,
688                 .procname       = "printk",
689                 .data           = &console_loglevel,
690                 .maxlen         = 4*sizeof(int),
691                 .mode           = 0644,
692                 .proc_handler   = &proc_dointvec,
693         },
694         {
695                 .ctl_name       = KERN_PRINTK_RATELIMIT,
696                 .procname       = "printk_ratelimit",
697                 .data           = &printk_ratelimit_state.interval,
698                 .maxlen         = sizeof(int),
699                 .mode           = 0644,
700                 .proc_handler   = &proc_dointvec_jiffies,
701                 .strategy       = &sysctl_jiffies,
702         },
703         {
704                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
705                 .procname       = "printk_ratelimit_burst",
706                 .data           = &printk_ratelimit_state.burst,
707                 .maxlen         = sizeof(int),
708                 .mode           = 0644,
709                 .proc_handler   = &proc_dointvec,
710         },
711 #endif
712         {
713                 .ctl_name       = KERN_NGROUPS_MAX,
714                 .procname       = "ngroups_max",
715                 .data           = &ngroups_max,
716                 .maxlen         = sizeof (int),
717                 .mode           = 0444,
718                 .proc_handler   = &proc_dointvec,
719         },
720 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
721         {
722                 .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
723                 .procname       = "unknown_nmi_panic",
724                 .data           = &unknown_nmi_panic,
725                 .maxlen         = sizeof (int),
726                 .mode           = 0644,
727                 .proc_handler   = &proc_dointvec,
728         },
729         {
730                 .procname       = "nmi_watchdog",
731                 .data           = &nmi_watchdog_enabled,
732                 .maxlen         = sizeof (int),
733                 .mode           = 0644,
734                 .proc_handler   = &proc_nmi_enabled,
735         },
736 #endif
737 #if defined(CONFIG_X86)
738         {
739                 .ctl_name       = KERN_PANIC_ON_NMI,
740                 .procname       = "panic_on_unrecovered_nmi",
741                 .data           = &panic_on_unrecovered_nmi,
742                 .maxlen         = sizeof(int),
743                 .mode           = 0644,
744                 .proc_handler   = &proc_dointvec,
745         },
746         {
747                 .ctl_name       = CTL_UNNUMBERED,
748                 .procname       = "panic_on_io_nmi",
749                 .data           = &panic_on_io_nmi,
750                 .maxlen         = sizeof(int),
751                 .mode           = 0644,
752                 .proc_handler   = &proc_dointvec,
753         },
754         {
755                 .ctl_name       = KERN_BOOTLOADER_TYPE,
756                 .procname       = "bootloader_type",
757                 .data           = &bootloader_type,
758                 .maxlen         = sizeof (int),
759                 .mode           = 0444,
760                 .proc_handler   = &proc_dointvec,
761         },
762         {
763                 .ctl_name       = CTL_UNNUMBERED,
764                 .procname       = "bootloader_version",
765                 .data           = &bootloader_version,
766                 .maxlen         = sizeof (int),
767                 .mode           = 0444,
768                 .proc_handler   = &proc_dointvec,
769         },
770         {
771                 .ctl_name       = CTL_UNNUMBERED,
772                 .procname       = "kstack_depth_to_print",
773                 .data           = &kstack_depth_to_print,
774                 .maxlen         = sizeof(int),
775                 .mode           = 0644,
776                 .proc_handler   = &proc_dointvec,
777         },
778         {
779                 .ctl_name       = CTL_UNNUMBERED,
780                 .procname       = "io_delay_type",
781                 .data           = &io_delay_type,
782                 .maxlen         = sizeof(int),
783                 .mode           = 0644,
784                 .proc_handler   = &proc_dointvec,
785         },
786 #endif
787 #if defined(CONFIG_MMU)
788         {
789                 .ctl_name       = KERN_RANDOMIZE,
790                 .procname       = "randomize_va_space",
791                 .data           = &randomize_va_space,
792                 .maxlen         = sizeof(int),
793                 .mode           = 0644,
794                 .proc_handler   = &proc_dointvec,
795         },
796 #endif
797 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
798         {
799                 .ctl_name       = KERN_SPIN_RETRY,
800                 .procname       = "spin_retry",
801                 .data           = &spin_retry,
802                 .maxlen         = sizeof (int),
803                 .mode           = 0644,
804                 .proc_handler   = &proc_dointvec,
805         },
806 #endif
807 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
808         {
809                 .procname       = "acpi_video_flags",
810                 .data           = &acpi_realmode_flags,
811                 .maxlen         = sizeof (unsigned long),
812                 .mode           = 0644,
813                 .proc_handler   = &proc_doulongvec_minmax,
814         },
815 #endif
816 #ifdef CONFIG_IA64
817         {
818                 .ctl_name       = KERN_IA64_UNALIGNED,
819                 .procname       = "ignore-unaligned-usertrap",
820                 .data           = &no_unaligned_warning,
821                 .maxlen         = sizeof (int),
822                 .mode           = 0644,
823                 .proc_handler   = &proc_dointvec,
824         },
825         {
826                 .ctl_name       = CTL_UNNUMBERED,
827                 .procname       = "unaligned-dump-stack",
828                 .data           = &unaligned_dump_stack,
829                 .maxlen         = sizeof (int),
830                 .mode           = 0644,
831                 .proc_handler   = &proc_dointvec,
832         },
833 #endif
834 #ifdef CONFIG_DETECT_SOFTLOCKUP
835         {
836                 .ctl_name       = CTL_UNNUMBERED,
837                 .procname       = "softlockup_panic",
838                 .data           = &softlockup_panic,
839                 .maxlen         = sizeof(int),
840                 .mode           = 0644,
841                 .proc_handler   = &proc_dointvec_minmax,
842                 .strategy       = &sysctl_intvec,
843                 .extra1         = &zero,
844                 .extra2         = &one,
845         },
846         {
847                 .ctl_name       = CTL_UNNUMBERED,
848                 .procname       = "softlockup_thresh",
849                 .data           = &softlockup_thresh,
850                 .maxlen         = sizeof(int),
851                 .mode           = 0644,
852                 .proc_handler   = &proc_dosoftlockup_thresh,
853                 .strategy       = &sysctl_intvec,
854                 .extra1         = &neg_one,
855                 .extra2         = &sixty,
856         },
857 #endif
858 #ifdef CONFIG_DETECT_HUNG_TASK
859         {
860                 .ctl_name       = CTL_UNNUMBERED,
861                 .procname       = "hung_task_panic",
862                 .data           = &sysctl_hung_task_panic,
863                 .maxlen         = sizeof(int),
864                 .mode           = 0644,
865                 .proc_handler   = &proc_dointvec_minmax,
866                 .strategy       = &sysctl_intvec,
867                 .extra1         = &zero,
868                 .extra2         = &one,
869         },
870         {
871                 .ctl_name       = CTL_UNNUMBERED,
872                 .procname       = "hung_task_check_count",
873                 .data           = &sysctl_hung_task_check_count,
874                 .maxlen         = sizeof(unsigned long),
875                 .mode           = 0644,
876                 .proc_handler   = &proc_doulongvec_minmax,
877                 .strategy       = &sysctl_intvec,
878         },
879         {
880                 .ctl_name       = CTL_UNNUMBERED,
881                 .procname       = "hung_task_timeout_secs",
882                 .data           = &sysctl_hung_task_timeout_secs,
883                 .maxlen         = sizeof(unsigned long),
884                 .mode           = 0644,
885                 .proc_handler   = &proc_dohung_task_timeout_secs,
886                 .strategy       = &sysctl_intvec,
887         },
888         {
889                 .ctl_name       = CTL_UNNUMBERED,
890                 .procname       = "hung_task_warnings",
891                 .data           = &sysctl_hung_task_warnings,
892                 .maxlen         = sizeof(unsigned long),
893                 .mode           = 0644,
894                 .proc_handler   = &proc_doulongvec_minmax,
895                 .strategy       = &sysctl_intvec,
896         },
897 #endif
898 #ifdef CONFIG_COMPAT
899         {
900                 .ctl_name       = KERN_COMPAT_LOG,
901                 .procname       = "compat-log",
902                 .data           = &compat_log,
903                 .maxlen         = sizeof (int),
904                 .mode           = 0644,
905                 .proc_handler   = &proc_dointvec,
906         },
907 #endif
908 #ifdef CONFIG_RT_MUTEXES
909         {
910                 .ctl_name       = KERN_MAX_LOCK_DEPTH,
911                 .procname       = "max_lock_depth",
912                 .data           = &max_lock_depth,
913                 .maxlen         = sizeof(int),
914                 .mode           = 0644,
915                 .proc_handler   = &proc_dointvec,
916         },
917 #endif
918         {
919                 .ctl_name       = CTL_UNNUMBERED,
920                 .procname       = "poweroff_cmd",
921                 .data           = &poweroff_cmd,
922                 .maxlen         = POWEROFF_CMD_PATH_LEN,
923                 .mode           = 0644,
924                 .proc_handler   = &proc_dostring,
925                 .strategy       = &sysctl_string,
926         },
927 #ifdef CONFIG_KEYS
928         {
929                 .ctl_name       = CTL_UNNUMBERED,
930                 .procname       = "keys",
931                 .mode           = 0555,
932                 .child          = key_sysctls,
933         },
934 #endif
935 #ifdef CONFIG_RCU_TORTURE_TEST
936         {
937                 .ctl_name       = CTL_UNNUMBERED,
938                 .procname       = "rcutorture_runnable",
939                 .data           = &rcutorture_runnable,
940                 .maxlen         = sizeof(int),
941                 .mode           = 0644,
942                 .proc_handler   = &proc_dointvec,
943         },
944 #endif
945 #ifdef CONFIG_SLOW_WORK
946         {
947                 .ctl_name       = CTL_UNNUMBERED,
948                 .procname       = "slow-work",
949                 .mode           = 0555,
950                 .child          = slow_work_sysctls,
951         },
952 #endif
953 #ifdef CONFIG_PERF_COUNTERS
954         {
955                 .ctl_name       = CTL_UNNUMBERED,
956                 .procname       = "perf_counter_paranoid",
957                 .data           = &sysctl_perf_counter_paranoid,
958                 .maxlen         = sizeof(sysctl_perf_counter_paranoid),
959                 .mode           = 0644,
960                 .proc_handler   = &proc_dointvec,
961         },
962         {
963                 .ctl_name       = CTL_UNNUMBERED,
964                 .procname       = "perf_counter_mlock_kb",
965                 .data           = &sysctl_perf_counter_mlock,
966                 .maxlen         = sizeof(sysctl_perf_counter_mlock),
967                 .mode           = 0644,
968                 .proc_handler   = &proc_dointvec,
969         },
970         {
971                 .ctl_name       = CTL_UNNUMBERED,
972                 .procname       = "perf_counter_max_sample_rate",
973                 .data           = &sysctl_perf_counter_sample_rate,
974                 .maxlen         = sizeof(sysctl_perf_counter_sample_rate),
975                 .mode           = 0644,
976                 .proc_handler   = &proc_dointvec,
977         },
978 #endif
979 #ifdef CONFIG_KMEMCHECK
980         {
981                 .ctl_name       = CTL_UNNUMBERED,
982                 .procname       = "kmemcheck",
983                 .data           = &kmemcheck_enabled,
984                 .maxlen         = sizeof(int),
985                 .mode           = 0644,
986                 .proc_handler   = &proc_dointvec,
987         },
988 #endif
989
990 /*
991  * NOTE: do not add new entries to this table unless you have read
992  * Documentation/sysctl/ctl_unnumbered.txt
993  */
994         { .ctl_name = 0 }
995 };
996
997 static struct ctl_table vm_table[] = {
998         {
999                 .ctl_name       = VM_OVERCOMMIT_MEMORY,
1000                 .procname       = "overcommit_memory",
1001                 .data           = &sysctl_overcommit_memory,
1002                 .maxlen         = sizeof(sysctl_overcommit_memory),
1003                 .mode           = 0644,
1004                 .proc_handler   = &proc_dointvec,
1005         },
1006         {
1007                 .ctl_name       = VM_PANIC_ON_OOM,
1008                 .procname       = "panic_on_oom",
1009                 .data           = &sysctl_panic_on_oom,
1010                 .maxlen         = sizeof(sysctl_panic_on_oom),
1011                 .mode           = 0644,
1012                 .proc_handler   = &proc_dointvec,
1013         },
1014         {
1015                 .ctl_name       = CTL_UNNUMBERED,
1016                 .procname       = "oom_kill_allocating_task",
1017                 .data           = &sysctl_oom_kill_allocating_task,
1018                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1019                 .mode           = 0644,
1020                 .proc_handler   = &proc_dointvec,
1021         },
1022         {
1023                 .ctl_name       = CTL_UNNUMBERED,
1024                 .procname       = "oom_dump_tasks",
1025                 .data           = &sysctl_oom_dump_tasks,
1026                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1027                 .mode           = 0644,
1028                 .proc_handler   = &proc_dointvec,
1029         },
1030         {
1031                 .ctl_name       = VM_OVERCOMMIT_RATIO,
1032                 .procname       = "overcommit_ratio",
1033                 .data           = &sysctl_overcommit_ratio,
1034                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1035                 .mode           = 0644,
1036                 .proc_handler   = &proc_dointvec,
1037         },
1038         {
1039                 .ctl_name       = VM_PAGE_CLUSTER,
1040                 .procname       = "page-cluster", 
1041                 .data           = &page_cluster,
1042                 .maxlen         = sizeof(int),
1043                 .mode           = 0644,
1044                 .proc_handler   = &proc_dointvec,
1045         },
1046         {
1047                 .ctl_name       = VM_DIRTY_BACKGROUND,
1048                 .procname       = "dirty_background_ratio",
1049                 .data           = &dirty_background_ratio,
1050                 .maxlen         = sizeof(dirty_background_ratio),
1051                 .mode           = 0644,
1052                 .proc_handler   = &dirty_background_ratio_handler,
1053                 .strategy       = &sysctl_intvec,
1054                 .extra1         = &zero,
1055                 .extra2         = &one_hundred,
1056         },
1057         {
1058                 .ctl_name       = CTL_UNNUMBERED,
1059                 .procname       = "dirty_background_bytes",
1060                 .data           = &dirty_background_bytes,
1061                 .maxlen         = sizeof(dirty_background_bytes),
1062                 .mode           = 0644,
1063                 .proc_handler   = &dirty_background_bytes_handler,
1064                 .strategy       = &sysctl_intvec,
1065                 .extra1         = &one_ul,
1066         },
1067         {
1068                 .ctl_name       = VM_DIRTY_RATIO,
1069                 .procname       = "dirty_ratio",
1070                 .data           = &vm_dirty_ratio,
1071                 .maxlen         = sizeof(vm_dirty_ratio),
1072                 .mode           = 0644,
1073                 .proc_handler   = &dirty_ratio_handler,
1074                 .strategy       = &sysctl_intvec,
1075                 .extra1         = &zero,
1076                 .extra2         = &one_hundred,
1077         },
1078         {
1079                 .ctl_name       = CTL_UNNUMBERED,
1080                 .procname       = "dirty_bytes",
1081                 .data           = &vm_dirty_bytes,
1082                 .maxlen         = sizeof(vm_dirty_bytes),
1083                 .mode           = 0644,
1084                 .proc_handler   = &dirty_bytes_handler,
1085                 .strategy       = &sysctl_intvec,
1086                 .extra1         = &dirty_bytes_min,
1087         },
1088         {
1089                 .procname       = "dirty_writeback_centisecs",
1090                 .data           = &dirty_writeback_interval,
1091                 .maxlen         = sizeof(dirty_writeback_interval),
1092                 .mode           = 0644,
1093                 .proc_handler   = &dirty_writeback_centisecs_handler,
1094         },
1095         {
1096                 .procname       = "dirty_expire_centisecs",
1097                 .data           = &dirty_expire_interval,
1098                 .maxlen         = sizeof(dirty_expire_interval),
1099                 .mode           = 0644,
1100                 .proc_handler   = &proc_dointvec,
1101         },
1102         {
1103                 .ctl_name       = VM_NR_PDFLUSH_THREADS,
1104                 .procname       = "nr_pdflush_threads",
1105                 .data           = &nr_pdflush_threads,
1106                 .maxlen         = sizeof nr_pdflush_threads,
1107                 .mode           = 0444 /* read-only*/,
1108                 .proc_handler   = &proc_dointvec,
1109         },
1110         {
1111                 .ctl_name       = VM_SWAPPINESS,
1112                 .procname       = "swappiness",
1113                 .data           = &vm_swappiness,
1114                 .maxlen         = sizeof(vm_swappiness),
1115                 .mode           = 0644,
1116                 .proc_handler   = &proc_dointvec_minmax,
1117                 .strategy       = &sysctl_intvec,
1118                 .extra1         = &zero,
1119                 .extra2         = &one_hundred,
1120         },
1121 #ifdef CONFIG_HUGETLB_PAGE
1122          {
1123                 .procname       = "nr_hugepages",
1124                 .data           = NULL,
1125                 .maxlen         = sizeof(unsigned long),
1126                 .mode           = 0644,
1127                 .proc_handler   = &hugetlb_sysctl_handler,
1128                 .extra1         = (void *)&hugetlb_zero,
1129                 .extra2         = (void *)&hugetlb_infinity,
1130          },
1131          {
1132                 .ctl_name       = VM_HUGETLB_GROUP,
1133                 .procname       = "hugetlb_shm_group",
1134                 .data           = &sysctl_hugetlb_shm_group,
1135                 .maxlen         = sizeof(gid_t),
1136                 .mode           = 0644,
1137                 .proc_handler   = &proc_dointvec,
1138          },
1139          {
1140                 .ctl_name       = CTL_UNNUMBERED,
1141                 .procname       = "hugepages_treat_as_movable",
1142                 .data           = &hugepages_treat_as_movable,
1143                 .maxlen         = sizeof(int),
1144                 .mode           = 0644,
1145                 .proc_handler   = &hugetlb_treat_movable_handler,
1146         },
1147         {
1148                 .ctl_name       = CTL_UNNUMBERED,
1149                 .procname       = "nr_overcommit_hugepages",
1150                 .data           = NULL,
1151                 .maxlen         = sizeof(unsigned long),
1152                 .mode           = 0644,
1153                 .proc_handler   = &hugetlb_overcommit_handler,
1154                 .extra1         = (void *)&hugetlb_zero,
1155                 .extra2         = (void *)&hugetlb_infinity,
1156         },
1157 #endif
1158         {
1159                 .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
1160                 .procname       = "lowmem_reserve_ratio",
1161                 .data           = &sysctl_lowmem_reserve_ratio,
1162                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1163                 .mode           = 0644,
1164                 .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
1165                 .strategy       = &sysctl_intvec,
1166         },
1167         {
1168                 .ctl_name       = VM_DROP_PAGECACHE,
1169                 .procname       = "drop_caches",
1170                 .data           = &sysctl_drop_caches,
1171                 .maxlen         = sizeof(int),
1172                 .mode           = 0644,
1173                 .proc_handler   = drop_caches_sysctl_handler,
1174                 .strategy       = &sysctl_intvec,
1175         },
1176         {
1177                 .ctl_name       = VM_MIN_FREE_KBYTES,
1178                 .procname       = "min_free_kbytes",
1179                 .data           = &min_free_kbytes,
1180                 .maxlen         = sizeof(min_free_kbytes),
1181                 .mode           = 0644,
1182                 .proc_handler   = &min_free_kbytes_sysctl_handler,
1183                 .strategy       = &sysctl_intvec,
1184                 .extra1         = &zero,
1185         },
1186         {
1187                 .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
1188                 .procname       = "percpu_pagelist_fraction",
1189                 .data           = &percpu_pagelist_fraction,
1190                 .maxlen         = sizeof(percpu_pagelist_fraction),
1191                 .mode           = 0644,
1192                 .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
1193                 .strategy       = &sysctl_intvec,
1194                 .extra1         = &min_percpu_pagelist_fract,
1195         },
1196 #ifdef CONFIG_MMU
1197         {
1198                 .ctl_name       = VM_MAX_MAP_COUNT,
1199                 .procname       = "max_map_count",
1200                 .data           = &sysctl_max_map_count,
1201                 .maxlen         = sizeof(sysctl_max_map_count),
1202                 .mode           = 0644,
1203                 .proc_handler   = &proc_dointvec
1204         },
1205 #else
1206         {
1207                 .ctl_name       = CTL_UNNUMBERED,
1208                 .procname       = "nr_trim_pages",
1209                 .data           = &sysctl_nr_trim_pages,
1210                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1211                 .mode           = 0644,
1212                 .proc_handler   = &proc_dointvec_minmax,
1213                 .strategy       = &sysctl_intvec,
1214                 .extra1         = &zero,
1215         },
1216 #endif
1217         {
1218                 .ctl_name       = VM_LAPTOP_MODE,
1219                 .procname       = "laptop_mode",
1220                 .data           = &laptop_mode,
1221                 .maxlen         = sizeof(laptop_mode),
1222                 .mode           = 0644,
1223                 .proc_handler   = &proc_dointvec_jiffies,
1224                 .strategy       = &sysctl_jiffies,
1225         },
1226         {
1227                 .ctl_name       = VM_BLOCK_DUMP,
1228                 .procname       = "block_dump",
1229                 .data           = &block_dump,
1230                 .maxlen         = sizeof(block_dump),
1231                 .mode           = 0644,
1232                 .proc_handler   = &proc_dointvec,
1233                 .strategy       = &sysctl_intvec,
1234                 .extra1         = &zero,
1235         },
1236         {
1237                 .ctl_name       = VM_VFS_CACHE_PRESSURE,
1238                 .procname       = "vfs_cache_pressure",
1239                 .data           = &sysctl_vfs_cache_pressure,
1240                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1241                 .mode           = 0644,
1242                 .proc_handler   = &proc_dointvec,
1243                 .strategy       = &sysctl_intvec,
1244                 .extra1         = &zero,
1245         },
1246 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1247         {
1248                 .ctl_name       = VM_LEGACY_VA_LAYOUT,
1249                 .procname       = "legacy_va_layout",
1250                 .data           = &sysctl_legacy_va_layout,
1251                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1252                 .mode           = 0644,
1253                 .proc_handler   = &proc_dointvec,
1254                 .strategy       = &sysctl_intvec,
1255                 .extra1         = &zero,
1256         },
1257 #endif
1258 #ifdef CONFIG_NUMA
1259         {
1260                 .ctl_name       = VM_ZONE_RECLAIM_MODE,
1261                 .procname       = "zone_reclaim_mode",
1262                 .data           = &zone_reclaim_mode,
1263                 .maxlen         = sizeof(zone_reclaim_mode),
1264                 .mode           = 0644,
1265                 .proc_handler   = &proc_dointvec,
1266                 .strategy       = &sysctl_intvec,
1267                 .extra1         = &zero,
1268         },
1269         {
1270                 .ctl_name       = VM_MIN_UNMAPPED,
1271                 .procname       = "min_unmapped_ratio",
1272                 .data           = &sysctl_min_unmapped_ratio,
1273                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1274                 .mode           = 0644,
1275                 .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
1276                 .strategy       = &sysctl_intvec,
1277                 .extra1         = &zero,
1278                 .extra2         = &one_hundred,
1279         },
1280         {
1281                 .ctl_name       = VM_MIN_SLAB,
1282                 .procname       = "min_slab_ratio",
1283                 .data           = &sysctl_min_slab_ratio,
1284                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1285                 .mode           = 0644,
1286                 .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1287                 .strategy       = &sysctl_intvec,
1288                 .extra1         = &zero,
1289                 .extra2         = &one_hundred,
1290         },
1291 #endif
1292 #ifdef CONFIG_SMP
1293         {
1294                 .ctl_name       = CTL_UNNUMBERED,
1295                 .procname       = "stat_interval",
1296                 .data           = &sysctl_stat_interval,
1297                 .maxlen         = sizeof(sysctl_stat_interval),
1298                 .mode           = 0644,
1299                 .proc_handler   = &proc_dointvec_jiffies,
1300                 .strategy       = &sysctl_jiffies,
1301         },
1302 #endif
1303         {
1304                 .ctl_name       = CTL_UNNUMBERED,
1305                 .procname       = "mmap_min_addr",
1306                 .data           = &mmap_min_addr,
1307                 .maxlen         = sizeof(unsigned long),
1308                 .mode           = 0644,
1309                 .proc_handler   = &proc_doulongvec_minmax,
1310         },
1311 #ifdef CONFIG_NUMA
1312         {
1313                 .ctl_name       = CTL_UNNUMBERED,
1314                 .procname       = "numa_zonelist_order",
1315                 .data           = &numa_zonelist_order,
1316                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1317                 .mode           = 0644,
1318                 .proc_handler   = &numa_zonelist_order_handler,
1319                 .strategy       = &sysctl_string,
1320         },
1321 #endif
1322 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1323    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1324         {
1325                 .ctl_name       = VM_VDSO_ENABLED,
1326                 .procname       = "vdso_enabled",
1327                 .data           = &vdso_enabled,
1328                 .maxlen         = sizeof(vdso_enabled),
1329                 .mode           = 0644,
1330                 .proc_handler   = &proc_dointvec,
1331                 .strategy       = &sysctl_intvec,
1332                 .extra1         = &zero,
1333         },
1334 #endif
1335 #ifdef CONFIG_HIGHMEM
1336         {
1337                 .ctl_name       = CTL_UNNUMBERED,
1338                 .procname       = "highmem_is_dirtyable",
1339                 .data           = &vm_highmem_is_dirtyable,
1340                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1341                 .mode           = 0644,
1342                 .proc_handler   = &proc_dointvec_minmax,
1343                 .strategy       = &sysctl_intvec,
1344                 .extra1         = &zero,
1345                 .extra2         = &one,
1346         },
1347 #endif
1348         {
1349                 .ctl_name       = CTL_UNNUMBERED,
1350                 .procname       = "scan_unevictable_pages",
1351                 .data           = &scan_unevictable_pages,
1352                 .maxlen         = sizeof(scan_unevictable_pages),
1353                 .mode           = 0644,
1354                 .proc_handler   = &scan_unevictable_handler,
1355         },
1356 /*
1357  * NOTE: do not add new entries to this table unless you have read
1358  * Documentation/sysctl/ctl_unnumbered.txt
1359  */
1360         { .ctl_name = 0 }
1361 };
1362
1363 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1364 static struct ctl_table binfmt_misc_table[] = {
1365         { .ctl_name = 0 }
1366 };
1367 #endif
1368
1369 static struct ctl_table fs_table[] = {
1370         {
1371                 .ctl_name       = FS_NRINODE,
1372                 .procname       = "inode-nr",
1373                 .data           = &inodes_stat,
1374                 .maxlen         = 2*sizeof(int),
1375                 .mode           = 0444,
1376                 .proc_handler   = &proc_dointvec,
1377         },
1378         {
1379                 .ctl_name       = FS_STATINODE,
1380                 .procname       = "inode-state",
1381                 .data           = &inodes_stat,
1382                 .maxlen         = 7*sizeof(int),
1383                 .mode           = 0444,
1384                 .proc_handler   = &proc_dointvec,
1385         },
1386         {
1387                 .procname       = "file-nr",
1388                 .data           = &files_stat,
1389                 .maxlen         = 3*sizeof(int),
1390                 .mode           = 0444,
1391                 .proc_handler   = &proc_nr_files,
1392         },
1393         {
1394                 .ctl_name       = FS_MAXFILE,
1395                 .procname       = "file-max",
1396                 .data           = &files_stat.max_files,
1397                 .maxlen         = sizeof(int),
1398                 .mode           = 0644,
1399                 .proc_handler   = &proc_dointvec,
1400         },
1401         {
1402                 .ctl_name       = CTL_UNNUMBERED,
1403                 .procname       = "nr_open",
1404                 .data           = &sysctl_nr_open,
1405                 .maxlen         = sizeof(int),
1406                 .mode           = 0644,
1407                 .proc_handler   = &proc_dointvec_minmax,
1408                 .extra1         = &sysctl_nr_open_min,
1409                 .extra2         = &sysctl_nr_open_max,
1410         },
1411         {
1412                 .ctl_name       = FS_DENTRY,
1413                 .procname       = "dentry-state",
1414                 .data           = &dentry_stat,
1415                 .maxlen         = 6*sizeof(int),
1416                 .mode           = 0444,
1417                 .proc_handler   = &proc_dointvec,
1418         },
1419         {
1420                 .ctl_name       = FS_OVERFLOWUID,
1421                 .procname       = "overflowuid",
1422                 .data           = &fs_overflowuid,
1423                 .maxlen         = sizeof(int),
1424                 .mode           = 0644,
1425                 .proc_handler   = &proc_dointvec_minmax,
1426                 .strategy       = &sysctl_intvec,
1427                 .extra1         = &minolduid,
1428                 .extra2         = &maxolduid,
1429         },
1430         {
1431                 .ctl_name       = FS_OVERFLOWGID,
1432                 .procname       = "overflowgid",
1433                 .data           = &fs_overflowgid,
1434                 .maxlen         = sizeof(int),
1435                 .mode           = 0644,
1436                 .proc_handler   = &proc_dointvec_minmax,
1437                 .strategy       = &sysctl_intvec,
1438                 .extra1         = &minolduid,
1439                 .extra2         = &maxolduid,
1440         },
1441 #ifdef CONFIG_FILE_LOCKING
1442         {
1443                 .ctl_name       = FS_LEASES,
1444                 .procname       = "leases-enable",
1445                 .data           = &leases_enable,
1446                 .maxlen         = sizeof(int),
1447                 .mode           = 0644,
1448                 .proc_handler   = &proc_dointvec,
1449         },
1450 #endif
1451 #ifdef CONFIG_DNOTIFY
1452         {
1453                 .ctl_name       = FS_DIR_NOTIFY,
1454                 .procname       = "dir-notify-enable",
1455                 .data           = &dir_notify_enable,
1456                 .maxlen         = sizeof(int),
1457                 .mode           = 0644,
1458                 .proc_handler   = &proc_dointvec,
1459         },
1460 #endif
1461 #ifdef CONFIG_MMU
1462 #ifdef CONFIG_FILE_LOCKING
1463         {
1464                 .ctl_name       = FS_LEASE_TIME,
1465                 .procname       = "lease-break-time",
1466                 .data           = &lease_break_time,
1467                 .maxlen         = sizeof(int),
1468                 .mode           = 0644,
1469                 .proc_handler   = &proc_dointvec,
1470         },
1471 #endif
1472 #ifdef CONFIG_AIO
1473         {
1474                 .procname       = "aio-nr",
1475                 .data           = &aio_nr,
1476                 .maxlen         = sizeof(aio_nr),
1477                 .mode           = 0444,
1478                 .proc_handler   = &proc_doulongvec_minmax,
1479         },
1480         {
1481                 .procname       = "aio-max-nr",
1482                 .data           = &aio_max_nr,
1483                 .maxlen         = sizeof(aio_max_nr),
1484                 .mode           = 0644,
1485                 .proc_handler   = &proc_doulongvec_minmax,
1486         },
1487 #endif /* CONFIG_AIO */
1488 #ifdef CONFIG_INOTIFY_USER
1489         {
1490                 .ctl_name       = FS_INOTIFY,
1491                 .procname       = "inotify",
1492                 .mode           = 0555,
1493                 .child          = inotify_table,
1494         },
1495 #endif  
1496 #ifdef CONFIG_EPOLL
1497         {
1498                 .procname       = "epoll",
1499                 .mode           = 0555,
1500                 .child          = epoll_table,
1501         },
1502 #endif
1503 #endif
1504         {
1505                 .ctl_name       = KERN_SETUID_DUMPABLE,
1506                 .procname       = "suid_dumpable",
1507                 .data           = &suid_dumpable,
1508                 .maxlen         = sizeof(int),
1509                 .mode           = 0644,
1510                 .proc_handler   = &proc_dointvec_minmax,
1511                 .strategy       = &sysctl_intvec,
1512                 .extra1         = &zero,
1513                 .extra2         = &two,
1514         },
1515 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1516         {
1517                 .ctl_name       = CTL_UNNUMBERED,
1518                 .procname       = "binfmt_misc",
1519                 .mode           = 0555,
1520                 .child          = binfmt_misc_table,
1521         },
1522 #endif
1523 /*
1524  * NOTE: do not add new entries to this table unless you have read
1525  * Documentation/sysctl/ctl_unnumbered.txt
1526  */
1527         { .ctl_name = 0 }
1528 };
1529
1530 static struct ctl_table debug_table[] = {
1531 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1532         {
1533                 .ctl_name       = CTL_UNNUMBERED,
1534                 .procname       = "exception-trace",
1535                 .data           = &show_unhandled_signals,
1536                 .maxlen         = sizeof(int),
1537                 .mode           = 0644,
1538                 .proc_handler   = proc_dointvec
1539         },
1540 #endif
1541         { .ctl_name = 0 }
1542 };
1543
1544 static struct ctl_table dev_table[] = {
1545         { .ctl_name = 0 }
1546 };
1547
1548 static DEFINE_SPINLOCK(sysctl_lock);
1549
1550 /* called under sysctl_lock */
1551 static int use_table(struct ctl_table_header *p)
1552 {
1553         if (unlikely(p->unregistering))
1554                 return 0;
1555         p->used++;
1556         return 1;
1557 }
1558
1559 /* called under sysctl_lock */
1560 static void unuse_table(struct ctl_table_header *p)
1561 {
1562         if (!--p->used)
1563                 if (unlikely(p->unregistering))
1564                         complete(p->unregistering);
1565 }
1566
1567 /* called under sysctl_lock, will reacquire if has to wait */
1568 static void start_unregistering(struct ctl_table_header *p)
1569 {
1570         /*
1571          * if p->used is 0, nobody will ever touch that entry again;
1572          * we'll eliminate all paths to it before dropping sysctl_lock
1573          */
1574         if (unlikely(p->used)) {
1575                 struct completion wait;
1576                 init_completion(&wait);
1577                 p->unregistering = &wait;
1578                 spin_unlock(&sysctl_lock);
1579                 wait_for_completion(&wait);
1580                 spin_lock(&sysctl_lock);
1581         } else {
1582                 /* anything non-NULL; we'll never dereference it */
1583                 p->unregistering = ERR_PTR(-EINVAL);
1584         }
1585         /*
1586          * do not remove from the list until nobody holds it; walking the
1587          * list in do_sysctl() relies on that.
1588          */
1589         list_del_init(&p->ctl_entry);
1590 }
1591
1592 void sysctl_head_get(struct ctl_table_header *head)
1593 {
1594         spin_lock(&sysctl_lock);
1595         head->count++;
1596         spin_unlock(&sysctl_lock);
1597 }
1598
1599 void sysctl_head_put(struct ctl_table_header *head)
1600 {
1601         spin_lock(&sysctl_lock);
1602         if (!--head->count)
1603                 kfree(head);
1604         spin_unlock(&sysctl_lock);
1605 }
1606
1607 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1608 {
1609         if (!head)
1610                 BUG();
1611         spin_lock(&sysctl_lock);
1612         if (!use_table(head))
1613                 head = ERR_PTR(-ENOENT);
1614         spin_unlock(&sysctl_lock);
1615         return head;
1616 }
1617
1618 void sysctl_head_finish(struct ctl_table_header *head)
1619 {
1620         if (!head)
1621                 return;
1622         spin_lock(&sysctl_lock);
1623         unuse_table(head);
1624         spin_unlock(&sysctl_lock);
1625 }
1626
1627 static struct ctl_table_set *
1628 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1629 {
1630         struct ctl_table_set *set = &root->default_set;
1631         if (root->lookup)
1632                 set = root->lookup(root, namespaces);
1633         return set;
1634 }
1635
1636 static struct list_head *
1637 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1638 {
1639         struct ctl_table_set *set = lookup_header_set(root, namespaces);
1640         return &set->list;
1641 }
1642
1643 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1644                                             struct ctl_table_header *prev)
1645 {
1646         struct ctl_table_root *root;
1647         struct list_head *header_list;
1648         struct ctl_table_header *head;
1649         struct list_head *tmp;
1650
1651         spin_lock(&sysctl_lock);
1652         if (prev) {
1653                 head = prev;
1654                 tmp = &prev->ctl_entry;
1655                 unuse_table(prev);
1656                 goto next;
1657         }
1658         tmp = &root_table_header.ctl_entry;
1659         for (;;) {
1660                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1661
1662                 if (!use_table(head))
1663                         goto next;
1664                 spin_unlock(&sysctl_lock);
1665                 return head;
1666         next:
1667                 root = head->root;
1668                 tmp = tmp->next;
1669                 header_list = lookup_header_list(root, namespaces);
1670                 if (tmp != header_list)
1671                         continue;
1672
1673                 do {
1674                         root = list_entry(root->root_list.next,
1675                                         struct ctl_table_root, root_list);
1676                         if (root == &sysctl_table_root)
1677                                 goto out;
1678                         header_list = lookup_header_list(root, namespaces);
1679                 } while (list_empty(header_list));
1680                 tmp = header_list->next;
1681         }
1682 out:
1683         spin_unlock(&sysctl_lock);
1684         return NULL;
1685 }
1686
1687 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1688 {
1689         return __sysctl_head_next(current->nsproxy, prev);
1690 }
1691
1692 void register_sysctl_root(struct ctl_table_root *root)
1693 {
1694         spin_lock(&sysctl_lock);
1695         list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1696         spin_unlock(&sysctl_lock);
1697 }
1698
1699 #ifdef CONFIG_SYSCTL_SYSCALL
1700 /* Perform the actual read/write of a sysctl table entry. */
1701 static int do_sysctl_strategy(struct ctl_table_root *root,
1702                         struct ctl_table *table,
1703                         void __user *oldval, size_t __user *oldlenp,
1704                         void __user *newval, size_t newlen)
1705 {
1706         int op = 0, rc;
1707
1708         if (oldval)
1709                 op |= MAY_READ;
1710         if (newval)
1711                 op |= MAY_WRITE;
1712         if (sysctl_perm(root, table, op))
1713                 return -EPERM;
1714
1715         if (table->strategy) {
1716                 rc = table->strategy(table, oldval, oldlenp, newval, newlen);
1717                 if (rc < 0)
1718                         return rc;
1719                 if (rc > 0)
1720                         return 0;
1721         }
1722
1723         /* If there is no strategy routine, or if the strategy returns
1724          * zero, proceed with automatic r/w */
1725         if (table->data && table->maxlen) {
1726                 rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
1727                 if (rc < 0)
1728                         return rc;
1729         }
1730         return 0;
1731 }
1732
1733 static int parse_table(int __user *name, int nlen,
1734                        void __user *oldval, size_t __user *oldlenp,
1735                        void __user *newval, size_t newlen,
1736                        struct ctl_table_root *root,
1737                        struct ctl_table *table)
1738 {
1739         int n;
1740 repeat:
1741         if (!nlen)
1742                 return -ENOTDIR;
1743         if (get_user(n, name))
1744                 return -EFAULT;
1745         for ( ; table->ctl_name || table->procname; table++) {
1746                 if (!table->ctl_name)
1747                         continue;
1748                 if (n == table->ctl_name) {
1749                         int error;
1750                         if (table->child) {
1751                                 if (sysctl_perm(root, table, MAY_EXEC))
1752                                         return -EPERM;
1753                                 name++;
1754                                 nlen--;
1755                                 table = table->child;
1756                                 goto repeat;
1757                         }
1758                         error = do_sysctl_strategy(root, table,
1759                                                    oldval, oldlenp,
1760                                                    newval, newlen);
1761                         return error;
1762                 }
1763         }
1764         return -ENOTDIR;
1765 }
1766
1767 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1768                void __user *newval, size_t newlen)
1769 {
1770         struct ctl_table_header *head;
1771         int error = -ENOTDIR;
1772
1773         if (nlen <= 0 || nlen >= CTL_MAXNAME)
1774                 return -ENOTDIR;
1775         if (oldval) {
1776                 int old_len;
1777                 if (!oldlenp || get_user(old_len, oldlenp))
1778                         return -EFAULT;
1779         }
1780
1781         for (head = sysctl_head_next(NULL); head;
1782                         head = sysctl_head_next(head)) {
1783                 error = parse_table(name, nlen, oldval, oldlenp, 
1784                                         newval, newlen,
1785                                         head->root, head->ctl_table);
1786                 if (error != -ENOTDIR) {
1787                         sysctl_head_finish(head);
1788                         break;
1789                 }
1790         }
1791         return error;
1792 }
1793
1794 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1795 {
1796         struct __sysctl_args tmp;
1797         int error;
1798
1799         if (copy_from_user(&tmp, args, sizeof(tmp)))
1800                 return -EFAULT;
1801
1802         error = deprecated_sysctl_warning(&tmp);
1803         if (error)
1804                 goto out;
1805
1806         lock_kernel();
1807         error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1808                           tmp.newval, tmp.newlen);
1809         unlock_kernel();
1810 out:
1811         return error;
1812 }
1813 #endif /* CONFIG_SYSCTL_SYSCALL */
1814
1815 /*
1816  * sysctl_perm does NOT grant the superuser all rights automatically, because
1817  * some sysctl variables are readonly even to root.
1818  */
1819
1820 static int test_perm(int mode, int op)
1821 {
1822         if (!current_euid())
1823                 mode >>= 6;
1824         else if (in_egroup_p(0))
1825                 mode >>= 3;
1826         if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1827                 return 0;
1828         return -EACCES;
1829 }
1830
1831 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1832 {
1833         int error;
1834         int mode;
1835
1836         error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1837         if (error)
1838                 return error;
1839
1840         if (root->permissions)
1841                 mode = root->permissions(root, current->nsproxy, table);
1842         else
1843                 mode = table->mode;
1844
1845         return test_perm(mode, op);
1846 }
1847
1848 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1849 {
1850         for (; table->ctl_name || table->procname; table++) {
1851                 table->parent = parent;
1852                 if (table->child)
1853                         sysctl_set_parent(table, table->child);
1854         }
1855 }
1856
1857 static __init int sysctl_init(void)
1858 {
1859         sysctl_set_parent(NULL, root_table);
1860 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1861         {
1862                 int err;
1863                 err = sysctl_check_table(current->nsproxy, root_table);
1864         }
1865 #endif
1866         return 0;
1867 }
1868
1869 core_initcall(sysctl_init);
1870
1871 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1872                                       struct ctl_table *table)
1873 {
1874         struct ctl_table *p;
1875         const char *s = branch->procname;
1876
1877         /* branch should have named subdirectory as its first element */
1878         if (!s || !branch->child)
1879                 return NULL;
1880
1881         /* ... and nothing else */
1882         if (branch[1].procname || branch[1].ctl_name)
1883                 return NULL;
1884
1885         /* table should contain subdirectory with the same name */
1886         for (p = table; p->procname || p->ctl_name; p++) {
1887                 if (!p->child)
1888                         continue;
1889                 if (p->procname && strcmp(p->procname, s) == 0)
1890                         return p;
1891         }
1892         return NULL;
1893 }
1894
1895 /* see if attaching q to p would be an improvement */
1896 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1897 {
1898         struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1899         struct ctl_table *next;
1900         int is_better = 0;
1901         int not_in_parent = !p->attached_by;
1902
1903         while ((next = is_branch_in(by, to)) != NULL) {
1904                 if (by == q->attached_by)
1905                         is_better = 1;
1906                 if (to == p->attached_by)
1907                         not_in_parent = 1;
1908                 by = by->child;
1909                 to = next->child;
1910         }
1911
1912         if (is_better && not_in_parent) {
1913                 q->attached_by = by;
1914                 q->attached_to = to;
1915                 q->parent = p;
1916         }
1917 }
1918
1919 /**
1920  * __register_sysctl_paths - register a sysctl hierarchy
1921  * @root: List of sysctl headers to register on
1922  * @namespaces: Data to compute which lists of sysctl entries are visible
1923  * @path: The path to the directory the sysctl table is in.
1924  * @table: the top-level table structure
1925  *
1926  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1927  * array. A completely 0 filled entry terminates the table.
1928  *
1929  * The members of the &struct ctl_table structure are used as follows:
1930  *
1931  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1932  *            must be unique within that level of sysctl
1933  *
1934  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1935  *            enter a sysctl file
1936  *
1937  * data - a pointer to data for use by proc_handler
1938  *
1939  * maxlen - the maximum size in bytes of the data
1940  *
1941  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1942  *
1943  * child - a pointer to the child sysctl table if this entry is a directory, or
1944  *         %NULL.
1945  *
1946  * proc_handler - the text handler routine (described below)
1947  *
1948  * strategy - the strategy routine (described below)
1949  *
1950  * de - for internal use by the sysctl routines
1951  *
1952  * extra1, extra2 - extra pointers usable by the proc handler routines
1953  *
1954  * Leaf nodes in the sysctl tree will be represented by a single file
1955  * under /proc; non-leaf nodes will be represented by directories.
1956  *
1957  * sysctl(2) can automatically manage read and write requests through
1958  * the sysctl table.  The data and maxlen fields of the ctl_table
1959  * struct enable minimal validation of the values being written to be
1960  * performed, and the mode field allows minimal authentication.
1961  *
1962  * More sophisticated management can be enabled by the provision of a
1963  * strategy routine with the table entry.  This will be called before
1964  * any automatic read or write of the data is performed.
1965  *
1966  * The strategy routine may return
1967  *
1968  * < 0 - Error occurred (error is passed to user process)
1969  *
1970  * 0   - OK - proceed with automatic read or write.
1971  *
1972  * > 0 - OK - read or write has been done by the strategy routine, so
1973  *       return immediately.
1974  *
1975  * There must be a proc_handler routine for any terminal nodes
1976  * mirrored under /proc/sys (non-terminals are handled by a built-in
1977  * directory handler).  Several default handlers are available to
1978  * cover common cases -
1979  *
1980  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1981  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1982  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1983  *
1984  * It is the handler's job to read the input buffer from user memory
1985  * and process it. The handler should return 0 on success.
1986  *
1987  * This routine returns %NULL on a failure to register, and a pointer
1988  * to the table header on success.
1989  */
1990 struct ctl_table_header *__register_sysctl_paths(
1991         struct ctl_table_root *root,
1992         struct nsproxy *namespaces,
1993         const struct ctl_path *path, struct ctl_table *table)
1994 {
1995         struct ctl_table_header *header;
1996         struct ctl_table *new, **prevp;
1997         unsigned int n, npath;
1998         struct ctl_table_set *set;
1999
2000         /* Count the path components */
2001         for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
2002                 ;
2003
2004         /*
2005          * For each path component, allocate a 2-element ctl_table array.
2006          * The first array element will be filled with the sysctl entry
2007          * for this, the second will be the sentinel (ctl_name == 0).
2008          *
2009          * We allocate everything in one go so that we don't have to
2010          * worry about freeing additional memory in unregister_sysctl_table.
2011          */
2012         header = kzalloc(sizeof(struct ctl_table_header) +
2013                          (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
2014         if (!header)
2015                 return NULL;
2016
2017         new = (struct ctl_table *) (header + 1);
2018
2019         /* Now connect the dots */
2020         prevp = &header->ctl_table;
2021         for (n = 0; n < npath; ++n, ++path) {
2022                 /* Copy the procname */
2023                 new->procname = path->procname;
2024                 new->ctl_name = path->ctl_name;
2025                 new->mode     = 0555;
2026
2027                 *prevp = new;
2028                 prevp = &new->child;
2029
2030                 new += 2;
2031         }
2032         *prevp = table;
2033         header->ctl_table_arg = table;
2034
2035         INIT_LIST_HEAD(&header->ctl_entry);
2036         header->used = 0;
2037         header->unregistering = NULL;
2038         header->root = root;
2039         sysctl_set_parent(NULL, header->ctl_table);
2040         header->count = 1;
2041 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
2042         if (sysctl_check_table(namespaces, header->ctl_table)) {
2043                 kfree(header);
2044                 return NULL;
2045         }
2046 #endif
2047         spin_lock(&sysctl_lock);
2048         header->set = lookup_header_set(root, namespaces);
2049         header->attached_by = header->ctl_table;
2050         header->attached_to = root_table;
2051         header->parent = &root_table_header;
2052         for (set = header->set; set; set = set->parent) {
2053                 struct ctl_table_header *p;
2054                 list_for_each_entry(p, &set->list, ctl_entry) {
2055                         if (p->unregistering)
2056                                 continue;
2057                         try_attach(p, header);
2058                 }
2059         }
2060         header->parent->count++;
2061         list_add_tail(&header->ctl_entry, &header->set->list);
2062         spin_unlock(&sysctl_lock);
2063
2064         return header;
2065 }
2066
2067 /**
2068  * register_sysctl_table_path - register a sysctl table hierarchy
2069  * @path: The path to the directory the sysctl table is in.
2070  * @table: the top-level table structure
2071  *
2072  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2073  * array. A completely 0 filled entry terminates the table.
2074  *
2075  * See __register_sysctl_paths for more details.
2076  */
2077 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2078                                                 struct ctl_table *table)
2079 {
2080         return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
2081                                         path, table);
2082 }
2083
2084 /**
2085  * register_sysctl_table - register a sysctl table hierarchy
2086  * @table: the top-level table structure
2087  *
2088  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2089  * array. A completely 0 filled entry terminates the table.
2090  *
2091  * See register_sysctl_paths for more details.
2092  */
2093 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
2094 {
2095         static const struct ctl_path null_path[] = { {} };
2096
2097         return register_sysctl_paths(null_path, table);
2098 }
2099
2100 /**
2101  * unregister_sysctl_table - unregister a sysctl table hierarchy
2102  * @header: the header returned from register_sysctl_table
2103  *
2104  * Unregisters the sysctl table and all children. proc entries may not
2105  * actually be removed until they are no longer used by anyone.
2106  */
2107 void unregister_sysctl_table(struct ctl_table_header * header)
2108 {
2109         might_sleep();
2110
2111         if (header == NULL)
2112                 return;
2113
2114         spin_lock(&sysctl_lock);
2115         start_unregistering(header);
2116         if (!--header->parent->count) {
2117                 WARN_ON(1);
2118                 kfree(header->parent);
2119         }
2120         if (!--header->count)
2121                 kfree(header);
2122         spin_unlock(&sysctl_lock);
2123 }
2124
2125 int sysctl_is_seen(struct ctl_table_header *p)
2126 {
2127         struct ctl_table_set *set = p->set;
2128         int res;
2129         spin_lock(&sysctl_lock);
2130         if (p->unregistering)
2131                 res = 0;
2132         else if (!set->is_seen)
2133                 res = 1;
2134         else
2135                 res = set->is_seen(set);
2136         spin_unlock(&sysctl_lock);
2137         return res;
2138 }
2139
2140 void setup_sysctl_set(struct ctl_table_set *p,
2141         struct ctl_table_set *parent,
2142         int (*is_seen)(struct ctl_table_set *))
2143 {
2144         INIT_LIST_HEAD(&p->list);
2145         p->parent = parent ? parent : &sysctl_table_root.default_set;
2146         p->is_seen = is_seen;
2147 }
2148
2149 #else /* !CONFIG_SYSCTL */
2150 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2151 {
2152         return NULL;
2153 }
2154
2155 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2156                                                     struct ctl_table *table)
2157 {
2158         return NULL;
2159 }
2160
2161 void unregister_sysctl_table(struct ctl_table_header * table)
2162 {
2163 }
2164
2165 void setup_sysctl_set(struct ctl_table_set *p,
2166         struct ctl_table_set *parent,
2167         int (*is_seen)(struct ctl_table_set *))
2168 {
2169 }
2170
2171 void sysctl_head_put(struct ctl_table_header *head)
2172 {
2173 }
2174
2175 #endif /* CONFIG_SYSCTL */
2176
2177 /*
2178  * /proc/sys support
2179  */
2180
2181 #ifdef CONFIG_PROC_SYSCTL
2182
2183 static int _proc_do_string(void* data, int maxlen, int write,
2184                            struct file *filp, void __user *buffer,
2185                            size_t *lenp, loff_t *ppos)
2186 {
2187         size_t len;
2188         char __user *p;
2189         char c;
2190
2191         if (!data || !maxlen || !*lenp) {
2192                 *lenp = 0;
2193                 return 0;
2194         }
2195
2196         if (write) {
2197                 len = 0;
2198                 p = buffer;
2199                 while (len < *lenp) {
2200                         if (get_user(c, p++))
2201                                 return -EFAULT;
2202                         if (c == 0 || c == '\n')
2203                                 break;
2204                         len++;
2205                 }
2206                 if (len >= maxlen)
2207                         len = maxlen-1;
2208                 if(copy_from_user(data, buffer, len))
2209                         return -EFAULT;
2210                 ((char *) data)[len] = 0;
2211                 *ppos += *lenp;
2212         } else {
2213                 len = strlen(data);
2214                 if (len > maxlen)
2215                         len = maxlen;
2216
2217                 if (*ppos > len) {
2218                         *lenp = 0;
2219                         return 0;
2220                 }
2221
2222                 data += *ppos;
2223                 len  -= *ppos;
2224
2225                 if (len > *lenp)
2226                         len = *lenp;
2227                 if (len)
2228                         if(copy_to_user(buffer, data, len))
2229                                 return -EFAULT;
2230                 if (len < *lenp) {
2231                         if(put_user('\n', ((char __user *) buffer) + len))
2232                                 return -EFAULT;
2233                         len++;
2234                 }
2235                 *lenp = len;
2236                 *ppos += len;
2237         }
2238         return 0;
2239 }
2240
2241 /**
2242  * proc_dostring - read a string sysctl
2243  * @table: the sysctl table
2244  * @write: %TRUE if this is a write to the sysctl file
2245  * @filp: the file structure
2246  * @buffer: the user buffer
2247  * @lenp: the size of the user buffer
2248  * @ppos: file position
2249  *
2250  * Reads/writes a string from/to the user buffer. If the kernel
2251  * buffer provided is not large enough to hold the string, the
2252  * string is truncated. The copied string is %NULL-terminated.
2253  * If the string is being read by the user process, it is copied
2254  * and a newline '\n' is added. It is truncated if the buffer is
2255  * not large enough.
2256  *
2257  * Returns 0 on success.
2258  */
2259 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2260                   void __user *buffer, size_t *lenp, loff_t *ppos)
2261 {
2262         return _proc_do_string(table->data, table->maxlen, write, filp,
2263                                buffer, lenp, ppos);
2264 }
2265
2266
2267 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2268                                  int *valp,
2269                                  int write, void *data)
2270 {
2271         if (write) {
2272                 *valp = *negp ? -*lvalp : *lvalp;
2273         } else {
2274                 int val = *valp;
2275                 if (val < 0) {
2276                         *negp = -1;
2277                         *lvalp = (unsigned long)-val;
2278                 } else {
2279                         *negp = 0;
2280                         *lvalp = (unsigned long)val;
2281                 }
2282         }
2283         return 0;
2284 }
2285
2286 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2287                   int write, struct file *filp, void __user *buffer,
2288                   size_t *lenp, loff_t *ppos,
2289                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2290                               int write, void *data),
2291                   void *data)
2292 {
2293 #define TMPBUFLEN 21
2294         int *i, vleft, first = 1, neg;
2295         unsigned long lval;
2296         size_t left, len;
2297         
2298         char buf[TMPBUFLEN], *p;
2299         char __user *s = buffer;
2300         
2301         if (!tbl_data || !table->maxlen || !*lenp ||
2302             (*ppos && !write)) {
2303                 *lenp = 0;
2304                 return 0;
2305         }
2306         
2307         i = (int *) tbl_data;
2308         vleft = table->maxlen / sizeof(*i);
2309         left = *lenp;
2310
2311         if (!conv)
2312                 conv = do_proc_dointvec_conv;
2313
2314         for (; left && vleft--; i++, first=0) {
2315                 if (write) {
2316                         while (left) {
2317                                 char c;
2318                                 if (get_user(c, s))
2319                                         return -EFAULT;
2320                                 if (!isspace(c))
2321                                         break;
2322                                 left--;
2323                                 s++;
2324                         }
2325                         if (!left)
2326                                 break;
2327                         neg = 0;
2328                         len = left;
2329                         if (len > sizeof(buf) - 1)
2330                                 len = sizeof(buf) - 1;
2331                         if (copy_from_user(buf, s, len))
2332                                 return -EFAULT;
2333                         buf[len] = 0;
2334                         p = buf;
2335                         if (*p == '-' && left > 1) {
2336                                 neg = 1;
2337                                 p++;
2338                         }
2339                         if (*p < '0' || *p > '9')
2340                                 break;
2341
2342                         lval = simple_strtoul(p, &p, 0);
2343
2344                         len = p-buf;
2345                         if ((len < left) && *p && !isspace(*p))
2346                                 break;
2347                         s += len;
2348                         left -= len;
2349
2350                         if (conv(&neg, &lval, i, 1, data))
2351                                 break;
2352                 } else {
2353                         p = buf;
2354                         if (!first)
2355                                 *p++ = '\t';
2356         
2357                         if (conv(&neg, &lval, i, 0, data))
2358                                 break;
2359
2360                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
2361                         len = strlen(buf);
2362                         if (len > left)
2363                                 len = left;
2364                         if(copy_to_user(s, buf, len))
2365                                 return -EFAULT;
2366                         left -= len;
2367                         s += len;
2368                 }
2369         }
2370
2371         if (!write && !first && left) {
2372                 if(put_user('\n', s))
2373                         return -EFAULT;
2374                 left--, s++;
2375         }
2376         if (write) {
2377                 while (left) {
2378                         char c;
2379                         if (get_user(c, s++))
2380                                 return -EFAULT;
2381                         if (!isspace(c))
2382                                 break;
2383                         left--;
2384                 }
2385         }
2386         if (write && first)
2387                 return -EINVAL;
2388         *lenp -= left;
2389         *ppos += *lenp;
2390         return 0;
2391 #undef TMPBUFLEN
2392 }
2393
2394 static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2395                   void __user *buffer, size_t *lenp, loff_t *ppos,
2396                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2397                               int write, void *data),
2398                   void *data)
2399 {
2400         return __do_proc_dointvec(table->data, table, write, filp,
2401                         buffer, lenp, ppos, conv, data);
2402 }
2403
2404 /**
2405  * proc_dointvec - read a vector of integers
2406  * @table: the sysctl table
2407  * @write: %TRUE if this is a write to the sysctl file
2408  * @filp: the file structure
2409  * @buffer: the user buffer
2410  * @lenp: the size of the user buffer
2411  * @ppos: file position
2412  *
2413  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2414  * values from/to the user buffer, treated as an ASCII string. 
2415  *
2416  * Returns 0 on success.
2417  */
2418 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2419                      void __user *buffer, size_t *lenp, loff_t *ppos)
2420 {
2421     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2422                             NULL,NULL);
2423 }
2424
2425 /*
2426  * Taint values can only be increased
2427  * This means we can safely use a temporary.
2428  */
2429 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
2430                                void __user *buffer, size_t *lenp, loff_t *ppos)
2431 {
2432         struct ctl_table t;
2433         unsigned long tmptaint = get_taint();
2434         int err;
2435
2436         if (write && !capable(CAP_SYS_ADMIN))
2437                 return -EPERM;
2438
2439         t = *table;
2440         t.data = &tmptaint;
2441         err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
2442         if (err < 0)
2443                 return err;
2444
2445         if (write) {
2446                 /*
2447                  * Poor man's atomic or. Not worth adding a primitive
2448                  * to everyone's atomic.h for this
2449                  */
2450                 int i;
2451                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2452                         if ((tmptaint >> i) & 1)
2453                                 add_taint(i);
2454                 }
2455         }
2456
2457         return err;
2458 }
2459
2460 struct do_proc_dointvec_minmax_conv_param {
2461         int *min;
2462         int *max;
2463 };
2464
2465 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
2466                                         int *valp, 
2467                                         int write, void *data)
2468 {
2469         struct do_proc_dointvec_minmax_conv_param *param = data;
2470         if (write) {
2471                 int val = *negp ? -*lvalp : *lvalp;
2472                 if ((param->min && *param->min > val) ||
2473                     (param->max && *param->max < val))
2474                         return -EINVAL;
2475                 *valp = val;
2476         } else {
2477                 int val = *valp;
2478                 if (val < 0) {
2479                         *negp = -1;
2480                         *lvalp = (unsigned long)-val;
2481                 } else {
2482                         *negp = 0;
2483                         *lvalp = (unsigned long)val;
2484                 }
2485         }
2486         return 0;
2487 }
2488
2489 /**
2490  * proc_dointvec_minmax - read a vector of integers with min/max values
2491  * @table: the sysctl table
2492  * @write: %TRUE if this is a write to the sysctl file
2493  * @filp: the file structure
2494  * @buffer: the user buffer
2495  * @lenp: the size of the user buffer
2496  * @ppos: file position
2497  *
2498  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2499  * values from/to the user buffer, treated as an ASCII string.
2500  *
2501  * This routine will ensure the values are within the range specified by
2502  * table->extra1 (min) and table->extra2 (max).
2503  *
2504  * Returns 0 on success.
2505  */
2506 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2507                   void __user *buffer, size_t *lenp, loff_t *ppos)
2508 {
2509         struct do_proc_dointvec_minmax_conv_param param = {
2510                 .min = (int *) table->extra1,
2511                 .max = (int *) table->extra2,
2512         };
2513         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2514                                 do_proc_dointvec_minmax_conv, &param);
2515 }
2516
2517 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2518                                      struct file *filp,
2519                                      void __user *buffer,
2520                                      size_t *lenp, loff_t *ppos,
2521                                      unsigned long convmul,
2522                                      unsigned long convdiv)
2523 {
2524 #define TMPBUFLEN 21
2525         unsigned long *i, *min, *max, val;
2526         int vleft, first=1, neg;
2527         size_t len, left;
2528         char buf[TMPBUFLEN], *p;
2529         char __user *s = buffer;
2530         
2531         if (!data || !table->maxlen || !*lenp ||
2532             (*ppos && !write)) {
2533                 *lenp = 0;
2534                 return 0;
2535         }
2536         
2537         i = (unsigned long *) data;
2538         min = (unsigned long *) table->extra1;
2539         max = (unsigned long *) table->extra2;
2540         vleft = table->maxlen / sizeof(unsigned long);
2541         left = *lenp;
2542         
2543         for (; left && vleft--; i++, min++, max++, first=0) {
2544                 if (write) {
2545                         while (left) {
2546                                 char c;
2547                                 if (get_user(c, s))
2548                                         return -EFAULT;
2549                                 if (!isspace(c))
2550                                         break;
2551                                 left--;
2552                                 s++;
2553                         }
2554                         if (!left)
2555                                 break;
2556                         neg = 0;
2557                         len = left;
2558                         if (len > TMPBUFLEN-1)
2559                                 len = TMPBUFLEN-1;
2560                         if (copy_from_user(buf, s, len))
2561                                 return -EFAULT;
2562                         buf[len] = 0;
2563                         p = buf;
2564                         if (*p == '-' && left > 1) {
2565                                 neg = 1;
2566                                 p++;
2567                         }
2568                         if (*p < '0' || *p > '9')
2569                                 break;
2570                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2571                         len = p-buf;
2572                         if ((len < left) && *p && !isspace(*p))
2573                                 break;
2574                         if (neg)
2575                                 val = -val;
2576                         s += len;
2577                         left -= len;
2578
2579                         if(neg)
2580                                 continue;
2581                         if ((min && val < *min) || (max && val > *max))
2582                                 continue;
2583                         *i = val;
2584                 } else {
2585                         p = buf;
2586                         if (!first)
2587                                 *p++ = '\t';
2588                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2589                         len = strlen(buf);
2590                         if (len > left)
2591                                 len = left;
2592                         if(copy_to_user(s, buf, len))
2593                                 return -EFAULT;
2594                         left -= len;
2595                         s += len;
2596                 }
2597         }
2598
2599         if (!write && !first && left) {
2600                 if(put_user('\n', s))
2601                         return -EFAULT;
2602                 left--, s++;
2603         }
2604         if (write) {
2605                 while (left) {
2606                         char c;
2607                         if (get_user(c, s++))
2608                                 return -EFAULT;
2609                         if (!isspace(c))
2610                                 break;
2611                         left--;
2612                 }
2613         }
2614         if (write && first)
2615                 return -EINVAL;
2616         *lenp -= left;
2617         *ppos += *lenp;
2618         return 0;
2619 #undef TMPBUFLEN
2620 }
2621
2622 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2623                                      struct file *filp,
2624                                      void __user *buffer,
2625                                      size_t *lenp, loff_t *ppos,
2626                                      unsigned long convmul,
2627                                      unsigned long convdiv)
2628 {
2629         return __do_proc_doulongvec_minmax(table->data, table, write,
2630                         filp, buffer, lenp, ppos, convmul, convdiv);
2631 }
2632
2633 /**
2634  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2635  * @table: the sysctl table
2636  * @write: %TRUE if this is a write to the sysctl file
2637  * @filp: the file structure
2638  * @buffer: the user buffer
2639  * @lenp: the size of the user buffer
2640  * @ppos: file position
2641  *
2642  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2643  * values from/to the user buffer, treated as an ASCII string.
2644  *
2645  * This routine will ensure the values are within the range specified by
2646  * table->extra1 (min) and table->extra2 (max).
2647  *
2648  * Returns 0 on success.
2649  */
2650 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2651                            void __user *buffer, size_t *lenp, loff_t *ppos)
2652 {
2653     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2654 }
2655
2656 /**
2657  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2658  * @table: the sysctl table
2659  * @write: %TRUE if this is a write to the sysctl file
2660  * @filp: the file structure
2661  * @buffer: the user buffer
2662  * @lenp: the size of the user buffer
2663  * @ppos: file position
2664  *
2665  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2666  * values from/to the user buffer, treated as an ASCII string. The values
2667  * are treated as milliseconds, and converted to jiffies when they are stored.
2668  *
2669  * This routine will ensure the values are within the range specified by
2670  * table->extra1 (min) and table->extra2 (max).
2671  *
2672  * Returns 0 on success.
2673  */
2674 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2675                                       struct file *filp,
2676                                       void __user *buffer,
2677                                       size_t *lenp, loff_t *ppos)
2678 {
2679     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2680                                      lenp, ppos, HZ, 1000l);
2681 }
2682
2683
2684 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2685                                          int *valp,
2686                                          int write, void *data)
2687 {
2688         if (write) {
2689                 if (*lvalp > LONG_MAX / HZ)
2690                         return 1;
2691                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2692         } else {
2693                 int val = *valp;
2694                 unsigned long lval;
2695                 if (val < 0) {
2696                         *negp = -1;
2697                         lval = (unsigned long)-val;
2698                 } else {
2699                         *negp = 0;
2700                         lval = (unsigned long)val;
2701                 }
2702                 *lvalp = lval / HZ;
2703         }
2704         return 0;
2705 }
2706
2707 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2708                                                 int *valp,
2709                                                 int write, void *data)
2710 {
2711         if (write) {
2712                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2713                         return 1;
2714                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2715         } else {
2716                 int val = *valp;
2717                 unsigned long lval;
2718                 if (val < 0) {
2719                         *negp = -1;
2720                         lval = (unsigned long)-val;
2721                 } else {
2722                         *negp = 0;
2723                         lval = (unsigned long)val;
2724                 }
2725                 *lvalp = jiffies_to_clock_t(lval);
2726         }
2727         return 0;
2728 }
2729
2730 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2731                                             int *valp,
2732                                             int write, void *data)
2733 {
2734         if (write) {
2735                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2736         } else {
2737                 int val = *valp;
2738                 unsigned long lval;
2739                 if (val < 0) {
2740                         *negp = -1;
2741                         lval = (unsigned long)-val;
2742                 } else {
2743                         *negp = 0;
2744                         lval = (unsigned long)val;
2745                 }
2746                 *lvalp = jiffies_to_msecs(lval);
2747         }
2748         return 0;
2749 }
2750
2751 /**
2752  * proc_dointvec_jiffies - read a vector of integers as seconds
2753  * @table: the sysctl table
2754  * @write: %TRUE if this is a write to the sysctl file
2755  * @filp: the file structure
2756  * @buffer: the user buffer
2757  * @lenp: the size of the user buffer
2758  * @ppos: file position
2759  *
2760  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2761  * values from/to the user buffer, treated as an ASCII string. 
2762  * The values read are assumed to be in seconds, and are converted into
2763  * jiffies.
2764  *
2765  * Returns 0 on success.
2766  */
2767 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2768                           void __user *buffer, size_t *lenp, loff_t *ppos)
2769 {
2770     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2771                             do_proc_dointvec_jiffies_conv,NULL);
2772 }
2773
2774 /**
2775  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2776  * @table: the sysctl table
2777  * @write: %TRUE if this is a write to the sysctl file
2778  * @filp: the file structure
2779  * @buffer: the user buffer
2780  * @lenp: the size of the user buffer
2781  * @ppos: pointer to the file position
2782  *
2783  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2784  * values from/to the user buffer, treated as an ASCII string. 
2785  * The values read are assumed to be in 1/USER_HZ seconds, and 
2786  * are converted into jiffies.
2787  *
2788  * Returns 0 on success.
2789  */
2790 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2791                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2792 {
2793     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2794                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2795 }
2796
2797 /**
2798  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2799  * @table: the sysctl table
2800  * @write: %TRUE if this is a write to the sysctl file
2801  * @filp: the file structure
2802  * @buffer: the user buffer
2803  * @lenp: the size of the user buffer
2804  * @ppos: file position
2805  * @ppos: the current position in the file
2806  *
2807  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2808  * values from/to the user buffer, treated as an ASCII string. 
2809  * The values read are assumed to be in 1/1000 seconds, and 
2810  * are converted into jiffies.
2811  *
2812  * Returns 0 on success.
2813  */
2814 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2815                              void __user *buffer, size_t *lenp, loff_t *ppos)
2816 {
2817         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2818                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2819 }
2820
2821 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
2822                            void __user *buffer, size_t *lenp, loff_t *ppos)
2823 {
2824         struct pid *new_pid;
2825         pid_t tmp;
2826         int r;
2827
2828         tmp = pid_vnr(cad_pid);
2829
2830         r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2831                                lenp, ppos, NULL, NULL);
2832         if (r || !write)
2833                 return r;
2834
2835         new_pid = find_get_pid(tmp);
2836         if (!new_pid)
2837                 return -ESRCH;
2838
2839         put_pid(xchg(&cad_pid, new_pid));
2840         return 0;
2841 }
2842
2843 #else /* CONFIG_PROC_FS */
2844
2845 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2846                   void __user *buffer, size_t *lenp, loff_t *ppos)
2847 {
2848         return -ENOSYS;
2849 }
2850
2851 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2852                   void __user *buffer, size_t *lenp, loff_t *ppos)
2853 {
2854         return -ENOSYS;
2855 }
2856
2857 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2858                     void __user *buffer, size_t *lenp, loff_t *ppos)
2859 {
2860         return -ENOSYS;
2861 }
2862
2863 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2864                     void __user *buffer, size_t *lenp, loff_t *ppos)
2865 {
2866         return -ENOSYS;
2867 }
2868
2869 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2870                     void __user *buffer, size_t *lenp, loff_t *ppos)
2871 {
2872         return -ENOSYS;
2873 }
2874
2875 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2876                              void __user *buffer, size_t *lenp, loff_t *ppos)
2877 {
2878         return -ENOSYS;
2879 }
2880
2881 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2882                     void __user *buffer, size_t *lenp, loff_t *ppos)
2883 {
2884         return -ENOSYS;
2885 }
2886
2887 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2888                                       struct file *filp,
2889                                       void __user *buffer,
2890                                       size_t *lenp, loff_t *ppos)
2891 {
2892     return -ENOSYS;
2893 }
2894
2895
2896 #endif /* CONFIG_PROC_FS */
2897
2898
2899 #ifdef CONFIG_SYSCTL_SYSCALL
2900 /*
2901  * General sysctl support routines 
2902  */
2903
2904 /* The generic sysctl data routine (used if no strategy routine supplied) */
2905 int sysctl_data(struct ctl_table *table,
2906                 void __user *oldval, size_t __user *oldlenp,
2907                 void __user *newval, size_t newlen)
2908 {
2909         size_t len;
2910
2911         /* Get out of I don't have a variable */
2912         if (!table->data || !table->maxlen)
2913                 return -ENOTDIR;
2914
2915         if (oldval && oldlenp) {
2916                 if (get_user(len, oldlenp))
2917                         return -EFAULT;
2918                 if (len) {
2919                         if (len > table->maxlen)
2920                                 len = table->maxlen;
2921                         if (copy_to_user(oldval, table->data, len))
2922                                 return -EFAULT;
2923                         if (put_user(len, oldlenp))
2924                                 return -EFAULT;
2925                 }
2926         }
2927
2928         if (newval && newlen) {
2929                 if (newlen > table->maxlen)
2930                         newlen = table->maxlen;
2931
2932                 if (copy_from_user(table->data, newval, newlen))
2933                         return -EFAULT;
2934         }
2935         return 1;
2936 }
2937
2938 /* The generic string strategy routine: */
2939 int sysctl_string(struct ctl_table *table,
2940                   void __user *oldval, size_t __user *oldlenp,
2941                   void __user *newval, size_t newlen)
2942 {
2943         if (!table->data || !table->maxlen) 
2944                 return -ENOTDIR;
2945         
2946         if (oldval && oldlenp) {
2947                 size_t bufsize;
2948                 if (get_user(bufsize, oldlenp))
2949                         return -EFAULT;
2950                 if (bufsize) {
2951                         size_t len = strlen(table->data), copied;
2952
2953                         /* This shouldn't trigger for a well-formed sysctl */
2954                         if (len > table->maxlen)
2955                                 len = table->maxlen;
2956
2957                         /* Copy up to a max of bufsize-1 bytes of the string */
2958                         copied = (len >= bufsize) ? bufsize - 1 : len;
2959
2960                         if (copy_to_user(oldval, table->data, copied) ||
2961                             put_user(0, (char __user *)(oldval + copied)))
2962                                 return -EFAULT;
2963                         if (put_user(len, oldlenp))
2964                                 return -EFAULT;
2965                 }
2966         }
2967         if (newval && newlen) {
2968                 size_t len = newlen;
2969                 if (len > table->maxlen)
2970                         len = table->maxlen;
2971                 if(copy_from_user(table->data, newval, len))
2972                         return -EFAULT;
2973                 if (len == table->maxlen)
2974                         len--;
2975                 ((char *) table->data)[len] = 0;
2976         }
2977         return 1;
2978 }
2979
2980 /*
2981  * This function makes sure that all of the integers in the vector
2982  * are between the minimum and maximum values given in the arrays
2983  * table->extra1 and table->extra2, respectively.
2984  */
2985 int sysctl_intvec(struct ctl_table *table,
2986                 void __user *oldval, size_t __user *oldlenp,
2987                 void __user *newval, size_t newlen)
2988 {
2989
2990         if (newval && newlen) {
2991                 int __user *vec = (int __user *) newval;
2992                 int *min = (int *) table->extra1;
2993                 int *max = (int *) table->extra2;
2994                 size_t length;
2995                 int i;
2996
2997                 if (newlen % sizeof(int) != 0)
2998                         return -EINVAL;
2999
3000                 if (!table->extra1 && !table->extra2)
3001                         return 0;
3002
3003                 if (newlen > table->maxlen)
3004                         newlen = table->maxlen;
3005                 length = newlen / sizeof(int);
3006
3007                 for (i = 0; i < length; i++) {
3008                         int value;
3009                         if (get_user(value, vec + i))
3010                                 return -EFAULT;
3011                         if (min && value < min[i])
3012                                 return -EINVAL;
3013                         if (max && value > max[i])
3014                                 return -EINVAL;
3015                 }
3016         }
3017         return 0;
3018 }
3019
3020 /* Strategy function to convert jiffies to seconds */ 
3021 int sysctl_jiffies(struct ctl_table *table,
3022                 void __user *oldval, size_t __user *oldlenp,
3023                 void __user *newval, size_t newlen)
3024 {
3025         if (oldval && oldlenp) {
3026                 size_t olen;
3027
3028                 if (get_user(olen, oldlenp))
3029                         return -EFAULT;
3030                 if (olen) {
3031                         int val;
3032
3033                         if (olen < sizeof(int))
3034                                 return -EINVAL;
3035
3036                         val = *(int *)(table->data) / HZ;
3037                         if (put_user(val, (int __user *)oldval))
3038                                 return -EFAULT;
3039                         if (put_user(sizeof(int), oldlenp))
3040                                 return -EFAULT;
3041                 }
3042         }
3043         if (newval && newlen) { 
3044                 int new;
3045                 if (newlen != sizeof(int))
3046                         return -EINVAL; 
3047                 if (get_user(new, (int __user *)newval))
3048                         return -EFAULT;
3049                 *(int *)(table->data) = new*HZ; 
3050         }
3051         return 1;
3052 }
3053
3054 /* Strategy function to convert jiffies to seconds */ 
3055 int sysctl_ms_jiffies(struct ctl_table *table,
3056                 void __user *oldval, size_t __user *oldlenp,
3057                 void __user *newval, size_t newlen)
3058 {
3059         if (oldval && oldlenp) {
3060                 size_t olen;
3061
3062                 if (get_user(olen, oldlenp))
3063                         return -EFAULT;
3064                 if (olen) {
3065                         int val;
3066
3067                         if (olen < sizeof(int))
3068                                 return -EINVAL;
3069
3070                         val = jiffies_to_msecs(*(int *)(table->data));
3071                         if (put_user(val, (int __user *)oldval))
3072                                 return -EFAULT;
3073                         if (put_user(sizeof(int), oldlenp))
3074                                 return -EFAULT;
3075                 }
3076         }
3077         if (newval && newlen) { 
3078                 int new;
3079                 if (newlen != sizeof(int))
3080                         return -EINVAL; 
3081                 if (get_user(new, (int __user *)newval))
3082                         return -EFAULT;
3083                 *(int *)(table->data) = msecs_to_jiffies(new);
3084         }
3085         return 1;
3086 }
3087
3088
3089
3090 #else /* CONFIG_SYSCTL_SYSCALL */
3091
3092
3093 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
3094 {
3095         struct __sysctl_args tmp;
3096         int error;
3097
3098         if (copy_from_user(&tmp, args, sizeof(tmp)))
3099                 return -EFAULT;
3100
3101         error = deprecated_sysctl_warning(&tmp);
3102
3103         /* If no error reading the parameters then just -ENOSYS ... */
3104         if (!error)
3105                 error = -ENOSYS;
3106
3107         return error;
3108 }
3109
3110 int sysctl_data(struct ctl_table *table,
3111                   void __user *oldval, size_t __user *oldlenp,
3112                   void __user *newval, size_t newlen)
3113 {
3114         return -ENOSYS;
3115 }
3116
3117 int sysctl_string(struct ctl_table *table,
3118                   void __user *oldval, size_t __user *oldlenp,
3119                   void __user *newval, size_t newlen)
3120 {
3121         return -ENOSYS;
3122 }
3123
3124 int sysctl_intvec(struct ctl_table *table,
3125                 void __user *oldval, size_t __user *oldlenp,
3126                 void __user *newval, size_t newlen)
3127 {
3128         return -ENOSYS;
3129 }
3130
3131 int sysctl_jiffies(struct ctl_table *table,
3132                 void __user *oldval, size_t __user *oldlenp,
3133                 void __user *newval, size_t newlen)
3134 {
3135         return -ENOSYS;
3136 }
3137
3138 int sysctl_ms_jiffies(struct ctl_table *table,
3139                 void __user *oldval, size_t __user *oldlenp,
3140                 void __user *newval, size_t newlen)
3141 {
3142         return -ENOSYS;
3143 }
3144
3145 #endif /* CONFIG_SYSCTL_SYSCALL */
3146
3147 static int deprecated_sysctl_warning(struct __sysctl_args *args)
3148 {
3149         static int msg_count;
3150         int name[CTL_MAXNAME];
3151         int i;
3152
3153         /* Check args->nlen. */
3154         if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
3155                 return -ENOTDIR;
3156
3157         /* Read in the sysctl name for better debug message logging */
3158         for (i = 0; i < args->nlen; i++)
3159                 if (get_user(name[i], args->name + i))
3160                         return -EFAULT;
3161
3162         /* Ignore accesses to kernel.version */
3163         if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
3164                 return 0;
3165
3166         if (msg_count < 5) {
3167                 msg_count++;
3168                 printk(KERN_INFO
3169                         "warning: process `%s' used the deprecated sysctl "
3170                         "system call with ", current->comm);
3171                 for (i = 0; i < args->nlen; i++)
3172                         printk("%d.", name[i]);
3173                 printk("\n");
3174         }
3175         return 0;
3176 }
3177
3178 /*
3179  * No sense putting this after each symbol definition, twice,
3180  * exception granted :-)
3181  */
3182 EXPORT_SYMBOL(proc_dointvec);
3183 EXPORT_SYMBOL(proc_dointvec_jiffies);
3184 EXPORT_SYMBOL(proc_dointvec_minmax);
3185 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3186 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3187 EXPORT_SYMBOL(proc_dostring);
3188 EXPORT_SYMBOL(proc_doulongvec_minmax);
3189 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3190 EXPORT_SYMBOL(register_sysctl_table);
3191 EXPORT_SYMBOL(register_sysctl_paths);
3192 EXPORT_SYMBOL(sysctl_intvec);
3193 EXPORT_SYMBOL(sysctl_jiffies);
3194 EXPORT_SYMBOL(sysctl_ms_jiffies);
3195 EXPORT_SYMBOL(sysctl_string);
3196 EXPORT_SYMBOL(sysctl_data);
3197 EXPORT_SYMBOL(unregister_sysctl_table);