git.oblomov.eu Git - linux-2.6/blob - arch/x86/mm/fault_64.c

   1 /*
   2  *  Copyright (C) 1995  Linus Torvalds
   3  *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
   4  */
   5
   6 #include <linux/signal.h>
   7 #include <linux/sched.h>
   8 #include <linux/kernel.h>
   9 #include <linux/errno.h>
  10 #include <linux/string.h>
  11 #include <linux/types.h>
  12 #include <linux/ptrace.h>
  13 #include <linux/mman.h>
  14 #include <linux/mm.h>
  15 #include <linux/smp.h>
  16 #include <linux/interrupt.h>
  17 #include <linux/init.h>
  18 #include <linux/tty.h>
  19 #include <linux/vt_kern.h>              /* For unblank_screen() */
  20 #include <linux/compiler.h>
  21 #include <linux/vmalloc.h>
  22 #include <linux/module.h>
  23 #include <linux/kprobes.h>
  24 #include <linux/uaccess.h>
  25 #include <linux/kdebug.h>
  26
  27 #include <asm/system.h>
  28 #include <asm/pgalloc.h>
  29 #include <asm/smp.h>
  30 #include <asm/tlbflush.h>
  31 #include <asm/proto.h>
  32 #include <asm-generic/sections.h>
  33
  34 /*
  35  * Page fault error code bits
  36  *      bit 0 == 0 means no page found, 1 means protection fault
  37  *      bit 1 == 0 means read, 1 means write
  38  *      bit 2 == 0 means kernel, 1 means user-mode
  39  *      bit 3 == 1 means use of reserved bit detected
  40  *      bit 4 == 1 means fault was an instruction fetch
  41  */
  42 #define PF_PROT         (1<<0)
  43 #define PF_WRITE        (1<<1)
  44 #define PF_USER         (1<<2)
  45 #define PF_RSVD         (1<<3)
  46 #define PF_INSTR        (1<<4)
  47
  48 static inline int notify_page_fault(struct pt_regs *regs)
  49 {
  50 #ifdef CONFIG_KPROBES
  51         int ret = 0;
  52
  53         /* kprobe_running() needs smp_processor_id() */
  54         if (!user_mode(regs)) {
  55                 preempt_disable();
  56                 if (kprobe_running() && kprobe_fault_handler(regs, 14))
  57                         ret = 1;
  58                 preempt_enable();
  59         }
  60
  61         return ret;
  62 #else
  63         return 0;
  64 #endif
  65 }
  66
  67 /*
  68  * X86_32
  69  * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
  70  * Check that here and ignore it.
  71  *
  72  * X86_64
  73  * Sometimes the CPU reports invalid exceptions on prefetch.
  74  * Check that here and ignore it.
  75  *
  76  * Opcode checker based on code by Richard Brunner
  77  */
  78 static int is_prefetch(struct pt_regs *regs, unsigned long addr,
  79                        unsigned long error_code)
  80 {
  81         unsigned char *instr;
  82         int scan_more = 1;
  83         int prefetch = 0;
  84         unsigned char *max_instr;
  85
  86 #ifdef CONFIG_X86_32
  87         if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
  88                      boot_cpu_data.x86 >= 6)) {
  89                 /* Catch an obscure case of prefetch inside an NX page. */
  90                 if (nx_enabled && (error_code & PF_INSTR))
  91                         return 0;
  92         } else {
  93                 return 0;
  94         }
  95 #else
  96         /* If it was a exec fault ignore */
  97         if (error_code & PF_INSTR)
  98                 return 0;
  99 #endif
 100
 101         instr = (unsigned char *)convert_ip_to_linear(current, regs);
 102         max_instr = instr + 15;
 103
 104         if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
 105                 return 0;
 106
 107         while (scan_more && instr < max_instr) {
 108                 unsigned char opcode;
 109                 unsigned char instr_hi;
 110                 unsigned char instr_lo;
 111
 112                 if (probe_kernel_address(instr, opcode))
 113                         break;
 114
 115                 instr_hi = opcode & 0xf0;
 116                 instr_lo = opcode & 0x0f;
 117                 instr++;
 118
 119                 switch (instr_hi) {
 120                 case 0x20:
 121                 case 0x30:
 122                         /*
 123                          * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
 124                          * In X86_64 long mode, the CPU will signal invalid
 125                          * opcode if some of these prefixes are present so
 126                          * X86_64 will never get here anyway
 127                          */
 128                         scan_more = ((instr_lo & 7) == 0x6);
 129                         break;
 130 #ifdef CONFIG_X86_64
 131                 case 0x40:
 132                         /*
 133                          * In AMD64 long mode 0x40..0x4F are valid REX prefixes
 134                          * Need to figure out under what instruction mode the
 135                          * instruction was issued. Could check the LDT for lm,
 136                          * but for now it's good enough to assume that long
 137                          * mode only uses well known segments or kernel.
 138                          */
 139                         scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
 140                         break;
 141 #endif
 142                 case 0x60:
 143                         /* 0x64 thru 0x67 are valid prefixes in all modes. */
 144                         scan_more = (instr_lo & 0xC) == 0x4;
 145                         break;
 146                 case 0xF0:
 147                         /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
 148                         scan_more = !instr_lo || (instr_lo>>1) == 1;
 149                         break;
 150                 case 0x00:
 151                         /* Prefetch instruction is 0x0F0D or 0x0F18 */
 152                         scan_more = 0;
 153
 154                         if (probe_kernel_address(instr, opcode))
 155                                 break;
 156                         prefetch = (instr_lo == 0xF) &&
 157                                 (opcode == 0x0D || opcode == 0x18);
 158                         break;
 159                 default:
 160                         scan_more = 0;
 161                         break;
 162                 }
 163         }
 164         return prefetch;
 165 }
 166
 167 static void force_sig_info_fault(int si_signo, int si_code,
 168         unsigned long address, struct task_struct *tsk)
 169 {
 170         siginfo_t info;
 171
 172         info.si_signo = si_signo;
 173         info.si_errno = 0;
 174         info.si_code = si_code;
 175         info.si_addr = (void __user *)address;
 176         force_sig_info(si_signo, &info, tsk);
 177 }
 178
 179 static int bad_address(void *p)
 180 {
 181         unsigned long dummy;
 182         return probe_kernel_address((unsigned long *)p, dummy);
 183 }
 184
 185 void dump_pagetable(unsigned long address)
 186 {
 187         pgd_t *pgd;
 188         pud_t *pud;
 189         pmd_t *pmd;
 190         pte_t *pte;
 191
 192         pgd = (pgd_t *)read_cr3();
 193
 194         pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
 195         pgd += pgd_index(address);
 196         if (bad_address(pgd)) goto bad;
 197         printk("PGD %lx ", pgd_val(*pgd));
 198         if (!pgd_present(*pgd)) goto ret;
 199
 200         pud = pud_offset(pgd, address);
 201         if (bad_address(pud)) goto bad;
 202         printk("PUD %lx ", pud_val(*pud));
 203         if (!pud_present(*pud)) goto ret;
 204
 205         pmd = pmd_offset(pud, address);
 206         if (bad_address(pmd)) goto bad;
 207         printk("PMD %lx ", pmd_val(*pmd));
 208         if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
 209
 210         pte = pte_offset_kernel(pmd, address);
 211         if (bad_address(pte)) goto bad;
 212         printk("PTE %lx", pte_val(*pte));
 213 ret:
 214         printk("\n");
 215         return;
 216 bad:
 217         printk("BAD\n");
 218 }
 219
 220 #ifdef CONFIG_X86_64
 221 static const char errata93_warning[] =
 222 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 223 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 224 KERN_ERR "******* Please consider a BIOS update.\n"
 225 KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
 226 #endif
 227
 228 /* Workaround for K8 erratum #93 & buggy BIOS.
 229    BIOS SMM functions are required to use a specific workaround
 230    to avoid corruption of the 64bit RIP register on C stepping K8.
 231    A lot of BIOS that didn't get tested properly miss this.
 232    The OS sees this as a page fault with the upper 32bits of RIP cleared.
 233    Try to work around it here.
 234    Note we only handle faults in kernel here.
 235    Does nothing for X86_32
 236  */
 237 static int is_errata93(struct pt_regs *regs, unsigned long address)
 238 {
 239 #ifdef CONFIG_X86_64
 240         static int warned;
 241         if (address != regs->ip)
 242                 return 0;
 243         if ((address >> 32) != 0)
 244                 return 0;
 245         address |= 0xffffffffUL << 32;
 246         if ((address >= (u64)_stext && address <= (u64)_etext) ||
 247             (address >= MODULES_VADDR && address <= MODULES_END)) {
 248                 if (!warned) {
 249                         printk(errata93_warning);
 250                         warned = 1;
 251                 }
 252                 regs->ip = address;
 253                 return 1;
 254         }
 255 #endif
 256         return 0;
 257 }
 258
 259 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 260                                  unsigned long error_code)
 261 {
 262         unsigned long flags = oops_begin();
 263         struct task_struct *tsk;
 264
 265         printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
 266                current->comm, address);
 267         dump_pagetable(address);
 268         tsk = current;
 269         tsk->thread.cr2 = address;
 270         tsk->thread.trap_no = 14;
 271         tsk->thread.error_code = error_code;
 272         if (__die("Bad pagetable", regs, error_code))
 273                 regs = NULL;
 274         oops_end(flags, regs, SIGKILL);
 275 }
 276
 277 /*
 278  * Handle a fault on the vmalloc area
 279  *
 280  * This assumes no large pages in there.
 281  */
 282 static int vmalloc_fault(unsigned long address)
 283 {
 284 #ifdef CONFIG_X86_32
 285         unsigned long pgd_paddr;
 286         pmd_t *pmd_k;
 287         pte_t *pte_k;
 288         /*
 289          * Synchronize this task's top level page-table
 290          * with the 'reference' page table.
 291          *
 292          * Do _not_ use "current" here. We might be inside
 293          * an interrupt in the middle of a task switch..
 294          */
 295         pgd_paddr = read_cr3();
 296         pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
 297         if (!pmd_k)
 298                 return -1;
 299         pte_k = pte_offset_kernel(pmd_k, address);
 300         if (!pte_present(*pte_k))
 301                 return -1;
 302         return 0;
 303 #else
 304         pgd_t *pgd, *pgd_ref;
 305         pud_t *pud, *pud_ref;
 306         pmd_t *pmd, *pmd_ref;
 307         pte_t *pte, *pte_ref;
 308
 309         /* Copy kernel mappings over when needed. This can also
 310            happen within a race in page table update. In the later
 311            case just flush. */
 312
 313         pgd = pgd_offset(current->mm ?: &init_mm, address);
 314         pgd_ref = pgd_offset_k(address);
 315         if (pgd_none(*pgd_ref))
 316                 return -1;
 317         if (pgd_none(*pgd))
 318                 set_pgd(pgd, *pgd_ref);
 319         else
 320                 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
 321
 322         /* Below here mismatches are bugs because these lower tables
 323            are shared */
 324
 325         pud = pud_offset(pgd, address);
 326         pud_ref = pud_offset(pgd_ref, address);
 327         if (pud_none(*pud_ref))
 328                 return -1;
 329         if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
 330                 BUG();
 331         pmd = pmd_offset(pud, address);
 332         pmd_ref = pmd_offset(pud_ref, address);
 333         if (pmd_none(*pmd_ref))
 334                 return -1;
 335         if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
 336                 BUG();
 337         pte_ref = pte_offset_kernel(pmd_ref, address);
 338         if (!pte_present(*pte_ref))
 339                 return -1;
 340         pte = pte_offset_kernel(pmd, address);
 341         /* Don't use pte_page here, because the mappings can point
 342            outside mem_map, and the NUMA hash lookup cannot handle
 343            that. */
 344         if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
 345                 BUG();
 346         return 0;
 347 #endif
 348 }
 349
 350 int show_unhandled_signals = 1;
 351
 352 /*
 353  * This routine handles page faults.  It determines the address,
 354  * and the problem, and then passes it off to one of the appropriate
 355  * routines.
 356  */
 357 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 358                                         unsigned long error_code)
 359 {
 360         struct task_struct *tsk;
 361         struct mm_struct *mm;
 362         struct vm_area_struct *vma;
 363         unsigned long address;
 364         int write, fault;
 365         unsigned long flags;
 366         int si_code;
 367
 368         /*
 369          * We can fault from pretty much anywhere, with unknown IRQ state.
 370          */
 371         trace_hardirqs_fixup();
 372
 373         tsk = current;
 374         mm = tsk->mm;
 375         prefetchw(&mm->mmap_sem);
 376
 377         /* get the address */
 378         address = read_cr2();
 379
 380         si_code = SEGV_MAPERR;
 381
 382         if (notify_page_fault(regs))
 383                 return;
 384
 385         /*
 386          * We fault-in kernel-space virtual memory on-demand. The
 387          * 'reference' page table is init_mm.pgd.
 388          *
 389          * NOTE! We MUST NOT take any locks for this case. We may
 390          * be in an interrupt or a critical region, and should
 391          * only copy the information from the master page table,
 392          * nothing more.
 393          *
 394          * This verifies that the fault happens in kernel space
 395          * (error_code & 4) == 0, and that the fault was not a
 396          * protection error (error_code & 9) == 0.
 397          */
 398         if (unlikely(address >= TASK_SIZE64)) {
 399                 /*
 400                  * Don't check for the module range here: its PML4
 401                  * is always initialized because it's shared with the main
 402                  * kernel text. Only vmalloc may need PML4 syncups.
 403                  */
 404                 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 405                       ((address >= VMALLOC_START && address < VMALLOC_END))) {
 406                         if (vmalloc_fault(address) >= 0)
 407                                 return;
 408                 }
 409                 /*
 410                  * Don't take the mm semaphore here. If we fixup a prefetch
 411                  * fault we could otherwise deadlock.
 412                  */
 413                 goto bad_area_nosemaphore;
 414         }
 415
 416         if (likely(regs->flags & X86_EFLAGS_IF))
 417                 local_irq_enable();
 418
 419         if (unlikely(error_code & PF_RSVD))
 420                 pgtable_bad(address, regs, error_code);
 421
 422         /*
 423          * If we're in an interrupt, have no user context or are running in an
 424          * atomic region then we must not take the fault.
 425          */
 426         if (unlikely(in_atomic() || !mm))
 427                 goto bad_area_nosemaphore;
 428
 429         /*
 430          * User-mode registers count as a user access even for any
 431          * potential system fault or CPU buglet.
 432          */
 433         if (user_mode_vm(regs))
 434                 error_code |= PF_USER;
 435
 436  again:
 437         /* When running in the kernel we expect faults to occur only to
 438          * addresses in user space.  All other faults represent errors in the
 439          * kernel and should generate an OOPS.  Unfortunately, in the case of an
 440          * erroneous fault occurring in a code path which already holds mmap_sem
 441          * we will deadlock attempting to validate the fault against the
 442          * address space.  Luckily the kernel only validly references user
 443          * space from well defined areas of code, which are listed in the
 444          * exceptions table.
 445          *
 446          * As the vast majority of faults will be valid we will only perform
 447          * the source reference check when there is a possibility of a deadlock.
 448          * Attempt to lock the address space, if we cannot we then validate the
 449          * source.  If this is invalid we can skip the address space check,
 450          * thus avoiding the deadlock.
 451          */
 452         if (!down_read_trylock(&mm->mmap_sem)) {
 453                 if ((error_code & PF_USER) == 0 &&
 454                     !search_exception_tables(regs->ip))
 455                         goto bad_area_nosemaphore;
 456                 down_read(&mm->mmap_sem);
 457         }
 458
 459         vma = find_vma(mm, address);
 460         if (!vma)
 461                 goto bad_area;
 462         if (likely(vma->vm_start <= address))
 463                 goto good_area;
 464         if (!(vma->vm_flags & VM_GROWSDOWN))
 465                 goto bad_area;
 466         if (error_code & PF_USER) {
 467                 /*
 468                  * Accessing the stack below %sp is always a bug.
 469                  * The large cushion allows instructions like enter
 470                  * and pusha to work.  ("enter $65535,$31" pushes
 471                  * 32 pointers and then decrements %sp by 65535.)
 472                  */
 473                 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
 474                         goto bad_area;
 475         }
 476         if (expand_stack(vma, address))
 477                 goto bad_area;
 478 /*
 479  * Ok, we have a good vm_area for this memory access, so
 480  * we can handle it..
 481  */
 482 good_area:
 483         si_code = SEGV_ACCERR;
 484         write = 0;
 485         switch (error_code & (PF_PROT|PF_WRITE)) {
 486         default:        /* 3: write, present */
 487                 /* fall through */
 488         case PF_WRITE:          /* write, not present */
 489                 if (!(vma->vm_flags & VM_WRITE))
 490                         goto bad_area;
 491                 write++;
 492                 break;
 493         case PF_PROT:           /* read, present */
 494                 goto bad_area;
 495         case 0:                 /* read, not present */
 496                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 497                         goto bad_area;
 498         }
 499
 500         /*
 501          * If for any reason at all we couldn't handle the fault,
 502          * make sure we exit gracefully rather than endlessly redo
 503          * the fault.
 504          */
 505         fault = handle_mm_fault(mm, vma, address, write);
 506         if (unlikely(fault & VM_FAULT_ERROR)) {
 507                 if (fault & VM_FAULT_OOM)
 508                         goto out_of_memory;
 509                 else if (fault & VM_FAULT_SIGBUS)
 510                         goto do_sigbus;
 511                 BUG();
 512         }
 513         if (fault & VM_FAULT_MAJOR)
 514                 tsk->maj_flt++;
 515         else
 516                 tsk->min_flt++;
 517
 518 #ifdef CONFIG_X86_32
 519         /*
 520          * Did it hit the DOS screen memory VA from vm86 mode?
 521          */
 522         if (v8086_mode(regs)) {
 523                 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
 524                 if (bit < 32)
 525                         tsk->thread.screen_bitmap |= 1 << bit;
 526         }
 527 #endif
 528         up_read(&mm->mmap_sem);
 529         return;
 530
 531 /*
 532  * Something tried to access memory that isn't in our memory map..
 533  * Fix it, but check if it's kernel or user first..
 534  */
 535 bad_area:
 536         up_read(&mm->mmap_sem);
 537
 538 bad_area_nosemaphore:
 539         /* User mode accesses just cause a SIGSEGV */
 540         if (error_code & PF_USER) {
 541
 542                 /*
 543                  * It's possible to have interrupts off here.
 544                  */
 545                 local_irq_enable();
 546
 547                 if (is_prefetch(regs, address, error_code))
 548                         return;
 549
 550                 /* Work around K8 erratum #100 K8 in compat mode
 551                    occasionally jumps to illegal addresses >4GB.  We
 552                    catch this here in the page fault handler because
 553                    these addresses are not reachable. Just detect this
 554                    case and return.  Any code segment in LDT is
 555                    compatibility mode. */
 556                 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
 557                     (address >> 32))
 558                         return;
 559
 560                 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 561                     printk_ratelimit()) {
 562                         printk(
 563 #ifdef CONFIG_X86_32
 564                         "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
 565 #else
 566                         "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
 567 #endif
 568                         task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 569                         tsk->comm, task_pid_nr(tsk), address, regs->ip,
 570                         regs->sp, error_code);
 571                         print_vma_addr(" in ", regs->ip);
 572                         printk("\n");
 573                 }
 574
 575                 tsk->thread.cr2 = address;
 576                 /* Kernel addresses are always protection faults */
 577                 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
 578                 tsk->thread.trap_no = 14;
 579
 580                 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
 581                 return;
 582         }
 583
 584 no_context:
 585         /* Are we prepared to handle this kernel fault?  */
 586         if (fixup_exception(regs))
 587                 return;
 588
 589         /*
 590          * Hall of shame of CPU/BIOS bugs.
 591          */
 592
 593         if (is_prefetch(regs, address, error_code))
 594                 return;
 595
 596         if (is_errata93(regs, address))
 597                 return;
 598
 599 /*
 600  * Oops. The kernel tried to access some bad page. We'll have to
 601  * terminate things with extreme prejudice.
 602  */
 603
 604         flags = oops_begin();
 605
 606         if (address < PAGE_SIZE)
 607                 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 608         else
 609                 printk(KERN_ALERT "Unable to handle kernel paging request");
 610         printk(" at %016lx RIP: \n" KERN_ALERT, address);
 611         printk_address(regs->ip, 1);
 612         dump_pagetable(address);
 613         tsk->thread.cr2 = address;
 614         tsk->thread.trap_no = 14;
 615         tsk->thread.error_code = error_code;
 616         if (__die("Oops", regs, error_code))
 617                 regs = NULL;
 618         /* Executive summary in case the body of the oops scrolled away */
 619         printk(KERN_EMERG "CR2: %016lx\n", address);
 620         oops_end(flags, regs, SIGKILL);
 621
 622 /*
 623  * We ran out of memory, or some other thing happened to us that made
 624  * us unable to handle the page fault gracefully.
 625  */
 626 out_of_memory:
 627         up_read(&mm->mmap_sem);
 628         if (is_global_init(current)) {
 629                 yield();
 630                 goto again;
 631         }
 632         printk("VM: killing process %s\n", tsk->comm);
 633         if (error_code & PF_USER)
 634                 do_group_exit(SIGKILL);
 635         goto no_context;
 636
 637 do_sigbus:
 638         up_read(&mm->mmap_sem);
 639
 640         /* Kernel mode? Handle exceptions or die */
 641         if (!(error_code & PF_USER))
 642                 goto no_context;
 643
 644         tsk->thread.cr2 = address;
 645         tsk->thread.error_code = error_code;
 646         tsk->thread.trap_no = 14;
 647         force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 648         return;
 649 }
 650
 651 DEFINE_SPINLOCK(pgd_lock);
 652 LIST_HEAD(pgd_list);
 653
 654 void vmalloc_sync_all(void)
 655 {
 656         /*
 657          * Note that races in the updates of insync and start aren't
 658          * problematic: insync can only get set bits added, and updates to
 659          * start are only improving performance (without affecting correctness
 660          * if undone).
 661          */
 662         static DECLARE_BITMAP(insync, PTRS_PER_PGD);
 663         static unsigned long start = VMALLOC_START & PGDIR_MASK;
 664         unsigned long address;
 665
 666         for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
 667                 if (!test_bit(pgd_index(address), insync)) {
 668                         const pgd_t *pgd_ref = pgd_offset_k(address);
 669                         struct page *page;
 670
 671                         if (pgd_none(*pgd_ref))
 672                                 continue;
 673                         spin_lock(&pgd_lock);
 674                         list_for_each_entry(page, &pgd_list, lru) {
 675                                 pgd_t *pgd;
 676                                 pgd = (pgd_t *)page_address(page) + pgd_index(address);
 677                                 if (pgd_none(*pgd))
 678                                         set_pgd(pgd, *pgd_ref);
 679                                 else
 680                                         BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
 681                         }
 682                         spin_unlock(&pgd_lock);
 683                         set_bit(pgd_index(address), insync);
 684                 }
 685                 if (address == start)
 686                         start = address + PGDIR_SIZE;
 687         }
 688         /* Check that there is no need to do the same for the modules area. */
 689         BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
 690         BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 691                                 (__START_KERNEL & PGDIR_MASK)));
 692 }