git.oblomov.eu Git - linux-2.6/blob - arch/x86/mm/numa_64.c

   1 /*
   2  * Generic VM initialization for x86-64 NUMA setups.
   3  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
   4  */
   5 #include <linux/kernel.h>
   6 #include <linux/mm.h>
   7 #include <linux/string.h>
   8 #include <linux/init.h>
   9 #include <linux/bootmem.h>
  10 #include <linux/mmzone.h>
  11 #include <linux/ctype.h>
  12 #include <linux/module.h>
  13 #include <linux/nodemask.h>
  14
  15 #include <asm/e820.h>
  16 #include <asm/proto.h>
  17 #include <asm/dma.h>
  18 #include <asm/numa.h>
  19 #include <asm/acpi.h>
  20
  21 #ifndef Dprintk
  22 #define Dprintk(x...)
  23 #endif
  24
  25 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
  26 bootmem_data_t plat_node_bdata[MAX_NUMNODES];
  27
  28 struct memnode memnode;
  29
  30 unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
  31         [0 ... NR_CPUS-1] = NUMA_NO_NODE
  32 };
  33 unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
  34         [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
  35 };
  36 cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
  37
  38 int numa_off __initdata;
  39 unsigned long __initdata nodemap_addr;
  40 unsigned long __initdata nodemap_size;
  41
  42
  43 /*
  44  * Given a shift value, try to populate memnodemap[]
  45  * Returns :
  46  * 1 if OK
  47  * 0 if memnodmap[] too small (of shift too small)
  48  * -1 if node overlap or lost ram (shift too big)
  49  */
  50 static int __init
  51 populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
  52 {
  53         int i;
  54         int res = -1;
  55         unsigned long addr, end;
  56
  57         memset(memnodemap, 0xff, memnodemapsize);
  58         for (i = 0; i < numnodes; i++) {
  59                 addr = nodes[i].start;
  60                 end = nodes[i].end;
  61                 if (addr >= end)
  62                         continue;
  63                 if ((end >> shift) >= memnodemapsize)
  64                         return 0;
  65                 do {
  66                         if (memnodemap[addr >> shift] != 0xff)
  67                                 return -1;
  68                         memnodemap[addr >> shift] = i;
  69                         addr += (1UL << shift);
  70                 } while (addr < end);
  71                 res = 1;
  72         }
  73         return res;
  74 }
  75
  76 static int __init allocate_cachealigned_memnodemap(void)
  77 {
  78         unsigned long pad, pad_addr;
  79
  80         memnodemap = memnode.embedded_map;
  81         if (memnodemapsize <= 48)
  82                 return 0;
  83
  84         pad = L1_CACHE_BYTES - 1;
  85         pad_addr = 0x8000;
  86         nodemap_size = pad + memnodemapsize;
  87         nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
  88                                       nodemap_size);
  89         if (nodemap_addr == -1UL) {
  90                 printk(KERN_ERR
  91                        "NUMA: Unable to allocate Memory to Node hash map\n");
  92                 nodemap_addr = nodemap_size = 0;
  93                 return -1;
  94         }
  95         pad_addr = (nodemap_addr + pad) & ~pad;
  96         memnodemap = phys_to_virt(pad_addr);
  97
  98         printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
  99                nodemap_addr, nodemap_addr + nodemap_size);
 100         return 0;
 101 }
 102
 103 /*
 104  * The LSB of all start and end addresses in the node map is the value of the
 105  * maximum possible shift.
 106  */
 107 static int __init
 108 extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes)
 109 {
 110         int i, nodes_used = 0;
 111         unsigned long start, end;
 112         unsigned long bitfield = 0, memtop = 0;
 113
 114         for (i = 0; i < numnodes; i++) {
 115                 start = nodes[i].start;
 116                 end = nodes[i].end;
 117                 if (start >= end)
 118                         continue;
 119                 bitfield |= start;
 120                 nodes_used++;
 121                 if (end > memtop)
 122                         memtop = end;
 123         }
 124         if (nodes_used <= 1)
 125                 i = 63;
 126         else
 127                 i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
 128         memnodemapsize = (memtop >> i)+1;
 129         return i;
 130 }
 131
 132 int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
 133 {
 134         int shift;
 135
 136         shift = extract_lsb_from_nodes(nodes, numnodes);
 137         if (allocate_cachealigned_memnodemap())
 138                 return -1;
 139         printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
 140                 shift);
 141
 142         if (populate_memnodemap(nodes, numnodes, shift) != 1) {
 143                 printk(KERN_INFO
 144         "Your memory is not aligned you need to rebuild your kernel "
 145         "with a bigger NODEMAPSIZE shift=%d\n",
 146                         shift);
 147                 return -1;
 148         }
 149         return shift;
 150 }
 151
 152 #ifdef CONFIG_SPARSEMEM
 153 int early_pfn_to_nid(unsigned long pfn)
 154 {
 155         return phys_to_nid(pfn << PAGE_SHIFT);
 156 }
 157 #endif
 158
 159 static void * __init
 160 early_node_mem(int nodeid, unsigned long start, unsigned long end,
 161               unsigned long size)
 162 {
 163         unsigned long mem = find_e820_area(start, end, size);
 164         void *ptr;
 165         if (mem != -1L)
 166                 return __va(mem);
 167         ptr = __alloc_bootmem_nopanic(size,
 168                                 SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
 169         if (ptr == NULL) {
 170                 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
 171                         size, nodeid);
 172                 return NULL;
 173         }
 174         return ptr;
 175 }
 176
 177 /* Initialize bootmem allocator for a node */
 178 void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 179 {
 180         unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
 181         unsigned long nodedata_phys;
 182         void *bootmap;
 183         const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
 184
 185         start = round_up(start, ZONE_ALIGN);
 186
 187         printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
 188
 189         start_pfn = start >> PAGE_SHIFT;
 190         end_pfn = end >> PAGE_SHIFT;
 191
 192         node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size);
 193         if (node_data[nodeid] == NULL)
 194                 return;
 195         nodedata_phys = __pa(node_data[nodeid]);
 196
 197         memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
 198         NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
 199         NODE_DATA(nodeid)->node_start_pfn = start_pfn;
 200         NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
 201
 202         /* Find a place for the bootmem map */
 203         bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
 204         bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
 205         bootmap = early_node_mem(nodeid, bootmap_start, end,
 206                                         bootmap_pages<<PAGE_SHIFT);
 207         if (bootmap == NULL)  {
 208                 if (nodedata_phys < start || nodedata_phys >= end)
 209                         free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
 210                 node_data[nodeid] = NULL;
 211                 return;
 212         }
 213         bootmap_start = __pa(bootmap);
 214         Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
 215
 216         bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
 217                                          bootmap_start >> PAGE_SHIFT,
 218                                          start_pfn, end_pfn);
 219
 220         free_bootmem_with_active_regions(nodeid, end);
 221
 222         reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
 223         reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
 224 #ifdef CONFIG_ACPI_NUMA
 225         srat_reserve_add_area(nodeid);
 226 #endif
 227         node_set_online(nodeid);
 228 }
 229
 230 /* Initialize final allocator for a zone */
 231 void __init setup_node_zones(int nodeid)
 232 {
 233         unsigned long start_pfn, end_pfn, memmapsize, limit;
 234
 235         start_pfn = node_start_pfn(nodeid);
 236         end_pfn = node_end_pfn(nodeid);
 237
 238         Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
 239                 nodeid, start_pfn, end_pfn);
 240
 241         /* Try to allocate mem_map at end to not fill up precious <4GB
 242            memory. */
 243         memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
 244         limit = end_pfn << PAGE_SHIFT;
 245 #ifdef CONFIG_FLAT_NODE_MEM_MAP
 246         NODE_DATA(nodeid)->node_mem_map =
 247                 __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
 248                                 memmapsize, SMP_CACHE_BYTES,
 249                                 round_down(limit - memmapsize, PAGE_SIZE),
 250                                 limit);
 251 #endif
 252 }
 253
 254 void __init numa_init_array(void)
 255 {
 256         int rr, i;
 257         /* There are unfortunately some poorly designed mainboards around
 258            that only connect memory to a single CPU. This breaks the 1:1 cpu->node
 259            mapping. To avoid this fill in the mapping for all possible
 260            CPUs, as the number of CPUs is not known yet.
 261            We round robin the existing nodes. */
 262         rr = first_node(node_online_map);
 263         for (i = 0; i < NR_CPUS; i++) {
 264                 if (cpu_to_node(i) != NUMA_NO_NODE)
 265                         continue;
 266                 numa_set_node(i, rr);
 267                 rr = next_node(rr, node_online_map);
 268                 if (rr == MAX_NUMNODES)
 269                         rr = first_node(node_online_map);
 270         }
 271
 272 }
 273
 274 #ifdef CONFIG_NUMA_EMU
 275 /* Numa emulation */
 276 char *cmdline __initdata;
 277
 278 /*
 279  * Setups up nid to range from addr to addr + size.  If the end boundary is
 280  * greater than max_addr, then max_addr is used instead.  The return value is 0
 281  * if there is additional memory left for allocation past addr and -1 otherwise.
 282  * addr is adjusted to be at the end of the node.
 283  */
 284 static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
 285                                    u64 size, u64 max_addr)
 286 {
 287         int ret = 0;
 288         nodes[nid].start = *addr;
 289         *addr += size;
 290         if (*addr >= max_addr) {
 291                 *addr = max_addr;
 292                 ret = -1;
 293         }
 294         nodes[nid].end = *addr;
 295         node_set(nid, node_possible_map);
 296         printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
 297                nodes[nid].start, nodes[nid].end,
 298                (nodes[nid].end - nodes[nid].start) >> 20);
 299         return ret;
 300 }
 301
 302 /*
 303  * Splits num_nodes nodes up equally starting at node_start.  The return value
 304  * is the number of nodes split up and addr is adjusted to be at the end of the
 305  * last node allocated.
 306  */
 307 static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
 308                                       u64 max_addr, int node_start,
 309                                       int num_nodes)
 310 {
 311         unsigned int big;
 312         u64 size;
 313         int i;
 314
 315         if (num_nodes <= 0)
 316                 return -1;
 317         if (num_nodes > MAX_NUMNODES)
 318                 num_nodes = MAX_NUMNODES;
 319         size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
 320                num_nodes;
 321         /*
 322          * Calculate the number of big nodes that can be allocated as a result
 323          * of consolidating the leftovers.
 324          */
 325         big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
 326               FAKE_NODE_MIN_SIZE;
 327
 328         /* Round down to nearest FAKE_NODE_MIN_SIZE. */
 329         size &= FAKE_NODE_MIN_HASH_MASK;
 330         if (!size) {
 331                 printk(KERN_ERR "Not enough memory for each node.  "
 332                        "NUMA emulation disabled.\n");
 333                 return -1;
 334         }
 335
 336         for (i = node_start; i < num_nodes + node_start; i++) {
 337                 u64 end = *addr + size;
 338                 if (i < big)
 339                         end += FAKE_NODE_MIN_SIZE;
 340                 /*
 341                  * The final node can have the remaining system RAM.  Other
 342                  * nodes receive roughly the same amount of available pages.
 343                  */
 344                 if (i == num_nodes + node_start - 1)
 345                         end = max_addr;
 346                 else
 347                         while (end - *addr - e820_hole_size(*addr, end) <
 348                                size) {
 349                                 end += FAKE_NODE_MIN_SIZE;
 350                                 if (end > max_addr) {
 351                                         end = max_addr;
 352                                         break;
 353                                 }
 354                         }
 355                 if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
 356                         break;
 357         }
 358         return i - node_start + 1;
 359 }
 360
 361 /*
 362  * Splits the remaining system RAM into chunks of size.  The remaining memory is
 363  * always assigned to a final node and can be asymmetric.  Returns the number of
 364  * nodes split.
 365  */
 366 static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
 367                                       u64 max_addr, int node_start, u64 size)
 368 {
 369         int i = node_start;
 370         size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
 371         while (!setup_node_range(i++, nodes, addr, size, max_addr))
 372                 ;
 373         return i - node_start;
 374 }
 375
 376 /*
 377  * Sets up the system RAM area from start_pfn to end_pfn according to the
 378  * numa=fake command-line option.
 379  */
 380 static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
 381 {
 382         struct bootnode nodes[MAX_NUMNODES];
 383         u64 addr = start_pfn << PAGE_SHIFT;
 384         u64 max_addr = end_pfn << PAGE_SHIFT;
 385         int num_nodes = 0;
 386         int coeff_flag;
 387         int coeff = -1;
 388         int num = 0;
 389         u64 size;
 390         int i;
 391
 392         memset(&nodes, 0, sizeof(nodes));
 393         /*
 394          * If the numa=fake command-line is just a single number N, split the
 395          * system RAM into N fake nodes.
 396          */
 397         if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
 398                 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
 399                                                 simple_strtol(cmdline, NULL, 0));
 400                 if (num_nodes < 0)
 401                         return num_nodes;
 402                 goto out;
 403         }
 404
 405         /* Parse the command line. */
 406         for (coeff_flag = 0; ; cmdline++) {
 407                 if (*cmdline && isdigit(*cmdline)) {
 408                         num = num * 10 + *cmdline - '0';
 409                         continue;
 410                 }
 411                 if (*cmdline == '*') {
 412                         if (num > 0)
 413                                 coeff = num;
 414                         coeff_flag = 1;
 415                 }
 416                 if (!*cmdline || *cmdline == ',') {
 417                         if (!coeff_flag)
 418                                 coeff = 1;
 419                         /*
 420                          * Round down to the nearest FAKE_NODE_MIN_SIZE.
 421                          * Command-line coefficients are in megabytes.
 422                          */
 423                         size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
 424                         if (size)
 425                                 for (i = 0; i < coeff; i++, num_nodes++)
 426                                         if (setup_node_range(num_nodes, nodes,
 427                                                 &addr, size, max_addr) < 0)
 428                                                 goto done;
 429                         if (!*cmdline)
 430                                 break;
 431                         coeff_flag = 0;
 432                         coeff = -1;
 433                 }
 434                 num = 0;
 435         }
 436 done:
 437         if (!num_nodes)
 438                 return -1;
 439         /* Fill remainder of system RAM, if appropriate. */
 440         if (addr < max_addr) {
 441                 if (coeff_flag && coeff < 0) {
 442                         /* Split remaining nodes into num-sized chunks */
 443                         num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
 444                                                          num_nodes, num);
 445                         goto out;
 446                 }
 447                 switch (*(cmdline - 1)) {
 448                 case '*':
 449                         /* Split remaining nodes into coeff chunks */
 450                         if (coeff <= 0)
 451                                 break;
 452                         num_nodes += split_nodes_equally(nodes, &addr, max_addr,
 453                                                          num_nodes, coeff);
 454                         break;
 455                 case ',':
 456                         /* Do not allocate remaining system RAM */
 457                         break;
 458                 default:
 459                         /* Give one final node */
 460                         setup_node_range(num_nodes, nodes, &addr,
 461                                          max_addr - addr, max_addr);
 462                         num_nodes++;
 463                 }
 464         }
 465 out:
 466         memnode_shift = compute_hash_shift(nodes, num_nodes);
 467         if (memnode_shift < 0) {
 468                 memnode_shift = 0;
 469                 printk(KERN_ERR "No NUMA hash function found.  NUMA emulation "
 470                        "disabled.\n");
 471                 return -1;
 472         }
 473
 474         /*
 475          * We need to vacate all active ranges that may have been registered by
 476          * SRAT and set acpi_numa to -1 so that srat_disabled() always returns
 477          * true.  NUMA emulation has succeeded so we will not scan ACPI nodes.
 478          */
 479         remove_all_active_ranges();
 480 #ifdef CONFIG_ACPI_NUMA
 481         acpi_numa = -1;
 482 #endif
 483         for_each_node_mask(i, node_possible_map) {
 484                 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
 485                                                 nodes[i].end >> PAGE_SHIFT);
 486                 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 487         }
 488         acpi_fake_nodes(nodes, num_nodes);
 489         numa_init_array();
 490         return 0;
 491 }
 492 #endif /* CONFIG_NUMA_EMU */
 493
 494 void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 495 {
 496         int i;
 497
 498         nodes_clear(node_possible_map);
 499
 500 #ifdef CONFIG_NUMA_EMU
 501         if (cmdline && !numa_emulation(start_pfn, end_pfn))
 502                 return;
 503         nodes_clear(node_possible_map);
 504 #endif
 505
 506 #ifdef CONFIG_ACPI_NUMA
 507         if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
 508                                           end_pfn << PAGE_SHIFT))
 509                 return;
 510         nodes_clear(node_possible_map);
 511 #endif
 512
 513 #ifdef CONFIG_K8_NUMA
 514         if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
 515                 return;
 516         nodes_clear(node_possible_map);
 517 #endif
 518         printk(KERN_INFO "%s\n",
 519                numa_off ? "NUMA turned off" : "No NUMA configuration found");
 520
 521         printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
 522                start_pfn << PAGE_SHIFT,
 523                end_pfn << PAGE_SHIFT);
 524                 /* setup dummy node covering all memory */
 525         memnode_shift = 63;
 526         memnodemap = memnode.embedded_map;
 527         memnodemap[0] = 0;
 528         nodes_clear(node_online_map);
 529         node_set_online(0);
 530         node_set(0, node_possible_map);
 531         for (i = 0; i < NR_CPUS; i++)
 532                 numa_set_node(i, 0);
 533         node_to_cpumask[0] = cpumask_of_cpu(0);
 534         e820_register_active_regions(0, start_pfn, end_pfn);
 535         setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 536 }
 537
 538 __cpuinit void numa_add_cpu(int cpu)
 539 {
 540         set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
 541 }
 542
 543 void __cpuinit numa_set_node(int cpu, int node)
 544 {
 545         cpu_pda(cpu)->nodenumber = node;
 546         cpu_to_node(cpu) = node;
 547 }
 548
 549 unsigned long __init numa_free_all_bootmem(void)
 550 {
 551         int i;
 552         unsigned long pages = 0;
 553         for_each_online_node(i) {
 554                 pages += free_all_bootmem_node(NODE_DATA(i));
 555         }
 556         return pages;
 557 }
 558
 559 void __init paging_init(void)
 560 {
 561         int i;
 562         unsigned long max_zone_pfns[MAX_NR_ZONES];
 563         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 564         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 565         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 566         max_zone_pfns[ZONE_NORMAL] = end_pfn;
 567
 568         sparse_memory_present_with_active_regions(MAX_NUMNODES);
 569         sparse_init();
 570
 571         for_each_online_node(i) {
 572                 setup_node_zones(i);
 573         }
 574
 575         free_area_init_nodes(max_zone_pfns);
 576 }
 577
 578 static __init int numa_setup(char *opt)
 579 {
 580         if (!opt)
 581                 return -EINVAL;
 582         if (!strncmp(opt,"off",3))
 583                 numa_off = 1;
 584 #ifdef CONFIG_NUMA_EMU
 585         if (!strncmp(opt, "fake=", 5))
 586                 cmdline = opt + 5;
 587 #endif
 588 #ifdef CONFIG_ACPI_NUMA
 589         if (!strncmp(opt,"noacpi",6))
 590                 acpi_numa = -1;
 591         if (!strncmp(opt,"hotadd=", 7))
 592                 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 593 #endif
 594         return 0;
 595 }
 596
 597 early_param("numa", numa_setup);
 598
 599 /*
 600  * Setup early cpu_to_node.
 601  *
 602  * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
 603  * and apicid_to_node[] tables have valid entries for a CPU.
 604  * This means we skip cpu_to_node[] initialisation for NUMA
 605  * emulation and faking node case (when running a kernel compiled
 606  * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
 607  * is already initialized in a round robin manner at numa_init_array,
 608  * prior to this call, and this initialization is good enough
 609  * for the fake NUMA cases.
 610  */
 611 void __init init_cpu_to_node(void)
 612 {
 613         int i;
 614         for (i = 0; i < NR_CPUS; i++) {
 615                 u8 apicid = x86_cpu_to_apicid[i];
 616                 if (apicid == BAD_APICID)
 617                         continue;
 618                 if (apicid_to_node[apicid] == NUMA_NO_NODE)
 619                         continue;
 620                 numa_set_node(i,apicid_to_node[apicid]);
 621         }
 622 }
 623
 624 EXPORT_SYMBOL(cpu_to_node);
 625 EXPORT_SYMBOL(node_to_cpumask);
 626 EXPORT_SYMBOL(memnode);
 627 EXPORT_SYMBOL(node_data);
 628
 629 #ifdef CONFIG_DISCONTIGMEM
 630 /*
 631  * Functions to convert PFNs from/to per node page addresses.
 632  * These are out of line because they are quite big.
 633  * They could be all tuned by pre caching more state.
 634  * Should do that.
 635  */
 636
 637 int pfn_valid(unsigned long pfn)
 638 {
 639         unsigned nid;
 640         if (pfn >= num_physpages)
 641                 return 0;
 642         nid = pfn_to_nid(pfn);
 643         if (nid == 0xff)
 644                 return 0;
 645         return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
 646 }
 647 EXPORT_SYMBOL(pfn_valid);
 648 #endif