git.oblomov.eu Git - linux-2.6/blob - arch/arm/mm/dma-mapping.c

   1 /*
   2  *  linux/arch/arm/mm/dma-mapping.c
   3  *
   4  *  Copyright (C) 2000-2004 Russell King
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  *
  10  *  DMA uncached mapping support.
  11  */
  12 #include <linux/module.h>
  13 #include <linux/mm.h>
  14 #include <linux/slab.h>
  15 #include <linux/errno.h>
  16 #include <linux/list.h>
  17 #include <linux/init.h>
  18 #include <linux/device.h>
  19 #include <linux/dma-mapping.h>
  20
  21 #include <asm/memory.h>
  22 #include <asm/cacheflush.h>
  23 #include <asm/tlbflush.h>
  24 #include <asm/sizes.h>
  25
  26 /* Sanity check size */
  27 #if (CONSISTENT_DMA_SIZE % SZ_2M)
  28 #error "CONSISTENT_DMA_SIZE must be multiple of 2MiB"
  29 #endif
  30
  31 #define CONSISTENT_END  (0xffe00000)
  32 #define CONSISTENT_BASE (CONSISTENT_END - CONSISTENT_DMA_SIZE)
  33
  34 #define CONSISTENT_OFFSET(x)    (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
  35 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT)
  36 #define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT)
  37
  38
  39 /*
  40  * These are the page tables (2MB each) covering uncached, DMA consistent allocations
  41  */
  42 static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
  43 static DEFINE_SPINLOCK(consistent_lock);
  44
  45 /*
  46  * VM region handling support.
  47  *
  48  * This should become something generic, handling VM region allocations for
  49  * vmalloc and similar (ioremap, module space, etc).
  50  *
  51  * I envisage vmalloc()'s supporting vm_struct becoming:
  52  *
  53  *  struct vm_struct {
  54  *    struct vm_region  region;
  55  *    unsigned long     flags;
  56  *    struct page       **pages;
  57  *    unsigned int      nr_pages;
  58  *    unsigned long     phys_addr;
  59  *  };
  60  *
  61  * get_vm_area() would then call vm_region_alloc with an appropriate
  62  * struct vm_region head (eg):
  63  *
  64  *  struct vm_region vmalloc_head = {
  65  *      .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
  66  *      .vm_start       = VMALLOC_START,
  67  *      .vm_end         = VMALLOC_END,
  68  *  };
  69  *
  70  * However, vmalloc_head.vm_start is variable (typically, it is dependent on
  71  * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
  72  * would have to initialise this each time prior to calling vm_region_alloc().
  73  */
  74 struct vm_region {
  75         struct list_head        vm_list;
  76         unsigned long           vm_start;
  77         unsigned long           vm_end;
  78         struct page             *vm_pages;
  79         int                     vm_active;
  80 };
  81
  82 static struct vm_region consistent_head = {
  83         .vm_list        = LIST_HEAD_INIT(consistent_head.vm_list),
  84         .vm_start       = CONSISTENT_BASE,
  85         .vm_end         = CONSISTENT_END,
  86 };
  87
  88 static struct vm_region *
  89 vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
  90 {
  91         unsigned long addr = head->vm_start, end = head->vm_end - size;
  92         unsigned long flags;
  93         struct vm_region *c, *new;
  94
  95         new = kmalloc(sizeof(struct vm_region), gfp);
  96         if (!new)
  97                 goto out;
  98
  99         spin_lock_irqsave(&consistent_lock, flags);
 100
 101         list_for_each_entry(c, &head->vm_list, vm_list) {
 102                 if ((addr + size) < addr)
 103                         goto nospc;
 104                 if ((addr + size) <= c->vm_start)
 105                         goto found;
 106                 addr = c->vm_end;
 107                 if (addr > end)
 108                         goto nospc;
 109         }
 110
 111  found:
 112         /*
 113          * Insert this entry _before_ the one we found.
 114          */
 115         list_add_tail(&new->vm_list, &c->vm_list);
 116         new->vm_start = addr;
 117         new->vm_end = addr + size;
 118         new->vm_active = 1;
 119
 120         spin_unlock_irqrestore(&consistent_lock, flags);
 121         return new;
 122
 123  nospc:
 124         spin_unlock_irqrestore(&consistent_lock, flags);
 125         kfree(new);
 126  out:
 127         return NULL;
 128 }
 129
 130 static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
 131 {
 132         struct vm_region *c;
 133
 134         list_for_each_entry(c, &head->vm_list, vm_list) {
 135                 if (c->vm_active && c->vm_start == addr)
 136                         goto out;
 137         }
 138         c = NULL;
 139  out:
 140         return c;
 141 }
 142
 143 #ifdef CONFIG_HUGETLB_PAGE
 144 #error ARM Coherent DMA allocator does not (yet) support huge TLB
 145 #endif
 146
 147 static void *
 148 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 149             pgprot_t prot)
 150 {
 151         struct page *page;
 152         struct vm_region *c;
 153         unsigned long order;
 154         u64 mask = ISA_DMA_THRESHOLD, limit;
 155
 156         if (!consistent_pte[0]) {
 157                 printk(KERN_ERR "%s: not initialised\n", __func__);
 158                 dump_stack();
 159                 return NULL;
 160         }
 161
 162         if (dev) {
 163                 mask = dev->coherent_dma_mask;
 164
 165                 /*
 166                  * Sanity check the DMA mask - it must be non-zero, and
 167                  * must be able to be satisfied by a DMA allocation.
 168                  */
 169                 if (mask == 0) {
 170                         dev_warn(dev, "coherent DMA mask is unset\n");
 171                         goto no_page;
 172                 }
 173
 174                 if ((~mask) & ISA_DMA_THRESHOLD) {
 175                         dev_warn(dev, "coherent DMA mask %#llx is smaller "
 176                                  "than system GFP_DMA mask %#llx\n",
 177                                  mask, (unsigned long long)ISA_DMA_THRESHOLD);
 178                         goto no_page;
 179                 }
 180         }
 181
 182         /*
 183          * Sanity check the allocation size.
 184          */
 185         size = PAGE_ALIGN(size);
 186         limit = (mask + 1) & ~mask;
 187         if ((limit && size >= limit) ||
 188             size >= (CONSISTENT_END - CONSISTENT_BASE)) {
 189                 printk(KERN_WARNING "coherent allocation too big "
 190                        "(requested %#x mask %#llx)\n", size, mask);
 191                 goto no_page;
 192         }
 193
 194         order = get_order(size);
 195
 196         if (mask != 0xffffffff)
 197                 gfp |= GFP_DMA;
 198
 199         page = alloc_pages(gfp, order);
 200         if (!page)
 201                 goto no_page;
 202
 203         /*
 204          * Invalidate any data that might be lurking in the
 205          * kernel direct-mapped region for device DMA.
 206          */
 207         {
 208                 void *ptr = page_address(page);
 209                 memset(ptr, 0, size);
 210                 dmac_flush_range(ptr, ptr + size);
 211                 outer_flush_range(__pa(ptr), __pa(ptr) + size);
 212         }
 213
 214         /*
 215          * Allocate a virtual address in the consistent mapping region.
 216          */
 217         c = vm_region_alloc(&consistent_head, size,
 218                             gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 219         if (c) {
 220                 pte_t *pte;
 221                 struct page *end = page + (1 << order);
 222                 int idx = CONSISTENT_PTE_INDEX(c->vm_start);
 223                 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
 224
 225                 pte = consistent_pte[idx] + off;
 226                 c->vm_pages = page;
 227
 228                 split_page(page, order);
 229
 230                 /*
 231                  * Set the "dma handle"
 232                  */
 233                 *handle = page_to_dma(dev, page);
 234
 235                 do {
 236                         BUG_ON(!pte_none(*pte));
 237
 238                         /*
 239                          * x86 does not mark the pages reserved...
 240                          */
 241                         SetPageReserved(page);
 242                         set_pte_ext(pte, mk_pte(page, prot), 0);
 243                         page++;
 244                         pte++;
 245                         off++;
 246                         if (off >= PTRS_PER_PTE) {
 247                                 off = 0;
 248                                 pte = consistent_pte[++idx];
 249                         }
 250                 } while (size -= PAGE_SIZE);
 251
 252                 /*
 253                  * Free the otherwise unused pages.
 254                  */
 255                 while (page < end) {
 256                         __free_page(page);
 257                         page++;
 258                 }
 259
 260                 return (void *)c->vm_start;
 261         }
 262
 263         if (page)
 264                 __free_pages(page, order);
 265  no_page:
 266         *handle = ~0;
 267         return NULL;
 268 }
 269
 270 /*
 271  * Allocate DMA-coherent memory space and return both the kernel remapped
 272  * virtual and bus address for that space.
 273  */
 274 void *
 275 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 276 {
 277         void *memory;
 278
 279         if (dma_alloc_from_coherent(dev, size, handle, &memory))
 280                 return memory;
 281
 282         if (arch_is_coherent()) {
 283                 void *virt;
 284
 285                 virt = kmalloc(size, gfp);
 286                 if (!virt)
 287                         return NULL;
 288                 *handle =  virt_to_dma(dev, virt);
 289
 290                 return virt;
 291         }
 292
 293         return __dma_alloc(dev, size, handle, gfp,
 294                            pgprot_noncached(pgprot_kernel));
 295 }
 296 EXPORT_SYMBOL(dma_alloc_coherent);
 297
 298 /*
 299  * Allocate a writecombining region, in much the same way as
 300  * dma_alloc_coherent above.
 301  */
 302 void *
 303 dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 304 {
 305         return __dma_alloc(dev, size, handle, gfp,
 306                            pgprot_writecombine(pgprot_kernel));
 307 }
 308 EXPORT_SYMBOL(dma_alloc_writecombine);
 309
 310 static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
 311                     void *cpu_addr, dma_addr_t dma_addr, size_t size)
 312 {
 313         unsigned long flags, user_size, kern_size;
 314         struct vm_region *c;
 315         int ret = -ENXIO;
 316
 317         user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 318
 319         spin_lock_irqsave(&consistent_lock, flags);
 320         c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
 321         spin_unlock_irqrestore(&consistent_lock, flags);
 322
 323         if (c) {
 324                 unsigned long off = vma->vm_pgoff;
 325
 326                 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
 327
 328                 if (off < kern_size &&
 329                     user_size <= (kern_size - off)) {
 330                         ret = remap_pfn_range(vma, vma->vm_start,
 331                                               page_to_pfn(c->vm_pages) + off,
 332                                               user_size << PAGE_SHIFT,
 333                                               vma->vm_page_prot);
 334                 }
 335         }
 336
 337         return ret;
 338 }
 339
 340 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 341                       void *cpu_addr, dma_addr_t dma_addr, size_t size)
 342 {
 343         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 344         return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
 345 }
 346 EXPORT_SYMBOL(dma_mmap_coherent);
 347
 348 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
 349                           void *cpu_addr, dma_addr_t dma_addr, size_t size)
 350 {
 351         vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 352         return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
 353 }
 354 EXPORT_SYMBOL(dma_mmap_writecombine);
 355
 356 /*
 357  * free a page as defined by the above mapping.
 358  * Must not be called with IRQs disabled.
 359  */
 360 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
 361 {
 362         struct vm_region *c;
 363         unsigned long flags, addr;
 364         pte_t *ptep;
 365         int idx;
 366         u32 off;
 367
 368         WARN_ON(irqs_disabled());
 369
 370         if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 371                 return;
 372
 373         if (arch_is_coherent()) {
 374                 kfree(cpu_addr);
 375                 return;
 376         }
 377
 378         size = PAGE_ALIGN(size);
 379
 380         spin_lock_irqsave(&consistent_lock, flags);
 381         c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
 382         if (!c)
 383                 goto no_area;
 384
 385         c->vm_active = 0;
 386         spin_unlock_irqrestore(&consistent_lock, flags);
 387
 388         if ((c->vm_end - c->vm_start) != size) {
 389                 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
 390                        __func__, c->vm_end - c->vm_start, size);
 391                 dump_stack();
 392                 size = c->vm_end - c->vm_start;
 393         }
 394
 395         idx = CONSISTENT_PTE_INDEX(c->vm_start);
 396         off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
 397         ptep = consistent_pte[idx] + off;
 398         addr = c->vm_start;
 399         do {
 400                 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
 401                 unsigned long pfn;
 402
 403                 ptep++;
 404                 addr += PAGE_SIZE;
 405                 off++;
 406                 if (off >= PTRS_PER_PTE) {
 407                         off = 0;
 408                         ptep = consistent_pte[++idx];
 409                 }
 410
 411                 if (!pte_none(pte) && pte_present(pte)) {
 412                         pfn = pte_pfn(pte);
 413
 414                         if (pfn_valid(pfn)) {
 415                                 struct page *page = pfn_to_page(pfn);
 416
 417                                 /*
 418                                  * x86 does not mark the pages reserved...
 419                                  */
 420                                 ClearPageReserved(page);
 421
 422                                 __free_page(page);
 423                                 continue;
 424                         }
 425                 }
 426
 427                 printk(KERN_CRIT "%s: bad page in kernel page table\n",
 428                        __func__);
 429         } while (size -= PAGE_SIZE);
 430
 431         flush_tlb_kernel_range(c->vm_start, c->vm_end);
 432
 433         spin_lock_irqsave(&consistent_lock, flags);
 434         list_del(&c->vm_list);
 435         spin_unlock_irqrestore(&consistent_lock, flags);
 436
 437         kfree(c);
 438         return;
 439
 440  no_area:
 441         spin_unlock_irqrestore(&consistent_lock, flags);
 442         printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
 443                __func__, cpu_addr);
 444         dump_stack();
 445 }
 446 EXPORT_SYMBOL(dma_free_coherent);
 447
 448 /*
 449  * Initialise the consistent memory allocation.
 450  */
 451 static int __init consistent_init(void)
 452 {
 453         pgd_t *pgd;
 454         pmd_t *pmd;
 455         pte_t *pte;
 456         int ret = 0, i = 0;
 457         u32 base = CONSISTENT_BASE;
 458
 459         do {
 460                 pgd = pgd_offset(&init_mm, base);
 461                 pmd = pmd_alloc(&init_mm, pgd, base);
 462                 if (!pmd) {
 463                         printk(KERN_ERR "%s: no pmd tables\n", __func__);
 464                         ret = -ENOMEM;
 465                         break;
 466                 }
 467                 WARN_ON(!pmd_none(*pmd));
 468
 469                 pte = pte_alloc_kernel(pmd, base);
 470                 if (!pte) {
 471                         printk(KERN_ERR "%s: no pte tables\n", __func__);
 472                         ret = -ENOMEM;
 473                         break;
 474                 }
 475
 476                 consistent_pte[i++] = pte;
 477                 base += (1 << PGDIR_SHIFT);
 478         } while (base < CONSISTENT_END);
 479
 480         return ret;
 481 }
 482
 483 core_initcall(consistent_init);
 484
 485 /*
 486  * Make an area consistent for devices.
 487  * Note: Drivers should NOT use this function directly, as it will break
 488  * platforms with CONFIG_DMABOUNCE.
 489  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
 490  */
 491 void dma_cache_maint(const void *start, size_t size, int direction)
 492 {
 493         const void *end = start + size;
 494
 495         BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1));
 496
 497         switch (direction) {
 498         case DMA_FROM_DEVICE:           /* invalidate only */
 499                 dmac_inv_range(start, end);
 500                 outer_inv_range(__pa(start), __pa(end));
 501                 break;
 502         case DMA_TO_DEVICE:             /* writeback only */
 503                 dmac_clean_range(start, end);
 504                 outer_clean_range(__pa(start), __pa(end));
 505                 break;
 506         case DMA_BIDIRECTIONAL:         /* writeback and invalidate */
 507                 dmac_flush_range(start, end);
 508                 outer_flush_range(__pa(start), __pa(end));
 509                 break;
 510         default:
 511                 BUG();
 512         }
 513 }
 514 EXPORT_SYMBOL(dma_cache_maint);
 515
 516 /**
 517  * dma_map_sg - map a set of SG buffers for streaming mode DMA
 518  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 519  * @sg: list of buffers
 520  * @nents: number of buffers to map
 521  * @dir: DMA transfer direction
 522  *
 523  * Map a set of buffers described by scatterlist in streaming mode for DMA.
 524  * This is the scatter-gather version of the dma_map_single interface.
 525  * Here the scatter gather list elements are each tagged with the
 526  * appropriate dma address and length.  They are obtained via
 527  * sg_dma_{address,length}.
 528  *
 529  * Device ownership issues as mentioned for dma_map_single are the same
 530  * here.
 531  */
 532 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 533                 enum dma_data_direction dir)
 534 {
 535         struct scatterlist *s;
 536         int i, j;
 537
 538         for_each_sg(sg, s, nents, i) {
 539                 s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
 540                                                 s->length, dir);
 541                 if (dma_mapping_error(dev, s->dma_address))
 542                         goto bad_mapping;
 543         }
 544         return nents;
 545
 546  bad_mapping:
 547         for_each_sg(sg, s, i, j)
 548                 dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
 549         return 0;
 550 }
 551 EXPORT_SYMBOL(dma_map_sg);
 552
 553 /**
 554  * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
 555  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 556  * @sg: list of buffers
 557  * @nents: number of buffers to unmap (returned from dma_map_sg)
 558  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
 559  *
 560  * Unmap a set of streaming mode DMA translations.  Again, CPU access
 561  * rules concerning calls here are the same as for dma_unmap_single().
 562  */
 563 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 564                 enum dma_data_direction dir)
 565 {
 566         struct scatterlist *s;
 567         int i;
 568
 569         for_each_sg(sg, s, nents, i)
 570                 dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
 571 }
 572 EXPORT_SYMBOL(dma_unmap_sg);
 573
 574 /**
 575  * dma_sync_sg_for_cpu
 576  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 577  * @sg: list of buffers
 578  * @nents: number of buffers to map (returned from dma_map_sg)
 579  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
 580  */
 581 void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 582                         int nents, enum dma_data_direction dir)
 583 {
 584         struct scatterlist *s;
 585         int i;
 586
 587         for_each_sg(sg, s, nents, i) {
 588                 dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0,
 589                                         sg_dma_len(s), dir);
 590         }
 591 }
 592 EXPORT_SYMBOL(dma_sync_sg_for_cpu);
 593
 594 /**
 595  * dma_sync_sg_for_device
 596  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 597  * @sg: list of buffers
 598  * @nents: number of buffers to map (returned from dma_map_sg)
 599  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
 600  */
 601 void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 602                         int nents, enum dma_data_direction dir)
 603 {
 604         struct scatterlist *s;
 605         int i;
 606
 607         for_each_sg(sg, s, nents, i) {
 608                 if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0,
 609                                         sg_dma_len(s), dir))
 610                         continue;
 611
 612                 if (!arch_is_coherent())
 613                         dma_cache_maint(sg_virt(s), s->length, dir);
 614         }
 615 }
 616 EXPORT_SYMBOL(dma_sync_sg_for_device);