git.oblomov.eu Git - linux-2.6/blob - arch/powerpc/platforms/cell/spufs/sched.c

   1 /* sched.c - SPU scheduler.
   2  *
   3  * Copyright (C) IBM 2005
   4  * Author: Mark Nutter <mnutter@us.ibm.com>
   5  *
   6  * 2006-03-31   NUMA domains added.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  */
  22
  23 #undef DEBUG
  24
  25 #include <linux/module.h>
  26 #include <linux/errno.h>
  27 #include <linux/sched.h>
  28 #include <linux/kernel.h>
  29 #include <linux/mm.h>
  30 #include <linux/completion.h>
  31 #include <linux/vmalloc.h>
  32 #include <linux/smp.h>
  33 #include <linux/smp_lock.h>
  34 #include <linux/stddef.h>
  35 #include <linux/unistd.h>
  36 #include <linux/numa.h>
  37 #include <linux/mutex.h>
  38 #include <linux/notifier.h>
  39
  40 #include <asm/io.h>
  41 #include <asm/mmu_context.h>
  42 #include <asm/spu.h>
  43 #include <asm/spu_csa.h>
  44 #include <asm/spu_priv1.h>
  45 #include "spufs.h"
  46
  47 #define SPU_MIN_TIMESLICE       (100 * HZ / 1000)
  48
  49 struct spu_prio_array {
  50         DECLARE_BITMAP(bitmap, MAX_PRIO);
  51         struct list_head runq[MAX_PRIO];
  52         spinlock_t runq_lock;
  53         struct list_head active_list[MAX_NUMNODES];
  54         struct mutex active_mutex[MAX_NUMNODES];
  55 };
  56
  57 static struct spu_prio_array *spu_prio;
  58
  59 static inline int node_allowed(int node)
  60 {
  61         cpumask_t mask;
  62
  63         if (!nr_cpus_node(node))
  64                 return 0;
  65         mask = node_to_cpumask(node);
  66         if (!cpus_intersects(mask, current->cpus_allowed))
  67                 return 0;
  68         return 1;
  69 }
  70
  71 /**
  72  * spu_add_to_active_list - add spu to active list
  73  * @spu:        spu to add to the active list
  74  */
  75 static void spu_add_to_active_list(struct spu *spu)
  76 {
  77         mutex_lock(&spu_prio->active_mutex[spu->node]);
  78         list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
  79         mutex_unlock(&spu_prio->active_mutex[spu->node]);
  80 }
  81
  82 /**
  83  * spu_remove_from_active_list - remove spu from active list
  84  * @spu:       spu to remove from the active list
  85  */
  86 static void spu_remove_from_active_list(struct spu *spu)
  87 {
  88         int node = spu->node;
  89
  90         mutex_lock(&spu_prio->active_mutex[node]);
  91         list_del_init(&spu->list);
  92         mutex_unlock(&spu_prio->active_mutex[node]);
  93 }
  94
  95 static inline void mm_needs_global_tlbie(struct mm_struct *mm)
  96 {
  97         int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
  98
  99         /* Global TLBIE broadcast required with SPEs. */
 100         __cpus_setall(&mm->cpu_vm_mask, nr);
 101 }
 102
 103 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
 104
 105 static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
 106 {
 107         blocking_notifier_call_chain(&spu_switch_notifier,
 108                             ctx ? ctx->object_id : 0, spu);
 109 }
 110
 111 int spu_switch_event_register(struct notifier_block * n)
 112 {
 113         return blocking_notifier_chain_register(&spu_switch_notifier, n);
 114 }
 115
 116 int spu_switch_event_unregister(struct notifier_block * n)
 117 {
 118         return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
 119 }
 120
 121 /**
 122  * spu_bind_context - bind spu context to physical spu
 123  * @spu:        physical spu to bind to
 124  * @ctx:        context to bind
 125  */
 126 static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 127 {
 128         pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
 129                  spu->number, spu->node);
 130         spu->ctx = ctx;
 131         spu->flags = 0;
 132         ctx->spu = spu;
 133         ctx->ops = &spu_hw_ops;
 134         spu->pid = current->pid;
 135         spu->mm = ctx->owner;
 136         mm_needs_global_tlbie(spu->mm);
 137         spu->ibox_callback = spufs_ibox_callback;
 138         spu->wbox_callback = spufs_wbox_callback;
 139         spu->stop_callback = spufs_stop_callback;
 140         spu->mfc_callback = spufs_mfc_callback;
 141         spu->dma_callback = spufs_dma_callback;
 142         mb();
 143         spu_unmap_mappings(ctx);
 144         spu_restore(&ctx->csa, spu);
 145         spu->timestamp = jiffies;
 146         spu_cpu_affinity_set(spu, raw_smp_processor_id());
 147         spu_switch_notify(spu, ctx);
 148         spu_add_to_active_list(spu);
 149         ctx->state = SPU_STATE_RUNNABLE;
 150 }
 151
 152 /**
 153  * spu_unbind_context - unbind spu context from physical spu
 154  * @spu:        physical spu to unbind from
 155  * @ctx:        context to unbind
 156  */
 157 static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 158 {
 159         pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
 160                  spu->pid, spu->number, spu->node);
 161
 162         spu_remove_from_active_list(spu);
 163         spu_switch_notify(spu, NULL);
 164         spu_unmap_mappings(ctx);
 165         spu_save(&ctx->csa, spu);
 166         spu->timestamp = jiffies;
 167         ctx->state = SPU_STATE_SAVED;
 168         spu->ibox_callback = NULL;
 169         spu->wbox_callback = NULL;
 170         spu->stop_callback = NULL;
 171         spu->mfc_callback = NULL;
 172         spu->dma_callback = NULL;
 173         spu->mm = NULL;
 174         spu->pid = 0;
 175         ctx->ops = &spu_backing_ops;
 176         ctx->spu = NULL;
 177         spu->flags = 0;
 178         spu->ctx = NULL;
 179 }
 180
 181 /**
 182  * spu_add_to_rq - add a context to the runqueue
 183  * @ctx:       context to add
 184  */
 185 static void spu_add_to_rq(struct spu_context *ctx)
 186 {
 187         spin_lock(&spu_prio->runq_lock);
 188         list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
 189         set_bit(ctx->prio, spu_prio->bitmap);
 190         spin_unlock(&spu_prio->runq_lock);
 191 }
 192
 193 /**
 194  * spu_del_from_rq - remove a context from the runqueue
 195  * @ctx:       context to remove
 196  */
 197 static void spu_del_from_rq(struct spu_context *ctx)
 198 {
 199         spin_lock(&spu_prio->runq_lock);
 200         list_del_init(&ctx->rq);
 201         if (list_empty(&spu_prio->runq[ctx->prio]))
 202                 clear_bit(ctx->prio, spu_prio->bitmap);
 203         spin_unlock(&spu_prio->runq_lock);
 204 }
 205
 206 /**
 207  * spu_grab_context - remove one context from the runqueue
 208  * @prio:      priority of the context to be removed
 209  *
 210  * This function removes one context from the runqueue for priority @prio.
 211  * If there is more than one context with the given priority the first
 212  * task on the runqueue will be taken.
 213  *
 214  * Returns the spu_context it just removed.
 215  *
 216  * Must be called with spu_prio->runq_lock held.
 217  */
 218 static struct spu_context *spu_grab_context(int prio)
 219 {
 220         struct list_head *rq = &spu_prio->runq[prio];
 221
 222         if (list_empty(rq))
 223                 return NULL;
 224         return list_entry(rq->next, struct spu_context, rq);
 225 }
 226
 227 static void spu_prio_wait(struct spu_context *ctx)
 228 {
 229         DEFINE_WAIT(wait);
 230
 231         set_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
 232         prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
 233         if (!signal_pending(current)) {
 234                 mutex_unlock(&ctx->state_mutex);
 235                 schedule();
 236                 mutex_lock(&ctx->state_mutex);
 237         }
 238         __set_current_state(TASK_RUNNING);
 239         remove_wait_queue(&ctx->stop_wq, &wait);
 240         clear_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
 241 }
 242
 243 /**
 244  * spu_reschedule - try to find a runnable context for a spu
 245  * @spu:       spu available
 246  *
 247  * This function is called whenever a spu becomes idle.  It looks for the
 248  * most suitable runnable spu context and schedules it for execution.
 249  */
 250 static void spu_reschedule(struct spu *spu)
 251 {
 252         int best;
 253
 254         spu_free(spu);
 255
 256         spin_lock(&spu_prio->runq_lock);
 257         best = sched_find_first_bit(spu_prio->bitmap);
 258         if (best < MAX_PRIO) {
 259                 struct spu_context *ctx = spu_grab_context(best);
 260                 if (ctx && test_bit(SPU_SCHED_WAKE, &ctx->sched_flags))
 261                         wake_up(&ctx->stop_wq);
 262         }
 263         spin_unlock(&spu_prio->runq_lock);
 264 }
 265
 266 static struct spu *spu_get_idle(struct spu_context *ctx)
 267 {
 268         struct spu *spu = NULL;
 269         int node = cpu_to_node(raw_smp_processor_id());
 270         int n;
 271
 272         for (n = 0; n < MAX_NUMNODES; n++, node++) {
 273                 node = (node < MAX_NUMNODES) ? node : 0;
 274                 if (!node_allowed(node))
 275                         continue;
 276                 spu = spu_alloc_node(node);
 277                 if (spu)
 278                         break;
 279         }
 280         return spu;
 281 }
 282
 283 /**
 284  * find_victim - find a lower priority context to preempt
 285  * @ctx:        canidate context for running
 286  *
 287  * Returns the freed physical spu to run the new context on.
 288  */
 289 static struct spu *find_victim(struct spu_context *ctx)
 290 {
 291         struct spu_context *victim = NULL;
 292         struct spu *spu;
 293         int node, n;
 294
 295         /*
 296          * Look for a possible preemption candidate on the local node first.
 297          * If there is no candidate look at the other nodes.  This isn't
 298          * exactly fair, but so far the whole spu schedule tries to keep
 299          * a strong node affinity.  We might want to fine-tune this in
 300          * the future.
 301          */
 302  restart:
 303         node = cpu_to_node(raw_smp_processor_id());
 304         for (n = 0; n < MAX_NUMNODES; n++, node++) {
 305                 node = (node < MAX_NUMNODES) ? node : 0;
 306                 if (!node_allowed(node))
 307                         continue;
 308
 309                 mutex_lock(&spu_prio->active_mutex[node]);
 310                 list_for_each_entry(spu, &spu_prio->active_list[node], list) {
 311                         struct spu_context *tmp = spu->ctx;
 312
 313                         if (tmp->rt_priority < ctx->rt_priority &&
 314                             (!victim || tmp->rt_priority < victim->rt_priority))
 315                                 victim = spu->ctx;
 316                 }
 317                 mutex_unlock(&spu_prio->active_mutex[node]);
 318
 319                 if (victim) {
 320                         /*
 321                          * This nests ctx->state_mutex, but we always lock
 322                          * higher priority contexts before lower priority
 323                          * ones, so this is safe until we introduce
 324                          * priority inheritance schemes.
 325                          */
 326                         if (!mutex_trylock(&victim->state_mutex)) {
 327                                 victim = NULL;
 328                                 goto restart;
 329                         }
 330
 331                         spu = victim->spu;
 332                         if (!spu) {
 333                                 /*
 334                                  * This race can happen because we've dropped
 335                                  * the active list mutex.  No a problem, just
 336                                  * restart the search.
 337                                  */
 338                                 mutex_unlock(&victim->state_mutex);
 339                                 victim = NULL;
 340                                 goto restart;
 341                         }
 342                         spu_unbind_context(spu, victim);
 343                         mutex_unlock(&victim->state_mutex);
 344                         return spu;
 345                 }
 346         }
 347
 348         return NULL;
 349 }
 350
 351 /**
 352  * spu_activate - find a free spu for a context and execute it
 353  * @ctx:        spu context to schedule
 354  * @flags:      flags (currently ignored)
 355  *
 356  * Tries to find a free spu to run @ctx.  If no free spu is availble
 357  * add the context to the runqueue so it gets woken up once an spu
 358  * is available.
 359  */
 360 int spu_activate(struct spu_context *ctx, unsigned long flags)
 361 {
 362
 363         if (ctx->spu)
 364                 return 0;
 365
 366         do {
 367                 struct spu *spu;
 368
 369                 spu = spu_get_idle(ctx);
 370                 /*
 371                  * If this is a realtime thread we try to get it running by
 372                  * preempting a lower priority thread.
 373                  */
 374                 if (!spu && ctx->rt_priority)
 375                         spu = find_victim(ctx);
 376                 if (spu) {
 377                         spu_bind_context(spu, ctx);
 378                         return 0;
 379                 }
 380
 381                 spu_add_to_rq(ctx);
 382                 if (!(flags & SPU_ACTIVATE_NOWAKE))
 383                         spu_prio_wait(ctx);
 384                 spu_del_from_rq(ctx);
 385         } while (!signal_pending(current));
 386
 387         return -ERESTARTSYS;
 388 }
 389
 390 /**
 391  * spu_deactivate - unbind a context from it's physical spu
 392  * @ctx:        spu context to unbind
 393  *
 394  * Unbind @ctx from the physical spu it is running on and schedule
 395  * the highest priority context to run on the freed physical spu.
 396  */
 397 void spu_deactivate(struct spu_context *ctx)
 398 {
 399         struct spu *spu = ctx->spu;
 400
 401         if (spu) {
 402                 spu_unbind_context(spu, ctx);
 403                 spu_reschedule(spu);
 404         }
 405 }
 406
 407 /**
 408  * spu_yield -  yield a physical spu if others are waiting
 409  * @ctx:        spu context to yield
 410  *
 411  * Check if there is a higher priority context waiting and if yes
 412  * unbind @ctx from the physical spu and schedule the highest
 413  * priority context to run on the freed physical spu instead.
 414  */
 415 void spu_yield(struct spu_context *ctx)
 416 {
 417         struct spu *spu;
 418         int need_yield = 0;
 419
 420         if (mutex_trylock(&ctx->state_mutex)) {
 421                 if ((spu = ctx->spu) != NULL) {
 422                         int best = sched_find_first_bit(spu_prio->bitmap);
 423                         if (best < MAX_PRIO) {
 424                                 pr_debug("%s: yielding SPU %d NODE %d\n",
 425                                          __FUNCTION__, spu->number, spu->node);
 426                                 spu_deactivate(ctx);
 427                                 need_yield = 1;
 428                         }
 429                 }
 430                 mutex_unlock(&ctx->state_mutex);
 431         }
 432         if (unlikely(need_yield))
 433                 yield();
 434 }
 435
 436 int __init spu_sched_init(void)
 437 {
 438         int i;
 439
 440         spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
 441         if (!spu_prio) {
 442                 printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
 443                        __FUNCTION__);
 444                 return 1;
 445         }
 446         for (i = 0; i < MAX_PRIO; i++) {
 447                 INIT_LIST_HEAD(&spu_prio->runq[i]);
 448                 __clear_bit(i, spu_prio->bitmap);
 449         }
 450         __set_bit(MAX_PRIO, spu_prio->bitmap);
 451         for (i = 0; i < MAX_NUMNODES; i++) {
 452                 mutex_init(&spu_prio->active_mutex[i]);
 453                 INIT_LIST_HEAD(&spu_prio->active_list[i]);
 454         }
 455         spin_lock_init(&spu_prio->runq_lock);
 456         return 0;
 457 }
 458
 459 void __exit spu_sched_exit(void)
 460 {
 461         struct spu *spu, *tmp;
 462         int node;
 463
 464         for (node = 0; node < MAX_NUMNODES; node++) {
 465                 mutex_lock(&spu_prio->active_mutex[node]);
 466                 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
 467                                          list) {
 468                         list_del_init(&spu->list);
 469                         spu_free(spu);
 470                 }
 471                 mutex_unlock(&spu_prio->active_mutex[node]);
 472         }
 473         kfree(spu_prio);
 474 }