2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
4 * Functions for EQs, NEQs and interrupts
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
9 * Copyright (c) 2005 IBM Corporation
11 * All rights reserved.
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
42 #include "ehca_classes.h"
44 #include "ehca_iverbs.h"
45 #include "ehca_tools.h"
48 #include "ipz_pt_fn.h"
50 #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
51 #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31)
52 #define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7)
53 #define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31)
54 #define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31)
55 #define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63)
56 #define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63)
58 #define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
59 #define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
60 #define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
61 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
63 #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
64 #define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
66 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
68 static void queue_comp_task(struct ehca_cq *__cq);
70 static struct ehca_comp_pool* pool;
71 static struct notifier_block comp_pool_callback_nb;
75 static inline void comp_event_callback(struct ehca_cq *cq)
77 if (!cq->ib_cq.comp_handler)
80 spin_lock(&cq->cb_lock);
81 cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
82 spin_unlock(&cq->cb_lock);
87 static void print_error_data(struct ehca_shca * shca, void* data,
88 u64* rblock, int length)
90 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
91 u64 resource = rblock[1];
94 case 0x1: /* Queue Pair */
96 struct ehca_qp *qp = (struct ehca_qp*)data;
98 /* only print error data if AER is set */
102 ehca_err(&shca->ib_device,
103 "QP 0x%x (resource=%lx) has errors.",
104 qp->ib_qp.qp_num, resource);
107 case 0x4: /* Completion Queue */
109 struct ehca_cq *cq = (struct ehca_cq*)data;
111 ehca_err(&shca->ib_device,
112 "CQ 0x%x (resource=%lx) has errors.",
113 cq->cq_number, resource);
117 ehca_err(&shca->ib_device,
118 "Unknown errror type: %lx on %s.",
119 type, shca->ib_device.name);
123 ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
124 ehca_err(&shca->ib_device, "EHCA ----- error data begin "
125 "---------------------------------------------------");
126 ehca_dmp(rblock, length, "resource=%lx", resource);
127 ehca_err(&shca->ib_device, "EHCA ----- error data end "
128 "----------------------------------------------------");
133 int ehca_error_data(struct ehca_shca *shca, void *data,
139 unsigned long block_count;
141 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
143 ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
148 /* rblock must be 4K aligned and should be 4K large */
149 ret = hipz_h_error_data(shca->ipz_hca_handle,
154 if (ret == H_R_STATE)
155 ehca_err(&shca->ib_device,
156 "No error data is available: %lx.", resource);
157 else if (ret == H_SUCCESS) {
160 length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
162 if (length > EHCA_PAGESIZE)
163 length = EHCA_PAGESIZE;
165 print_error_data(shca, data, rblock, length);
167 ehca_err(&shca->ib_device,
168 "Error data could not be fetched: %lx", resource);
170 ehca_free_fw_ctrlblock(rblock);
177 static void qp_event_callback(struct ehca_shca *shca,
179 enum ib_event_type event_type)
181 struct ib_event event;
184 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
186 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
187 qp = idr_find(&ehca_qp_idr, token);
188 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
194 ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
196 if (!qp->ib_qp.event_handler)
199 event.device = &shca->ib_device;
200 event.event = event_type;
201 event.element.qp = &qp->ib_qp;
203 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
208 static void cq_event_callback(struct ehca_shca *shca,
213 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
215 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
216 cq = idr_find(&ehca_cq_idr, token);
217 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
222 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
227 static void parse_identifier(struct ehca_shca *shca, u64 eqe)
229 u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
231 switch (identifier) {
232 case 0x02: /* path migrated */
233 qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
235 case 0x03: /* communication established */
236 qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
238 case 0x04: /* send queue drained */
239 qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
241 case 0x05: /* QP error */
242 case 0x06: /* QP error */
243 qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
245 case 0x07: /* CQ error */
246 case 0x08: /* CQ error */
247 cq_event_callback(shca, eqe);
249 case 0x09: /* MRMWPTE error */
250 ehca_err(&shca->ib_device, "MRMWPTE error.");
252 case 0x0A: /* port event */
253 ehca_err(&shca->ib_device, "Port event.");
255 case 0x0B: /* MR access error */
256 ehca_err(&shca->ib_device, "MR access error.");
258 case 0x0C: /* EQ error */
259 ehca_err(&shca->ib_device, "EQ error.");
261 case 0x0D: /* P/Q_Key mismatch */
262 ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
264 case 0x10: /* sampling complete */
265 ehca_err(&shca->ib_device, "Sampling complete.");
267 case 0x11: /* unaffiliated access error */
268 ehca_err(&shca->ib_device, "Unaffiliated access error.");
270 case 0x12: /* path migrating error */
271 ehca_err(&shca->ib_device, "Path migration error.");
273 case 0x13: /* interface trace stopped */
274 ehca_err(&shca->ib_device, "Interface trace stopped.");
276 case 0x14: /* first error capture info available */
278 ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
279 identifier, shca->ib_device.name);
286 static void parse_ec(struct ehca_shca *shca, u64 eqe)
288 struct ib_event event;
289 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
290 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
293 case 0x30: /* port availability change */
294 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
295 ehca_info(&shca->ib_device,
296 "port %x is active.", port);
297 event.device = &shca->ib_device;
298 event.event = IB_EVENT_PORT_ACTIVE;
299 event.element.port_num = port;
300 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
301 ib_dispatch_event(&event);
303 ehca_info(&shca->ib_device,
304 "port %x is inactive.", port);
305 event.device = &shca->ib_device;
306 event.event = IB_EVENT_PORT_ERR;
307 event.element.port_num = port;
308 shca->sport[port - 1].port_state = IB_PORT_DOWN;
309 ib_dispatch_event(&event);
313 /* port configuration change
314 * disruptive change is caused by
315 * LID, PKEY or SM change
317 ehca_warn(&shca->ib_device,
318 "disruptive port %x configuration change", port);
320 ehca_info(&shca->ib_device,
321 "port %x is inactive.", port);
322 event.device = &shca->ib_device;
323 event.event = IB_EVENT_PORT_ERR;
324 event.element.port_num = port;
325 shca->sport[port - 1].port_state = IB_PORT_DOWN;
326 ib_dispatch_event(&event);
328 ehca_info(&shca->ib_device,
329 "port %x is active.", port);
330 event.device = &shca->ib_device;
331 event.event = IB_EVENT_PORT_ACTIVE;
332 event.element.port_num = port;
333 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
334 ib_dispatch_event(&event);
336 case 0x32: /* adapter malfunction */
337 ehca_err(&shca->ib_device, "Adapter malfunction.");
339 case 0x33: /* trace stopped */
340 ehca_err(&shca->ib_device, "Traced stopped.");
343 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
344 ec, shca->ib_device.name);
351 static inline void reset_eq_pending(struct ehca_cq *cq)
354 struct h_galpa gal = cq->galpas.kernel;
356 hipz_galpa_store_cq(gal, cqx_ep, 0x0);
357 CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
362 irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
364 struct ehca_shca *shca = (struct ehca_shca*)dev_id;
366 tasklet_hi_schedule(&shca->neq.interrupt_task);
371 void ehca_tasklet_neq(unsigned long data)
373 struct ehca_shca *shca = (struct ehca_shca*)data;
374 struct ehca_eqe *eqe;
377 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
380 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
381 parse_ec(shca, eqe->entry);
383 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
386 ret = hipz_h_reset_event(shca->ipz_hca_handle,
387 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
389 if (ret != H_SUCCESS)
390 ehca_err(&shca->ib_device, "Can't clear notification events.");
395 irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
397 struct ehca_shca *shca = (struct ehca_shca*)dev_id;
399 tasklet_hi_schedule(&shca->eq.interrupt_task);
404 void ehca_tasklet_eq(unsigned long data)
406 struct ehca_shca *shca = (struct ehca_shca*)data;
407 struct ehca_eqe *eqe;
412 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
414 if ((shca->hw_level >= 2) && eqe)
419 while ((int_state == 1) || eqe) {
421 u64 eqe_value = eqe->entry;
423 ehca_dbg(&shca->ib_device,
424 "eqe_value=%lx", eqe_value);
426 /* TODO: better structure */
427 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
433 ehca_dbg(&shca->ib_device,
434 "... completion event");
436 EHCA_BMASK_GET(EQE_CQ_TOKEN,
438 spin_lock_irqsave(&ehca_cq_idr_lock,
440 cq = idr_find(&ehca_cq_idr, token);
443 spin_unlock_irqrestore(&ehca_cq_idr_lock,
448 reset_eq_pending(cq);
449 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
451 spin_unlock_irqrestore(&ehca_cq_idr_lock,
454 spin_unlock_irqrestore(&ehca_cq_idr_lock,
456 comp_event_callback(cq);
459 ehca_dbg(&shca->ib_device,
460 "... non completion event");
461 parse_identifier(shca, eqe_value);
464 (struct ehca_eqe *)ehca_poll_eq(shca,
468 if (shca->hw_level >= 2) {
470 hipz_h_query_int_state(shca->ipz_hca_handle,
474 if (query_cnt >= 100) {
479 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
482 } while (int_state != 0);
487 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
489 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
491 unsigned long flags_last_cpu;
493 if (ehca_debug_level)
494 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
496 spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
497 pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
498 if (pool->last_cpu == NR_CPUS)
499 pool->last_cpu = first_cpu(cpu_online_map);
500 spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
502 return pool->last_cpu;
505 static void __queue_comp_task(struct ehca_cq *__cq,
506 struct ehca_cpu_comp_task *cct)
508 unsigned long flags_cct;
509 unsigned long flags_cq;
511 spin_lock_irqsave(&cct->task_lock, flags_cct);
512 spin_lock_irqsave(&__cq->task_lock, flags_cq);
514 if (__cq->nr_callbacks == 0) {
515 __cq->nr_callbacks++;
516 list_add_tail(&__cq->entry, &cct->cq_list);
518 wake_up(&cct->wait_queue);
521 __cq->nr_callbacks++;
523 spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
524 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
527 static void queue_comp_task(struct ehca_cq *__cq)
531 struct ehca_cpu_comp_task *cct;
534 cpu_id = find_next_online_cpu(pool);
536 BUG_ON(!cpu_online(cpu_id));
538 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
540 if (cct->cq_jobs > 0) {
541 cpu_id = find_next_online_cpu(pool);
542 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
545 __queue_comp_task(__cq, cct);
552 static void run_comp_task(struct ehca_cpu_comp_task* cct)
555 unsigned long flags_cct;
556 unsigned long flags_cq;
558 spin_lock_irqsave(&cct->task_lock, flags_cct);
560 while (!list_empty(&cct->cq_list)) {
561 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
562 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
563 comp_event_callback(cq);
564 spin_lock_irqsave(&cct->task_lock, flags_cct);
566 spin_lock_irqsave(&cq->task_lock, flags_cq);
568 if (cq->nr_callbacks == 0) {
569 list_del_init(cct->cq_list.next);
572 spin_unlock_irqrestore(&cq->task_lock, flags_cq);
576 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
581 static int comp_task(void *__cct)
583 struct ehca_cpu_comp_task* cct = __cct;
584 DECLARE_WAITQUEUE(wait, current);
586 set_current_state(TASK_INTERRUPTIBLE);
587 while(!kthread_should_stop()) {
588 add_wait_queue(&cct->wait_queue, &wait);
590 if (list_empty(&cct->cq_list))
593 __set_current_state(TASK_RUNNING);
595 remove_wait_queue(&cct->wait_queue, &wait);
597 if (!list_empty(&cct->cq_list))
598 run_comp_task(__cct);
600 set_current_state(TASK_INTERRUPTIBLE);
602 __set_current_state(TASK_RUNNING);
607 static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
610 struct ehca_cpu_comp_task *cct;
612 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
613 spin_lock_init(&cct->task_lock);
614 INIT_LIST_HEAD(&cct->cq_list);
615 init_waitqueue_head(&cct->wait_queue);
616 cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
621 static void destroy_comp_task(struct ehca_comp_pool *pool,
624 struct ehca_cpu_comp_task *cct;
625 struct task_struct *task;
626 unsigned long flags_cct;
628 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
630 spin_lock_irqsave(&cct->task_lock, flags_cct);
636 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
644 static void take_over_work(struct ehca_comp_pool *pool,
647 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
650 unsigned long flags_cct;
652 spin_lock_irqsave(&cct->task_lock, flags_cct);
654 list_splice_init(&cct->cq_list, &list);
656 while(!list_empty(&list)) {
657 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
659 list_del(&cq->entry);
660 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
661 smp_processor_id()));
664 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
668 static int comp_pool_callback(struct notifier_block *nfb,
669 unsigned long action,
672 unsigned int cpu = (unsigned long)hcpu;
673 struct ehca_cpu_comp_task *cct;
677 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
678 if(!create_comp_task(pool, cpu)) {
679 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
683 case CPU_UP_CANCELED:
684 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
685 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
686 kthread_bind(cct->task, any_online_cpu(cpu_online_map));
687 destroy_comp_task(pool, cpu);
690 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
691 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
692 kthread_bind(cct->task, cpu);
693 wake_up_process(cct->task);
695 case CPU_DOWN_PREPARE:
696 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
698 case CPU_DOWN_FAILED:
699 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
702 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
703 destroy_comp_task(pool, cpu);
704 take_over_work(pool, cpu);
713 int ehca_create_comp_pool(void)
715 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
717 struct task_struct *task;
719 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
723 spin_lock_init(&pool->last_cpu_lock);
724 pool->last_cpu = any_online_cpu(cpu_online_map);
726 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
727 if (pool->cpu_comp_tasks == NULL) {
732 for_each_online_cpu(cpu) {
733 task = create_comp_task(pool, cpu);
735 kthread_bind(task, cpu);
736 wake_up_process(task);
740 comp_pool_callback_nb.notifier_call = comp_pool_callback;
741 comp_pool_callback_nb.priority =0;
742 register_cpu_notifier(&comp_pool_callback_nb);
748 void ehca_destroy_comp_pool(void)
750 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
753 unregister_cpu_notifier(&comp_pool_callback_nb);
755 for (i = 0; i < NR_CPUS; i++) {
757 destroy_comp_task(pool, i);
759 free_percpu(pool->cpu_comp_tasks);