1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
6 ** This copyrighted material is made available to anyone wishing to use,
7 ** modify, copy, or redistribute it subject to the terms and conditions
8 ** of the GNU General Public License v.2.
10 *******************************************************************************
11 ******************************************************************************/
13 /* Central locking logic has four stages:
33 Stage 1 (lock, unlock) is mainly about checking input args and
34 splitting into one of the four main operations:
36 dlm_lock = request_lock
37 dlm_lock+CONVERT = convert_lock
38 dlm_unlock = unlock_lock
39 dlm_unlock+CANCEL = cancel_lock
41 Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is
42 provided to the next stage.
44 Stage 3, _xxxx_lock(), determines if the operation is local or remote.
45 When remote, it calls send_xxxx(), when local it calls do_xxxx().
47 Stage 4, do_xxxx(), is the guts of the operation. It manipulates the
48 given rsb and lkb and queues callbacks.
50 For remote operations, send_xxxx() results in the corresponding do_xxxx()
51 function being executed on the remote node. The connecting send/receive
52 calls on local (L) and remote (R) nodes:
54 L: send_xxxx() -> R: receive_xxxx()
56 L: receive_xxxx_reply() <- R: send_xxxx_reply()
59 #include "dlm_internal.h"
62 #include "requestqueue.h"
66 #include "lockspace.h"
71 #include "lvb_table.h"
74 static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb);
75 static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb);
76 static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb);
77 static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb);
78 static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb);
79 static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
80 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
81 static int send_remove(struct dlm_rsb *r);
82 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
83 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
84 struct dlm_message *ms);
85 static int receive_extralen(struct dlm_message *ms);
88 * Lock compatibilty matrix - thanks Steve
89 * UN = Unlocked state. Not really a state, used as a flag
90 * PD = Padding. Used to make the matrix a nice power of two in size
91 * Other states are the same as the VMS DLM.
92 * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same)
95 static const int __dlm_compat_matrix[8][8] = {
96 /* UN NL CR CW PR PW EX PD */
97 {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */
98 {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */
99 {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */
100 {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */
101 {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */
102 {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */
103 {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */
104 {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
108 * This defines the direction of transfer of LVB data.
109 * Granted mode is the row; requested mode is the column.
110 * Usage: matrix[grmode+1][rqmode+1]
111 * 1 = LVB is returned to the caller
112 * 0 = LVB is written to the resource
113 * -1 = nothing happens to the LVB
116 const int dlm_lvb_operations[8][8] = {
117 /* UN NL CR CW PR PW EX PD*/
118 { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */
119 { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */
120 { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */
121 { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */
122 { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */
123 { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */
124 { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */
125 { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */
127 EXPORT_SYMBOL_GPL(dlm_lvb_operations);
129 #define modes_compat(gr, rq) \
130 __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1]
132 int dlm_modes_compat(int mode1, int mode2)
134 return __dlm_compat_matrix[mode1 + 1][mode2 + 1];
138 * Compatibility matrix for conversions with QUECVT set.
139 * Granted mode is the row; requested mode is the column.
140 * Usage: matrix[grmode+1][rqmode+1]
143 static const int __quecvt_compat_matrix[8][8] = {
144 /* UN NL CR CW PR PW EX PD */
145 {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */
146 {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */
147 {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */
148 {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */
149 {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */
150 {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */
151 {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */
152 {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
155 static void dlm_print_lkb(struct dlm_lkb *lkb)
157 printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n"
158 " status %d rqmode %d grmode %d wait_type %d ast_type %d\n",
159 lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags,
160 lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode,
161 lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type);
164 void dlm_print_rsb(struct dlm_rsb *r)
166 printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n",
167 r->res_nodeid, r->res_flags, r->res_first_lkid,
168 r->res_recover_locks_count, r->res_name);
171 /* Threads cannot use the lockspace while it's being recovered */
173 static inline void lock_recovery(struct dlm_ls *ls)
175 down_read(&ls->ls_in_recovery);
178 static inline void unlock_recovery(struct dlm_ls *ls)
180 up_read(&ls->ls_in_recovery);
183 static inline int lock_recovery_try(struct dlm_ls *ls)
185 return down_read_trylock(&ls->ls_in_recovery);
188 static inline int can_be_queued(struct dlm_lkb *lkb)
190 return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE);
193 static inline int force_blocking_asts(struct dlm_lkb *lkb)
195 return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST);
198 static inline int is_demoted(struct dlm_lkb *lkb)
200 return (lkb->lkb_sbflags & DLM_SBF_DEMOTED);
203 static inline int is_remote(struct dlm_rsb *r)
205 DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r););
206 return !!r->res_nodeid;
209 static inline int is_process_copy(struct dlm_lkb *lkb)
211 return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY));
214 static inline int is_master_copy(struct dlm_lkb *lkb)
216 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
217 DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb););
218 return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0;
221 static inline int middle_conversion(struct dlm_lkb *lkb)
223 if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) ||
224 (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW))
229 static inline int down_conversion(struct dlm_lkb *lkb)
231 return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode);
234 static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
236 if (is_master_copy(lkb))
239 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
241 lkb->lkb_lksb->sb_status = rv;
242 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
244 dlm_add_ast(lkb, AST_COMP);
247 static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
249 if (is_master_copy(lkb))
250 send_bast(r, lkb, rqmode);
252 lkb->lkb_bastmode = rqmode;
253 dlm_add_ast(lkb, AST_BAST);
258 * Basic operations on rsb's and lkb's
261 static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
265 r = allocate_rsb(ls, len);
271 memcpy(r->res_name, name, len);
272 mutex_init(&r->res_mutex);
274 INIT_LIST_HEAD(&r->res_lookup);
275 INIT_LIST_HEAD(&r->res_grantqueue);
276 INIT_LIST_HEAD(&r->res_convertqueue);
277 INIT_LIST_HEAD(&r->res_waitqueue);
278 INIT_LIST_HEAD(&r->res_root_list);
279 INIT_LIST_HEAD(&r->res_recover_list);
284 static int search_rsb_list(struct list_head *head, char *name, int len,
285 unsigned int flags, struct dlm_rsb **r_ret)
290 list_for_each_entry(r, head, res_hashchain) {
291 if (len == r->res_length && !memcmp(name, r->res_name, len))
297 if (r->res_nodeid && (flags & R_MASTER))
303 static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
304 unsigned int flags, struct dlm_rsb **r_ret)
309 error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r);
311 kref_get(&r->res_ref);
314 error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
318 list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list);
320 if (dlm_no_directory(ls))
323 if (r->res_nodeid == -1) {
324 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
325 r->res_first_lkid = 0;
326 } else if (r->res_nodeid > 0) {
327 rsb_set_flag(r, RSB_MASTER_UNCERTAIN);
328 r->res_first_lkid = 0;
330 DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r););
331 DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),);
338 static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
339 unsigned int flags, struct dlm_rsb **r_ret)
342 write_lock(&ls->ls_rsbtbl[b].lock);
343 error = _search_rsb(ls, name, len, b, flags, r_ret);
344 write_unlock(&ls->ls_rsbtbl[b].lock);
349 * Find rsb in rsbtbl and potentially create/add one
351 * Delaying the release of rsb's has a similar benefit to applications keeping
352 * NL locks on an rsb, but without the guarantee that the cached master value
353 * will still be valid when the rsb is reused. Apps aren't always smart enough
354 * to keep NL locks on an rsb that they may lock again shortly; this can lead
355 * to excessive master lookups and removals if we don't delay the release.
357 * Searching for an rsb means looking through both the normal list and toss
358 * list. When found on the toss list the rsb is moved to the normal list with
359 * ref count of 1; when found on normal list the ref count is incremented.
362 static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
363 unsigned int flags, struct dlm_rsb **r_ret)
365 struct dlm_rsb *r, *tmp;
366 uint32_t hash, bucket;
369 if (dlm_no_directory(ls))
372 hash = jhash(name, namelen, 0);
373 bucket = hash & (ls->ls_rsbtbl_size - 1);
375 error = search_rsb(ls, name, namelen, bucket, flags, &r);
379 if (error == -ENOENT && !(flags & R_CREATE))
382 /* the rsb was found but wasn't a master copy */
383 if (error == -ENOTBLK)
387 r = create_rsb(ls, name, namelen);
392 r->res_bucket = bucket;
394 kref_init(&r->res_ref);
396 /* With no directory, the master can be set immediately */
397 if (dlm_no_directory(ls)) {
398 int nodeid = dlm_dir_nodeid(r);
399 if (nodeid == dlm_our_nodeid())
401 r->res_nodeid = nodeid;
404 write_lock(&ls->ls_rsbtbl[bucket].lock);
405 error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
407 write_unlock(&ls->ls_rsbtbl[bucket].lock);
412 list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
413 write_unlock(&ls->ls_rsbtbl[bucket].lock);
420 int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
421 unsigned int flags, struct dlm_rsb **r_ret)
423 return find_rsb(ls, name, namelen, flags, r_ret);
426 /* This is only called to add a reference when the code already holds
427 a valid reference to the rsb, so there's no need for locking. */
429 static inline void hold_rsb(struct dlm_rsb *r)
431 kref_get(&r->res_ref);
434 void dlm_hold_rsb(struct dlm_rsb *r)
439 static void toss_rsb(struct kref *kref)
441 struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
442 struct dlm_ls *ls = r->res_ls;
444 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r););
445 kref_init(&r->res_ref);
446 list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss);
447 r->res_toss_time = jiffies;
449 free_lvb(r->res_lvbptr);
450 r->res_lvbptr = NULL;
454 /* When all references to the rsb are gone it's transfered to
455 the tossed list for later disposal. */
457 static void put_rsb(struct dlm_rsb *r)
459 struct dlm_ls *ls = r->res_ls;
460 uint32_t bucket = r->res_bucket;
462 write_lock(&ls->ls_rsbtbl[bucket].lock);
463 kref_put(&r->res_ref, toss_rsb);
464 write_unlock(&ls->ls_rsbtbl[bucket].lock);
467 void dlm_put_rsb(struct dlm_rsb *r)
472 /* See comment for unhold_lkb */
474 static void unhold_rsb(struct dlm_rsb *r)
477 rv = kref_put(&r->res_ref, toss_rsb);
478 DLM_ASSERT(!rv, dlm_print_rsb(r););
481 static void kill_rsb(struct kref *kref)
483 struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
485 /* All work is done after the return from kref_put() so we
486 can release the write_lock before the remove and free. */
488 DLM_ASSERT(list_empty(&r->res_lookup),);
489 DLM_ASSERT(list_empty(&r->res_grantqueue),);
490 DLM_ASSERT(list_empty(&r->res_convertqueue),);
491 DLM_ASSERT(list_empty(&r->res_waitqueue),);
492 DLM_ASSERT(list_empty(&r->res_root_list),);
493 DLM_ASSERT(list_empty(&r->res_recover_list),);
496 /* Attaching/detaching lkb's from rsb's is for rsb reference counting.
497 The rsb must exist as long as any lkb's for it do. */
499 static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
502 lkb->lkb_resource = r;
505 static void detach_lkb(struct dlm_lkb *lkb)
507 if (lkb->lkb_resource) {
508 put_rsb(lkb->lkb_resource);
509 lkb->lkb_resource = NULL;
513 static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
515 struct dlm_lkb *lkb, *tmp;
519 lkb = allocate_lkb(ls);
523 lkb->lkb_nodeid = -1;
524 lkb->lkb_grmode = DLM_LOCK_IV;
525 kref_init(&lkb->lkb_ref);
527 get_random_bytes(&bucket, sizeof(bucket));
528 bucket &= (ls->ls_lkbtbl_size - 1);
530 write_lock(&ls->ls_lkbtbl[bucket].lock);
532 /* counter can roll over so we must verify lkid is not in use */
535 lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16);
537 list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
539 if (tmp->lkb_id != lkid)
547 list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
548 write_unlock(&ls->ls_lkbtbl[bucket].lock);
554 static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
556 uint16_t bucket = lkid & 0xFFFF;
559 list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
560 if (lkb->lkb_id == lkid)
566 static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
569 uint16_t bucket = lkid & 0xFFFF;
571 if (bucket >= ls->ls_lkbtbl_size)
574 read_lock(&ls->ls_lkbtbl[bucket].lock);
575 lkb = __find_lkb(ls, lkid);
577 kref_get(&lkb->lkb_ref);
578 read_unlock(&ls->ls_lkbtbl[bucket].lock);
581 return lkb ? 0 : -ENOENT;
584 static void kill_lkb(struct kref *kref)
586 struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
588 /* All work is done after the return from kref_put() so we
589 can release the write_lock before the detach_lkb */
591 DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
594 /* __put_lkb() is used when an lkb may not have an rsb attached to
595 it so we need to provide the lockspace explicitly */
597 static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
599 uint16_t bucket = lkb->lkb_id & 0xFFFF;
601 write_lock(&ls->ls_lkbtbl[bucket].lock);
602 if (kref_put(&lkb->lkb_ref, kill_lkb)) {
603 list_del(&lkb->lkb_idtbl_list);
604 write_unlock(&ls->ls_lkbtbl[bucket].lock);
608 /* for local/process lkbs, lvbptr points to caller's lksb */
609 if (lkb->lkb_lvbptr && is_master_copy(lkb))
610 free_lvb(lkb->lkb_lvbptr);
614 write_unlock(&ls->ls_lkbtbl[bucket].lock);
619 int dlm_put_lkb(struct dlm_lkb *lkb)
623 DLM_ASSERT(lkb->lkb_resource, dlm_print_lkb(lkb););
624 DLM_ASSERT(lkb->lkb_resource->res_ls, dlm_print_lkb(lkb););
626 ls = lkb->lkb_resource->res_ls;
627 return __put_lkb(ls, lkb);
630 /* This is only called to add a reference when the code already holds
631 a valid reference to the lkb, so there's no need for locking. */
633 static inline void hold_lkb(struct dlm_lkb *lkb)
635 kref_get(&lkb->lkb_ref);
638 /* This is called when we need to remove a reference and are certain
639 it's not the last ref. e.g. del_lkb is always called between a
640 find_lkb/put_lkb and is always the inverse of a previous add_lkb.
641 put_lkb would work fine, but would involve unnecessary locking */
643 static inline void unhold_lkb(struct dlm_lkb *lkb)
646 rv = kref_put(&lkb->lkb_ref, kill_lkb);
647 DLM_ASSERT(!rv, dlm_print_lkb(lkb););
650 static void lkb_add_ordered(struct list_head *new, struct list_head *head,
653 struct dlm_lkb *lkb = NULL;
655 list_for_each_entry(lkb, head, lkb_statequeue)
656 if (lkb->lkb_rqmode < mode)
660 list_add_tail(new, head);
662 __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
665 /* add/remove lkb to rsb's grant/convert/wait queue */
667 static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status)
669 kref_get(&lkb->lkb_ref);
671 DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
673 lkb->lkb_status = status;
676 case DLM_LKSTS_WAITING:
677 if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
678 list_add(&lkb->lkb_statequeue, &r->res_waitqueue);
680 list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue);
682 case DLM_LKSTS_GRANTED:
683 /* convention says granted locks kept in order of grmode */
684 lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue,
687 case DLM_LKSTS_CONVERT:
688 if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
689 list_add(&lkb->lkb_statequeue, &r->res_convertqueue);
691 list_add_tail(&lkb->lkb_statequeue,
692 &r->res_convertqueue);
695 DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status););
699 static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
702 list_del(&lkb->lkb_statequeue);
706 static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts)
710 add_lkb(r, lkb, sts);
714 /* add/remove lkb from global waiters list of lkb's waiting for
715 a reply from a remote node */
717 static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
719 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
721 mutex_lock(&ls->ls_waiters_mutex);
722 if (lkb->lkb_wait_type) {
723 log_print("add_to_waiters error %d", lkb->lkb_wait_type);
726 lkb->lkb_wait_type = mstype;
727 kref_get(&lkb->lkb_ref);
728 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
730 mutex_unlock(&ls->ls_waiters_mutex);
733 static int _remove_from_waiters(struct dlm_lkb *lkb)
737 if (!lkb->lkb_wait_type) {
738 log_print("remove_from_waiters error");
742 lkb->lkb_wait_type = 0;
743 list_del(&lkb->lkb_wait_reply);
749 static int remove_from_waiters(struct dlm_lkb *lkb)
751 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
754 mutex_lock(&ls->ls_waiters_mutex);
755 error = _remove_from_waiters(lkb);
756 mutex_unlock(&ls->ls_waiters_mutex);
760 static void dir_remove(struct dlm_rsb *r)
764 if (dlm_no_directory(r->res_ls))
767 to_nodeid = dlm_dir_nodeid(r);
768 if (to_nodeid != dlm_our_nodeid())
771 dlm_dir_remove_entry(r->res_ls, to_nodeid,
772 r->res_name, r->res_length);
775 /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is
776 found since they are in order of newest to oldest? */
778 static int shrink_bucket(struct dlm_ls *ls, int b)
781 int count = 0, found;
785 write_lock(&ls->ls_rsbtbl[b].lock);
786 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
788 if (!time_after_eq(jiffies, r->res_toss_time +
789 dlm_config.toss_secs * HZ))
796 write_unlock(&ls->ls_rsbtbl[b].lock);
800 if (kref_put(&r->res_ref, kill_rsb)) {
801 list_del(&r->res_hashchain);
802 write_unlock(&ls->ls_rsbtbl[b].lock);
809 write_unlock(&ls->ls_rsbtbl[b].lock);
810 log_error(ls, "tossed rsb in use %s", r->res_name);
817 void dlm_scan_rsbs(struct dlm_ls *ls)
821 if (dlm_locking_stopped(ls))
824 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
825 shrink_bucket(ls, i);
830 /* lkb is master or local copy */
832 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
834 int b, len = r->res_ls->ls_lvblen;
836 /* b=1 lvb returned to caller
837 b=0 lvb written to rsb or invalidated
840 b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
843 if (!lkb->lkb_lvbptr)
846 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
852 memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len);
853 lkb->lkb_lvbseq = r->res_lvbseq;
856 if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
857 rsb_set_flag(r, RSB_VALNOTVALID);
861 if (!lkb->lkb_lvbptr)
864 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
868 r->res_lvbptr = allocate_lvb(r->res_ls);
873 memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len);
875 lkb->lkb_lvbseq = r->res_lvbseq;
876 rsb_clear_flag(r, RSB_VALNOTVALID);
879 if (rsb_flag(r, RSB_VALNOTVALID))
880 lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID;
883 static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
885 if (lkb->lkb_grmode < DLM_LOCK_PW)
888 if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
889 rsb_set_flag(r, RSB_VALNOTVALID);
893 if (!lkb->lkb_lvbptr)
896 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
900 r->res_lvbptr = allocate_lvb(r->res_ls);
905 memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
907 rsb_clear_flag(r, RSB_VALNOTVALID);
910 /* lkb is process copy (pc) */
912 static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
913 struct dlm_message *ms)
917 if (!lkb->lkb_lvbptr)
920 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
923 b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
925 int len = receive_extralen(ms);
926 memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
927 lkb->lkb_lvbseq = ms->m_lvbseq;
931 /* Manipulate lkb's on rsb's convert/granted/waiting queues
932 remove_lock -- used for unlock, removes lkb from granted
933 revert_lock -- used for cancel, moves lkb from convert to granted
934 grant_lock -- used for request and convert, adds lkb to granted or
935 moves lkb from convert or waiting to granted
937 Each of these is used for master or local copy lkb's. There is
938 also a _pc() variation used to make the corresponding change on
939 a process copy (pc) lkb. */
941 static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
944 lkb->lkb_grmode = DLM_LOCK_IV;
945 /* this unhold undoes the original ref from create_lkb()
946 so this leads to the lkb being freed */
950 static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
952 set_lvb_unlock(r, lkb);
953 _remove_lock(r, lkb);
956 static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
958 _remove_lock(r, lkb);
961 static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
963 lkb->lkb_rqmode = DLM_LOCK_IV;
965 switch (lkb->lkb_status) {
966 case DLM_LKSTS_CONVERT:
967 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
969 case DLM_LKSTS_WAITING:
971 lkb->lkb_grmode = DLM_LOCK_IV;
972 /* this unhold undoes the original ref from create_lkb()
973 so this leads to the lkb being freed */
977 log_print("invalid status for revert %d", lkb->lkb_status);
981 static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
986 static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
988 if (lkb->lkb_grmode != lkb->lkb_rqmode) {
989 lkb->lkb_grmode = lkb->lkb_rqmode;
991 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
993 add_lkb(r, lkb, DLM_LKSTS_GRANTED);
996 lkb->lkb_rqmode = DLM_LOCK_IV;
999 static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1001 set_lvb_lock(r, lkb);
1002 _grant_lock(r, lkb);
1003 lkb->lkb_highbast = 0;
1006 static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
1007 struct dlm_message *ms)
1009 set_lvb_lock_pc(r, lkb, ms);
1010 _grant_lock(r, lkb);
1013 /* called by grant_pending_locks() which means an async grant message must
1014 be sent to the requesting node in addition to granting the lock if the
1015 lkb belongs to a remote node. */
1017 static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1020 if (is_master_copy(lkb))
1023 queue_cast(r, lkb, 0);
1026 static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head)
1028 struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb,
1030 if (lkb->lkb_id == first->lkb_id)
1036 /* Check if the given lkb conflicts with another lkb on the queue. */
1038 static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1040 struct dlm_lkb *this;
1042 list_for_each_entry(this, head, lkb_statequeue) {
1045 if (!modes_compat(this, lkb))
1052 * "A conversion deadlock arises with a pair of lock requests in the converting
1053 * queue for one resource. The granted mode of each lock blocks the requested
1054 * mode of the other lock."
1056 * Part 2: if the granted mode of lkb is preventing the first lkb in the
1057 * convert queue from being granted, then demote lkb (set grmode to NL).
1058 * This second form requires that we check for conv-deadlk even when
1059 * now == 0 in _can_be_granted().
1062 * Granted Queue: empty
1063 * Convert Queue: NL->EX (first lock)
1064 * PR->EX (second lock)
1066 * The first lock can't be granted because of the granted mode of the second
1067 * lock and the second lock can't be granted because it's not first in the
1068 * list. We demote the granted mode of the second lock (the lkb passed to this
1071 * After the resolution, the "grant pending" function needs to go back and try
1072 * to grant locks on the convert queue again since the first lock can now be
1076 static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
1078 struct dlm_lkb *this, *first = NULL, *self = NULL;
1080 list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
1088 if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
1092 /* if lkb is on the convert queue and is preventing the first
1093 from being granted, then there's deadlock and we demote lkb.
1094 multiple converting locks may need to do this before the first
1095 converting lock can be granted. */
1097 if (self && self != first) {
1098 if (!modes_compat(lkb, first) &&
1099 !queue_conflict(&rsb->res_grantqueue, first))
1107 * Return 1 if the lock can be granted, 0 otherwise.
1108 * Also detect and resolve conversion deadlocks.
1110 * lkb is the lock to be granted
1112 * now is 1 if the function is being called in the context of the
1113 * immediate request, it is 0 if called later, after the lock has been
1116 * References are from chapter 6 of "VAXcluster Principles" by Roy Davis
1119 static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1121 int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV);
1124 * 6-10: Version 5.4 introduced an option to address the phenomenon of
1125 * a new request for a NL mode lock being blocked.
1127 * 6-11: If the optional EXPEDITE flag is used with the new NL mode
1128 * request, then it would be granted. In essence, the use of this flag
1129 * tells the Lock Manager to expedite theis request by not considering
1130 * what may be in the CONVERTING or WAITING queues... As of this
1131 * writing, the EXPEDITE flag can be used only with new requests for NL
1132 * mode locks. This flag is not valid for conversion requests.
1134 * A shortcut. Earlier checks return an error if EXPEDITE is used in a
1135 * conversion or used with a non-NL requested mode. We also know an
1136 * EXPEDITE request is always granted immediately, so now must always
1137 * be 1. The full condition to grant an expedite request: (now &&
1138 * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can
1139 * therefore be shortened to just checking the flag.
1142 if (lkb->lkb_exflags & DLM_LKF_EXPEDITE)
1146 * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be
1147 * added to the remaining conditions.
1150 if (queue_conflict(&r->res_grantqueue, lkb))
1154 * 6-3: By default, a conversion request is immediately granted if the
1155 * requested mode is compatible with the modes of all other granted
1159 if (queue_conflict(&r->res_convertqueue, lkb))
1163 * 6-5: But the default algorithm for deciding whether to grant or
1164 * queue conversion requests does not by itself guarantee that such
1165 * requests are serviced on a "first come first serve" basis. This, in
1166 * turn, can lead to a phenomenon known as "indefinate postponement".
1168 * 6-7: This issue is dealt with by using the optional QUECVT flag with
1169 * the system service employed to request a lock conversion. This flag
1170 * forces certain conversion requests to be queued, even if they are
1171 * compatible with the granted modes of other locks on the same
1172 * resource. Thus, the use of this flag results in conversion requests
1173 * being ordered on a "first come first servce" basis.
1175 * DCT: This condition is all about new conversions being able to occur
1176 * "in place" while the lock remains on the granted queue (assuming
1177 * nothing else conflicts.) IOW if QUECVT isn't set, a conversion
1178 * doesn't _have_ to go onto the convert queue where it's processed in
1179 * order. The "now" variable is necessary to distinguish converts
1180 * being received and processed for the first time now, because once a
1181 * convert is moved to the conversion queue the condition below applies
1182 * requiring fifo granting.
1185 if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT))
1189 * The NOORDER flag is set to avoid the standard vms rules on grant
1193 if (lkb->lkb_exflags & DLM_LKF_NOORDER)
1197 * 6-3: Once in that queue [CONVERTING], a conversion request cannot be
1198 * granted until all other conversion requests ahead of it are granted
1202 if (!now && conv && first_in_list(lkb, &r->res_convertqueue))
1206 * 6-4: By default, a new request is immediately granted only if all
1207 * three of the following conditions are satisfied when the request is
1209 * - The queue of ungranted conversion requests for the resource is
1211 * - The queue of ungranted new requests for the resource is empty.
1212 * - The mode of the new request is compatible with the most
1213 * restrictive mode of all granted locks on the resource.
1216 if (now && !conv && list_empty(&r->res_convertqueue) &&
1217 list_empty(&r->res_waitqueue))
1221 * 6-4: Once a lock request is in the queue of ungranted new requests,
1222 * it cannot be granted until the queue of ungranted conversion
1223 * requests is empty, all ungranted new requests ahead of it are
1224 * granted and/or canceled, and it is compatible with the granted mode
1225 * of the most restrictive lock granted on the resource.
1228 if (!now && !conv && list_empty(&r->res_convertqueue) &&
1229 first_in_list(lkb, &r->res_waitqueue))
1234 * The following, enabled by CONVDEADLK, departs from VMS.
1237 if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
1238 conversion_deadlock_detect(r, lkb)) {
1239 lkb->lkb_grmode = DLM_LOCK_NL;
1240 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1247 * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
1248 * simple way to provide a big optimization to applications that can use them.
1251 static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1253 uint32_t flags = lkb->lkb_exflags;
1255 int8_t alt = 0, rqmode = lkb->lkb_rqmode;
1257 rv = _can_be_granted(r, lkb, now);
1261 if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
1264 if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
1266 else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
1270 lkb->lkb_rqmode = alt;
1271 rv = _can_be_granted(r, lkb, now);
1273 lkb->lkb_sbflags |= DLM_SBF_ALTMODE;
1275 lkb->lkb_rqmode = rqmode;
1281 static int grant_pending_convert(struct dlm_rsb *r, int high)
1283 struct dlm_lkb *lkb, *s;
1284 int hi, demoted, quit, grant_restart, demote_restart;
1292 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
1293 demoted = is_demoted(lkb);
1294 if (can_be_granted(r, lkb, 0)) {
1295 grant_lock_pending(r, lkb);
1298 hi = max_t(int, lkb->lkb_rqmode, hi);
1299 if (!demoted && is_demoted(lkb))
1306 if (demote_restart && !quit) {
1311 return max_t(int, high, hi);
1314 static int grant_pending_wait(struct dlm_rsb *r, int high)
1316 struct dlm_lkb *lkb, *s;
1318 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
1319 if (can_be_granted(r, lkb, 0))
1320 grant_lock_pending(r, lkb);
1322 high = max_t(int, lkb->lkb_rqmode, high);
1328 static void grant_pending_locks(struct dlm_rsb *r)
1330 struct dlm_lkb *lkb, *s;
1331 int high = DLM_LOCK_IV;
1333 DLM_ASSERT(is_master(r), dlm_print_rsb(r););
1335 high = grant_pending_convert(r, high);
1336 high = grant_pending_wait(r, high);
1338 if (high == DLM_LOCK_IV)
1342 * If there are locks left on the wait/convert queue then send blocking
1343 * ASTs to granted locks based on the largest requested mode (high)
1344 * found above. FIXME: highbast < high comparison not valid for PR/CW.
1347 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
1348 if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) &&
1349 !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) {
1350 queue_bast(r, lkb, high);
1351 lkb->lkb_highbast = high;
1356 static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
1357 struct dlm_lkb *lkb)
1361 list_for_each_entry(gr, head, lkb_statequeue) {
1362 if (gr->lkb_bastaddr &&
1363 gr->lkb_highbast < lkb->lkb_rqmode &&
1364 !modes_compat(gr, lkb)) {
1365 queue_bast(r, gr, lkb->lkb_rqmode);
1366 gr->lkb_highbast = lkb->lkb_rqmode;
1371 static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb)
1373 send_bast_queue(r, &r->res_grantqueue, lkb);
1376 static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb)
1378 send_bast_queue(r, &r->res_grantqueue, lkb);
1379 send_bast_queue(r, &r->res_convertqueue, lkb);
1382 /* set_master(r, lkb) -- set the master nodeid of a resource
1384 The purpose of this function is to set the nodeid field in the given
1385 lkb using the nodeid field in the given rsb. If the rsb's nodeid is
1386 known, it can just be copied to the lkb and the function will return
1387 0. If the rsb's nodeid is _not_ known, it needs to be looked up
1388 before it can be copied to the lkb.
1390 When the rsb nodeid is being looked up remotely, the initial lkb
1391 causing the lookup is kept on the ls_waiters list waiting for the
1392 lookup reply. Other lkb's waiting for the same rsb lookup are kept
1393 on the rsb's res_lookup list until the master is verified.
1396 0: nodeid is set in rsb/lkb and the caller should go ahead and use it
1397 1: the rsb master is not available and the lkb has been placed on
1401 static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
1403 struct dlm_ls *ls = r->res_ls;
1404 int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
1406 if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) {
1407 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
1408 r->res_first_lkid = lkb->lkb_id;
1409 lkb->lkb_nodeid = r->res_nodeid;
1413 if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) {
1414 list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup);
1418 if (r->res_nodeid == 0) {
1419 lkb->lkb_nodeid = 0;
1423 if (r->res_nodeid > 0) {
1424 lkb->lkb_nodeid = r->res_nodeid;
1428 DLM_ASSERT(r->res_nodeid == -1, dlm_print_rsb(r););
1430 dir_nodeid = dlm_dir_nodeid(r);
1432 if (dir_nodeid != our_nodeid) {
1433 r->res_first_lkid = lkb->lkb_id;
1434 send_lookup(r, lkb);
1439 /* It's possible for dlm_scand to remove an old rsb for
1440 this same resource from the toss list, us to create
1441 a new one, look up the master locally, and find it
1442 already exists just before dlm_scand does the
1443 dir_remove() on the previous rsb. */
1445 error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
1446 r->res_length, &ret_nodeid);
1449 log_debug(ls, "dir_lookup error %d %s", error, r->res_name);
1453 if (ret_nodeid == our_nodeid) {
1454 r->res_first_lkid = 0;
1456 lkb->lkb_nodeid = 0;
1458 r->res_first_lkid = lkb->lkb_id;
1459 r->res_nodeid = ret_nodeid;
1460 lkb->lkb_nodeid = ret_nodeid;
1465 static void process_lookup_list(struct dlm_rsb *r)
1467 struct dlm_lkb *lkb, *safe;
1469 list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) {
1470 list_del(&lkb->lkb_rsb_lookup);
1471 _request_lock(r, lkb);
1476 /* confirm_master -- confirm (or deny) an rsb's master nodeid */
1478 static void confirm_master(struct dlm_rsb *r, int error)
1480 struct dlm_lkb *lkb;
1482 if (!r->res_first_lkid)
1488 r->res_first_lkid = 0;
1489 process_lookup_list(r);
1493 /* the remote master didn't queue our NOQUEUE request;
1494 make a waiting lkb the first_lkid */
1496 r->res_first_lkid = 0;
1498 if (!list_empty(&r->res_lookup)) {
1499 lkb = list_entry(r->res_lookup.next, struct dlm_lkb,
1501 list_del(&lkb->lkb_rsb_lookup);
1502 r->res_first_lkid = lkb->lkb_id;
1503 _request_lock(r, lkb);
1509 log_error(r->res_ls, "confirm_master unknown error %d", error);
1513 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1514 int namelen, uint32_t parent_lkid, void *ast,
1515 void *astarg, void *bast, struct dlm_args *args)
1519 /* check for invalid arg usage */
1521 if (mode < 0 || mode > DLM_LOCK_EX)
1524 if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN))
1527 if (flags & DLM_LKF_CANCEL)
1530 if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT))
1533 if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT))
1536 if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE)
1539 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT)
1542 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT)
1545 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE)
1548 if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL)
1554 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
1557 /* parent/child locks not yet supported */
1561 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
1564 /* these args will be copied to the lkb in validate_lock_args,
1565 it cannot be done now because when converting locks, fields in
1566 an active lkb cannot be modified before locking the rsb */
1568 args->flags = flags;
1569 args->astaddr = ast;
1570 args->astparam = (long) astarg;
1571 args->bastaddr = bast;
1579 static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
1581 if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK |
1582 DLM_LKF_FORCEUNLOCK))
1585 args->flags = flags;
1586 args->astparam = (long) astarg;
1590 static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1591 struct dlm_args *args)
1595 if (args->flags & DLM_LKF_CONVERT) {
1596 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
1599 if (args->flags & DLM_LKF_QUECVT &&
1600 !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
1604 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
1607 if (lkb->lkb_wait_type)
1611 lkb->lkb_exflags = args->flags;
1612 lkb->lkb_sbflags = 0;
1613 lkb->lkb_astaddr = args->astaddr;
1614 lkb->lkb_astparam = args->astparam;
1615 lkb->lkb_bastaddr = args->bastaddr;
1616 lkb->lkb_rqmode = args->mode;
1617 lkb->lkb_lksb = args->lksb;
1618 lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
1619 lkb->lkb_ownpid = (int) current->pid;
1625 static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1629 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
1632 if (args->flags & DLM_LKF_FORCEUNLOCK)
1635 if (args->flags & DLM_LKF_CANCEL &&
1636 lkb->lkb_status == DLM_LKSTS_GRANTED)
1639 if (!(args->flags & DLM_LKF_CANCEL) &&
1640 lkb->lkb_status != DLM_LKSTS_GRANTED)
1644 if (lkb->lkb_wait_type)
1648 lkb->lkb_exflags = args->flags;
1649 lkb->lkb_sbflags = 0;
1650 lkb->lkb_astparam = args->astparam;
1658 * Four stage 4 varieties:
1659 * do_request(), do_convert(), do_unlock(), do_cancel()
1660 * These are called on the master node for the given lock and
1661 * from the central locking logic.
1664 static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1668 if (can_be_granted(r, lkb, 1)) {
1670 queue_cast(r, lkb, 0);
1674 if (can_be_queued(lkb)) {
1675 error = -EINPROGRESS;
1676 add_lkb(r, lkb, DLM_LKSTS_WAITING);
1677 send_blocking_asts(r, lkb);
1682 if (force_blocking_asts(lkb))
1683 send_blocking_asts_all(r, lkb);
1684 queue_cast(r, lkb, -EAGAIN);
1690 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
1694 /* changing an existing lock may allow others to be granted */
1696 if (can_be_granted(r, lkb, 1)) {
1698 queue_cast(r, lkb, 0);
1699 grant_pending_locks(r);
1703 if (can_be_queued(lkb)) {
1704 if (is_demoted(lkb))
1705 grant_pending_locks(r);
1706 error = -EINPROGRESS;
1708 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
1709 send_blocking_asts(r, lkb);
1714 if (force_blocking_asts(lkb))
1715 send_blocking_asts_all(r, lkb);
1716 queue_cast(r, lkb, -EAGAIN);
1722 static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1724 remove_lock(r, lkb);
1725 queue_cast(r, lkb, -DLM_EUNLOCK);
1726 grant_pending_locks(r);
1727 return -DLM_EUNLOCK;
1730 static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
1732 revert_lock(r, lkb);
1733 queue_cast(r, lkb, -DLM_ECANCEL);
1734 grant_pending_locks(r);
1735 return -DLM_ECANCEL;
1739 * Four stage 3 varieties:
1740 * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
1743 /* add a new lkb to a possibly new rsb, called by requesting process */
1745 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1749 /* set_master: sets lkb nodeid from r */
1751 error = set_master(r, lkb);
1760 /* receive_request() calls do_request() on remote node */
1761 error = send_request(r, lkb);
1763 error = do_request(r, lkb);
1768 /* change some property of an existing lkb, e.g. mode */
1770 static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1775 /* receive_convert() calls do_convert() on remote node */
1776 error = send_convert(r, lkb);
1778 error = do_convert(r, lkb);
1783 /* remove an existing lkb from the granted queue */
1785 static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1790 /* receive_unlock() calls do_unlock() on remote node */
1791 error = send_unlock(r, lkb);
1793 error = do_unlock(r, lkb);
1798 /* remove an existing lkb from the convert or wait queue */
1800 static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1805 /* receive_cancel() calls do_cancel() on remote node */
1806 error = send_cancel(r, lkb);
1808 error = do_cancel(r, lkb);
1814 * Four stage 2 varieties:
1815 * request_lock(), convert_lock(), unlock_lock(), cancel_lock()
1818 static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name,
1819 int len, struct dlm_args *args)
1824 error = validate_lock_args(ls, lkb, args);
1828 error = find_rsb(ls, name, len, R_CREATE, &r);
1835 lkb->lkb_lksb->sb_lkid = lkb->lkb_id;
1837 error = _request_lock(r, lkb);
1846 static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
1847 struct dlm_args *args)
1852 r = lkb->lkb_resource;
1857 error = validate_lock_args(ls, lkb, args);
1861 error = _convert_lock(r, lkb);
1868 static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
1869 struct dlm_args *args)
1874 r = lkb->lkb_resource;
1879 error = validate_unlock_args(lkb, args);
1883 error = _unlock_lock(r, lkb);
1890 static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
1891 struct dlm_args *args)
1896 r = lkb->lkb_resource;
1901 error = validate_unlock_args(lkb, args);
1905 error = _cancel_lock(r, lkb);
1913 * Two stage 1 varieties: dlm_lock() and dlm_unlock()
1916 int dlm_lock(dlm_lockspace_t *lockspace,
1918 struct dlm_lksb *lksb,
1921 unsigned int namelen,
1922 uint32_t parent_lkid,
1923 void (*ast) (void *astarg),
1925 void (*bast) (void *astarg, int mode))
1928 struct dlm_lkb *lkb;
1929 struct dlm_args args;
1930 int error, convert = flags & DLM_LKF_CONVERT;
1932 ls = dlm_find_lockspace_local(lockspace);
1939 error = find_lkb(ls, lksb->sb_lkid, &lkb);
1941 error = create_lkb(ls, &lkb);
1946 error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
1947 astarg, bast, &args);
1952 error = convert_lock(ls, lkb, &args);
1954 error = request_lock(ls, lkb, name, namelen, &args);
1956 if (error == -EINPROGRESS)
1959 if (convert || error)
1961 if (error == -EAGAIN)
1964 unlock_recovery(ls);
1965 dlm_put_lockspace(ls);
1969 int dlm_unlock(dlm_lockspace_t *lockspace,
1972 struct dlm_lksb *lksb,
1976 struct dlm_lkb *lkb;
1977 struct dlm_args args;
1980 ls = dlm_find_lockspace_local(lockspace);
1986 error = find_lkb(ls, lkid, &lkb);
1990 error = set_unlock_args(flags, astarg, &args);
1994 if (flags & DLM_LKF_CANCEL)
1995 error = cancel_lock(ls, lkb, &args);
1997 error = unlock_lock(ls, lkb, &args);
1999 if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL)
2004 unlock_recovery(ls);
2005 dlm_put_lockspace(ls);
2010 * send/receive routines for remote operations and replies
2014 * send_request receive_request
2015 * send_convert receive_convert
2016 * send_unlock receive_unlock
2017 * send_cancel receive_cancel
2018 * send_grant receive_grant
2019 * send_bast receive_bast
2020 * send_lookup receive_lookup
2021 * send_remove receive_remove
2024 * receive_request_reply send_request_reply
2025 * receive_convert_reply send_convert_reply
2026 * receive_unlock_reply send_unlock_reply
2027 * receive_cancel_reply send_cancel_reply
2028 * receive_lookup_reply send_lookup_reply
2031 static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
2032 int to_nodeid, int mstype,
2033 struct dlm_message **ms_ret,
2034 struct dlm_mhandle **mh_ret)
2036 struct dlm_message *ms;
2037 struct dlm_mhandle *mh;
2039 int mb_len = sizeof(struct dlm_message);
2042 case DLM_MSG_REQUEST:
2043 case DLM_MSG_LOOKUP:
2044 case DLM_MSG_REMOVE:
2045 mb_len += r->res_length;
2047 case DLM_MSG_CONVERT:
2048 case DLM_MSG_UNLOCK:
2049 case DLM_MSG_REQUEST_REPLY:
2050 case DLM_MSG_CONVERT_REPLY:
2052 if (lkb && lkb->lkb_lvbptr)
2053 mb_len += r->res_ls->ls_lvblen;
2057 /* get_buffer gives us a message handle (mh) that we need to
2058 pass into lowcomms_commit and a message buffer (mb) that we
2059 write our data into */
2061 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
2065 memset(mb, 0, mb_len);
2067 ms = (struct dlm_message *) mb;
2069 ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
2070 ms->m_header.h_lockspace = r->res_ls->ls_global_id;
2071 ms->m_header.h_nodeid = dlm_our_nodeid();
2072 ms->m_header.h_length = mb_len;
2073 ms->m_header.h_cmd = DLM_MSG;
2075 ms->m_type = mstype;
2082 /* further lowcomms enhancements or alternate implementations may make
2083 the return value from this function useful at some point */
2085 static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
2087 dlm_message_out(ms);
2088 dlm_lowcomms_commit_buffer(mh);
2092 static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
2093 struct dlm_message *ms)
2095 ms->m_nodeid = lkb->lkb_nodeid;
2096 ms->m_pid = lkb->lkb_ownpid;
2097 ms->m_lkid = lkb->lkb_id;
2098 ms->m_remid = lkb->lkb_remid;
2099 ms->m_exflags = lkb->lkb_exflags;
2100 ms->m_sbflags = lkb->lkb_sbflags;
2101 ms->m_flags = lkb->lkb_flags;
2102 ms->m_lvbseq = lkb->lkb_lvbseq;
2103 ms->m_status = lkb->lkb_status;
2104 ms->m_grmode = lkb->lkb_grmode;
2105 ms->m_rqmode = lkb->lkb_rqmode;
2106 ms->m_hash = r->res_hash;
2108 /* m_result and m_bastmode are set from function args,
2109 not from lkb fields */
2111 if (lkb->lkb_bastaddr)
2112 ms->m_asts |= AST_BAST;
2113 if (lkb->lkb_astaddr)
2114 ms->m_asts |= AST_COMP;
2116 if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP)
2117 memcpy(ms->m_extra, r->res_name, r->res_length);
2119 else if (lkb->lkb_lvbptr)
2120 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
2124 static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2126 struct dlm_message *ms;
2127 struct dlm_mhandle *mh;
2128 int to_nodeid, error;
2130 add_to_waiters(lkb, mstype);
2132 to_nodeid = r->res_nodeid;
2134 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2138 send_args(r, lkb, ms);
2140 error = send_message(mh, ms);
2146 remove_from_waiters(lkb);
2150 static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
2152 return send_common(r, lkb, DLM_MSG_REQUEST);
2155 static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2159 error = send_common(r, lkb, DLM_MSG_CONVERT);
2161 /* down conversions go without a reply from the master */
2162 if (!error && down_conversion(lkb)) {
2163 remove_from_waiters(lkb);
2164 r->res_ls->ls_stub_ms.m_result = 0;
2165 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
2171 /* FIXME: if this lkb is the only lock we hold on the rsb, then set
2172 MASTER_UNCERTAIN to force the next request on the rsb to confirm
2173 that the master is still correct. */
2175 static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2177 return send_common(r, lkb, DLM_MSG_UNLOCK);
2180 static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
2182 return send_common(r, lkb, DLM_MSG_CANCEL);
2185 static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb)
2187 struct dlm_message *ms;
2188 struct dlm_mhandle *mh;
2189 int to_nodeid, error;
2191 to_nodeid = lkb->lkb_nodeid;
2193 error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh);
2197 send_args(r, lkb, ms);
2201 error = send_message(mh, ms);
2206 static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
2208 struct dlm_message *ms;
2209 struct dlm_mhandle *mh;
2210 int to_nodeid, error;
2212 to_nodeid = lkb->lkb_nodeid;
2214 error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh);
2218 send_args(r, lkb, ms);
2220 ms->m_bastmode = mode;
2222 error = send_message(mh, ms);
2227 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2229 struct dlm_message *ms;
2230 struct dlm_mhandle *mh;
2231 int to_nodeid, error;
2233 add_to_waiters(lkb, DLM_MSG_LOOKUP);
2235 to_nodeid = dlm_dir_nodeid(r);
2237 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
2241 send_args(r, lkb, ms);
2243 error = send_message(mh, ms);
2249 remove_from_waiters(lkb);
2253 static int send_remove(struct dlm_rsb *r)
2255 struct dlm_message *ms;
2256 struct dlm_mhandle *mh;
2257 int to_nodeid, error;
2259 to_nodeid = dlm_dir_nodeid(r);
2261 error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh);
2265 memcpy(ms->m_extra, r->res_name, r->res_length);
2266 ms->m_hash = r->res_hash;
2268 error = send_message(mh, ms);
2273 static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2276 struct dlm_message *ms;
2277 struct dlm_mhandle *mh;
2278 int to_nodeid, error;
2280 to_nodeid = lkb->lkb_nodeid;
2282 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2286 send_args(r, lkb, ms);
2290 error = send_message(mh, ms);
2295 static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2297 return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv);
2300 static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2302 return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv);
2305 static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2307 return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv);
2310 static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2312 return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv);
2315 static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
2316 int ret_nodeid, int rv)
2318 struct dlm_rsb *r = &ls->ls_stub_rsb;
2319 struct dlm_message *ms;
2320 struct dlm_mhandle *mh;
2321 int error, nodeid = ms_in->m_header.h_nodeid;
2323 error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh);
2327 ms->m_lkid = ms_in->m_lkid;
2329 ms->m_nodeid = ret_nodeid;
2331 error = send_message(mh, ms);
2336 /* which args we save from a received message depends heavily on the type
2337 of message, unlike the send side where we can safely send everything about
2338 the lkb for any type of message */
2340 static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
2342 lkb->lkb_exflags = ms->m_exflags;
2343 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
2344 (ms->m_flags & 0x0000FFFF);
2347 static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2349 lkb->lkb_sbflags = ms->m_sbflags;
2350 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
2351 (ms->m_flags & 0x0000FFFF);
2354 static int receive_extralen(struct dlm_message *ms)
2356 return (ms->m_header.h_length - sizeof(struct dlm_message));
2359 static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
2360 struct dlm_message *ms)
2364 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
2365 if (!lkb->lkb_lvbptr)
2366 lkb->lkb_lvbptr = allocate_lvb(ls);
2367 if (!lkb->lkb_lvbptr)
2369 len = receive_extralen(ms);
2370 memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
2375 static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2376 struct dlm_message *ms)
2378 lkb->lkb_nodeid = ms->m_header.h_nodeid;
2379 lkb->lkb_ownpid = ms->m_pid;
2380 lkb->lkb_remid = ms->m_lkid;
2381 lkb->lkb_grmode = DLM_LOCK_IV;
2382 lkb->lkb_rqmode = ms->m_rqmode;
2383 lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST);
2384 lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP);
2386 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
2388 if (receive_lvb(ls, lkb, ms))
2394 static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2395 struct dlm_message *ms)
2397 if (lkb->lkb_nodeid != ms->m_header.h_nodeid) {
2398 log_error(ls, "convert_args nodeid %d %d lkid %x %x",
2399 lkb->lkb_nodeid, ms->m_header.h_nodeid,
2400 lkb->lkb_id, lkb->lkb_remid);
2404 if (!is_master_copy(lkb))
2407 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
2410 if (receive_lvb(ls, lkb, ms))
2413 lkb->lkb_rqmode = ms->m_rqmode;
2414 lkb->lkb_lvbseq = ms->m_lvbseq;
2419 static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2420 struct dlm_message *ms)
2422 if (!is_master_copy(lkb))
2424 if (receive_lvb(ls, lkb, ms))
2429 /* We fill in the stub-lkb fields with the info that send_xxxx_reply()
2430 uses to send a reply and that the remote end uses to process the reply. */
2432 static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
2434 struct dlm_lkb *lkb = &ls->ls_stub_lkb;
2435 lkb->lkb_nodeid = ms->m_header.h_nodeid;
2436 lkb->lkb_remid = ms->m_lkid;
2439 static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
2441 struct dlm_lkb *lkb;
2445 error = create_lkb(ls, &lkb);
2449 receive_flags(lkb, ms);
2450 lkb->lkb_flags |= DLM_IFL_MSTCPY;
2451 error = receive_request_args(ls, lkb, ms);
2457 namelen = receive_extralen(ms);
2459 error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r);
2468 error = do_request(r, lkb);
2469 send_request_reply(r, lkb, error);
2474 if (error == -EINPROGRESS)
2481 setup_stub_lkb(ls, ms);
2482 send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2485 static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
2487 struct dlm_lkb *lkb;
2489 int error, reply = 1;
2491 error = find_lkb(ls, ms->m_remid, &lkb);
2495 r = lkb->lkb_resource;
2500 receive_flags(lkb, ms);
2501 error = receive_convert_args(ls, lkb, ms);
2504 reply = !down_conversion(lkb);
2506 error = do_convert(r, lkb);
2509 send_convert_reply(r, lkb, error);
2517 setup_stub_lkb(ls, ms);
2518 send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2521 static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
2523 struct dlm_lkb *lkb;
2527 error = find_lkb(ls, ms->m_remid, &lkb);
2531 r = lkb->lkb_resource;
2536 receive_flags(lkb, ms);
2537 error = receive_unlock_args(ls, lkb, ms);
2541 error = do_unlock(r, lkb);
2543 send_unlock_reply(r, lkb, error);
2551 setup_stub_lkb(ls, ms);
2552 send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2555 static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
2557 struct dlm_lkb *lkb;
2561 error = find_lkb(ls, ms->m_remid, &lkb);
2565 receive_flags(lkb, ms);
2567 r = lkb->lkb_resource;
2572 error = do_cancel(r, lkb);
2573 send_cancel_reply(r, lkb, error);
2581 setup_stub_lkb(ls, ms);
2582 send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2585 static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
2587 struct dlm_lkb *lkb;
2591 error = find_lkb(ls, ms->m_remid, &lkb);
2593 log_error(ls, "receive_grant no lkb");
2596 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2598 r = lkb->lkb_resource;
2603 receive_flags_reply(lkb, ms);
2604 grant_lock_pc(r, lkb, ms);
2605 queue_cast(r, lkb, 0);
2612 static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
2614 struct dlm_lkb *lkb;
2618 error = find_lkb(ls, ms->m_remid, &lkb);
2620 log_error(ls, "receive_bast no lkb");
2623 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2625 r = lkb->lkb_resource;
2630 queue_bast(r, lkb, ms->m_bastmode);
2637 static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
2639 int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid;
2641 from_nodeid = ms->m_header.h_nodeid;
2642 our_nodeid = dlm_our_nodeid();
2644 len = receive_extralen(ms);
2646 dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash);
2647 if (dir_nodeid != our_nodeid) {
2648 log_error(ls, "lookup dir_nodeid %d from %d",
2649 dir_nodeid, from_nodeid);
2655 error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid);
2657 /* Optimization: we're master so treat lookup as a request */
2658 if (!error && ret_nodeid == our_nodeid) {
2659 receive_request(ls, ms);
2663 send_lookup_reply(ls, ms, ret_nodeid, error);
2666 static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
2668 int len, dir_nodeid, from_nodeid;
2670 from_nodeid = ms->m_header.h_nodeid;
2672 len = receive_extralen(ms);
2674 dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash);
2675 if (dir_nodeid != dlm_our_nodeid()) {
2676 log_error(ls, "remove dir entry dir_nodeid %d from %d",
2677 dir_nodeid, from_nodeid);
2681 dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len);
2684 static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
2686 struct dlm_lkb *lkb;
2690 error = find_lkb(ls, ms->m_remid, &lkb);
2692 log_error(ls, "receive_request_reply no lkb");
2695 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2697 mstype = lkb->lkb_wait_type;
2698 error = remove_from_waiters(lkb);
2700 log_error(ls, "receive_request_reply not on waiters");
2704 /* this is the value returned from do_request() on the master */
2705 error = ms->m_result;
2707 r = lkb->lkb_resource;
2711 /* Optimization: the dir node was also the master, so it took our
2712 lookup as a request and sent request reply instead of lookup reply */
2713 if (mstype == DLM_MSG_LOOKUP) {
2714 r->res_nodeid = ms->m_header.h_nodeid;
2715 lkb->lkb_nodeid = r->res_nodeid;
2720 /* request would block (be queued) on remote master;
2721 the unhold undoes the original ref from create_lkb()
2722 so it leads to the lkb being freed */
2723 queue_cast(r, lkb, -EAGAIN);
2724 confirm_master(r, -EAGAIN);
2730 /* request was queued or granted on remote master */
2731 receive_flags_reply(lkb, ms);
2732 lkb->lkb_remid = ms->m_lkid;
2734 add_lkb(r, lkb, DLM_LKSTS_WAITING);
2736 grant_lock_pc(r, lkb, ms);
2737 queue_cast(r, lkb, 0);
2739 confirm_master(r, error);
2744 /* find_rsb failed to find rsb or rsb wasn't master */
2746 lkb->lkb_nodeid = -1;
2747 _request_lock(r, lkb);
2751 log_error(ls, "receive_request_reply error %d", error);
2760 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2761 struct dlm_message *ms)
2763 int error = ms->m_result;
2765 /* this is the value returned from do_convert() on the master */
2769 /* convert would block (be queued) on remote master */
2770 queue_cast(r, lkb, -EAGAIN);
2774 /* convert was queued on remote master */
2776 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2780 /* convert was granted on remote master */
2781 receive_flags_reply(lkb, ms);
2782 grant_lock_pc(r, lkb, ms);
2783 queue_cast(r, lkb, 0);
2787 log_error(r->res_ls, "receive_convert_reply error %d", error);
2791 static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2793 struct dlm_rsb *r = lkb->lkb_resource;
2798 __receive_convert_reply(r, lkb, ms);
2804 static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
2806 struct dlm_lkb *lkb;
2809 error = find_lkb(ls, ms->m_remid, &lkb);
2811 log_error(ls, "receive_convert_reply no lkb");
2814 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2816 error = remove_from_waiters(lkb);
2818 log_error(ls, "receive_convert_reply not on waiters");
2822 _receive_convert_reply(lkb, ms);
2827 static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2829 struct dlm_rsb *r = lkb->lkb_resource;
2830 int error = ms->m_result;
2835 /* this is the value returned from do_unlock() on the master */
2839 receive_flags_reply(lkb, ms);
2840 remove_lock_pc(r, lkb);
2841 queue_cast(r, lkb, -DLM_EUNLOCK);
2844 log_error(r->res_ls, "receive_unlock_reply error %d", error);
2851 static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
2853 struct dlm_lkb *lkb;
2856 error = find_lkb(ls, ms->m_remid, &lkb);
2858 log_error(ls, "receive_unlock_reply no lkb");
2861 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2863 error = remove_from_waiters(lkb);
2865 log_error(ls, "receive_unlock_reply not on waiters");
2869 _receive_unlock_reply(lkb, ms);
2874 static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2876 struct dlm_rsb *r = lkb->lkb_resource;
2877 int error = ms->m_result;
2882 /* this is the value returned from do_cancel() on the master */
2886 receive_flags_reply(lkb, ms);
2887 revert_lock_pc(r, lkb);
2888 queue_cast(r, lkb, -DLM_ECANCEL);
2891 log_error(r->res_ls, "receive_cancel_reply error %d", error);
2898 static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
2900 struct dlm_lkb *lkb;
2903 error = find_lkb(ls, ms->m_remid, &lkb);
2905 log_error(ls, "receive_cancel_reply no lkb");
2908 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2910 error = remove_from_waiters(lkb);
2912 log_error(ls, "receive_cancel_reply not on waiters");
2916 _receive_cancel_reply(lkb, ms);
2921 static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
2923 struct dlm_lkb *lkb;
2925 int error, ret_nodeid;
2927 error = find_lkb(ls, ms->m_lkid, &lkb);
2929 log_error(ls, "receive_lookup_reply no lkb");
2933 error = remove_from_waiters(lkb);
2935 log_error(ls, "receive_lookup_reply not on waiters");
2939 /* this is the value returned by dlm_dir_lookup on dir node
2940 FIXME: will a non-zero error ever be returned? */
2941 error = ms->m_result;
2943 r = lkb->lkb_resource;
2947 ret_nodeid = ms->m_nodeid;
2948 if (ret_nodeid == dlm_our_nodeid()) {
2951 r->res_first_lkid = 0;
2953 /* set_master() will copy res_nodeid to lkb_nodeid */
2954 r->res_nodeid = ret_nodeid;
2957 _request_lock(r, lkb);
2960 process_lookup_list(r);
2968 int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
2970 struct dlm_message *ms = (struct dlm_message *) hd;
2977 ls = dlm_find_lockspace_global(hd->h_lockspace);
2979 log_print("drop message %d from %d for unknown lockspace %d",
2980 ms->m_type, nodeid, hd->h_lockspace);
2984 /* recovery may have just ended leaving a bunch of backed-up requests
2985 in the requestqueue; wait while dlm_recoverd clears them */
2988 dlm_wait_requestqueue(ls);
2990 /* recovery may have just started while there were a bunch of
2991 in-flight requests -- save them in requestqueue to be processed
2992 after recovery. we can't let dlm_recvd block on the recovery
2993 lock. if dlm_recoverd is calling this function to clear the
2994 requestqueue, it needs to be interrupted (-EINTR) if another
2995 recovery operation is starting. */
2998 if (dlm_locking_stopped(ls)) {
3000 dlm_add_requestqueue(ls, nodeid, hd);
3005 if (lock_recovery_try(ls))
3010 switch (ms->m_type) {
3012 /* messages sent to a master node */
3014 case DLM_MSG_REQUEST:
3015 receive_request(ls, ms);
3018 case DLM_MSG_CONVERT:
3019 receive_convert(ls, ms);
3022 case DLM_MSG_UNLOCK:
3023 receive_unlock(ls, ms);
3026 case DLM_MSG_CANCEL:
3027 receive_cancel(ls, ms);
3030 /* messages sent from a master node (replies to above) */
3032 case DLM_MSG_REQUEST_REPLY:
3033 receive_request_reply(ls, ms);
3036 case DLM_MSG_CONVERT_REPLY:
3037 receive_convert_reply(ls, ms);
3040 case DLM_MSG_UNLOCK_REPLY:
3041 receive_unlock_reply(ls, ms);
3044 case DLM_MSG_CANCEL_REPLY:
3045 receive_cancel_reply(ls, ms);
3048 /* messages sent from a master node (only two types of async msg) */
3051 receive_grant(ls, ms);
3055 receive_bast(ls, ms);
3058 /* messages sent to a dir node */
3060 case DLM_MSG_LOOKUP:
3061 receive_lookup(ls, ms);
3064 case DLM_MSG_REMOVE:
3065 receive_remove(ls, ms);
3068 /* messages sent from a dir node (remove has no reply) */
3070 case DLM_MSG_LOOKUP_REPLY:
3071 receive_lookup_reply(ls, ms);
3075 log_error(ls, "unknown message type %d", ms->m_type);
3078 unlock_recovery(ls);
3080 dlm_put_lockspace(ls);
3090 static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3092 if (middle_conversion(lkb)) {
3094 ls->ls_stub_ms.m_result = -EINPROGRESS;
3095 _remove_from_waiters(lkb);
3096 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3098 /* Same special case as in receive_rcom_lock_args() */
3099 lkb->lkb_grmode = DLM_LOCK_IV;
3100 rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT);
3103 } else if (lkb->lkb_rqmode >= lkb->lkb_grmode) {
3104 lkb->lkb_flags |= DLM_IFL_RESEND;
3107 /* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down
3108 conversions are async; there's no reply from the remote master */
3111 /* A waiting lkb needs recovery if the master node has failed, or
3112 the master node is changing (only when no directory is used) */
3114 static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
3116 if (dlm_is_removed(ls, lkb->lkb_nodeid))
3119 if (!dlm_no_directory(ls))
3122 if (dlm_dir_nodeid(lkb->lkb_resource) != lkb->lkb_nodeid)
3128 /* Recovery for locks that are waiting for replies from nodes that are now
3129 gone. We can just complete unlocks and cancels by faking a reply from the
3130 dead node. Requests and up-conversions we flag to be resent after
3131 recovery. Down-conversions can just be completed with a fake reply like
3132 unlocks. Conversions between PR and CW need special attention. */
3134 void dlm_recover_waiters_pre(struct dlm_ls *ls)
3136 struct dlm_lkb *lkb, *safe;
3138 mutex_lock(&ls->ls_waiters_mutex);
3140 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
3141 log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
3142 lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
3144 /* all outstanding lookups, regardless of destination will be
3145 resent after recovery is done */
3147 if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) {
3148 lkb->lkb_flags |= DLM_IFL_RESEND;
3152 if (!waiter_needs_recovery(ls, lkb))
3155 switch (lkb->lkb_wait_type) {
3157 case DLM_MSG_REQUEST:
3158 lkb->lkb_flags |= DLM_IFL_RESEND;
3161 case DLM_MSG_CONVERT:
3162 recover_convert_waiter(ls, lkb);
3165 case DLM_MSG_UNLOCK:
3167 ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
3168 _remove_from_waiters(lkb);
3169 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3173 case DLM_MSG_CANCEL:
3175 ls->ls_stub_ms.m_result = -DLM_ECANCEL;
3176 _remove_from_waiters(lkb);
3177 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3182 log_error(ls, "invalid lkb wait_type %d",
3183 lkb->lkb_wait_type);
3186 mutex_unlock(&ls->ls_waiters_mutex);
3189 static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
3191 struct dlm_lkb *lkb;
3194 mutex_lock(&ls->ls_waiters_mutex);
3195 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
3196 if (lkb->lkb_flags & DLM_IFL_RESEND) {
3197 rv = lkb->lkb_wait_type;
3198 _remove_from_waiters(lkb);
3199 lkb->lkb_flags &= ~DLM_IFL_RESEND;
3203 mutex_unlock(&ls->ls_waiters_mutex);
3211 /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
3212 master or dir-node for r. Processing the lkb may result in it being placed
3215 int dlm_recover_waiters_post(struct dlm_ls *ls)
3217 struct dlm_lkb *lkb;
3219 int error = 0, mstype;
3222 if (dlm_locking_stopped(ls)) {
3223 log_debug(ls, "recover_waiters_post aborted");
3228 mstype = remove_resend_waiter(ls, &lkb);
3232 r = lkb->lkb_resource;
3234 log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
3235 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
3239 case DLM_MSG_LOOKUP:
3242 _request_lock(r, lkb);
3244 confirm_master(r, 0);
3249 case DLM_MSG_REQUEST:
3252 _request_lock(r, lkb);
3257 case DLM_MSG_CONVERT:
3260 _convert_lock(r, lkb);
3266 log_error(ls, "recover_waiters_post type %d", mstype);
3273 static void purge_queue(struct dlm_rsb *r, struct list_head *queue,
3274 int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb))
3276 struct dlm_ls *ls = r->res_ls;
3277 struct dlm_lkb *lkb, *safe;
3279 list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) {
3280 if (test(ls, lkb)) {
3281 rsb_set_flag(r, RSB_LOCKS_PURGED);
3283 /* this put should free the lkb */
3284 if (!dlm_put_lkb(lkb))
3285 log_error(ls, "purged lkb not released");
3290 static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb)
3292 return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid));
3295 static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb)
3297 return is_master_copy(lkb);
3300 static void purge_dead_locks(struct dlm_rsb *r)
3302 purge_queue(r, &r->res_grantqueue, &purge_dead_test);
3303 purge_queue(r, &r->res_convertqueue, &purge_dead_test);
3304 purge_queue(r, &r->res_waitqueue, &purge_dead_test);
3307 void dlm_purge_mstcpy_locks(struct dlm_rsb *r)
3309 purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test);
3310 purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test);
3311 purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test);
3314 /* Get rid of locks held by nodes that are gone. */
3316 int dlm_purge_locks(struct dlm_ls *ls)
3320 log_debug(ls, "dlm_purge_locks");
3322 down_write(&ls->ls_root_sem);
3323 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
3327 purge_dead_locks(r);
3333 up_write(&ls->ls_root_sem);
3338 static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket)
3340 struct dlm_rsb *r, *r_ret = NULL;
3342 read_lock(&ls->ls_rsbtbl[bucket].lock);
3343 list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) {
3344 if (!rsb_flag(r, RSB_LOCKS_PURGED))
3347 rsb_clear_flag(r, RSB_LOCKS_PURGED);
3351 read_unlock(&ls->ls_rsbtbl[bucket].lock);
3355 void dlm_grant_after_purge(struct dlm_ls *ls)
3360 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
3361 r = find_purged_rsb(ls, i);
3366 grant_pending_locks(r);
3367 confirm_master(r, 0);
3374 static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid,
3377 struct dlm_lkb *lkb;
3379 list_for_each_entry(lkb, head, lkb_statequeue) {
3380 if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid)
3386 static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid,
3389 struct dlm_lkb *lkb;
3391 lkb = search_remid_list(&r->res_grantqueue, nodeid, remid);
3394 lkb = search_remid_list(&r->res_convertqueue, nodeid, remid);
3397 lkb = search_remid_list(&r->res_waitqueue, nodeid, remid);
3403 static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3404 struct dlm_rsb *r, struct dlm_rcom *rc)
3406 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
3409 lkb->lkb_nodeid = rc->rc_header.h_nodeid;
3410 lkb->lkb_ownpid = rl->rl_ownpid;
3411 lkb->lkb_remid = rl->rl_lkid;
3412 lkb->lkb_exflags = rl->rl_exflags;
3413 lkb->lkb_flags = rl->rl_flags & 0x0000FFFF;
3414 lkb->lkb_flags |= DLM_IFL_MSTCPY;
3415 lkb->lkb_lvbseq = rl->rl_lvbseq;
3416 lkb->lkb_rqmode = rl->rl_rqmode;
3417 lkb->lkb_grmode = rl->rl_grmode;
3418 /* don't set lkb_status because add_lkb wants to itself */
3420 lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST);
3421 lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP);
3423 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3424 lkb->lkb_lvbptr = allocate_lvb(ls);
3425 if (!lkb->lkb_lvbptr)
3427 lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
3428 sizeof(struct rcom_lock);
3429 memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen);
3432 /* Conversions between PR and CW (middle modes) need special handling.
3433 The real granted mode of these converting locks cannot be determined
3434 until all locks have been rebuilt on the rsb (recover_conversion) */
3436 if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) {
3437 rl->rl_status = DLM_LKSTS_CONVERT;
3438 lkb->lkb_grmode = DLM_LOCK_IV;
3439 rsb_set_flag(r, RSB_RECOVER_CONVERT);
3445 /* This lkb may have been recovered in a previous aborted recovery so we need
3446 to check if the rsb already has an lkb with the given remote nodeid/lkid.
3447 If so we just send back a standard reply. If not, we create a new lkb with
3448 the given values and send back our lkid. We send back our lkid by sending
3449 back the rcom_lock struct we got but with the remid field filled in. */
3451 int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3453 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
3455 struct dlm_lkb *lkb;
3458 if (rl->rl_parent_lkid) {
3459 error = -EOPNOTSUPP;
3463 error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r);
3469 lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid);
3475 error = create_lkb(ls, &lkb);
3479 error = receive_rcom_lock_args(ls, lkb, r, rc);
3486 add_lkb(r, lkb, rl->rl_status);
3490 /* this is the new value returned to the lock holder for
3491 saving in its process-copy lkb */
3492 rl->rl_remid = lkb->lkb_id;
3499 log_print("recover_master_copy %d %x", error, rl->rl_lkid);
3500 rl->rl_result = error;
3504 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3506 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
3508 struct dlm_lkb *lkb;
3511 error = find_lkb(ls, rl->rl_lkid, &lkb);
3513 log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid);
3517 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3519 error = rl->rl_result;
3521 r = lkb->lkb_resource;
3527 log_debug(ls, "master copy exists %x", lkb->lkb_id);
3530 lkb->lkb_remid = rl->rl_remid;
3533 log_error(ls, "dlm_recover_process_copy unknown error %d %x",
3534 error, lkb->lkb_id);
3537 /* an ack for dlm_recover_locks() which waits for replies from
3538 all the locks it sends to new masters */
3539 dlm_recovered_lock(r);