1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
25 #include "requestqueue.h"
27 #ifdef CONFIG_DLM_DEBUG
28 int dlm_create_debug_file(struct dlm_ls *ls);
29 void dlm_delete_debug_file(struct dlm_ls *ls);
31 static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
32 static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
36 static struct mutex ls_lock;
37 static struct list_head lslist;
38 static spinlock_t lslist_lock;
39 static struct task_struct * scand_task;
42 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
45 int n = simple_strtol(buf, NULL, 0);
47 ls = dlm_find_lockspace_local(ls->ls_local_handle);
61 dlm_put_lockspace(ls);
65 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
67 ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
68 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
69 wake_up(&ls->ls_uevent_wait);
73 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
75 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
78 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
80 ls->ls_global_id = simple_strtoul(buf, NULL, 0);
84 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
86 uint32_t status = dlm_recover_status(ls);
87 return snprintf(buf, PAGE_SIZE, "%x\n", status);
90 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
92 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
96 struct attribute attr;
97 ssize_t (*show)(struct dlm_ls *, char *);
98 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
101 static struct dlm_attr dlm_attr_control = {
102 .attr = {.name = "control", .mode = S_IWUSR},
103 .store = dlm_control_store
106 static struct dlm_attr dlm_attr_event = {
107 .attr = {.name = "event_done", .mode = S_IWUSR},
108 .store = dlm_event_store
111 static struct dlm_attr dlm_attr_id = {
112 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
114 .store = dlm_id_store
117 static struct dlm_attr dlm_attr_recover_status = {
118 .attr = {.name = "recover_status", .mode = S_IRUGO},
119 .show = dlm_recover_status_show
122 static struct dlm_attr dlm_attr_recover_nodeid = {
123 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
124 .show = dlm_recover_nodeid_show
127 static struct attribute *dlm_attrs[] = {
128 &dlm_attr_control.attr,
129 &dlm_attr_event.attr,
131 &dlm_attr_recover_status.attr,
132 &dlm_attr_recover_nodeid.attr,
136 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
139 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
140 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
141 return a->show ? a->show(ls, buf) : 0;
144 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
145 const char *buf, size_t len)
147 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
148 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
149 return a->store ? a->store(ls, buf, len) : len;
152 static void lockspace_kobj_release(struct kobject *k)
154 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
158 static struct sysfs_ops dlm_attr_ops = {
159 .show = dlm_attr_show,
160 .store = dlm_attr_store,
163 static struct kobj_type dlm_ktype = {
164 .default_attrs = dlm_attrs,
165 .sysfs_ops = &dlm_attr_ops,
166 .release = lockspace_kobj_release,
169 static struct kset dlm_kset = {
170 .kobj = {.name = "dlm",},
174 static int kobject_setup(struct dlm_ls *ls)
176 char lsname[DLM_LOCKSPACE_LEN];
179 memset(lsname, 0, DLM_LOCKSPACE_LEN);
180 snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name);
182 error = kobject_set_name(&ls->ls_kobj, "%s", lsname);
186 ls->ls_kobj.kset = &dlm_kset;
187 ls->ls_kobj.ktype = &dlm_ktype;
191 static int do_uevent(struct dlm_ls *ls, int in)
196 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
200 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
202 /* dlm_controld will see the uevent, do the necessary group management
203 and then write to sysfs to wake us */
205 error = wait_event_interruptible(ls->ls_uevent_wait,
206 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
208 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
213 error = ls->ls_uevent_result;
216 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
217 error, ls->ls_uevent_result);
222 int dlm_lockspace_init(void)
227 mutex_init(&ls_lock);
228 INIT_LIST_HEAD(&lslist);
229 spin_lock_init(&lslist_lock);
231 kobj_set_kset_s(&dlm_kset, kernel_subsys);
232 error = kset_register(&dlm_kset);
234 printk("dlm_lockspace_init: cannot register kset %d\n", error);
238 void dlm_lockspace_exit(void)
240 kset_unregister(&dlm_kset);
243 static int dlm_scand(void *data)
247 while (!kthread_should_stop()) {
248 list_for_each_entry(ls, &lslist, ls_list) {
249 if (dlm_lock_recovery_try(ls)) {
251 dlm_scan_timeout(ls);
252 dlm_unlock_recovery(ls);
255 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
260 static int dlm_scand_start(void)
262 struct task_struct *p;
265 p = kthread_run(dlm_scand, NULL, "dlm_scand");
273 static void dlm_scand_stop(void)
275 kthread_stop(scand_task);
278 static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen)
282 spin_lock(&lslist_lock);
284 list_for_each_entry(ls, &lslist, ls_list) {
285 if (ls->ls_namelen == namelen &&
286 memcmp(ls->ls_name, name, namelen) == 0)
291 spin_unlock(&lslist_lock);
295 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
299 spin_lock(&lslist_lock);
301 list_for_each_entry(ls, &lslist, ls_list) {
302 if (ls->ls_global_id == id) {
309 spin_unlock(&lslist_lock);
313 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
317 spin_lock(&lslist_lock);
318 list_for_each_entry(ls, &lslist, ls_list) {
319 if (ls->ls_local_handle == lockspace) {
326 spin_unlock(&lslist_lock);
330 struct dlm_ls *dlm_find_lockspace_device(int minor)
334 spin_lock(&lslist_lock);
335 list_for_each_entry(ls, &lslist, ls_list) {
336 if (ls->ls_device.minor == minor) {
343 spin_unlock(&lslist_lock);
347 void dlm_put_lockspace(struct dlm_ls *ls)
349 spin_lock(&lslist_lock);
351 spin_unlock(&lslist_lock);
354 static void remove_lockspace(struct dlm_ls *ls)
357 spin_lock(&lslist_lock);
358 if (ls->ls_count == 0) {
359 list_del(&ls->ls_list);
360 spin_unlock(&lslist_lock);
363 spin_unlock(&lslist_lock);
368 static int threads_start(void)
372 /* Thread which process lock requests for all lockspace's */
373 error = dlm_astd_start();
375 log_print("cannot start dlm_astd thread %d", error);
379 error = dlm_scand_start();
381 log_print("cannot start dlm_scand thread %d", error);
385 /* Thread for sending/receiving messages for all lockspace's */
386 error = dlm_lowcomms_start();
388 log_print("cannot start dlm lowcomms %d", error);
402 static void threads_stop(void)
409 static int new_lockspace(char *name, int namelen, void **lockspace,
410 uint32_t flags, int lvblen)
413 int i, size, error = -ENOMEM;
416 if (namelen > DLM_LOCKSPACE_LEN)
419 if (!lvblen || (lvblen % 8))
422 if (!try_module_get(THIS_MODULE))
425 ls = dlm_find_lockspace_name(name, namelen);
428 module_put(THIS_MODULE);
432 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL);
435 memcpy(ls->ls_name, name, namelen);
436 ls->ls_namelen = namelen;
437 ls->ls_lvblen = lvblen;
441 if (flags & DLM_LSFL_TIMEWARN)
442 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
444 if (flags & DLM_LSFL_FS)
445 ls->ls_allocation = GFP_NOFS;
447 ls->ls_allocation = GFP_KERNEL;
449 /* ls_exflags are forced to match among nodes, and we don't
450 need to require all nodes to have TIMEWARN or FS set */
451 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
453 size = dlm_config.ci_rsbtbl_size;
454 ls->ls_rsbtbl_size = size;
456 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
459 for (i = 0; i < size; i++) {
460 INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list);
461 INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss);
462 rwlock_init(&ls->ls_rsbtbl[i].lock);
465 size = dlm_config.ci_lkbtbl_size;
466 ls->ls_lkbtbl_size = size;
468 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
471 for (i = 0; i < size; i++) {
472 INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
473 rwlock_init(&ls->ls_lkbtbl[i].lock);
474 ls->ls_lkbtbl[i].counter = 1;
477 size = dlm_config.ci_dirtbl_size;
478 ls->ls_dirtbl_size = size;
480 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
483 for (i = 0; i < size; i++) {
484 INIT_LIST_HEAD(&ls->ls_dirtbl[i].list);
485 rwlock_init(&ls->ls_dirtbl[i].lock);
488 INIT_LIST_HEAD(&ls->ls_waiters);
489 mutex_init(&ls->ls_waiters_mutex);
490 INIT_LIST_HEAD(&ls->ls_orphans);
491 mutex_init(&ls->ls_orphans_mutex);
492 INIT_LIST_HEAD(&ls->ls_timeout);
493 mutex_init(&ls->ls_timeout_mutex);
495 INIT_LIST_HEAD(&ls->ls_nodes);
496 INIT_LIST_HEAD(&ls->ls_nodes_gone);
497 ls->ls_num_nodes = 0;
498 ls->ls_low_nodeid = 0;
499 ls->ls_total_weight = 0;
500 ls->ls_node_array = NULL;
502 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
503 ls->ls_stub_rsb.res_ls = ls;
505 ls->ls_debug_rsb_dentry = NULL;
506 ls->ls_debug_waiters_dentry = NULL;
508 init_waitqueue_head(&ls->ls_uevent_wait);
509 ls->ls_uevent_result = 0;
510 init_completion(&ls->ls_members_done);
511 ls->ls_members_result = -1;
513 ls->ls_recoverd_task = NULL;
514 mutex_init(&ls->ls_recoverd_active);
515 spin_lock_init(&ls->ls_recover_lock);
516 spin_lock_init(&ls->ls_rcom_spin);
517 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
518 ls->ls_recover_status = 0;
519 ls->ls_recover_seq = 0;
520 ls->ls_recover_args = NULL;
521 init_rwsem(&ls->ls_in_recovery);
522 INIT_LIST_HEAD(&ls->ls_requestqueue);
523 mutex_init(&ls->ls_requestqueue_mutex);
524 mutex_init(&ls->ls_clear_proc_locks);
526 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
527 if (!ls->ls_recover_buf)
530 INIT_LIST_HEAD(&ls->ls_recover_list);
531 spin_lock_init(&ls->ls_recover_list_lock);
532 ls->ls_recover_list_count = 0;
533 ls->ls_local_handle = ls;
534 init_waitqueue_head(&ls->ls_wait_general);
535 INIT_LIST_HEAD(&ls->ls_root_list);
536 init_rwsem(&ls->ls_root_sem);
538 down_write(&ls->ls_in_recovery);
540 spin_lock(&lslist_lock);
541 list_add(&ls->ls_list, &lslist);
542 spin_unlock(&lslist_lock);
544 /* needs to find ls in lslist */
545 error = dlm_recoverd_start(ls);
547 log_error(ls, "can't start dlm_recoverd %d", error);
551 error = kobject_setup(ls);
555 error = kobject_register(&ls->ls_kobj);
559 /* let kobject handle freeing of ls if there's an error */
562 /* This uevent triggers dlm_controld in userspace to add us to the
563 group of nodes that are members of this lockspace (managed by the
564 cluster infrastructure.) Once it's done that, it tells us who the
565 current lockspace members are (via configfs) and then tells the
566 lockspace to start running (via sysfs) in dlm_ls_start(). */
568 error = do_uevent(ls, 1);
572 wait_for_completion(&ls->ls_members_done);
573 error = ls->ls_members_result;
577 dlm_create_debug_file(ls);
579 log_debug(ls, "join complete");
586 dlm_clear_members(ls);
587 kfree(ls->ls_node_array);
589 dlm_recoverd_stop(ls);
591 spin_lock(&lslist_lock);
592 list_del(&ls->ls_list);
593 spin_unlock(&lslist_lock);
594 kfree(ls->ls_recover_buf);
596 kfree(ls->ls_dirtbl);
598 kfree(ls->ls_lkbtbl);
600 kfree(ls->ls_rsbtbl);
603 kobject_unregister(&ls->ls_kobj);
607 module_put(THIS_MODULE);
611 int dlm_new_lockspace(char *name, int namelen, void **lockspace,
612 uint32_t flags, int lvblen)
616 mutex_lock(&ls_lock);
618 error = threads_start();
622 error = new_lockspace(name, namelen, lockspace, flags, lvblen);
628 mutex_unlock(&ls_lock);
632 /* Return 1 if the lockspace still has active remote locks,
633 * 2 if the lockspace still has active local locks.
635 static int lockspace_busy(struct dlm_ls *ls)
637 int i, lkb_found = 0;
640 /* NOTE: We check the lockidtbl here rather than the resource table.
641 This is because there may be LKBs queued as ASTs that have been
642 unlinked from their RSBs and are pending deletion once the AST has
645 for (i = 0; i < ls->ls_lkbtbl_size; i++) {
646 read_lock(&ls->ls_lkbtbl[i].lock);
647 if (!list_empty(&ls->ls_lkbtbl[i].list)) {
649 list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
651 if (!lkb->lkb_nodeid) {
652 read_unlock(&ls->ls_lkbtbl[i].lock);
657 read_unlock(&ls->ls_lkbtbl[i].lock);
662 static int release_lockspace(struct dlm_ls *ls, int force)
666 struct list_head *head;
668 int busy = lockspace_busy(ls);
676 dlm_recoverd_stop(ls);
678 remove_lockspace(ls);
680 dlm_delete_debug_file(ls);
684 kfree(ls->ls_recover_buf);
687 * Free direntry structs.
691 kfree(ls->ls_dirtbl);
694 * Free all lkb's on lkbtbl[] lists.
697 for (i = 0; i < ls->ls_lkbtbl_size; i++) {
698 head = &ls->ls_lkbtbl[i].list;
699 while (!list_empty(head)) {
700 lkb = list_entry(head->next, struct dlm_lkb,
703 list_del(&lkb->lkb_idtbl_list);
707 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
708 free_lvb(lkb->lkb_lvbptr);
715 kfree(ls->ls_lkbtbl);
718 * Free all rsb's on rsbtbl[] lists
721 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
722 head = &ls->ls_rsbtbl[i].list;
723 while (!list_empty(head)) {
724 rsb = list_entry(head->next, struct dlm_rsb,
727 list_del(&rsb->res_hashchain);
731 head = &ls->ls_rsbtbl[i].toss;
732 while (!list_empty(head)) {
733 rsb = list_entry(head->next, struct dlm_rsb,
735 list_del(&rsb->res_hashchain);
740 kfree(ls->ls_rsbtbl);
743 * Free structures on any other lists
746 dlm_purge_requestqueue(ls);
747 kfree(ls->ls_recover_args);
748 dlm_clear_free_entries(ls);
749 dlm_clear_members(ls);
750 dlm_clear_members_gone(ls);
751 kfree(ls->ls_node_array);
752 kobject_unregister(&ls->ls_kobj);
753 /* The ls structure will be freed when the kobject is done with */
755 mutex_lock(&ls_lock);
759 mutex_unlock(&ls_lock);
761 module_put(THIS_MODULE);
766 * Called when a system has released all its locks and is not going to use the
767 * lockspace any longer. We free everything we're managing for this lockspace.
768 * Remaining nodes will go through the recovery process as if we'd died. The
769 * lockspace must continue to function as usual, participating in recoveries,
770 * until this returns.
772 * Force has 4 possible values:
773 * 0 - don't destroy locksapce if it has any LKBs
774 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
775 * 2 - destroy lockspace regardless of LKBs
776 * 3 - destroy lockspace as part of a forced shutdown
779 int dlm_release_lockspace(void *lockspace, int force)
783 ls = dlm_find_lockspace_local(lockspace);
786 dlm_put_lockspace(ls);
787 return release_lockspace(ls, force);