1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * Copyright (C) 2004, 2005 Oracle. All rights reserved.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public
17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA.
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/sysctl.h>
25 #include <linux/configfs.h>
29 #include "nodemanager.h"
30 #include "heartbeat.h"
35 /* for now we operate under the assertion that there can be only one
36 * cluster active at a time. Changing this will require trickling
37 * cluster references throughout where nodes are looked up */
38 static struct o2nm_cluster *o2nm_single_cluster = NULL;
40 #define OCFS2_MAX_HB_CTL_PATH 256
41 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
43 static ctl_table ocfs2_nm_table[] = {
46 .procname = "hb_ctl_path",
47 .data = ocfs2_hb_ctl_path,
48 .maxlen = OCFS2_MAX_HB_CTL_PATH,
50 .proc_handler = &proc_dostring,
51 .strategy = &sysctl_string,
56 static ctl_table ocfs2_mod_table[] = {
58 .ctl_name = KERN_OCFS2_NM,
63 .child = ocfs2_nm_table
68 static ctl_table ocfs2_kern_table[] = {
70 .ctl_name = KERN_OCFS2,
75 .child = ocfs2_mod_table
80 static ctl_table ocfs2_root_table[] = {
87 .child = ocfs2_kern_table
92 static struct ctl_table_header *ocfs2_table_header = NULL;
94 const char *o2nm_get_hb_ctl_path(void)
96 return ocfs2_hb_ctl_path;
98 EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path);
100 struct o2nm_cluster {
101 struct config_group cl_group;
102 unsigned cl_has_local:1;
104 rwlock_t cl_nodes_lock;
105 struct o2nm_node *cl_nodes[O2NM_MAX_NODES];
106 struct rb_root cl_node_ip_tree;
107 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
108 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
111 struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
113 struct o2nm_node *node = NULL;
115 if (node_num >= O2NM_MAX_NODES || o2nm_single_cluster == NULL)
118 read_lock(&o2nm_single_cluster->cl_nodes_lock);
119 node = o2nm_single_cluster->cl_nodes[node_num];
121 config_item_get(&node->nd_item);
122 read_unlock(&o2nm_single_cluster->cl_nodes_lock);
126 EXPORT_SYMBOL_GPL(o2nm_get_node_by_num);
128 int o2nm_configured_node_map(unsigned long *map, unsigned bytes)
130 struct o2nm_cluster *cluster = o2nm_single_cluster;
132 BUG_ON(bytes < (sizeof(cluster->cl_nodes_bitmap)));
137 read_lock(&cluster->cl_nodes_lock);
138 memcpy(map, cluster->cl_nodes_bitmap, sizeof(cluster->cl_nodes_bitmap));
139 read_unlock(&cluster->cl_nodes_lock);
143 EXPORT_SYMBOL_GPL(o2nm_configured_node_map);
145 static struct o2nm_node *o2nm_node_ip_tree_lookup(struct o2nm_cluster *cluster,
147 struct rb_node ***ret_p,
148 struct rb_node **ret_parent)
150 struct rb_node **p = &cluster->cl_node_ip_tree.rb_node;
151 struct rb_node *parent = NULL;
152 struct o2nm_node *node, *ret = NULL;
158 node = rb_entry(parent, struct o2nm_node, nd_ip_node);
160 cmp = memcmp(&ip_needle, &node->nd_ipv4_address,
174 if (ret_parent != NULL)
175 *ret_parent = parent;
180 struct o2nm_node *o2nm_get_node_by_ip(__be32 addr)
182 struct o2nm_node *node = NULL;
183 struct o2nm_cluster *cluster = o2nm_single_cluster;
188 read_lock(&cluster->cl_nodes_lock);
189 node = o2nm_node_ip_tree_lookup(cluster, addr, NULL, NULL);
191 config_item_get(&node->nd_item);
192 read_unlock(&cluster->cl_nodes_lock);
197 EXPORT_SYMBOL_GPL(o2nm_get_node_by_ip);
199 void o2nm_node_put(struct o2nm_node *node)
201 config_item_put(&node->nd_item);
203 EXPORT_SYMBOL_GPL(o2nm_node_put);
205 void o2nm_node_get(struct o2nm_node *node)
207 config_item_get(&node->nd_item);
209 EXPORT_SYMBOL_GPL(o2nm_node_get);
211 u8 o2nm_this_node(void)
213 u8 node_num = O2NM_MAX_NODES;
215 if (o2nm_single_cluster && o2nm_single_cluster->cl_has_local)
216 node_num = o2nm_single_cluster->cl_local_node;
220 EXPORT_SYMBOL_GPL(o2nm_this_node);
222 /* node configfs bits */
224 static struct o2nm_cluster *to_o2nm_cluster(struct config_item *item)
227 container_of(to_config_group(item), struct o2nm_cluster,
232 static struct o2nm_node *to_o2nm_node(struct config_item *item)
234 return item ? container_of(item, struct o2nm_node, nd_item) : NULL;
237 static void o2nm_node_release(struct config_item *item)
239 struct o2nm_node *node = to_o2nm_node(item);
243 static ssize_t o2nm_node_num_read(struct o2nm_node *node, char *page)
245 return sprintf(page, "%d\n", node->nd_num);
248 static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node)
250 /* through the first node_set .parent
251 * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */
252 return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent);
256 O2NM_NODE_ATTR_NUM = 0,
258 O2NM_NODE_ATTR_ADDRESS,
259 O2NM_NODE_ATTR_LOCAL,
262 static ssize_t o2nm_node_num_write(struct o2nm_node *node, const char *page,
265 struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
267 char *p = (char *)page;
269 tmp = simple_strtoul(p, &p, 0);
270 if (!p || (*p && (*p != '\n')))
273 if (tmp >= O2NM_MAX_NODES)
276 /* once we're in the cl_nodes tree networking can look us up by
277 * node number and try to use our address and port attributes
278 * to connect to this node.. make sure that they've been set
279 * before writing the node attribute? */
280 if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) ||
281 !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes))
282 return -EINVAL; /* XXX */
284 write_lock(&cluster->cl_nodes_lock);
285 if (cluster->cl_nodes[tmp])
288 cluster->cl_nodes[tmp] = node;
290 set_bit(tmp, cluster->cl_nodes_bitmap);
292 write_unlock(&cluster->cl_nodes_lock);
298 static ssize_t o2nm_node_ipv4_port_read(struct o2nm_node *node, char *page)
300 return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port));
303 static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node,
304 const char *page, size_t count)
307 char *p = (char *)page;
309 tmp = simple_strtoul(p, &p, 0);
310 if (!p || (*p && (*p != '\n')))
318 node->nd_ipv4_port = htons(tmp);
323 static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page)
325 return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address));
328 static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node,
332 struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
334 struct rb_node **p, *parent;
335 unsigned int octets[4];
336 __be32 ipv4_addr = 0;
338 ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[3], &octets[2],
339 &octets[1], &octets[0]);
343 for (i = 0; i < ARRAY_SIZE(octets); i++) {
346 be32_add_cpu(&ipv4_addr, octets[i] << (i * 8));
350 write_lock(&cluster->cl_nodes_lock);
351 if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent))
354 rb_link_node(&node->nd_ip_node, parent, p);
355 rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree);
357 write_unlock(&cluster->cl_nodes_lock);
361 memcpy(&node->nd_ipv4_address, &ipv4_addr, sizeof(ipv4_addr));
366 static ssize_t o2nm_node_local_read(struct o2nm_node *node, char *page)
368 return sprintf(page, "%d\n", node->nd_local);
371 static ssize_t o2nm_node_local_write(struct o2nm_node *node, const char *page,
374 struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
376 char *p = (char *)page;
379 tmp = simple_strtoul(p, &p, 0);
380 if (!p || (*p && (*p != '\n')))
383 tmp = !!tmp; /* boolean of whether this node wants to be local */
385 /* setting local turns on networking rx for now so we require having
386 * set everything else first */
387 if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) ||
388 !test_bit(O2NM_NODE_ATTR_NUM, &node->nd_set_attributes) ||
389 !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes))
390 return -EINVAL; /* XXX */
392 /* the only failure case is trying to set a new local node
393 * when a different one is already set */
394 if (tmp && tmp == cluster->cl_has_local &&
395 cluster->cl_local_node != node->nd_num)
398 /* bring up the rx thread if we're setting the new local node. */
399 if (tmp && !cluster->cl_has_local) {
400 ret = o2net_start_listening(node);
405 if (!tmp && cluster->cl_has_local &&
406 cluster->cl_local_node == node->nd_num) {
407 o2net_stop_listening(node);
408 cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
411 node->nd_local = tmp;
412 if (node->nd_local) {
413 cluster->cl_has_local = tmp;
414 cluster->cl_local_node = node->nd_num;
420 struct o2nm_node_attribute {
421 struct configfs_attribute attr;
422 ssize_t (*show)(struct o2nm_node *, char *);
423 ssize_t (*store)(struct o2nm_node *, const char *, size_t);
426 static struct o2nm_node_attribute o2nm_node_attr_num = {
427 .attr = { .ca_owner = THIS_MODULE,
429 .ca_mode = S_IRUGO | S_IWUSR },
430 .show = o2nm_node_num_read,
431 .store = o2nm_node_num_write,
434 static struct o2nm_node_attribute o2nm_node_attr_ipv4_port = {
435 .attr = { .ca_owner = THIS_MODULE,
436 .ca_name = "ipv4_port",
437 .ca_mode = S_IRUGO | S_IWUSR },
438 .show = o2nm_node_ipv4_port_read,
439 .store = o2nm_node_ipv4_port_write,
442 static struct o2nm_node_attribute o2nm_node_attr_ipv4_address = {
443 .attr = { .ca_owner = THIS_MODULE,
444 .ca_name = "ipv4_address",
445 .ca_mode = S_IRUGO | S_IWUSR },
446 .show = o2nm_node_ipv4_address_read,
447 .store = o2nm_node_ipv4_address_write,
450 static struct o2nm_node_attribute o2nm_node_attr_local = {
451 .attr = { .ca_owner = THIS_MODULE,
453 .ca_mode = S_IRUGO | S_IWUSR },
454 .show = o2nm_node_local_read,
455 .store = o2nm_node_local_write,
458 static struct configfs_attribute *o2nm_node_attrs[] = {
459 [O2NM_NODE_ATTR_NUM] = &o2nm_node_attr_num.attr,
460 [O2NM_NODE_ATTR_PORT] = &o2nm_node_attr_ipv4_port.attr,
461 [O2NM_NODE_ATTR_ADDRESS] = &o2nm_node_attr_ipv4_address.attr,
462 [O2NM_NODE_ATTR_LOCAL] = &o2nm_node_attr_local.attr,
466 static int o2nm_attr_index(struct configfs_attribute *attr)
469 for (i = 0; i < ARRAY_SIZE(o2nm_node_attrs); i++) {
470 if (attr == o2nm_node_attrs[i])
477 static ssize_t o2nm_node_show(struct config_item *item,
478 struct configfs_attribute *attr,
481 struct o2nm_node *node = to_o2nm_node(item);
482 struct o2nm_node_attribute *o2nm_node_attr =
483 container_of(attr, struct o2nm_node_attribute, attr);
486 if (o2nm_node_attr->show)
487 ret = o2nm_node_attr->show(node, page);
491 static ssize_t o2nm_node_store(struct config_item *item,
492 struct configfs_attribute *attr,
493 const char *page, size_t count)
495 struct o2nm_node *node = to_o2nm_node(item);
496 struct o2nm_node_attribute *o2nm_node_attr =
497 container_of(attr, struct o2nm_node_attribute, attr);
499 int attr_index = o2nm_attr_index(attr);
501 if (o2nm_node_attr->store == NULL) {
506 if (test_bit(attr_index, &node->nd_set_attributes))
509 ret = o2nm_node_attr->store(node, page, count);
513 set_bit(attr_index, &node->nd_set_attributes);
518 static struct configfs_item_operations o2nm_node_item_ops = {
519 .release = o2nm_node_release,
520 .show_attribute = o2nm_node_show,
521 .store_attribute = o2nm_node_store,
524 static struct config_item_type o2nm_node_type = {
525 .ct_item_ops = &o2nm_node_item_ops,
526 .ct_attrs = o2nm_node_attrs,
527 .ct_owner = THIS_MODULE,
532 struct o2nm_node_group {
533 struct config_group ns_group;
538 static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group)
541 container_of(group, struct o2nm_node_group, ns_group)
546 static struct config_item *o2nm_node_group_make_item(struct config_group *group,
549 struct o2nm_node *node = NULL;
550 struct config_item *ret = NULL;
552 if (strlen(name) > O2NM_MAX_NAME_LEN)
553 goto out; /* ENAMETOOLONG */
555 node = kcalloc(1, sizeof(struct o2nm_node), GFP_KERNEL);
557 goto out; /* ENOMEM */
559 strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */
560 config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
561 spin_lock_init(&node->nd_lock);
563 ret = &node->nd_item;
572 static void o2nm_node_group_drop_item(struct config_group *group,
573 struct config_item *item)
575 struct o2nm_node *node = to_o2nm_node(item);
576 struct o2nm_cluster *cluster = to_o2nm_cluster(group->cg_item.ci_parent);
578 o2net_disconnect_node(node);
580 if (cluster->cl_has_local &&
581 (cluster->cl_local_node == node->nd_num)) {
582 cluster->cl_has_local = 0;
583 cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
584 o2net_stop_listening(node);
587 /* XXX call into net to stop this node from trading messages */
589 write_lock(&cluster->cl_nodes_lock);
592 if (node->nd_ipv4_address)
593 rb_erase(&node->nd_ip_node, &cluster->cl_node_ip_tree);
595 /* nd_num might be 0 if the node number hasn't been set.. */
596 if (cluster->cl_nodes[node->nd_num] == node) {
597 cluster->cl_nodes[node->nd_num] = NULL;
598 clear_bit(node->nd_num, cluster->cl_nodes_bitmap);
600 write_unlock(&cluster->cl_nodes_lock);
602 config_item_put(item);
605 static struct configfs_group_operations o2nm_node_group_group_ops = {
606 .make_item = o2nm_node_group_make_item,
607 .drop_item = o2nm_node_group_drop_item,
610 static struct config_item_type o2nm_node_group_type = {
611 .ct_group_ops = &o2nm_node_group_group_ops,
612 .ct_owner = THIS_MODULE,
617 static void o2nm_cluster_release(struct config_item *item)
619 struct o2nm_cluster *cluster = to_o2nm_cluster(item);
621 kfree(cluster->cl_group.default_groups);
625 static struct configfs_item_operations o2nm_cluster_item_ops = {
626 .release = o2nm_cluster_release,
629 static struct config_item_type o2nm_cluster_type = {
630 .ct_item_ops = &o2nm_cluster_item_ops,
631 .ct_owner = THIS_MODULE,
636 struct o2nm_cluster_group {
637 struct configfs_subsystem cs_subsys;
642 static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *group)
645 container_of(to_configfs_subsystem(group), struct o2nm_cluster_group, cs_subsys)
650 static struct config_group *o2nm_cluster_group_make_group(struct config_group *group,
653 struct o2nm_cluster *cluster = NULL;
654 struct o2nm_node_group *ns = NULL;
655 struct config_group *o2hb_group = NULL, *ret = NULL;
658 /* this runs under the parent dir's i_mutex; there can be only
659 * one caller in here at a time */
660 if (o2nm_single_cluster)
661 goto out; /* ENOSPC */
663 cluster = kcalloc(1, sizeof(struct o2nm_cluster), GFP_KERNEL);
664 ns = kcalloc(1, sizeof(struct o2nm_node_group), GFP_KERNEL);
665 defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
666 o2hb_group = o2hb_alloc_hb_set();
667 if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
670 config_group_init_type_name(&cluster->cl_group, name,
672 config_group_init_type_name(&ns->ns_group, "node",
673 &o2nm_node_group_type);
675 cluster->cl_group.default_groups = defs;
676 cluster->cl_group.default_groups[0] = &ns->ns_group;
677 cluster->cl_group.default_groups[1] = o2hb_group;
678 cluster->cl_group.default_groups[2] = NULL;
679 rwlock_init(&cluster->cl_nodes_lock);
680 cluster->cl_node_ip_tree = RB_ROOT;
682 ret = &cluster->cl_group;
683 o2nm_single_cluster = cluster;
689 o2hb_free_hb_set(o2hb_group);
696 static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item)
698 struct o2nm_cluster *cluster = to_o2nm_cluster(item);
700 struct config_item *killme;
702 BUG_ON(o2nm_single_cluster != cluster);
703 o2nm_single_cluster = NULL;
705 for (i = 0; cluster->cl_group.default_groups[i]; i++) {
706 killme = &cluster->cl_group.default_groups[i]->cg_item;
707 cluster->cl_group.default_groups[i] = NULL;
708 config_item_put(killme);
711 config_item_put(item);
714 static struct configfs_group_operations o2nm_cluster_group_group_ops = {
715 .make_group = o2nm_cluster_group_make_group,
716 .drop_item = o2nm_cluster_group_drop_item,
719 static struct config_item_type o2nm_cluster_group_type = {
720 .ct_group_ops = &o2nm_cluster_group_group_ops,
721 .ct_owner = THIS_MODULE,
724 static struct o2nm_cluster_group o2nm_cluster_group = {
728 .ci_namebuf = "cluster",
729 .ci_type = &o2nm_cluster_group_type,
735 static void __exit exit_o2nm(void)
737 if (ocfs2_table_header)
738 unregister_sysctl_table(ocfs2_table_header);
740 /* XXX sync with hb callbacks and shut down hb? */
741 o2net_unregister_hb_callbacks();
742 configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys);
748 static int __init init_o2nm(void)
752 cluster_print_version();
757 ocfs2_table_header = register_sysctl_table(ocfs2_root_table, 0);
758 if (!ocfs2_table_header) {
759 printk(KERN_ERR "nodemanager: unable to register sysctl\n");
760 ret = -ENOMEM; /* or something. */
764 ret = o2net_register_hb_callbacks();
768 config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
769 init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem);
770 ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys);
772 printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
776 ret = o2cb_sys_init();
780 configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys);
782 o2net_unregister_hb_callbacks();
784 unregister_sysctl_table(ocfs2_table_header);
791 MODULE_AUTHOR("Oracle");
792 MODULE_LICENSE("GPL");
794 module_init(init_o2nm)
795 module_exit(exit_o2nm)