git.oblomov.eu Git - linux-2.6/blob - net/ipv4/ipvs/ip_vs_sh.c

   1 /*
   2  * IPVS:        Source Hashing scheduling module
   3  *
   4  * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Changes:
  12  *
  13  */
  14
  15 /*
  16  * The sh algorithm is to select server by the hash key of source IP
  17  * address. The pseudo code is as follows:
  18  *
  19  *       n <- servernode[src_ip];
  20  *       if (n is dead) OR
  21  *          (n is overloaded) or (n.weight <= 0) then
  22  *                 return NULL;
  23  *
  24  *       return n;
  25  *
  26  * Notes that servernode is a 256-bucket hash table that maps the hash
  27  * index derived from packet source IP address to the current server
  28  * array. If the sh scheduler is used in cache cluster, it is good to
  29  * combine it with cache_bypass feature. When the statically assigned
  30  * server is dead or overloaded, the load balancer can bypass the cache
  31  * server and send requests to the original server directly.
  32  *
  33  */
  34
  35 #include <linux/ip.h>
  36 #include <linux/module.h>
  37 #include <linux/kernel.h>
  38 #include <linux/skbuff.h>
  39
  40 #include <net/ip_vs.h>
  41
  42
  43 /*
  44  *      IPVS SH bucket
  45  */
  46 struct ip_vs_sh_bucket {
  47         struct ip_vs_dest       *dest;          /* real server (cache) */
  48 };
  49
  50 /*
  51  *     for IPVS SH entry hash table
  52  */
  53 #ifndef CONFIG_IP_VS_SH_TAB_BITS
  54 #define CONFIG_IP_VS_SH_TAB_BITS        8
  55 #endif
  56 #define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
  57 #define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
  58 #define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
  59
  60
  61 /*
  62  *      Returns hash value for IPVS SH entry
  63  */
  64 static inline unsigned ip_vs_sh_hashkey(__be32 addr)
  65 {
  66         return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
  67 }
  68
  69
  70 /*
  71  *      Get ip_vs_dest associated with supplied parameters.
  72  */
  73 static inline struct ip_vs_dest *
  74 ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
  75 {
  76         return (tbl[ip_vs_sh_hashkey(addr)]).dest;
  77 }
  78
  79
  80 /*
  81  *      Assign all the hash buckets of the specified table with the service.
  82  */
  83 static int
  84 ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
  85 {
  86         int i;
  87         struct ip_vs_sh_bucket *b;
  88         struct list_head *p;
  89         struct ip_vs_dest *dest;
  90
  91         b = tbl;
  92         p = &svc->destinations;
  93         for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
  94                 if (list_empty(p)) {
  95                         b->dest = NULL;
  96                 } else {
  97                         if (p == &svc->destinations)
  98                                 p = p->next;
  99
 100                         dest = list_entry(p, struct ip_vs_dest, n_list);
 101                         atomic_inc(&dest->refcnt);
 102                         b->dest = dest;
 103
 104                         p = p->next;
 105                 }
 106                 b++;
 107         }
 108         return 0;
 109 }
 110
 111
 112 /*
 113  *      Flush all the hash buckets of the specified table.
 114  */
 115 static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
 116 {
 117         int i;
 118         struct ip_vs_sh_bucket *b;
 119
 120         b = tbl;
 121         for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
 122                 if (b->dest) {
 123                         atomic_dec(&b->dest->refcnt);
 124                         b->dest = NULL;
 125                 }
 126                 b++;
 127         }
 128 }
 129
 130
 131 static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
 132 {
 133         struct ip_vs_sh_bucket *tbl;
 134
 135         /* allocate the SH table for this service */
 136         tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
 137                       GFP_ATOMIC);
 138         if (tbl == NULL) {
 139                 IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
 140                 return -ENOMEM;
 141         }
 142         svc->sched_data = tbl;
 143         IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
 144                   "current service\n",
 145                   sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 146
 147         /* assign the hash buckets with the updated service */
 148         ip_vs_sh_assign(tbl, svc);
 149
 150         return 0;
 151 }
 152
 153
 154 static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
 155 {
 156         struct ip_vs_sh_bucket *tbl = svc->sched_data;
 157
 158         /* got to clean up hash buckets here */
 159         ip_vs_sh_flush(tbl);
 160
 161         /* release the table itself */
 162         kfree(svc->sched_data);
 163         IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
 164                   sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 165
 166         return 0;
 167 }
 168
 169
 170 static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
 171 {
 172         struct ip_vs_sh_bucket *tbl = svc->sched_data;
 173
 174         /* got to clean up hash buckets here */
 175         ip_vs_sh_flush(tbl);
 176
 177         /* assign the hash buckets with the updated service */
 178         ip_vs_sh_assign(tbl, svc);
 179
 180         return 0;
 181 }
 182
 183
 184 /*
 185  *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
 186  *      consider that the server is overloaded here.
 187  */
 188 static inline int is_overloaded(struct ip_vs_dest *dest)
 189 {
 190         return dest->flags & IP_VS_DEST_F_OVERLOAD;
 191 }
 192
 193
 194 /*
 195  *      Source Hashing scheduling
 196  */
 197 static struct ip_vs_dest *
 198 ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 199 {
 200         struct ip_vs_dest *dest;
 201         struct ip_vs_sh_bucket *tbl;
 202         struct iphdr *iph = ip_hdr(skb);
 203
 204         IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 205
 206         tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
 207         dest = ip_vs_sh_get(tbl, iph->saddr);
 208         if (!dest
 209             || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 210             || atomic_read(&dest->weight) <= 0
 211             || is_overloaded(dest)) {
 212                 return NULL;
 213         }
 214
 215         IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
 216                   "--> server %u.%u.%u.%u:%d\n",
 217                   NIPQUAD(iph->saddr),
 218                   NIPQUAD(dest->addr),
 219                   ntohs(dest->port));
 220
 221         return dest;
 222 }
 223
 224
 225 /*
 226  *      IPVS SH Scheduler structure
 227  */
 228 static struct ip_vs_scheduler ip_vs_sh_scheduler =
 229 {
 230         .name =                 "sh",
 231         .refcnt =               ATOMIC_INIT(0),
 232         .module =               THIS_MODULE,
 233         .n_list  =              LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
 234         .init_service =         ip_vs_sh_init_svc,
 235         .done_service =         ip_vs_sh_done_svc,
 236         .update_service =       ip_vs_sh_update_svc,
 237         .schedule =             ip_vs_sh_schedule,
 238 };
 239
 240
 241 static int __init ip_vs_sh_init(void)
 242 {
 243         return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
 244 }
 245
 246
 247 static void __exit ip_vs_sh_cleanup(void)
 248 {
 249         unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
 250 }
 251
 252
 253 module_init(ip_vs_sh_init);
 254 module_exit(ip_vs_sh_cleanup);
 255 MODULE_LICENSE("GPL");