[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC.
[linux-2.6] / fs / afs / vlocation.c
1 /* AFS volume location management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include "internal.h"
16
17 unsigned afs_vlocation_timeout = 10;    /* volume location timeout in seconds */
18 unsigned afs_vlocation_update_timeout = 10 * 60;
19
20 static void afs_vlocation_reaper(struct work_struct *);
21 static void afs_vlocation_updater(struct work_struct *);
22
23 static LIST_HEAD(afs_vlocation_updates);
24 static LIST_HEAD(afs_vlocation_graveyard);
25 static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
26 static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
27 static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
28 static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
29 static struct workqueue_struct *afs_vlocation_update_worker;
30
31 /*
32  * iterate through the VL servers in a cell until one of them admits knowing
33  * about the volume in question
34  */
35 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
36                                            struct afs_cache_vlocation *vldb)
37 {
38         struct afs_cell *cell = vl->cell;
39         struct in_addr addr;
40         int count, ret;
41
42         _enter("%s,%s", cell->name, vl->vldb.name);
43
44         down_write(&vl->cell->vl_sem);
45         ret = -ENOMEDIUM;
46         for (count = cell->vl_naddrs; count > 0; count--) {
47                 addr = cell->vl_addrs[cell->vl_curr_svix];
48
49                 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
50
51                 /* attempt to access the VL server */
52                 ret = afs_vl_get_entry_by_name(&addr, vl->vldb.name, vldb,
53                                                &afs_sync_call);
54                 switch (ret) {
55                 case 0:
56                         goto out;
57                 case -ENOMEM:
58                 case -ENONET:
59                 case -ENETUNREACH:
60                 case -EHOSTUNREACH:
61                 case -ECONNREFUSED:
62                         if (ret == -ENOMEM || ret == -ENONET)
63                                 goto out;
64                         goto rotate;
65                 case -ENOMEDIUM:
66                         goto out;
67                 default:
68                         ret = -EIO;
69                         goto rotate;
70                 }
71
72                 /* rotate the server records upon lookup failure */
73         rotate:
74                 cell->vl_curr_svix++;
75                 cell->vl_curr_svix %= cell->vl_naddrs;
76         }
77
78 out:
79         up_write(&vl->cell->vl_sem);
80         _leave(" = %d", ret);
81         return ret;
82 }
83
84 /*
85  * iterate through the VL servers in a cell until one of them admits knowing
86  * about the volume in question
87  */
88 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
89                                          afs_volid_t volid,
90                                          afs_voltype_t voltype,
91                                          struct afs_cache_vlocation *vldb)
92 {
93         struct afs_cell *cell = vl->cell;
94         struct in_addr addr;
95         int count, ret;
96
97         _enter("%s,%x,%d,", cell->name, volid, voltype);
98
99         down_write(&vl->cell->vl_sem);
100         ret = -ENOMEDIUM;
101         for (count = cell->vl_naddrs; count > 0; count--) {
102                 addr = cell->vl_addrs[cell->vl_curr_svix];
103
104                 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
105
106                 /* attempt to access the VL server */
107                 ret = afs_vl_get_entry_by_id(&addr, volid, voltype, vldb,
108                                              &afs_sync_call);
109                 switch (ret) {
110                 case 0:
111                         goto out;
112                 case -ENOMEM:
113                 case -ENONET:
114                 case -ENETUNREACH:
115                 case -EHOSTUNREACH:
116                 case -ECONNREFUSED:
117                         if (ret == -ENOMEM || ret == -ENONET)
118                                 goto out;
119                         goto rotate;
120                 case -EBUSY:
121                         vl->upd_busy_cnt++;
122                         if (vl->upd_busy_cnt <= 3) {
123                                 if (vl->upd_busy_cnt > 1) {
124                                         /* second+ BUSY - sleep a little bit */
125                                         set_current_state(TASK_UNINTERRUPTIBLE);
126                                         schedule_timeout(1);
127                                         __set_current_state(TASK_RUNNING);
128                                 }
129                                 continue;
130                         }
131                         break;
132                 case -ENOMEDIUM:
133                         vl->upd_rej_cnt++;
134                         goto rotate;
135                 default:
136                         ret = -EIO;
137                         goto rotate;
138                 }
139
140                 /* rotate the server records upon lookup failure */
141         rotate:
142                 cell->vl_curr_svix++;
143                 cell->vl_curr_svix %= cell->vl_naddrs;
144                 vl->upd_busy_cnt = 0;
145         }
146
147 out:
148         if (ret < 0 && vl->upd_rej_cnt > 0) {
149                 printk(KERN_NOTICE "kAFS:"
150                        " Active volume no longer valid '%s'\n",
151                        vl->vldb.name);
152                 vl->valid = 0;
153                 ret = -ENOMEDIUM;
154         }
155
156         up_write(&vl->cell->vl_sem);
157         _leave(" = %d", ret);
158         return ret;
159 }
160
161 /*
162  * allocate a volume location record
163  */
164 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
165                                                  const char *name,
166                                                  size_t namesz)
167 {
168         struct afs_vlocation *vl;
169
170         vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
171         if (vl) {
172                 vl->cell = cell;
173                 vl->state = AFS_VL_NEW;
174                 atomic_set(&vl->usage, 1);
175                 INIT_LIST_HEAD(&vl->link);
176                 INIT_LIST_HEAD(&vl->grave);
177                 INIT_LIST_HEAD(&vl->update);
178                 init_waitqueue_head(&vl->waitq);
179                 rwlock_init(&vl->lock);
180                 memcpy(vl->vldb.name, name, namesz);
181         }
182
183         _leave(" = %p", vl);
184         return vl;
185 }
186
187 /*
188  * update record if we found it in the cache
189  */
190 static int afs_vlocation_update_record(struct afs_vlocation *vl,
191                                        struct afs_cache_vlocation *vldb)
192 {
193         afs_voltype_t voltype;
194         afs_volid_t vid;
195         int ret;
196
197         /* try to look up a cached volume in the cell VL databases by ID */
198         _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
199                vl->vldb.name,
200                vl->vldb.vidmask,
201                ntohl(vl->vldb.servers[0].s_addr),
202                vl->vldb.srvtmask[0],
203                ntohl(vl->vldb.servers[1].s_addr),
204                vl->vldb.srvtmask[1],
205                ntohl(vl->vldb.servers[2].s_addr),
206                vl->vldb.srvtmask[2]);
207
208         _debug("Vids: %08x %08x %08x",
209                vl->vldb.vid[0],
210                vl->vldb.vid[1],
211                vl->vldb.vid[2]);
212
213         if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
214                 vid = vl->vldb.vid[0];
215                 voltype = AFSVL_RWVOL;
216         } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
217                 vid = vl->vldb.vid[1];
218                 voltype = AFSVL_ROVOL;
219         } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
220                 vid = vl->vldb.vid[2];
221                 voltype = AFSVL_BACKVOL;
222         } else {
223                 BUG();
224                 vid = 0;
225                 voltype = 0;
226         }
227
228         /* contact the server to make sure the volume is still available
229          * - TODO: need to handle disconnected operation here
230          */
231         ret = afs_vlocation_access_vl_by_id(vl, vid, voltype, vldb);
232         switch (ret) {
233                 /* net error */
234         default:
235                 printk(KERN_WARNING "kAFS:"
236                        " failed to update volume '%s' (%x) up in '%s': %d\n",
237                        vl->vldb.name, vid, vl->cell->name, ret);
238                 _leave(" = %d", ret);
239                 return ret;
240
241                 /* pulled from local cache into memory */
242         case 0:
243                 _leave(" = 0");
244                 return 0;
245
246                 /* uh oh... looks like the volume got deleted */
247         case -ENOMEDIUM:
248                 printk(KERN_ERR "kAFS:"
249                        " volume '%s' (%x) does not exist '%s'\n",
250                        vl->vldb.name, vid, vl->cell->name);
251
252                 /* TODO: make existing record unavailable */
253                 _leave(" = %d", ret);
254                 return ret;
255         }
256 }
257
258 /*
259  * apply the update to a VL record
260  */
261 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
262                                        struct afs_cache_vlocation *vldb)
263 {
264         _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
265                vldb->name, vldb->vidmask,
266                ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
267                ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
268                ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
269
270         _debug("Vids: %08x %08x %08x",
271                vldb->vid[0], vldb->vid[1], vldb->vid[2]);
272
273         if (strcmp(vldb->name, vl->vldb.name) != 0)
274                 printk(KERN_NOTICE "kAFS:"
275                        " name of volume '%s' changed to '%s' on server\n",
276                        vl->vldb.name, vldb->name);
277
278         vl->vldb = *vldb;
279
280 #ifdef AFS_CACHING_SUPPORT
281         /* update volume entry in local cache */
282         cachefs_update_cookie(vl->cache);
283 #endif
284 }
285
286 /*
287  * fill in a volume location record, consulting the cache and the VL server
288  * both
289  */
290 static int afs_vlocation_fill_in_record(struct afs_vlocation *vl)
291 {
292         struct afs_cache_vlocation vldb;
293         int ret;
294
295         _enter("");
296
297         ASSERTCMP(vl->valid, ==, 0);
298
299         memset(&vldb, 0, sizeof(vldb));
300
301         /* see if we have an in-cache copy (will set vl->valid if there is) */
302 #ifdef AFS_CACHING_SUPPORT
303         cachefs_acquire_cookie(cell->cache,
304                                &afs_volume_cache_index_def,
305                                vlocation,
306                                &vl->cache);
307 #endif
308
309         if (vl->valid) {
310                 /* try to update a known volume in the cell VL databases by
311                  * ID as the name may have changed */
312                 _debug("found in cache");
313                 ret = afs_vlocation_update_record(vl, &vldb);
314         } else {
315                 /* try to look up an unknown volume in the cell VL databases by
316                  * name */
317                 ret = afs_vlocation_access_vl_by_name(vl, &vldb);
318                 if (ret < 0) {
319                         printk("kAFS: failed to locate '%s' in cell '%s'\n",
320                                vl->vldb.name, vl->cell->name);
321                         return ret;
322                 }
323         }
324
325         afs_vlocation_apply_update(vl, &vldb);
326         _leave(" = 0");
327         return 0;
328 }
329
330 /*
331  * queue a vlocation record for updates
332  */
333 void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
334 {
335         struct afs_vlocation *xvl;
336
337         /* wait at least 10 minutes before updating... */
338         vl->update_at = get_seconds() + afs_vlocation_update_timeout;
339
340         spin_lock(&afs_vlocation_updates_lock);
341
342         if (!list_empty(&afs_vlocation_updates)) {
343                 /* ... but wait at least 1 second more than the newest record
344                  * already queued so that we don't spam the VL server suddenly
345                  * with lots of requests
346                  */
347                 xvl = list_entry(afs_vlocation_updates.prev,
348                                  struct afs_vlocation, update);
349                 if (vl->update_at <= xvl->update_at)
350                         vl->update_at = xvl->update_at + 1;
351         } else {
352                 queue_delayed_work(afs_vlocation_update_worker,
353                                    &afs_vlocation_update,
354                                    afs_vlocation_update_timeout * HZ);
355         }
356
357         list_add_tail(&vl->update, &afs_vlocation_updates);
358         spin_unlock(&afs_vlocation_updates_lock);
359 }
360
361 /*
362  * lookup volume location
363  * - iterate through the VL servers in a cell until one of them admits knowing
364  *   about the volume in question
365  * - lookup in the local cache if not able to find on the VL server
366  * - insert/update in the local cache if did get a VL response
367  */
368 struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
369                                            const char *name,
370                                            size_t namesz)
371 {
372         struct afs_vlocation *vl;
373         int ret;
374
375         _enter("{%s},%*.*s,%zu",
376                cell->name, (int) namesz, (int) namesz, name, namesz);
377
378         if (namesz > sizeof(vl->vldb.name)) {
379                 _leave(" = -ENAMETOOLONG");
380                 return ERR_PTR(-ENAMETOOLONG);
381         }
382
383         /* see if we have an in-memory copy first */
384         down_write(&cell->vl_sem);
385         spin_lock(&cell->vl_lock);
386         list_for_each_entry(vl, &cell->vl_list, link) {
387                 if (vl->vldb.name[namesz] != '\0')
388                         continue;
389                 if (memcmp(vl->vldb.name, name, namesz) == 0)
390                         goto found_in_memory;
391         }
392         spin_unlock(&cell->vl_lock);
393
394         /* not in the cell's in-memory lists - create a new record */
395         vl = afs_vlocation_alloc(cell, name, namesz);
396         if (!vl) {
397                 up_write(&cell->vl_sem);
398                 return ERR_PTR(-ENOMEM);
399         }
400
401         afs_get_cell(cell);
402
403         list_add_tail(&vl->link, &cell->vl_list);
404         vl->state = AFS_VL_CREATING;
405         up_write(&cell->vl_sem);
406
407 fill_in_record:
408         ret = afs_vlocation_fill_in_record(vl);
409         if (ret < 0)
410                 goto error_abandon;
411         vl->state = AFS_VL_VALID;
412         wake_up(&vl->waitq);
413
414         /* schedule for regular updates */
415         afs_vlocation_queue_for_updates(vl);
416         goto success;
417
418 found_in_memory:
419         /* found in memory */
420         _debug("found in memory");
421         atomic_inc(&vl->usage);
422         spin_unlock(&cell->vl_lock);
423         if (!list_empty(&vl->grave)) {
424                 spin_lock(&afs_vlocation_graveyard_lock);
425                 list_del_init(&vl->grave);
426                 spin_unlock(&afs_vlocation_graveyard_lock);
427         }
428         up_write(&cell->vl_sem);
429
430         /* see if it was an abandoned record that we might try filling in */
431         while (vl->state != AFS_VL_VALID) {
432                 afs_vlocation_state_t state = vl->state;
433
434                 _debug("invalid [state %d]", state);
435
436                 if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) {
437                         if (cmpxchg(&vl->state, state, AFS_VL_CREATING) ==
438                             state)
439                                 goto fill_in_record;
440                         continue;
441                 }
442
443                 /* must now wait for creation or update by someone else to
444                  * complete */
445                 _debug("wait");
446
447                 ret = wait_event_interruptible(
448                         vl->waitq,
449                         vl->state == AFS_VL_NEW ||
450                         vl->state == AFS_VL_VALID ||
451                         vl->state == AFS_VL_NO_VOLUME);
452                 if (ret < 0)
453                         goto error;
454         }
455
456 success:
457         _leave(" = %p",vl);
458         return vl;
459
460 error_abandon:
461         vl->state = AFS_VL_NEW;
462         wake_up(&vl->waitq);
463 error:
464         ASSERT(vl != NULL);
465         afs_put_vlocation(vl);
466         _leave(" = %d", ret);
467         return ERR_PTR(ret);
468 }
469
470 /*
471  * finish using a volume location record
472  */
473 void afs_put_vlocation(struct afs_vlocation *vl)
474 {
475         if (!vl)
476                 return;
477
478         _enter("%s", vl->vldb.name);
479
480         ASSERTCMP(atomic_read(&vl->usage), >, 0);
481
482         if (likely(!atomic_dec_and_test(&vl->usage))) {
483                 _leave("");
484                 return;
485         }
486
487         spin_lock(&afs_vlocation_graveyard_lock);
488         if (atomic_read(&vl->usage) == 0) {
489                 _debug("buried");
490                 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
491                 vl->time_of_death = get_seconds();
492                 schedule_delayed_work(&afs_vlocation_reap,
493                                       afs_vlocation_timeout * HZ);
494
495                 /* suspend updates on this record */
496                 if (!list_empty(&vl->update)) {
497                         spin_lock(&afs_vlocation_updates_lock);
498                         list_del_init(&vl->update);
499                         spin_unlock(&afs_vlocation_updates_lock);
500                 }
501         }
502         spin_unlock(&afs_vlocation_graveyard_lock);
503         _leave(" [killed?]");
504 }
505
506 /*
507  * destroy a dead volume location record
508  */
509 static void afs_vlocation_destroy(struct afs_vlocation *vl)
510 {
511         _enter("%p", vl);
512
513 #ifdef AFS_CACHING_SUPPORT
514         cachefs_relinquish_cookie(vl->cache, 0);
515 #endif
516
517         afs_put_cell(vl->cell);
518         kfree(vl);
519 }
520
521 /*
522  * reap dead volume location records
523  */
524 static void afs_vlocation_reaper(struct work_struct *work)
525 {
526         LIST_HEAD(corpses);
527         struct afs_vlocation *vl;
528         unsigned long delay, expiry;
529         time_t now;
530
531         _enter("");
532
533         now = get_seconds();
534         spin_lock(&afs_vlocation_graveyard_lock);
535
536         while (!list_empty(&afs_vlocation_graveyard)) {
537                 vl = list_entry(afs_vlocation_graveyard.next,
538                                 struct afs_vlocation, grave);
539
540                 _debug("check %p", vl);
541
542                 /* the queue is ordered most dead first */
543                 expiry = vl->time_of_death + afs_vlocation_timeout;
544                 if (expiry > now) {
545                         delay = (expiry - now) * HZ;
546                         _debug("delay %lu", delay);
547                         if (!schedule_delayed_work(&afs_vlocation_reap,
548                                                    delay)) {
549                                 cancel_delayed_work(&afs_vlocation_reap);
550                                 schedule_delayed_work(&afs_vlocation_reap,
551                                                       delay);
552                         }
553                         break;
554                 }
555
556                 spin_lock(&vl->cell->vl_lock);
557                 if (atomic_read(&vl->usage) > 0) {
558                         _debug("no reap");
559                         list_del_init(&vl->grave);
560                 } else {
561                         _debug("reap");
562                         list_move_tail(&vl->grave, &corpses);
563                         list_del_init(&vl->link);
564                 }
565                 spin_unlock(&vl->cell->vl_lock);
566         }
567
568         spin_unlock(&afs_vlocation_graveyard_lock);
569
570         /* now reap the corpses we've extracted */
571         while (!list_empty(&corpses)) {
572                 vl = list_entry(corpses.next, struct afs_vlocation, grave);
573                 list_del(&vl->grave);
574                 afs_vlocation_destroy(vl);
575         }
576
577         _leave("");
578 }
579
580 /*
581  * initialise the VL update process
582  */
583 int __init afs_vlocation_update_init(void)
584 {
585         afs_vlocation_update_worker =
586                 create_singlethread_workqueue("kafs_vlupdated");
587         return afs_vlocation_update_worker ? 0 : -ENOMEM;
588 }
589
590 /*
591  * discard all the volume location records for rmmod
592  */
593 void __exit afs_vlocation_purge(void)
594 {
595         afs_vlocation_timeout = 0;
596
597         spin_lock(&afs_vlocation_updates_lock);
598         list_del_init(&afs_vlocation_updates);
599         spin_unlock(&afs_vlocation_updates_lock);
600         cancel_delayed_work(&afs_vlocation_update);
601         queue_delayed_work(afs_vlocation_update_worker,
602                            &afs_vlocation_update, 0);
603         destroy_workqueue(afs_vlocation_update_worker);
604
605         cancel_delayed_work(&afs_vlocation_reap);
606         schedule_delayed_work(&afs_vlocation_reap, 0);
607 }
608
609 /*
610  * update a volume location
611  */
612 static void afs_vlocation_updater(struct work_struct *work)
613 {
614         struct afs_cache_vlocation vldb;
615         struct afs_vlocation *vl, *xvl;
616         time_t now;
617         long timeout;
618         int ret;
619
620         _enter("");
621
622         now = get_seconds();
623
624         /* find a record to update */
625         spin_lock(&afs_vlocation_updates_lock);
626         for (;;) {
627                 if (list_empty(&afs_vlocation_updates)) {
628                         spin_unlock(&afs_vlocation_updates_lock);
629                         _leave(" [nothing]");
630                         return;
631                 }
632
633                 vl = list_entry(afs_vlocation_updates.next,
634                                 struct afs_vlocation, update);
635                 if (atomic_read(&vl->usage) > 0)
636                         break;
637                 list_del_init(&vl->update);
638         }
639
640         timeout = vl->update_at - now;
641         if (timeout > 0) {
642                 queue_delayed_work(afs_vlocation_update_worker,
643                                    &afs_vlocation_update, timeout * HZ);
644                 spin_unlock(&afs_vlocation_updates_lock);
645                 _leave(" [nothing]");
646                 return;
647         }
648
649         list_del_init(&vl->update);
650         atomic_inc(&vl->usage);
651         spin_unlock(&afs_vlocation_updates_lock);
652
653         /* we can now perform the update */
654         _debug("update %s", vl->vldb.name);
655         vl->state = AFS_VL_UPDATING;
656         vl->upd_rej_cnt = 0;
657         vl->upd_busy_cnt = 0;
658
659         ret = afs_vlocation_update_record(vl, &vldb);
660         switch (ret) {
661         case 0:
662                 afs_vlocation_apply_update(vl, &vldb);
663                 vl->state = AFS_VL_VALID;
664                 break;
665         case -ENOMEDIUM:
666                 vl->state = AFS_VL_VOLUME_DELETED;
667                 break;
668         default:
669                 vl->state = AFS_VL_UNCERTAIN;
670                 break;
671         }
672
673         /* and then reschedule */
674         _debug("reschedule");
675         vl->update_at = get_seconds() + afs_vlocation_update_timeout;
676
677         spin_lock(&afs_vlocation_updates_lock);
678
679         if (!list_empty(&afs_vlocation_updates)) {
680                 /* next update in 10 minutes, but wait at least 1 second more
681                  * than the newest record already queued so that we don't spam
682                  * the VL server suddenly with lots of requests
683                  */
684                 xvl = list_entry(afs_vlocation_updates.prev,
685                                  struct afs_vlocation, update);
686                 if (vl->update_at <= xvl->update_at)
687                         vl->update_at = xvl->update_at + 1;
688                 xvl = list_entry(afs_vlocation_updates.next,
689                                  struct afs_vlocation, update);
690                 timeout = xvl->update_at - now;
691                 if (timeout < 0)
692                         timeout = 0;
693         } else {
694                 timeout = afs_vlocation_update_timeout;
695         }
696
697         ASSERT(list_empty(&vl->update));
698
699         list_add_tail(&vl->update, &afs_vlocation_updates);
700
701         _debug("timeout %ld", timeout);
702         queue_delayed_work(afs_vlocation_update_worker,
703                            &afs_vlocation_update, timeout * HZ);
704         spin_unlock(&afs_vlocation_updates_lock);
705         afs_put_vlocation(vl);
706 }