1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <asm/unaligned.h>
14 #define TIMERTICK (HZ / 10)
15 #define MINTIMER (2 * TIMERTICK)
16 #define MAXTIMER (HZ << 1)
17 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
19 static struct sk_buff *
20 new_skb(struct net_device *if_dev, ulong len)
24 skb = alloc_skb(len, GFP_ATOMIC);
26 skb->nh.raw = skb->mac.raw = skb->data;
28 skb->protocol = __constant_htons(ETH_P_AOE);
31 skb->next = skb->prev = NULL;
33 /* tell the network layer not to perform IP checksums
34 * or to get the NIC to do it
36 skb->ip_summed = CHECKSUM_NONE;
41 static struct sk_buff *
42 skb_prepare(struct aoedev *d, struct frame *f)
47 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
49 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
54 memcpy(p, f->data, f->ndata);
56 if (f->writedatalen) {
57 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
58 memcpy(p, f->bufaddr, f->writedatalen);
65 getframe(struct aoedev *d, int tag)
78 * Leave the top bit clear so we have tagspace for userland.
79 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
80 * This driver reserves tag -1 to mean "unused frame."
83 newtag(struct aoedev *d)
88 return n |= (++d->lasttag & 0x7fff) << 16;
92 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
94 u32 host_tag = newtag(d);
96 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
97 memcpy(h->dst, d->addr, sizeof h->dst);
98 h->type = __constant_cpu_to_be16(ETH_P_AOE);
100 h->major = cpu_to_be16(d->aoemajor);
101 h->minor = d->aoeminor;
103 h->tag = cpu_to_be32(host_tag);
109 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
112 struct aoe_atahdr *ah;
116 register sector_t sector;
117 char writebit, extbit;
124 sector = buf->sector;
125 bcnt = buf->bv_resid;
126 if (bcnt > MAXATADATA)
129 /* initialize the headers & frame */
130 h = (struct aoe_hdr *) f->data;
131 ah = (struct aoe_atahdr *) (h+1);
132 f->ndata = sizeof *h + sizeof *ah;
133 memset(h, 0, f->ndata);
134 f->tag = aoehdr_atainit(d, h);
137 f->bufaddr = buf->bufaddr;
139 /* set up ata header */
140 ah->scnt = bcnt >> 9;
142 ah->lba1 = sector >>= 8;
143 ah->lba2 = sector >>= 8;
144 ah->lba3 = sector >>= 8;
145 if (d->flags & DEVFL_EXT) {
146 ah->aflags |= AOEAFL_EXT;
147 ah->lba4 = sector >>= 8;
148 ah->lba5 = sector >>= 8;
152 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
155 if (bio_data_dir(buf->bio) == WRITE) {
156 ah->aflags |= AOEAFL_WRITE;
157 f->writedatalen = bcnt;
163 ah->cmdstat = WIN_READ | writebit | extbit;
165 /* mark all tracking fields and load out */
166 buf->nframesout += 1;
167 buf->bufaddr += bcnt;
168 buf->bv_resid -= bcnt;
169 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
171 buf->sector += bcnt >> 9;
172 if (buf->resid == 0) {
174 } else if (buf->bv_resid == 0) {
176 buf->bv_resid = buf->bv->bv_len;
177 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
180 skb = skb_prepare(d, f);
184 d->sendq_tl->next = skb;
191 /* enters with d->lock held */
193 aoecmd_work(struct aoedev *d)
198 f = getframe(d, FREETAG);
201 if (d->inprocess == NULL) {
202 if (list_empty(&d->bufq))
204 buf = container_of(d->bufq.next, struct buf, bufs);
205 list_del(d->bufq.next);
206 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
214 rexmit(struct aoedev *d, struct frame *f)
223 snprintf(buf, sizeof buf,
224 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
226 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
229 h = (struct aoe_hdr *) f->data;
231 h->tag = cpu_to_be32(n);
233 skb = skb_prepare(d, f);
237 d->sendq_tl->next = skb;
249 n = jiffies & 0xffff;
257 rexmit_timer(ulong vp)
262 register long timeout;
265 d = (struct aoedev *) vp;
268 /* timeout is always ~150% of the moving average */
270 timeout += timeout >> 1;
272 spin_lock_irqsave(&d->lock, flags);
274 if (d->flags & DEVFL_TKILL) {
275 tdie: spin_unlock_irqrestore(&d->lock, flags);
281 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
282 n = f->waited += timeout;
284 if (n > MAXWAIT) { /* waited too long. device failure. */
293 d->sendq_hd = d->sendq_tl = NULL;
297 d->rttavg = MAXTIMER;
300 d->timer.expires = jiffies + TIMERTICK;
301 add_timer(&d->timer);
303 spin_unlock_irqrestore(&d->lock, flags);
309 ataid_complete(struct aoedev *d, unsigned char *id)
314 /* word 83: command set supported */
315 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
317 /* word 86: command set/feature enabled */
318 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
320 if (n & (1<<10)) { /* bit 10: LBA 48 */
321 d->flags |= DEVFL_EXT;
323 /* word 100: number lba48 sectors */
324 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
326 /* set as in ide-disk.c:init_idedisk_capacity */
327 d->geo.cylinders = ssize;
328 d->geo.cylinders /= (255 * 63);
332 d->flags &= ~DEVFL_EXT;
334 /* number lba28 sectors */
335 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
337 /* NOTE: obsolete in ATA 6 */
338 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
339 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
340 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
345 d->gd->capacity = ssize;
346 d->flags |= DEVFL_UP;
349 if (d->flags & DEVFL_WORKON) {
350 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
351 "(This really shouldn't happen).\n");
354 INIT_WORK(&d->work, aoeblk_gdalloc, d);
355 schedule_work(&d->work);
356 d->flags |= DEVFL_WORKON;
360 calc_rttavg(struct aoedev *d, int rtt)
367 else if (n > MAXTIMER)
370 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
376 aoecmd_ata_rsp(struct sk_buff *skb)
380 struct aoe_atahdr *ahin, *ahout;
389 hin = (struct aoe_hdr *) skb->mac.raw;
390 aoemajor = be16_to_cpu(hin->major);
391 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
393 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
394 "for unknown device %d.%d\n",
395 aoemajor, hin->minor);
400 spin_lock_irqsave(&d->lock, flags);
402 f = getframe(d, be32_to_cpu(hin->tag));
404 spin_unlock_irqrestore(&d->lock, flags);
405 snprintf(ebuf, sizeof ebuf,
406 "%15s e%d.%d tag=%08x@%08lx\n",
408 be16_to_cpu(hin->major),
410 be32_to_cpu(hin->tag),
416 calc_rttavg(d, tsince(f->tag));
418 ahin = (struct aoe_atahdr *) (hin+1);
419 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
422 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
423 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
424 "stat=%2.2Xh from e%ld.%ld\n",
425 ahout->cmdstat, ahin->cmdstat,
426 d->aoemajor, d->aoeminor);
428 buf->flags |= BUFFL_FAIL;
430 switch (ahout->cmdstat) {
433 n = ahout->scnt << 9;
434 if (skb->len - sizeof *hin - sizeof *ahin < n) {
435 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
436 "ata data size in read. skb->len=%d\n",
438 /* fail frame f? just returning will rexmit. */
439 spin_unlock_irqrestore(&d->lock, flags);
442 memcpy(f->bufaddr, ahin+1, n);
447 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
448 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
449 "in ataid. skb->len=%d\n", skb->len);
450 spin_unlock_irqrestore(&d->lock, flags);
453 ataid_complete(d, (char *) (ahin+1));
454 /* d->flags |= DEVFL_WC_UPDATE; */
457 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
458 "outbound ata command %2.2Xh for %d.%d\n",
460 be16_to_cpu(hin->major),
466 buf->nframesout -= 1;
467 if (buf->nframesout == 0 && buf->resid == 0) {
468 unsigned long duration = jiffies - buf->start_time;
469 unsigned long n_sect = buf->bio->bi_size >> 9;
470 struct gendisk *disk = d->gd;
472 if (bio_data_dir(buf->bio) == WRITE) {
473 disk_stat_inc(disk, writes);
474 disk_stat_add(disk, write_ticks, duration);
475 disk_stat_add(disk, write_sectors, n_sect);
477 disk_stat_inc(disk, reads);
478 disk_stat_add(disk, read_ticks, duration);
479 disk_stat_add(disk, read_sectors, n_sect);
481 disk_stat_add(disk, io_ticks, duration);
482 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
483 bio_endio(buf->bio, buf->bio->bi_size, n);
484 mempool_free(buf, d->bufpool);
494 d->sendq_hd = d->sendq_tl = NULL;
496 spin_unlock_irqrestore(&d->lock, flags);
502 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
505 struct aoe_cfghdr *ch;
506 struct sk_buff *skb, *sl;
507 struct net_device *ifp;
511 read_lock(&dev_base_lock);
512 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
514 if (!is_aoe_netif(ifp))
517 skb = new_skb(ifp, sizeof *h + sizeof *ch);
519 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
522 h = (struct aoe_hdr *) skb->mac.raw;
523 memset(h, 0, sizeof *h + sizeof *ch);
525 memset(h->dst, 0xff, sizeof h->dst);
526 memcpy(h->src, ifp->dev_addr, sizeof h->src);
527 h->type = __constant_cpu_to_be16(ETH_P_AOE);
529 h->major = cpu_to_be16(aoemajor);
536 read_unlock(&dev_base_lock);
542 * Since we only call this in one place (and it only prepares one frame)
543 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
545 static struct sk_buff *
546 aoecmd_ata_id(struct aoedev *d)
549 struct aoe_atahdr *ah;
553 f = getframe(d, FREETAG);
555 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
556 "This shouldn't happen.\n");
560 /* initialize the headers & frame */
561 h = (struct aoe_hdr *) f->data;
562 ah = (struct aoe_atahdr *) (h+1);
563 f->ndata = sizeof *h + sizeof *ah;
564 memset(h, 0, f->ndata);
565 f->tag = aoehdr_atainit(d, h);
569 /* this message initializes the device, so we reset the rttavg */
570 d->rttavg = MAXTIMER;
572 /* set up ata header */
574 ah->cmdstat = WIN_IDENTIFY;
577 skb = skb_prepare(d, f);
579 /* we now want to start the rexmit tracking */
580 d->flags &= ~DEVFL_TKILL;
581 d->timer.data = (ulong) d;
582 d->timer.function = rexmit_timer;
583 d->timer.expires = jiffies + TIMERTICK;
584 add_timer(&d->timer);
590 aoecmd_cfg_rsp(struct sk_buff *skb)
594 struct aoe_cfghdr *ch;
595 ulong flags, sysminor, aoemajor;
598 enum { MAXFRAMES = 8 };
600 h = (struct aoe_hdr *) skb->mac.raw;
601 ch = (struct aoe_cfghdr *) (h+1);
604 * Enough people have their dip switches set backwards to
605 * warrant a loud message for this special case.
607 aoemajor = be16_to_cpu(h->major);
608 if (aoemajor == 0xfff) {
609 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
610 "address is all ones. Check shelf dip switches\n");
614 sysminor = SYSMINOR(aoemajor, h->minor);
615 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
617 "aoe: e%ld.%d: minor number too large\n",
618 aoemajor, (int) h->minor);
622 bufcnt = be16_to_cpu(ch->bufcnt);
623 if (bufcnt > MAXFRAMES) /* keep it reasonable */
626 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
628 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
632 spin_lock_irqsave(&d->lock, flags);
634 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
635 spin_unlock_irqrestore(&d->lock, flags);
639 d->fw_ver = be16_to_cpu(ch->fwver);
641 /* we get here only if the device is new */
642 sl = aoecmd_ata_id(d);
644 spin_unlock_irqrestore(&d->lock, flags);