[PATCH] iseries_veth: Try to avoid pathological reset behaviour
[linux-2.6] / drivers / net / iseries_veth.c
1 /* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. */
2 /*
3  * IBM eServer iSeries Virtual Ethernet Device Driver
4  * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
5  * Substantially cleaned up by:
6  * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21  * USA
22  *
23  *
24  * This module implements the virtual ethernet device for iSeries LPAR
25  * Linux.  It uses hypervisor message passing to implement an
26  * ethernet-like network device communicating between partitions on
27  * the iSeries.
28  *
29  * The iSeries LPAR hypervisor currently allows for up to 16 different
30  * virtual ethernets.  These are all dynamically configurable on
31  * OS/400 partitions, but dynamic configuration is not supported under
32  * Linux yet.  An ethXX network device will be created for each
33  * virtual ethernet this partition is connected to.
34  *
35  * - This driver is responsible for routing packets to and from other
36  *   partitions.  The MAC addresses used by the virtual ethernets
37  *   contains meaning and must not be modified.
38  *
39  * - Having 2 virtual ethernets to the same remote partition DOES NOT
40  *   double the available bandwidth.  The 2 devices will share the
41  *   available hypervisor bandwidth.
42  *
43  * - If you send a packet to your own mac address, it will just be
44  *   dropped, you won't get it on the receive side.
45  *
46  * - Multicast is implemented by sending the frame frame to every
47  *   other partition.  It is the responsibility of the receiving
48  *   partition to filter the addresses desired.
49  *
50  * Tunable parameters:
51  *
52  * VETH_NUMBUFFERS: This compile time option defaults to 120.  It
53  * controls how much memory Linux will allocate per remote partition
54  * it is communicating with.  It can be thought of as the maximum
55  * number of packets outstanding to a remote partition at a time.
56  */
57
58 #include <linux/config.h>
59 #include <linux/module.h>
60 #include <linux/version.h>
61 #include <linux/types.h>
62 #include <linux/errno.h>
63 #include <linux/ioport.h>
64 #include <linux/kernel.h>
65 #include <linux/netdevice.h>
66 #include <linux/etherdevice.h>
67 #include <linux/skbuff.h>
68 #include <linux/init.h>
69 #include <linux/delay.h>
70 #include <linux/mm.h>
71 #include <linux/ethtool.h>
72 #include <asm/iSeries/mf.h>
73 #include <asm/iSeries/iSeries_pci.h>
74 #include <asm/uaccess.h>
75
76 #include <asm/iSeries/HvLpConfig.h>
77 #include <asm/iSeries/HvTypes.h>
78 #include <asm/iSeries/HvLpEvent.h>
79 #include <asm/iommu.h>
80 #include <asm/vio.h>
81
82 #undef DEBUG
83
84 #include "iseries_veth.h"
85
86 MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
87 MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
88 MODULE_LICENSE("GPL");
89
90 #define VETH_NUMBUFFERS         (120)
91 #define VETH_ACKTIMEOUT         (1000000) /* microseconds */
92 #define VETH_MAX_MCAST          (12)
93
94 #define VETH_MAX_MTU            (9000)
95
96 #if VETH_NUMBUFFERS < 10
97 #define ACK_THRESHOLD           (1)
98 #elif VETH_NUMBUFFERS < 20
99 #define ACK_THRESHOLD           (4)
100 #elif VETH_NUMBUFFERS < 40
101 #define ACK_THRESHOLD           (10)
102 #else
103 #define ACK_THRESHOLD           (20)
104 #endif
105
106 #define VETH_STATE_SHUTDOWN     (0x0001)
107 #define VETH_STATE_OPEN         (0x0002)
108 #define VETH_STATE_RESET        (0x0004)
109 #define VETH_STATE_SENTMON      (0x0008)
110 #define VETH_STATE_SENTCAPS     (0x0010)
111 #define VETH_STATE_GOTCAPACK    (0x0020)
112 #define VETH_STATE_GOTCAPS      (0x0040)
113 #define VETH_STATE_SENTCAPACK   (0x0080)
114 #define VETH_STATE_READY        (0x0100)
115
116 struct veth_msg {
117         struct veth_msg *next;
118         struct VethFramesData data;
119         int token;
120         unsigned long in_use;
121         struct sk_buff *skb;
122         struct device *dev;
123 };
124
125 struct veth_lpar_connection {
126         HvLpIndex remote_lp;
127         struct work_struct statemachine_wq;
128         struct veth_msg *msgs;
129         int num_events;
130         struct VethCapData local_caps;
131
132         struct timer_list ack_timer;
133
134         spinlock_t lock;
135         unsigned long state;
136         HvLpInstanceId src_inst;
137         HvLpInstanceId dst_inst;
138         struct VethLpEvent cap_event, cap_ack_event;
139         u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
140         u32 num_pending_acks;
141
142         int num_ack_events;
143         struct VethCapData remote_caps;
144         u32 ack_timeout;
145
146         spinlock_t msg_stack_lock;
147         struct veth_msg *msg_stack_head;
148 };
149
150 struct veth_port {
151         struct device *dev;
152         struct net_device_stats stats;
153         u64 mac_addr;
154         HvLpIndexMap lpar_map;
155
156         spinlock_t pending_gate;
157         struct sk_buff *pending_skb;
158         HvLpIndexMap pending_lpmask;
159
160         rwlock_t mcast_gate;
161         int promiscuous;
162         int all_mcast;
163         int num_mcast;
164         u64 mcast_addr[VETH_MAX_MCAST];
165 };
166
167 static HvLpIndex this_lp;
168 static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
169 static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
170
171 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
172 static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
173 static void veth_flush_pending(struct veth_lpar_connection *cnx);
174 static void veth_receive(struct veth_lpar_connection *, struct VethLpEvent *);
175 static void veth_timed_ack(unsigned long connectionPtr);
176
177 /*
178  * Utility functions
179  */
180
181 #define veth_info(fmt, args...) \
182         printk(KERN_INFO "iseries_veth: " fmt, ## args)
183
184 #define veth_error(fmt, args...) \
185         printk(KERN_ERR "iseries_veth: Error: " fmt, ## args)
186
187 #ifdef DEBUG
188 #define veth_debug(fmt, args...) \
189         printk(KERN_DEBUG "iseries_veth: " fmt, ## args)
190 #else
191 #define veth_debug(fmt, args...) do {} while (0)
192 #endif
193
194 static inline void veth_stack_push(struct veth_lpar_connection *cnx,
195                                    struct veth_msg *msg)
196 {
197         unsigned long flags;
198
199         spin_lock_irqsave(&cnx->msg_stack_lock, flags);
200         msg->next = cnx->msg_stack_head;
201         cnx->msg_stack_head = msg;
202         spin_unlock_irqrestore(&cnx->msg_stack_lock, flags);
203 }
204
205 static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
206 {
207         unsigned long flags;
208         struct veth_msg *msg;
209
210         spin_lock_irqsave(&cnx->msg_stack_lock, flags);
211         msg = cnx->msg_stack_head;
212         if (msg)
213                 cnx->msg_stack_head = cnx->msg_stack_head->next;
214         spin_unlock_irqrestore(&cnx->msg_stack_lock, flags);
215         return msg;
216 }
217
218 static inline HvLpEvent_Rc
219 veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
220                  HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
221                  u64 token,
222                  u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
223 {
224         return HvCallEvent_signalLpEventFast(cnx->remote_lp,
225                                              HvLpEvent_Type_VirtualLan,
226                                              subtype, ackind, acktype,
227                                              cnx->src_inst,
228                                              cnx->dst_inst,
229                                              token, data1, data2, data3,
230                                              data4, data5);
231 }
232
233 static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
234                                            u16 subtype, u64 token, void *data)
235 {
236         u64 *p = (u64 *) data;
237
238         return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
239                                 HvLpEvent_AckType_ImmediateAck,
240                                 token, p[0], p[1], p[2], p[3], p[4]);
241 }
242
243 struct veth_allocation {
244         struct completion c;
245         int num;
246 };
247
248 static void veth_complete_allocation(void *parm, int number)
249 {
250         struct veth_allocation *vc = (struct veth_allocation *)parm;
251
252         vc->num = number;
253         complete(&vc->c);
254 }
255
256 static int veth_allocate_events(HvLpIndex rlp, int number)
257 {
258         struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 };
259
260         mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
261                             sizeof(struct VethLpEvent), number,
262                             &veth_complete_allocation, &vc);
263         wait_for_completion(&vc.c);
264
265         return vc.num;
266 }
267
268 /*
269  * LPAR connection code
270  */
271
272 static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
273 {
274         schedule_work(&cnx->statemachine_wq);
275 }
276
277 static void veth_take_cap(struct veth_lpar_connection *cnx,
278                           struct VethLpEvent *event)
279 {
280         unsigned long flags;
281
282         spin_lock_irqsave(&cnx->lock, flags);
283         /* Receiving caps may mean the other end has just come up, so
284          * we need to reload the instance ID of the far end */
285         cnx->dst_inst =
286                 HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
287                                                   HvLpEvent_Type_VirtualLan);
288
289         if (cnx->state & VETH_STATE_GOTCAPS) {
290                 veth_error("Received a second capabilities from LPAR %d.\n",
291                            cnx->remote_lp);
292                 event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
293                 HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
294         } else {
295                 memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
296                 cnx->state |= VETH_STATE_GOTCAPS;
297                 veth_kick_statemachine(cnx);
298         }
299         spin_unlock_irqrestore(&cnx->lock, flags);
300 }
301
302 static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
303                               struct VethLpEvent *event)
304 {
305         unsigned long flags;
306
307         spin_lock_irqsave(&cnx->lock, flags);
308         if (cnx->state & VETH_STATE_GOTCAPACK) {
309                 veth_error("Received a second capabilities ack from LPAR %d.\n",
310                            cnx->remote_lp);
311         } else {
312                 memcpy(&cnx->cap_ack_event, event,
313                        sizeof(&cnx->cap_ack_event));
314                 cnx->state |= VETH_STATE_GOTCAPACK;
315                 veth_kick_statemachine(cnx);
316         }
317         spin_unlock_irqrestore(&cnx->lock, flags);
318 }
319
320 static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
321                                   struct VethLpEvent *event)
322 {
323         unsigned long flags;
324
325         spin_lock_irqsave(&cnx->lock, flags);
326         veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
327
328         /* Avoid kicking the statemachine once we're shutdown.
329          * It's unnecessary and it could break veth_stop_connection(). */
330
331         if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
332                 cnx->state |= VETH_STATE_RESET;
333                 veth_kick_statemachine(cnx);
334         }
335         spin_unlock_irqrestore(&cnx->lock, flags);
336 }
337
338 static void veth_handle_ack(struct VethLpEvent *event)
339 {
340         HvLpIndex rlp = event->base_event.xTargetLp;
341         struct veth_lpar_connection *cnx = veth_cnx[rlp];
342
343         BUG_ON(! cnx);
344
345         switch (event->base_event.xSubtype) {
346         case VethEventTypeCap:
347                 veth_take_cap_ack(cnx, event);
348                 break;
349         case VethEventTypeMonitor:
350                 veth_take_monitor_ack(cnx, event);
351                 break;
352         default:
353                 veth_error("Unknown ack type %d from LPAR %d.\n",
354                                 event->base_event.xSubtype, rlp);
355         };
356 }
357
358 static void veth_handle_int(struct VethLpEvent *event)
359 {
360         HvLpIndex rlp = event->base_event.xSourceLp;
361         struct veth_lpar_connection *cnx = veth_cnx[rlp];
362         unsigned long flags;
363         int i;
364
365         BUG_ON(! cnx);
366
367         switch (event->base_event.xSubtype) {
368         case VethEventTypeCap:
369                 veth_take_cap(cnx, event);
370                 break;
371         case VethEventTypeMonitor:
372                 /* do nothing... this'll hang out here til we're dead,
373                  * and the hypervisor will return it for us. */
374                 break;
375         case VethEventTypeFramesAck:
376                 spin_lock_irqsave(&cnx->lock, flags);
377                 for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
378                         u16 msgnum = event->u.frames_ack_data.token[i];
379
380                         if (msgnum < VETH_NUMBUFFERS)
381                                 veth_recycle_msg(cnx, cnx->msgs + msgnum);
382                 }
383                 spin_unlock_irqrestore(&cnx->lock, flags);
384                 veth_flush_pending(cnx);
385                 break;
386         case VethEventTypeFrames:
387                 veth_receive(cnx, event);
388                 break;
389         default:
390                 veth_error("Unknown interrupt type %d from LPAR %d.\n",
391                                 event->base_event.xSubtype, rlp);
392         };
393 }
394
395 static void veth_handle_event(struct HvLpEvent *event, struct pt_regs *regs)
396 {
397         struct VethLpEvent *veth_event = (struct VethLpEvent *)event;
398
399         if (event->xFlags.xFunction == HvLpEvent_Function_Ack)
400                 veth_handle_ack(veth_event);
401         else if (event->xFlags.xFunction == HvLpEvent_Function_Int)
402                 veth_handle_int(veth_event);
403 }
404
405 static int veth_process_caps(struct veth_lpar_connection *cnx)
406 {
407         struct VethCapData *remote_caps = &cnx->remote_caps;
408         int num_acks_needed;
409
410         /* Convert timer to jiffies */
411         cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
412
413         if ( (remote_caps->num_buffers == 0)
414              || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG)
415              || (remote_caps->ack_threshold == 0)
416              || (cnx->ack_timeout == 0) ) {
417                 veth_error("Received incompatible capabilities from LPAR %d.\n",
418                                 cnx->remote_lp);
419                 return HvLpEvent_Rc_InvalidSubtypeData;
420         }
421
422         num_acks_needed = (remote_caps->num_buffers
423                            / remote_caps->ack_threshold) + 1;
424
425         /* FIXME: locking on num_ack_events? */
426         if (cnx->num_ack_events < num_acks_needed) {
427                 int num;
428
429                 num = veth_allocate_events(cnx->remote_lp,
430                                            num_acks_needed-cnx->num_ack_events);
431                 if (num > 0)
432                         cnx->num_ack_events += num;
433
434                 if (cnx->num_ack_events < num_acks_needed) {
435                         veth_error("Couldn't allocate enough ack events "
436                                         "for LPAR %d.\n", cnx->remote_lp);
437
438                         return HvLpEvent_Rc_BufferNotAvailable;
439                 }
440         }
441
442
443         return HvLpEvent_Rc_Good;
444 }
445
446 /* FIXME: The gotos here are a bit dubious */
447 static void veth_statemachine(void *p)
448 {
449         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p;
450         int rlp = cnx->remote_lp;
451         int rc;
452
453         spin_lock_irq(&cnx->lock);
454
455  restart:
456         if (cnx->state & VETH_STATE_RESET) {
457                 int i;
458
459                 if (cnx->state & VETH_STATE_OPEN)
460                         HvCallEvent_closeLpEventPath(cnx->remote_lp,
461                                                      HvLpEvent_Type_VirtualLan);
462
463                 /*
464                  * Reset ack data. This prevents the ack_timer actually
465                  * doing anything, even if it runs one more time when
466                  * we drop the lock below.
467                  */
468                 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
469                 cnx->num_pending_acks = 0;
470
471                 cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
472                                 | VETH_STATE_OPEN | VETH_STATE_SENTCAPS
473                                 | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
474                                 | VETH_STATE_SENTCAPACK | VETH_STATE_READY);
475
476                 /* Clean up any leftover messages */
477                 if (cnx->msgs)
478                         for (i = 0; i < VETH_NUMBUFFERS; ++i)
479                                 veth_recycle_msg(cnx, cnx->msgs + i);
480
481                 /* Drop the lock so we can do stuff that might sleep or
482                  * take other locks. */
483                 spin_unlock_irq(&cnx->lock);
484
485                 del_timer_sync(&cnx->ack_timer);
486                 veth_flush_pending(cnx);
487
488                 spin_lock_irq(&cnx->lock);
489
490                 if (cnx->state & VETH_STATE_RESET)
491                         goto restart;
492
493                 /* Hack, wait for the other end to reset itself. */
494                 if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
495                         schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
496                         goto out;
497                 }
498         }
499
500         if (cnx->state & VETH_STATE_SHUTDOWN)
501                 /* It's all over, do nothing */
502                 goto out;
503
504         if ( !(cnx->state & VETH_STATE_OPEN) ) {
505                 if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
506                         goto cant_cope;
507
508                 HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
509                 cnx->src_inst =
510                         HvCallEvent_getSourceLpInstanceId(rlp,
511                                                           HvLpEvent_Type_VirtualLan);
512                 cnx->dst_inst =
513                         HvCallEvent_getTargetLpInstanceId(rlp,
514                                                           HvLpEvent_Type_VirtualLan);
515                 cnx->state |= VETH_STATE_OPEN;
516         }
517
518         if ( (cnx->state & VETH_STATE_OPEN)
519              && !(cnx->state & VETH_STATE_SENTMON) ) {
520                 rc = veth_signalevent(cnx, VethEventTypeMonitor,
521                                       HvLpEvent_AckInd_DoAck,
522                                       HvLpEvent_AckType_DeferredAck,
523                                       0, 0, 0, 0, 0, 0);
524
525                 if (rc == HvLpEvent_Rc_Good) {
526                         cnx->state |= VETH_STATE_SENTMON;
527                 } else {
528                         if ( (rc != HvLpEvent_Rc_PartitionDead)
529                              && (rc != HvLpEvent_Rc_PathClosed) )
530                                 veth_error("Error sending monitor to LPAR %d, "
531                                                 "rc = %d\n", rlp, rc);
532
533                         /* Oh well, hope we get a cap from the other
534                          * end and do better when that kicks us */
535                         goto out;
536                 }
537         }
538
539         if ( (cnx->state & VETH_STATE_OPEN)
540              && !(cnx->state & VETH_STATE_SENTCAPS)) {
541                 u64 *rawcap = (u64 *)&cnx->local_caps;
542
543                 rc = veth_signalevent(cnx, VethEventTypeCap,
544                                       HvLpEvent_AckInd_DoAck,
545                                       HvLpEvent_AckType_ImmediateAck,
546                                       0, rawcap[0], rawcap[1], rawcap[2],
547                                       rawcap[3], rawcap[4]);
548
549                 if (rc == HvLpEvent_Rc_Good) {
550                         cnx->state |= VETH_STATE_SENTCAPS;
551                 } else {
552                         if ( (rc != HvLpEvent_Rc_PartitionDead)
553                              && (rc != HvLpEvent_Rc_PathClosed) )
554                                 veth_error("Error sending caps to LPAR %d, "
555                                                 "rc = %d\n", rlp, rc);
556
557                         /* Oh well, hope we get a cap from the other
558                          * end and do better when that kicks us */
559                         goto out;
560                 }
561         }
562
563         if ((cnx->state & VETH_STATE_GOTCAPS)
564             && !(cnx->state & VETH_STATE_SENTCAPACK)) {
565                 struct VethCapData *remote_caps = &cnx->remote_caps;
566
567                 memcpy(remote_caps, &cnx->cap_event.u.caps_data,
568                        sizeof(*remote_caps));
569
570                 spin_unlock_irq(&cnx->lock);
571                 rc = veth_process_caps(cnx);
572                 spin_lock_irq(&cnx->lock);
573
574                 /* We dropped the lock, so recheck for anything which
575                  * might mess us up */
576                 if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
577                         goto restart;
578
579                 cnx->cap_event.base_event.xRc = rc;
580                 HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
581                 if (rc == HvLpEvent_Rc_Good)
582                         cnx->state |= VETH_STATE_SENTCAPACK;
583                 else
584                         goto cant_cope;
585         }
586
587         if ((cnx->state & VETH_STATE_GOTCAPACK)
588             && (cnx->state & VETH_STATE_GOTCAPS)
589             && !(cnx->state & VETH_STATE_READY)) {
590                 if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
591                         /* Start the ACK timer */
592                         cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
593                         add_timer(&cnx->ack_timer);
594                         cnx->state |= VETH_STATE_READY;
595                 } else {
596                         veth_error("Caps rejected by LPAR %d, rc = %d\n",
597                                         rlp, cnx->cap_ack_event.base_event.xRc);
598                         goto cant_cope;
599                 }
600         }
601
602  out:
603         spin_unlock_irq(&cnx->lock);
604         return;
605
606  cant_cope:
607         /* FIXME: we get here if something happens we really can't
608          * cope with.  The link will never work once we get here, and
609          * all we can do is not lock the rest of the system up */
610         veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
611                         " (state = 0x%04lx)\n", rlp, cnx->state);
612         cnx->state |= VETH_STATE_SHUTDOWN;
613         spin_unlock_irq(&cnx->lock);
614 }
615
616 static int veth_init_connection(u8 rlp)
617 {
618         struct veth_lpar_connection *cnx;
619         struct veth_msg *msgs;
620         int i;
621
622         if ( (rlp == this_lp)
623              || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
624                 return 0;
625
626         cnx = kmalloc(sizeof(*cnx), GFP_KERNEL);
627         if (! cnx)
628                 return -ENOMEM;
629         memset(cnx, 0, sizeof(*cnx));
630
631         cnx->remote_lp = rlp;
632         spin_lock_init(&cnx->lock);
633         INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx);
634         init_timer(&cnx->ack_timer);
635         cnx->ack_timer.function = veth_timed_ack;
636         cnx->ack_timer.data = (unsigned long) cnx;
637         memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
638
639         veth_cnx[rlp] = cnx;
640
641         msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL);
642         if (! msgs) {
643                 veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
644                 return -ENOMEM;
645         }
646
647         cnx->msgs = msgs;
648         memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg));
649         spin_lock_init(&cnx->msg_stack_lock);
650
651         for (i = 0; i < VETH_NUMBUFFERS; i++) {
652                 msgs[i].token = i;
653                 veth_stack_push(cnx, msgs + i);
654         }
655
656         cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
657
658         if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
659                 veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
660                 return -ENOMEM;
661         }
662
663         cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
664         cnx->local_caps.ack_threshold = ACK_THRESHOLD;
665         cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
666
667         return 0;
668 }
669
670 static void veth_stop_connection(u8 rlp)
671 {
672         struct veth_lpar_connection *cnx = veth_cnx[rlp];
673
674         if (! cnx)
675                 return;
676
677         spin_lock_irq(&cnx->lock);
678         cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
679         veth_kick_statemachine(cnx);
680         spin_unlock_irq(&cnx->lock);
681
682         /* There's a slim chance the reset code has just queued the
683          * statemachine to run in five seconds. If so we need to cancel
684          * that and requeue the work to run now. */
685         if (cancel_delayed_work(&cnx->statemachine_wq)) {
686                 spin_lock_irq(&cnx->lock);
687                 veth_kick_statemachine(cnx);
688                 spin_unlock_irq(&cnx->lock);
689         }
690
691         /* Wait for the state machine to run. */
692         flush_scheduled_work();
693
694         if (cnx->num_events > 0)
695                 mf_deallocate_lp_events(cnx->remote_lp,
696                                       HvLpEvent_Type_VirtualLan,
697                                       cnx->num_events,
698                                       NULL, NULL);
699         if (cnx->num_ack_events > 0)
700                 mf_deallocate_lp_events(cnx->remote_lp,
701                                       HvLpEvent_Type_VirtualLan,
702                                       cnx->num_ack_events,
703                                       NULL, NULL);
704 }
705
706 static void veth_destroy_connection(u8 rlp)
707 {
708         struct veth_lpar_connection *cnx = veth_cnx[rlp];
709
710         if (! cnx)
711                 return;
712
713         kfree(cnx->msgs);
714         kfree(cnx);
715         veth_cnx[rlp] = NULL;
716 }
717
718 /*
719  * net_device code
720  */
721
722 static int veth_open(struct net_device *dev)
723 {
724         struct veth_port *port = (struct veth_port *) dev->priv;
725
726         memset(&port->stats, 0, sizeof (port->stats));
727         netif_start_queue(dev);
728         return 0;
729 }
730
731 static int veth_close(struct net_device *dev)
732 {
733         netif_stop_queue(dev);
734         return 0;
735 }
736
737 static struct net_device_stats *veth_get_stats(struct net_device *dev)
738 {
739         struct veth_port *port = (struct veth_port *) dev->priv;
740
741         return &port->stats;
742 }
743
744 static int veth_change_mtu(struct net_device *dev, int new_mtu)
745 {
746         if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
747                 return -EINVAL;
748         dev->mtu = new_mtu;
749         return 0;
750 }
751
752 static void veth_set_multicast_list(struct net_device *dev)
753 {
754         struct veth_port *port = (struct veth_port *) dev->priv;
755         unsigned long flags;
756
757         write_lock_irqsave(&port->mcast_gate, flags);
758
759         if (dev->flags & IFF_PROMISC) { /* set promiscuous mode */
760                 printk(KERN_INFO "%s: Promiscuous mode enabled.\n",
761                        dev->name);
762                 port->promiscuous = 1;
763         } else if ( (dev->flags & IFF_ALLMULTI)
764                     || (dev->mc_count > VETH_MAX_MCAST) ) {
765                 port->all_mcast = 1;
766         } else {
767                 struct dev_mc_list *dmi = dev->mc_list;
768                 int i;
769
770                 /* Update table */
771                 port->num_mcast = 0;
772
773                 for (i = 0; i < dev->mc_count; i++) {
774                         u8 *addr = dmi->dmi_addr;
775                         u64 xaddr = 0;
776
777                         if (addr[0] & 0x01) {/* multicast address? */
778                                 memcpy(&xaddr, addr, ETH_ALEN);
779                                 port->mcast_addr[port->num_mcast] = xaddr;
780                                 port->num_mcast++;
781                         }
782                         dmi = dmi->next;
783                 }
784         }
785
786         write_unlock_irqrestore(&port->mcast_gate, flags);
787 }
788
789 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
790 {
791         strncpy(info->driver, "veth", sizeof(info->driver) - 1);
792         info->driver[sizeof(info->driver) - 1] = '\0';
793         strncpy(info->version, "1.0", sizeof(info->version) - 1);
794 }
795
796 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
797 {
798         ecmd->supported = (SUPPORTED_1000baseT_Full
799                           | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
800         ecmd->advertising = (SUPPORTED_1000baseT_Full
801                             | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
802         ecmd->port = PORT_FIBRE;
803         ecmd->transceiver = XCVR_INTERNAL;
804         ecmd->phy_address = 0;
805         ecmd->speed = SPEED_1000;
806         ecmd->duplex = DUPLEX_FULL;
807         ecmd->autoneg = AUTONEG_ENABLE;
808         ecmd->maxtxpkt = 120;
809         ecmd->maxrxpkt = 120;
810         return 0;
811 }
812
813 static u32 veth_get_link(struct net_device *dev)
814 {
815         return 1;
816 }
817
818 static struct ethtool_ops ops = {
819         .get_drvinfo = veth_get_drvinfo,
820         .get_settings = veth_get_settings,
821         .get_link = veth_get_link,
822 };
823
824 static void veth_tx_timeout(struct net_device *dev)
825 {
826         struct veth_port *port = (struct veth_port *)dev->priv;
827         struct net_device_stats *stats = &port->stats;
828         unsigned long flags;
829         int i;
830
831         stats->tx_errors++;
832
833         spin_lock_irqsave(&port->pending_gate, flags);
834
835         if (!port->pending_lpmask) {
836                 spin_unlock_irqrestore(&port->pending_gate, flags);
837                 return;
838         }
839
840         printk(KERN_WARNING "%s: Tx timeout!  Resetting lp connections: %08x\n",
841                dev->name, port->pending_lpmask);
842
843         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
844                 struct veth_lpar_connection *cnx = veth_cnx[i];
845
846                 if (! (port->pending_lpmask & (1<<i)))
847                         continue;
848
849                 /* If we're pending on it, we must be connected to it,
850                  * so we should certainly have a structure for it. */
851                 BUG_ON(! cnx);
852
853                 /* Theoretically we could be kicking a connection
854                  * which doesn't deserve it, but in practice if we've
855                  * had a Tx timeout, the pending_lpmask will have
856                  * exactly one bit set - the connection causing the
857                  * problem. */
858                 spin_lock(&cnx->lock);
859                 cnx->state |= VETH_STATE_RESET;
860                 veth_kick_statemachine(cnx);
861                 spin_unlock(&cnx->lock);
862         }
863
864         spin_unlock_irqrestore(&port->pending_gate, flags);
865 }
866
867 static struct net_device * __init veth_probe_one(int vlan, struct device *vdev)
868 {
869         struct net_device *dev;
870         struct veth_port *port;
871         int i, rc;
872
873         dev = alloc_etherdev(sizeof (struct veth_port));
874         if (! dev) {
875                 veth_error("Unable to allocate net_device structure!\n");
876                 return NULL;
877         }
878
879         port = (struct veth_port *) dev->priv;
880
881         spin_lock_init(&port->pending_gate);
882         rwlock_init(&port->mcast_gate);
883
884         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
885                 HvLpVirtualLanIndexMap map;
886
887                 if (i == this_lp)
888                         continue;
889                 map = HvLpConfig_getVirtualLanIndexMapForLp(i);
890                 if (map & (0x8000 >> vlan))
891                         port->lpar_map |= (1 << i);
892         }
893         port->dev = vdev;
894
895         dev->dev_addr[0] = 0x02;
896         dev->dev_addr[1] = 0x01;
897         dev->dev_addr[2] = 0xff;
898         dev->dev_addr[3] = vlan;
899         dev->dev_addr[4] = 0xff;
900         dev->dev_addr[5] = this_lp;
901
902         dev->mtu = VETH_MAX_MTU;
903
904         memcpy(&port->mac_addr, dev->dev_addr, 6);
905
906         dev->open = veth_open;
907         dev->hard_start_xmit = veth_start_xmit;
908         dev->stop = veth_close;
909         dev->get_stats = veth_get_stats;
910         dev->change_mtu = veth_change_mtu;
911         dev->set_mac_address = NULL;
912         dev->set_multicast_list = veth_set_multicast_list;
913         SET_ETHTOOL_OPS(dev, &ops);
914
915         dev->watchdog_timeo = 2 * (VETH_ACKTIMEOUT * HZ / 1000000);
916         dev->tx_timeout = veth_tx_timeout;
917
918         SET_NETDEV_DEV(dev, vdev);
919
920         rc = register_netdev(dev);
921         if (rc != 0) {
922                 veth_error("Failed registering net device for vlan%d.\n", vlan);
923                 free_netdev(dev);
924                 return NULL;
925         }
926
927         veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
928                         dev->name, vlan, port->lpar_map);
929
930         return dev;
931 }
932
933 /*
934  * Tx path
935  */
936
937 static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
938                                 struct net_device *dev)
939 {
940         struct veth_lpar_connection *cnx = veth_cnx[rlp];
941         struct veth_port *port = (struct veth_port *) dev->priv;
942         HvLpEvent_Rc rc;
943         u32 dma_address, dma_length;
944         struct veth_msg *msg = NULL;
945         int err = 0;
946         unsigned long flags;
947
948         if (! cnx) {
949                 port->stats.tx_errors++;
950                 dev_kfree_skb(skb);
951                 return 0;
952         }
953
954         spin_lock_irqsave(&cnx->lock, flags);
955
956         if (! (cnx->state & VETH_STATE_READY))
957                 goto drop;
958
959         if ((skb->len - 14) > VETH_MAX_MTU)
960                 goto drop;
961
962         msg = veth_stack_pop(cnx);
963
964         if (! msg) {
965                 err = 1;
966                 goto drop;
967         }
968
969         dma_length = skb->len;
970         dma_address = dma_map_single(port->dev, skb->data,
971                                      dma_length, DMA_TO_DEVICE);
972
973         if (dma_mapping_error(dma_address))
974                 goto recycle_and_drop;
975
976         /* Is it really necessary to check the length and address
977          * fields of the first entry here? */
978         msg->skb = skb;
979         msg->dev = port->dev;
980         msg->data.addr[0] = dma_address;
981         msg->data.len[0] = dma_length;
982         msg->data.eofmask = 1 << VETH_EOF_SHIFT;
983         set_bit(0, &(msg->in_use));
984         rc = veth_signaldata(cnx, VethEventTypeFrames, msg->token, &msg->data);
985
986         if (rc != HvLpEvent_Rc_Good)
987                 goto recycle_and_drop;
988
989         spin_unlock_irqrestore(&cnx->lock, flags);
990         return 0;
991
992  recycle_and_drop:
993         msg->skb = NULL;
994         /* need to set in use to make veth_recycle_msg in case this
995          * was a mapping failure */
996         set_bit(0, &msg->in_use);
997         veth_recycle_msg(cnx, msg);
998  drop:
999         port->stats.tx_errors++;
1000         dev_kfree_skb(skb);
1001         spin_unlock_irqrestore(&cnx->lock, flags);
1002         return err;
1003 }
1004
1005 static HvLpIndexMap veth_transmit_to_many(struct sk_buff *skb,
1006                                           HvLpIndexMap lpmask,
1007                                           struct net_device *dev)
1008 {
1009         struct veth_port *port = (struct veth_port *) dev->priv;
1010         int i;
1011         int rc;
1012
1013         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1014                 if ((lpmask & (1 << i)) == 0)
1015                         continue;
1016
1017                 rc = veth_transmit_to_one(skb_get(skb), i, dev);
1018                 if (! rc)
1019                         lpmask &= ~(1<<i);
1020         }
1021
1022         if (! lpmask) {
1023                 port->stats.tx_packets++;
1024                 port->stats.tx_bytes += skb->len;
1025         }
1026
1027         return lpmask;
1028 }
1029
1030 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1031 {
1032         unsigned char *frame = skb->data;
1033         struct veth_port *port = (struct veth_port *) dev->priv;
1034         unsigned long flags;
1035         HvLpIndexMap lpmask;
1036
1037         if (! (frame[0] & 0x01)) {
1038                 /* unicast packet */
1039                 HvLpIndex rlp = frame[5];
1040
1041                 if ( ! ((1 << rlp) & port->lpar_map) ) {
1042                         dev_kfree_skb(skb);
1043                         return 0;
1044                 }
1045
1046                 lpmask = 1 << rlp;
1047         } else {
1048                 lpmask = port->lpar_map;
1049         }
1050
1051         spin_lock_irqsave(&port->pending_gate, flags);
1052
1053         lpmask = veth_transmit_to_many(skb, lpmask, dev);
1054
1055         dev->trans_start = jiffies;
1056
1057         if (! lpmask) {
1058                 dev_kfree_skb(skb);
1059         } else {
1060                 if (port->pending_skb) {
1061                         veth_error("%s: TX while skb was pending!\n",
1062                                    dev->name);
1063                         dev_kfree_skb(skb);
1064                         spin_unlock_irqrestore(&port->pending_gate, flags);
1065                         return 1;
1066                 }
1067
1068                 port->pending_skb = skb;
1069                 port->pending_lpmask = lpmask;
1070                 netif_stop_queue(dev);
1071         }
1072
1073         spin_unlock_irqrestore(&port->pending_gate, flags);
1074
1075         return 0;
1076 }
1077
1078 static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1079                              struct veth_msg *msg)
1080 {
1081         u32 dma_address, dma_length;
1082
1083         if (test_and_clear_bit(0, &msg->in_use)) {
1084                 dma_address = msg->data.addr[0];
1085                 dma_length = msg->data.len[0];
1086
1087                 dma_unmap_single(msg->dev, dma_address, dma_length,
1088                                  DMA_TO_DEVICE);
1089
1090                 if (msg->skb) {
1091                         dev_kfree_skb_any(msg->skb);
1092                         msg->skb = NULL;
1093                 }
1094
1095                 memset(&msg->data, 0, sizeof(msg->data));
1096                 veth_stack_push(cnx, msg);
1097         } else if (cnx->state & VETH_STATE_OPEN) {
1098                 veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1099                                 cnx->remote_lp, msg->token);
1100         }
1101 }
1102
1103 static void veth_flush_pending(struct veth_lpar_connection *cnx)
1104 {
1105         int i;
1106         for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1107                 struct net_device *dev = veth_dev[i];
1108                 struct veth_port *port;
1109                 unsigned long flags;
1110
1111                 if (! dev)
1112                         continue;
1113
1114                 port = (struct veth_port *)dev->priv;
1115
1116                 if (! (port->lpar_map & (1<<cnx->remote_lp)))
1117                         continue;
1118
1119                 spin_lock_irqsave(&port->pending_gate, flags);
1120                 if (port->pending_skb) {
1121                         port->pending_lpmask =
1122                                 veth_transmit_to_many(port->pending_skb,
1123                                                       port->pending_lpmask,
1124                                                       dev);
1125                         if (! port->pending_lpmask) {
1126                                 dev_kfree_skb_any(port->pending_skb);
1127                                 port->pending_skb = NULL;
1128                                 netif_wake_queue(dev);
1129                         }
1130                 }
1131                 spin_unlock_irqrestore(&port->pending_gate, flags);
1132         }
1133 }
1134
1135 /*
1136  * Rx path
1137  */
1138
1139 static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1140 {
1141         int wanted = 0;
1142         int i;
1143         unsigned long flags;
1144
1145         if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1146                 return 1;
1147
1148         if (! (((char *) &mac_addr)[0] & 0x01))
1149                 return 0;
1150
1151         read_lock_irqsave(&port->mcast_gate, flags);
1152
1153         if (port->promiscuous || port->all_mcast) {
1154                 wanted = 1;
1155                 goto out;
1156         }
1157
1158         for (i = 0; i < port->num_mcast; ++i) {
1159                 if (port->mcast_addr[i] == mac_addr) {
1160                         wanted = 1;
1161                         break;
1162                 }
1163         }
1164
1165  out:
1166         read_unlock_irqrestore(&port->mcast_gate, flags);
1167
1168         return wanted;
1169 }
1170
1171 struct dma_chunk {
1172         u64 addr;
1173         u64 size;
1174 };
1175
1176 #define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1177
1178 static inline void veth_build_dma_list(struct dma_chunk *list,
1179                                        unsigned char *p, unsigned long length)
1180 {
1181         unsigned long done;
1182         int i = 1;
1183
1184         /* FIXME: skbs are continguous in real addresses.  Do we
1185          * really need to break it into PAGE_SIZE chunks, or can we do
1186          * it just at the granularity of iSeries real->absolute
1187          * mapping?  Indeed, given the way the allocator works, can we
1188          * count on them being absolutely contiguous? */
1189         list[0].addr = ISERIES_HV_ADDR(p);
1190         list[0].size = min(length,
1191                            PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1192
1193         done = list[0].size;
1194         while (done < length) {
1195                 list[i].addr = ISERIES_HV_ADDR(p + done);
1196                 list[i].size = min(length-done, PAGE_SIZE);
1197                 done += list[i].size;
1198                 i++;
1199         }
1200 }
1201
1202 static void veth_flush_acks(struct veth_lpar_connection *cnx)
1203 {
1204         HvLpEvent_Rc rc;
1205
1206         rc = veth_signaldata(cnx, VethEventTypeFramesAck,
1207                              0, &cnx->pending_acks);
1208
1209         if (rc != HvLpEvent_Rc_Good)
1210                 veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1211                                 cnx->remote_lp, (int)rc);
1212
1213         cnx->num_pending_acks = 0;
1214         memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1215 }
1216
1217 static void veth_receive(struct veth_lpar_connection *cnx,
1218                          struct VethLpEvent *event)
1219 {
1220         struct VethFramesData *senddata = &event->u.frames_data;
1221         int startchunk = 0;
1222         int nchunks;
1223         unsigned long flags;
1224         HvLpDma_Rc rc;
1225
1226         do {
1227                 u16 length = 0;
1228                 struct sk_buff *skb;
1229                 struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1230                 struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1231                 u64 dest;
1232                 HvLpVirtualLanIndex vlan;
1233                 struct net_device *dev;
1234                 struct veth_port *port;
1235
1236                 /* FIXME: do we need this? */
1237                 memset(local_list, 0, sizeof(local_list));
1238                 memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1239
1240                 /* a 0 address marks the end of the valid entries */
1241                 if (senddata->addr[startchunk] == 0)
1242                         break;
1243
1244                 /* make sure that we have at least 1 EOF entry in the
1245                  * remaining entries */
1246                 if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1247                         veth_error("Missing EOF fragment in event "
1248                                         "eofmask = 0x%x startchunk = %d\n",
1249                                         (unsigned)senddata->eofmask,
1250                                         startchunk);
1251                         break;
1252                 }
1253
1254                 /* build list of chunks in this frame */
1255                 nchunks = 0;
1256                 do {
1257                         remote_list[nchunks].addr =
1258                                 (u64) senddata->addr[startchunk+nchunks] << 32;
1259                         remote_list[nchunks].size =
1260                                 senddata->len[startchunk+nchunks];
1261                         length += remote_list[nchunks].size;
1262                 } while (! (senddata->eofmask &
1263                             (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1264
1265                 /* length == total length of all chunks */
1266                 /* nchunks == # of chunks in this frame */
1267
1268                 if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1269                         veth_error("Received oversize frame from LPAR %d "
1270                                         "(length = %d)\n",
1271                                         cnx->remote_lp, length);
1272                         continue;
1273                 }
1274
1275                 skb = alloc_skb(length, GFP_ATOMIC);
1276                 if (!skb)
1277                         continue;
1278
1279                 veth_build_dma_list(local_list, skb->data, length);
1280
1281                 rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1282                                             event->base_event.xSourceLp,
1283                                             HvLpDma_Direction_RemoteToLocal,
1284                                             cnx->src_inst,
1285                                             cnx->dst_inst,
1286                                             HvLpDma_AddressType_RealAddress,
1287                                             HvLpDma_AddressType_TceIndex,
1288                                             ISERIES_HV_ADDR(&local_list),
1289                                             ISERIES_HV_ADDR(&remote_list),
1290                                             length);
1291                 if (rc != HvLpDma_Rc_Good) {
1292                         dev_kfree_skb_irq(skb);
1293                         continue;
1294                 }
1295
1296                 vlan = skb->data[9];
1297                 dev = veth_dev[vlan];
1298                 if (! dev) {
1299                         /*
1300                          * Some earlier versions of the driver sent
1301                          * broadcasts down all connections, even to lpars
1302                          * that weren't on the relevant vlan. So ignore
1303                          * packets belonging to a vlan we're not on.
1304                          * We can also be here if we receive packets while
1305                          * the driver is going down, because then dev is NULL.
1306                          */
1307                         dev_kfree_skb_irq(skb);
1308                         continue;
1309                 }
1310
1311                 port = (struct veth_port *)dev->priv;
1312                 dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1313
1314                 if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1315                         dev_kfree_skb_irq(skb);
1316                         continue;
1317                 }
1318                 if (! veth_frame_wanted(port, dest)) {
1319                         dev_kfree_skb_irq(skb);
1320                         continue;
1321                 }
1322
1323                 skb_put(skb, length);
1324                 skb->dev = dev;
1325                 skb->protocol = eth_type_trans(skb, dev);
1326                 skb->ip_summed = CHECKSUM_NONE;
1327                 netif_rx(skb);  /* send it up */
1328                 port->stats.rx_packets++;
1329                 port->stats.rx_bytes += length;
1330         } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1331
1332         /* Ack it */
1333         spin_lock_irqsave(&cnx->lock, flags);
1334         BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1335
1336         cnx->pending_acks[cnx->num_pending_acks++] =
1337                 event->base_event.xCorrelationToken;
1338
1339         if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold)
1340              || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1341                 veth_flush_acks(cnx);
1342
1343         spin_unlock_irqrestore(&cnx->lock, flags);
1344 }
1345
1346 static void veth_timed_ack(unsigned long ptr)
1347 {
1348         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1349         unsigned long flags;
1350
1351         /* Ack all the events */
1352         spin_lock_irqsave(&cnx->lock, flags);
1353         if (cnx->num_pending_acks > 0)
1354                 veth_flush_acks(cnx);
1355
1356         /* Reschedule the timer */
1357         cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1358         add_timer(&cnx->ack_timer);
1359         spin_unlock_irqrestore(&cnx->lock, flags);
1360 }
1361
1362 static int veth_remove(struct vio_dev *vdev)
1363 {
1364         int i = vdev->unit_address;
1365         struct net_device *dev;
1366
1367         dev = veth_dev[i];
1368         if (dev != NULL) {
1369                 veth_dev[i] = NULL;
1370                 unregister_netdev(dev);
1371                 free_netdev(dev);
1372         }
1373         return 0;
1374 }
1375
1376 static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1377 {
1378         int i = vdev->unit_address;
1379         struct net_device *dev;
1380
1381         dev = veth_probe_one(i, &vdev->dev);
1382         if (dev == NULL) {
1383                 veth_remove(vdev);
1384                 return 1;
1385         }
1386         veth_dev[i] = dev;
1387
1388         /* Start the state machine on each connection, to commence
1389          * link negotiation */
1390         for (i = 0; i < HVMAXARCHITECTEDLPS; i++)
1391                 if (veth_cnx[i])
1392                         veth_kick_statemachine(veth_cnx[i]);
1393
1394         return 0;
1395 }
1396
1397 /**
1398  * veth_device_table: Used by vio.c to match devices that we
1399  * support.
1400  */
1401 static struct vio_device_id veth_device_table[] __devinitdata = {
1402         { "vlan", "" },
1403         { "", "" }
1404 };
1405 MODULE_DEVICE_TABLE(vio, veth_device_table);
1406
1407 static struct vio_driver veth_driver = {
1408         .name = "iseries_veth",
1409         .id_table = veth_device_table,
1410         .probe = veth_probe,
1411         .remove = veth_remove
1412 };
1413
1414 /*
1415  * Module initialization/cleanup
1416  */
1417
1418 void __exit veth_module_cleanup(void)
1419 {
1420         int i;
1421
1422         /* Stop the queues first to stop any new packets being sent. */
1423         for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++)
1424                 if (veth_dev[i])
1425                         netif_stop_queue(veth_dev[i]);
1426
1427         /* Stop the connections before we unregister the driver. This
1428          * ensures there's no skbs lying around holding the device open. */
1429         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i)
1430                 veth_stop_connection(i);
1431
1432         HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1433
1434         /* Hypervisor callbacks may have scheduled more work while we
1435          * were stoping connections. Now that we've disconnected from
1436          * the hypervisor make sure everything's finished. */
1437         flush_scheduled_work();
1438
1439         vio_unregister_driver(&veth_driver);
1440
1441         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i)
1442                 veth_destroy_connection(i);
1443
1444 }
1445 module_exit(veth_module_cleanup);
1446
1447 int __init veth_module_init(void)
1448 {
1449         int i;
1450         int rc;
1451
1452         this_lp = HvLpConfig_getLpIndex_outline();
1453
1454         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1455                 rc = veth_init_connection(i);
1456                 if (rc != 0) {
1457                         veth_module_cleanup();
1458                         return rc;
1459                 }
1460         }
1461
1462         HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1463                                   &veth_handle_event);
1464
1465         return vio_register_driver(&veth_driver);
1466 }
1467 module_init(veth_module_init);