source: bootcd/isolinux/syslinux-6.03/gpxe/src/drivers/net/myri10ge.c

Last change on this file was e16e8f2, checked in by Edwin Eefting <edwin@datux.nl>, 3 years ago

bootstuff

  • Property mode set to 100644
File size: 26.6 KB
Line 
1/************************************************* -*- linux-c -*-
2 * Myricom 10Gb Network Interface Card Software
3 * Copyright 2009, Myricom, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 ****************************************************************/
18
19FILE_LICENCE ( GPL2_ONLY );
20
21/*
22 * Author: Glenn Brown <glenn@myri.com>
23 */
24
25/*
26 * General Theory of Operation
27 *
28 * This is a minimal Myricom 10 gigabit Ethernet driver for network
29 * boot.
30 *
31 * Initialization
32 *
33 * myri10ge_pci_probe() is called by gPXE during initialization.
34 * Minimal NIC initialization is performed to minimize resources
35 * consumed when the driver is resident but unused.
36 *
37 * Network Boot
38 *
39 * myri10ge_net_open() is called by gPXE before attempting to network
40 * boot from the card.  Packet buffers are allocated and the NIC
41 * interface is initialized.
42 *
43 * Transmit
44 *
45 * myri10ge_net_transmit() enqueues frames for transmission by writing
46 * discriptors to the NIC's tx ring.  For simplicity and to avoid
47 * copies, we always have the NIC DMA up the packet.  The sent I/O
48 * buffer is released once the NIC signals myri10ge_interrupt_handler()
49 * that the send has completed.
50 *
51 * Receive
52 *
53 * Receives are posted to the NIC's receive ring.  The NIC fills a
54 * DMAable receive_completion ring with completion notifications.
55 * myri10ge_net_poll() polls for these receive notifications, posts
56 * replacement receive buffers to the NIC, and passes received frames
57 * to netdev_rx().
58 */
59
60/*
61 * Debugging levels:
62 *      - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
63 *        TX overflow, corrupted packets, ...
64 *      - DBG2() is for successful events, like packet received,
65 *        packet transmitted, and other general notifications.
66 *      - DBGP() prints the name of each called function on entry
67 */
68
69#include <stdint.h>
70
71#include <byteswap.h>
72#include <errno.h>
73#include <gpxe/ethernet.h>
74#include <gpxe/if_ether.h>
75#include <gpxe/iobuf.h>
76#include <gpxe/malloc.h>
77#include <gpxe/netdevice.h>
78#include <gpxe/pci.h>
79#include <gpxe/timer.h>
80
81#include "myri10ge_mcp.h"
82
83/****************************************************************
84 * Forward declarations
85 ****************************************************************/
86
87/* PCI driver entry points */
88
89static int      myri10ge_pci_probe ( struct pci_device*,
90                                     const struct pci_device_id* );
91static void     myri10ge_pci_remove ( struct pci_device* );
92
93/* Network device operations */
94
95static void     myri10ge_net_close ( struct net_device* );
96static void     myri10ge_net_irq ( struct net_device*, int enable );
97static int      myri10ge_net_open ( struct net_device* );
98static void     myri10ge_net_poll ( struct net_device* );
99static int      myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
100
101/****************************************************************
102 * Constants
103 ****************************************************************/
104
105/* Maximum ring indices, used to wrap ring indices.  These must be 2**N-1. */
106
107#define MYRI10GE_TRANSMIT_WRAP                  1U
108#define MYRI10GE_RECEIVE_WRAP                   7U
109#define MYRI10GE_RECEIVE_COMPLETION_WRAP        31U
110
111/****************************************************************
112 * Driver internal data types.
113 ****************************************************************/
114
115/* Structure holding all DMA buffers for a NIC, which we will
116   allocated as contiguous read/write DMAable memory when the NIC is
117   initialized. */
118
119struct myri10ge_dma_buffers
120{
121        /* The NIC DMAs receive completion notifications into this ring */
122
123        mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
124
125        /* Interrupt details are DMAd here before interrupting. */
126
127        mcp_irq_data_t irq_data; /* 64B */
128
129        /* NIC command completion status is DMAd here. */
130
131        mcp_cmd_response_t command_response; /* 8B */
132};
133
134struct myri10ge_private
135{
136        /* Interrupt support */
137
138        uint32  *irq_claim;     /* in NIC SRAM */
139        uint32  *irq_deassert;  /* in NIC SRAM */
140
141        /* DMA buffers. */
142
143        struct myri10ge_dma_buffers     *dma;
144
145        /*
146         * Transmit state.
147         *
148         * The counts here are uint32 for easy comparison with
149         * priv->dma->irq_data.send_done_count and with each other.
150         */
151
152        mcp_kreq_ether_send_t   *transmit_ring; /* in NIC SRAM */
153        uint32                   transmit_ring_wrap;
154        uint32                   transmits_posted;
155        uint32                   transmits_done;
156        struct io_buffer        *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
157
158        /*
159         * Receive state.
160         */
161
162        mcp_kreq_ether_recv_t   *receive_post_ring;     /* in NIC SRAM */
163        unsigned int             receive_post_ring_wrap;
164        unsigned int             receives_posted;
165        unsigned int             receives_done;
166        struct io_buffer        *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
167
168        /* Address for writing commands to the firmware.
169           BEWARE: the value must be written 32 bits at a time. */
170
171        mcp_cmd_t       *command;
172};
173
174/****************************************************************
175 * Driver internal functions.
176 ****************************************************************/
177
178/* Print ring status when debugging.  Use this only after a printed
179   value changes. */
180
181#define DBG2_RINGS( priv )                                              \
182        DBG2 ( "tx %x/%x rx %x/%x in %s() \n",                          \
183               ( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
184               ( priv ) ->receives_done, ( priv ) -> receives_posted,   \
185               __FUNCTION__ )
186
187/*
188 * Return a pointer to the driver private data for a network device.
189 *
190 * @v netdev    Network device created by this driver.
191 * @ret priv    The corresponding driver private data.
192 */
193static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
194{
195        /* Our private data always follows the network device in memory,
196           since we use alloc_netdev() to allocate the storage. */
197
198        return ( struct myri10ge_private * ) ( nd + 1 );
199}
200
201/*
202 * Pass a receive buffer to the NIC to be filled.
203 *
204 * @v priv      The network device to receive the buffer.
205 * @v iob       The I/O buffer to fill.
206 *
207 * Receive buffers are filled in FIFO order.
208 */
209static void myri10ge_post_receive ( struct myri10ge_private *priv,
210                                    struct io_buffer *iob )
211{
212        unsigned int             receives_posted;
213        mcp_kreq_ether_recv_t   *request;
214
215        /* Record the posted I/O buffer, to be passed to netdev_rx() on
216           receive. */
217
218        receives_posted = priv->receives_posted;
219        priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
220
221        /* Post the receive. */
222
223        request = &priv->receive_post_ring[receives_posted
224                                           & priv->receive_post_ring_wrap];
225        request->addr_high = 0;
226        wmb();
227        request->addr_low = htonl ( virt_to_bus ( iob->data ) );
228        priv->receives_posted = ++receives_posted;
229}
230
231/*
232 * Execute a command on the NIC.
233 *
234 * @v priv      NIC to perform the command.
235 * @v cmd       The command to perform.
236 * @v data      I/O copy buffer for parameters/results
237 * @ret rc      0 on success, else an error code.
238 */
239static int myri10ge_command ( struct myri10ge_private *priv,
240                              uint32 cmd,
241                              uint32 data[3] )
242{
243        int                              i;
244        mcp_cmd_t                       *command;
245        uint32                           result;
246        unsigned int                     slept_ms;
247        volatile mcp_cmd_response_t     *response;
248
249        DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
250        command = priv->command;
251        response = &priv->dma->command_response;
252
253        /* Mark the command as incomplete. */
254
255        response->result = 0xFFFFFFFF;
256
257        /* Pass the command to the NIC. */
258
259        command->cmd                = htonl ( cmd );
260        command->data0              = htonl ( data[0] );
261        command->data1              = htonl ( data[1] );
262        command->data2              = htonl ( data[2] );
263        command->response_addr.high = 0;
264        command->response_addr.low
265                = htonl ( virt_to_bus ( &priv->dma->command_response ) );
266        for ( i=0; i<36; i+=4 )
267                * ( uint32 * ) &command->pad[i] = 0;
268        wmb();
269        * ( uint32 * ) &command->pad[36] = 0;
270
271        /* Wait up to 2 seconds for a response. */
272
273        for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
274                result = response->result;
275                if ( result == 0 ) {
276                        data[0] = ntohl ( response->data );
277                        return 0;
278                } else if ( result != 0xFFFFFFFF ) {
279                        DBG ( "cmd%d:0x%x\n",
280                              cmd,
281                              ntohl ( response->result ) );
282                        return -EIO;
283                }
284                udelay ( 1000 );
285                rmb();
286        }
287        DBG ( "cmd%d:timed out\n", cmd );
288        return -ETIMEDOUT;
289}
290
291/*
292 * Handle any pending interrupt.
293 *
294 * @v netdev            Device being polled for interrupts.
295 *
296 * This is called periodically to let the driver check for interrupts.
297 */
298static void myri10ge_interrupt_handler ( struct net_device *netdev )
299{
300        struct myri10ge_private *priv;
301        mcp_irq_data_t          *irq_data;
302        uint8                    valid;
303
304        priv = myri10ge_priv ( netdev );
305        irq_data = &priv->dma->irq_data;
306
307        /* Return if there was no interrupt. */
308
309        rmb();
310        valid = irq_data->valid;
311        if ( !valid )
312                return;
313        DBG2 ( "irq " );
314
315        /* Tell the NIC to deassert the interrupt and clear
316           irq_data->valid.*/
317
318        *priv->irq_deassert = 0;        /* any value is OK. */
319        mb();
320
321        /* Handle any new receives. */
322
323        if ( valid & 1 ) {
324
325                /* Pass the receive interrupt token back to the NIC. */
326
327                DBG2 ( "rx " );
328                *priv->irq_claim = htonl ( 3 );
329                wmb();
330        }
331
332        /* Handle any sent packet by freeing its I/O buffer, now that
333           we know it has been DMAd. */
334
335        if ( valid & 2 ) {
336                unsigned int nic_done_count;
337
338                DBG2 ( "snt " );
339                nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
340                while ( priv->transmits_done != nic_done_count ) {
341                        struct io_buffer *iob;
342
343                        iob = priv->transmit_iob [priv->transmits_done
344                                                  & MYRI10GE_TRANSMIT_WRAP];
345                        DBG2 ( "%p ", iob );
346                        netdev_tx_complete ( netdev, iob );
347                        ++priv->transmits_done;
348                }
349        }
350
351        /* Record any statistics update. */
352
353        if ( irq_data->stats_updated ) {
354
355                /* Update the link status. */
356
357                DBG2 ( "stats " );
358                if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
359                        netdev_link_up ( netdev );
360                else
361                        netdev_link_down ( netdev );
362
363                /* Ignore all error counters from the NIC. */
364        }
365
366        /* Wait for the interrupt to be deasserted, as indicated by
367           irq_data->valid, which is set by the NIC after the deassert. */
368
369        DBG2 ( "wait " );
370        do {
371                mb();
372        } while ( irq_data->valid );
373
374        /* Claim the interrupt to enable future interrupt generation. */
375
376        DBG2 ( "claim\n" );
377        * ( priv->irq_claim + 1 ) = htonl ( 3 );
378        mb();
379}
380
381/* Constants for reading the STRING_SPECS via the Myricom
382   Vendor Specific PCI configuration space capability. */
383
384#define VS_ADDR ( vs + 0x18 )
385#define VS_DATA ( vs + 0x14 )
386#define VS_MODE ( vs + 0x10 )
387#define         VS_MODE_READ32 0x3
388#define         VS_MODE_LOCATE 0x8
389#define                 VS_LOCATE_STRING_SPECS 0x3
390
391/*
392 * Read MAC address from its 'string specs' via the vendor-specific
393 * capability.  (This capability allows NIC SRAM and ROM to be read
394 * before it is mapped.)
395 *
396 * @v pci               The device.
397 * @v mac               Buffer to store the MAC address.
398 * @ret rc              Returns 0 on success, else an error code.
399 */
400static int mac_address_from_string_specs ( struct pci_device *pci,
401                                                   uint8 mac[ETH_ALEN] )
402{
403        char string_specs[256];
404        char *ptr, *limit;
405        char *to = string_specs;
406        uint32 addr;
407        uint32 len;
408        unsigned int vs;
409        int mac_set = 0;
410
411        /* Find the "vendor specific" capability. */
412
413        vs = pci_find_capability ( pci, 9 );
414        if ( vs == 0 ) {
415                DBG ( "no VS\n" );
416                return -ENOTSUP;
417        }
418
419        /* Locate the String specs in LANai SRAM. */
420
421        pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
422        pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
423        pci_read_config_dword ( pci, VS_ADDR, &addr );
424        pci_read_config_dword ( pci, VS_DATA, &len );
425        DBG2 ( "ss@%x,%x\n", addr, len );
426
427        /* Copy in the string specs.  Use 32-bit reads for performance. */
428
429        if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
430                DBG ( "SS too big\n" );
431                return -ENOTSUP;
432        }
433
434        pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
435        while ( len >= 4 ) {
436                uint32 tmp;
437
438                pci_write_config_byte ( pci, VS_ADDR, addr );
439                pci_read_config_dword ( pci, VS_DATA, &tmp );
440                tmp = ntohl ( tmp );
441                memcpy ( to, &tmp, 4 );
442                to += 4;
443                addr += 4;
444                len -= 4;
445        }
446        pci_write_config_byte ( pci, VS_MODE, 0 );
447
448        /* Parse the string specs. */
449
450        DBG2 ( "STRING_SPECS:\n" );
451        ptr = string_specs;
452        limit = string_specs + sizeof ( string_specs );
453        while ( *ptr != '\0' && ptr < limit ) {
454                DBG2 ( "%s\n", ptr );
455                if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
456                        unsigned int i;
457
458                        ptr += 4;
459                        for ( i=0; i<6; i++ ) {
460                                if ( ( ptr + 2 ) > limit ) {
461                                        DBG ( "bad MAC addr\n" );
462                                        return -ENOTSUP;
463                                }
464                                mac[i] = strtoul ( ptr, &ptr, 16 );
465                                ptr += 1;
466                        }
467                        mac_set = 1;
468                }
469                else
470                        while ( ptr < limit && *ptr++ );
471        }
472
473        /* Verify we parsed all we need. */
474
475        if ( !mac_set ) {
476                DBG ( "no MAC addr\n" );
477                return -ENOTSUP;
478        }
479
480        DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
481               mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
482
483        return 0;
484}
485
486/****************************************************************
487 * gPXE PCI Device Driver API functions
488 ****************************************************************/
489
490/*
491 * Initialize the PCI device.
492 *
493 * @v pci               The device's associated pci_device structure.
494 * @v id                The PCI device + vendor id.
495 * @ret rc              Returns zero if successfully initialized.
496 *
497 * This function is called very early on, while gPXE is initializing.
498 * This is a gPXE PCI Device Driver API function.
499 */
500static int myri10ge_pci_probe ( struct pci_device *pci,
501                                const struct pci_device_id *id __unused )
502{
503        static struct net_device_operations myri10ge_operations = {
504                .open     = myri10ge_net_open,
505                .close    = myri10ge_net_close,
506                .transmit = myri10ge_net_transmit,
507                .poll     = myri10ge_net_poll,
508                .irq      = myri10ge_net_irq
509        };
510
511        const char *dbg;
512        int rc;
513        struct net_device *netdev;
514        struct myri10ge_private *priv;
515
516        DBGP ( "myri10ge_pci_probe: " );
517
518        netdev = alloc_etherdev ( sizeof ( *priv ) );
519        if ( !netdev ) {
520                rc = -ENOMEM;
521                dbg = "alloc_etherdev";
522                goto abort_with_nothing;
523        }
524
525        netdev_init ( netdev, &myri10ge_operations );
526        priv = myri10ge_priv ( netdev );
527
528        pci_set_drvdata ( pci, netdev );
529        netdev->dev = &pci->dev;
530
531        /* Make sure interrupts are disabled. */
532
533        myri10ge_net_irq ( netdev, 0 );
534
535        /* Read the NIC HW address. */
536
537        rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
538        if ( rc ) {
539                dbg = "mac_from_ss";
540                goto abort_with_netdev_init;
541        }
542        DBGP ( "mac " );
543
544        /* Enable bus master, etc. */
545
546        adjust_pci_device ( pci );
547        DBGP ( "pci " );
548
549        /* Register the initialized network device. */
550
551        rc = register_netdev ( netdev );
552        if ( rc ) {
553                dbg = "register_netdev";
554                goto abort_with_netdev_init;
555        }
556
557        DBGP ( "done\n" );
558
559        return 0;
560
561abort_with_netdev_init:
562        netdev_nullify ( netdev );
563        netdev_put ( netdev );
564abort_with_nothing:
565        DBG ( "%s:%s\n", dbg, strerror ( rc ) );
566        return rc;
567}
568
569/*
570 * Remove a device from the PCI device list.
571 *
572 * @v pci               PCI device to remove.
573 *
574 * This is a PCI Device Driver API function.
575 */
576static void myri10ge_pci_remove ( struct pci_device *pci )
577{
578        struct net_device       *netdev;
579
580        DBGP ( "myri10ge_pci_remove\n" );
581        netdev = pci_get_drvdata ( pci );
582
583        unregister_netdev ( netdev );
584        netdev_nullify ( netdev );
585        netdev_put ( netdev );
586}
587
588/****************************************************************
589 * gPXE Network Device Driver Operations
590 ****************************************************************/
591
592/*
593 * Close a network device.
594 *
595 * @v netdev            Device to close.
596 *
597 * This is a gPXE Network Device Driver API function.
598 */
599static void myri10ge_net_close ( struct net_device *netdev )
600{
601        struct myri10ge_private *priv;
602        uint32                   data[3];
603
604        DBGP ( "myri10ge_net_close\n" );
605        priv = myri10ge_priv ( netdev );
606
607        /* disable interrupts */
608
609        myri10ge_net_irq ( netdev, 0 );
610
611        /* Reset the NIC interface, so we won't get any more events from
612           the NIC. */
613
614        myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
615
616        /* Free receive buffers that were never filled. */
617
618        while ( priv->receives_done != priv->receives_posted ) {
619                free_iob ( priv->receive_iob[priv->receives_done
620                                             & MYRI10GE_RECEIVE_WRAP] );
621                ++priv->receives_done;
622        }
623
624        /* Release DMAable memory. */
625
626        free_dma ( priv->dma, sizeof ( *priv->dma ) );
627
628        /* Erase all state from the open. */
629
630        memset ( priv, 0, sizeof ( *priv ) );
631
632        DBG2_RINGS ( priv );
633}
634
635/*
636 * Enable or disable IRQ masking.
637 *
638 * @v netdev            Device to control.
639 * @v enable            Zero to mask off IRQ, non-zero to enable IRQ.
640 *
641 * This is a gPXE Network Driver API function.
642 */
643static void myri10ge_net_irq ( struct net_device *netdev, int enable )
644{
645        struct pci_device       *pci_dev;
646        uint16                   val;
647
648        DBGP ( "myri10ge_net_irq\n" );
649        pci_dev = ( struct pci_device * ) netdev->dev;
650
651        /* Adjust the Interrupt Disable bit in the Command register of the
652           PCI Device. */
653
654        pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
655        if ( enable )
656                val &= ~PCI_COMMAND_INTX_DISABLE;
657        else
658                val |= PCI_COMMAND_INTX_DISABLE;
659        pci_write_config_word ( pci_dev, PCI_COMMAND, val );
660}
661
662/*
663 * Opens a network device.
664 *
665 * @v netdev            Device to be opened.
666 * @ret rc              Non-zero if failed to open.
667 *
668 * This enables tx and rx on the device.
669 * This is a gPXE Network Device Driver API function.
670 */
671static int myri10ge_net_open ( struct net_device *netdev )
672{
673        const char              *dbg;   /* printed upon error return */
674        int                      rc;
675        struct io_buffer        *iob;
676        struct myri10ge_private *priv;
677        uint32                   data[3];
678        struct pci_device       *pci_dev;
679        void                    *membase;
680
681        DBGP ( "myri10ge_net_open\n" );
682        priv    = myri10ge_priv ( netdev );
683        pci_dev = ( struct pci_device * ) netdev->dev;
684        membase = phys_to_virt ( pci_dev->membase );
685
686        /* Compute address for passing commands to the firmware. */
687
688        priv->command = membase + MXGEFW_ETH_CMD;
689
690        /* Ensure interrupts are disabled. */
691
692        myri10ge_net_irq ( netdev, 0 );
693
694        /* Allocate cleared DMAable buffers. */
695
696        priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
697        if ( !priv->dma ) {
698                rc = -ENOMEM;
699                dbg = "DMA";
700                goto abort_with_nothing;
701        }
702        memset ( priv->dma, 0, sizeof ( *priv->dma ) );
703
704        /* Simplify following code. */
705
706#define TRY( prefix, base, suffix ) do {                \
707                rc = myri10ge_command ( priv,           \
708                                        MXGEFW_         \
709                                        ## prefix       \
710                                        ## base         \
711                                        ## suffix,      \
712                                        data );         \
713                if ( rc ) {                             \
714                        dbg = #base;                    \
715                        goto abort_with_dma;            \
716                }                                       \
717        } while ( 0 )
718
719        /* Send a reset command to the card to see if it is alive,
720           and to reset its queue state. */
721
722        TRY ( CMD_, RESET , );
723
724        /* Set the interrupt queue size. */
725
726        data[0] = ( sizeof ( priv->dma->receive_completion )
727                    | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
728        TRY ( CMD_SET_ , INTRQ_SIZE , );
729
730        /* Set the interrupt queue DMA address. */
731
732        data[0] = virt_to_bus ( &priv->dma->receive_completion );
733        data[1] = 0;
734        TRY ( CMD_SET_, INTRQ_DMA, );
735
736        /* Get the NIC interrupt claim address. */
737
738        TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
739        priv->irq_claim = membase + data[0];
740
741        /* Get the NIC interrupt assert address. */
742
743        TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
744        priv->irq_deassert = membase + data[0];
745
746        /* Disable interrupt coalescing, which is inappropriate for the
747           minimal buffering we provide. */
748
749        TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
750        * ( ( uint32 * ) ( membase + data[0] ) ) = 0;
751
752        /* Set the NIC mac address. */
753
754        data[0] = ( netdev->ll_addr[0] << 24
755                    | netdev->ll_addr[1] << 16
756                    | netdev->ll_addr[2] << 8
757                    | netdev->ll_addr[3] );
758        data[1] = ( ( netdev->ll_addr[4] << 8 )
759                     | netdev->ll_addr[5] );
760        TRY ( SET_ , MAC_ADDRESS , );
761
762        /* Enable multicast receives, because some gPXE clients don't work
763           without multicast. . */
764
765        TRY ( ENABLE_ , ALLMULTI , );
766
767        /* Disable Ethernet flow control, so the NIC cannot deadlock the
768           network under any circumstances. */
769
770        TRY ( DISABLE_ , FLOW , _CONTROL );
771
772        /* Compute transmit ring sizes. */
773
774        data[0] = 0;            /* slice 0 */
775        TRY ( CMD_GET_, SEND_RING, _SIZE );
776        priv->transmit_ring_wrap
777                = data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
778        if ( priv->transmit_ring_wrap
779             & ( priv->transmit_ring_wrap + 1 ) ) {
780                rc = -EPROTO;
781                dbg = "TX_RING";
782                goto abort_with_dma;
783        }
784
785        /* Compute receive ring sizes. */
786
787        data[0] = 0;            /* slice 0 */
788        TRY ( CMD_GET_ , RX_RING , _SIZE );
789        priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
790        if ( priv->receive_post_ring_wrap
791             & ( priv->receive_post_ring_wrap + 1 ) ) {
792                rc = -EPROTO;
793                dbg = "RX_RING";
794                goto abort_with_dma;
795        }
796
797        /* Get NIC transmit ring address. */
798
799        data[0] = 0;            /* slice 0. */
800        TRY ( CMD_GET_, SEND, _OFFSET );
801        priv->transmit_ring = membase + data[0];
802
803        /* Get the NIC receive ring address. */
804
805        data[0] = 0;            /* slice 0. */
806        TRY ( CMD_GET_, SMALL_RX, _OFFSET );
807        priv->receive_post_ring = membase + data[0];
808
809        /* Set the Nic MTU. */
810
811        data[0] = ETH_FRAME_LEN;
812        TRY ( CMD_SET_, MTU, );
813
814        /* Tell the NIC our buffer sizes. ( We use only small buffers, so we
815           set both buffer sizes to the same value, which will force all
816           received frames to use small buffers. ) */
817
818        data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
819        TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
820        data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
821        TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
822
823        /* Tell firmware where to DMA IRQ data */
824
825        data[0] = virt_to_bus ( &priv->dma->irq_data );
826        data[1] = 0;
827        data[2] = sizeof ( priv->dma->irq_data );
828        TRY ( CMD_SET_, STATS_DMA_V2, );
829
830        /* Post receives. */
831
832        while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
833
834                /* Reserve 2 extra bytes at the start of packets, since
835                   the firmware always skips the first 2 bytes of the buffer
836                   so TCP headers will be aligned. */
837
838                iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
839                if ( !iob ) {
840                        rc = -ENOMEM;
841                        dbg = "alloc_iob";
842                        goto abort_with_receives_posted;
843                }
844                iob_reserve ( iob, MXGEFW_PAD );
845                myri10ge_post_receive ( priv, iob );
846        }
847
848        /* Bring up the link. */
849
850        TRY ( CMD_, ETHERNET_UP, );
851
852        DBG2_RINGS ( priv );
853        return 0;
854
855abort_with_receives_posted:
856        while ( priv->receives_posted-- )
857                free_iob ( priv->receive_iob[priv->receives_posted] );
858abort_with_dma:
859        /* Because the link is not up, we don't have to reset the NIC here. */
860        free_dma ( priv->dma, sizeof ( *priv->dma ) );
861abort_with_nothing:
862        /* Erase all signs of the failed open. */
863        memset ( priv, 0, sizeof ( *priv ) );
864        DBG ( "%s: %s\n", dbg, strerror ( rc ) );
865        return ( rc );
866}
867
868/*
869 * This function allows a driver to process events during operation.
870 *
871 * @v netdev            Device being polled.
872 *
873 * This is called periodically by gPXE to let the driver check the status of
874 * transmitted packets and to allow the driver to check for received packets.
875 * This is a gPXE Network Device Driver API function.
876 */
877static void myri10ge_net_poll ( struct net_device *netdev )
878{
879        struct io_buffer                *iob;
880        struct io_buffer                *replacement;
881        struct myri10ge_dma_buffers     *dma;
882        struct myri10ge_private         *priv;
883        unsigned int                     length;
884        unsigned int                     orig_receives_posted;
885
886        DBGP ( "myri10ge_net_poll\n" );
887        priv = myri10ge_priv ( netdev );
888        dma  = priv->dma;
889
890        /* Process any pending interrupt. */
891
892        myri10ge_interrupt_handler ( netdev );
893
894        /* Pass up received frames, but limit ourselves to receives posted
895           before this function was called, so we cannot livelock if
896           receives are arriving faster than we process them. */
897
898        orig_receives_posted = priv->receives_posted;
899        while ( priv->receives_done != orig_receives_posted ) {
900
901                /* Stop if there is no pending receive. */
902
903                length = ntohs ( dma->receive_completion
904                                 [priv->receives_done
905                                  & MYRI10GE_RECEIVE_COMPLETION_WRAP]
906                                 .length );
907                if ( length == 0 )
908                        break;
909
910                /* Allocate a replacement buffer.  If none is available,
911                   stop passing up packets until a buffer is available.
912
913                   Reserve 2 extra bytes at the start of packets, since
914                   the firmware always skips the first 2 bytes of the buffer
915                   so TCP headers will be aligned. */
916
917                replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
918                if ( !replacement ) {
919                        DBG ( "NO RX BUF\n" );
920                        break;
921                }
922                iob_reserve ( replacement, MXGEFW_PAD );
923
924                /* Pass up the received frame. */
925
926                iob = priv->receive_iob[priv->receives_done
927                                        & MYRI10GE_RECEIVE_WRAP];
928                iob_put ( iob, length );
929                netdev_rx ( netdev, iob );
930
931                /* We have consumed the packet, so clear the receive
932                   notification. */
933
934                dma->receive_completion [priv->receives_done
935                                         & MYRI10GE_RECEIVE_COMPLETION_WRAP]
936                        .length = 0;
937                wmb();
938
939                /* Replace the passed-up I/O buffer. */
940
941                myri10ge_post_receive ( priv, replacement );
942                ++priv->receives_done;
943                DBG2_RINGS ( priv );
944        }
945}
946
947/*
948 * This transmits a packet.
949 *
950 * @v netdev            Device to transmit from.
951 * @v iobuf             Data to transmit.
952 * @ret rc              Non-zero if failed to transmit.
953 *
954 * This is a gPXE Network Driver API function.
955 */
956static int myri10ge_net_transmit ( struct net_device *netdev,
957                                   struct io_buffer *iobuf )
958{
959        mcp_kreq_ether_send_t   *kreq;
960        size_t                   len;
961        struct myri10ge_private *priv;
962        uint32                   transmits_posted;
963
964        DBGP ( "myri10ge_net_transmit\n" );
965        priv = myri10ge_priv ( netdev );
966
967        /* Confirm space in the send ring. */
968
969        transmits_posted = priv->transmits_posted;
970        if ( transmits_posted - priv->transmits_done
971             > MYRI10GE_TRANSMIT_WRAP ) {
972                DBG ( "TX ring full\n" );
973                return -ENOBUFS;
974        }
975
976        DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
977        DBG2_HD ( iobuf->data, 14 );
978
979        /* Record the packet being transmitted, so we can later report
980           send completion. */
981
982        priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
983
984        /* Copy and pad undersized frames, because the NIC does not pad,
985           and we would rather copy small frames than do a gather. */
986
987        len = iob_len ( iobuf );
988        if ( len < ETH_ZLEN ) {
989                iob_pad ( iobuf, ETH_ZLEN );
990                len = ETH_ZLEN;
991        }
992
993        /* Enqueue the packet by writing a descriptor to the NIC.
994           This is a bit tricky because the HW requires 32-bit writes,
995           but the structure has smaller fields. */
996
997        kreq = &priv->transmit_ring[transmits_posted
998                                    & priv->transmit_ring_wrap];
999        kreq->addr_high = 0;
1000        kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
1001        ( ( uint32 * ) kreq ) [2] = htonl (
1002                0x0000 << 16     /* pseudo_header_offset */
1003                | ( len & 0xFFFF ) /* length */
1004                );
1005        wmb();
1006        ( ( uint32 * ) kreq ) [3] = htonl (
1007                0x00 << 24      /* pad */
1008                | 0x01 << 16    /* rdma_count */
1009                | 0x00 << 8     /* cksum_offset */
1010                | ( MXGEFW_FLAGS_SMALL
1011                    | MXGEFW_FLAGS_FIRST
1012                    | MXGEFW_FLAGS_NO_TSO ) /* flags */
1013                );
1014        wmb();
1015
1016        /* Mark the slot as consumed and return. */
1017
1018        priv->transmits_posted = ++transmits_posted;
1019        DBG2_RINGS ( priv );
1020        return 0;
1021}
1022
1023static struct pci_device_id myri10ge_nics[] = {
1024        /* Each of these macros must be a single line to satisfy a script. */
1025        PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
1026};
1027
1028struct pci_driver myri10ge_driver __pci_driver = {
1029        .ids      = myri10ge_nics,
1030        .id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
1031        .probe    = myri10ge_pci_probe,
1032        .remove   = myri10ge_pci_remove
1033};
1034
1035/*
1036 * Local variables:
1037 *  c-basic-offset: 8
1038 *  c-indent-level: 8
1039 *  tab-width: 8
1040 * End:
1041 */
Note: See TracBrowser for help on using the repository browser.