1 | /* |
---|
2 | * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. |
---|
3 | * |
---|
4 | * This program is free software; you can redistribute it and/or |
---|
5 | * modify it under the terms of the GNU General Public License as |
---|
6 | * published by the Free Software Foundation; either version 2 of the |
---|
7 | * License, or any later version. |
---|
8 | * |
---|
9 | * This program is distributed in the hope that it will be useful, but |
---|
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
12 | * General Public License for more details. |
---|
13 | * |
---|
14 | * You should have received a copy of the GNU General Public License |
---|
15 | * along with this program; if not, write to the Free Software |
---|
16 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
17 | */ |
---|
18 | |
---|
19 | FILE_LICENCE ( GPL2_OR_LATER ); |
---|
20 | |
---|
21 | #include <stdint.h> |
---|
22 | #include <stdlib.h> |
---|
23 | #include <stdio.h> |
---|
24 | #include <string.h> |
---|
25 | #include <unistd.h> |
---|
26 | #include <byteswap.h> |
---|
27 | #include <errno.h> |
---|
28 | #include <assert.h> |
---|
29 | #include <gpxe/list.h> |
---|
30 | #include <gpxe/errortab.h> |
---|
31 | #include <gpxe/if_arp.h> |
---|
32 | #include <gpxe/netdevice.h> |
---|
33 | #include <gpxe/iobuf.h> |
---|
34 | #include <gpxe/ipoib.h> |
---|
35 | #include <gpxe/process.h> |
---|
36 | #include <gpxe/infiniband.h> |
---|
37 | #include <gpxe/ib_mi.h> |
---|
38 | #include <gpxe/ib_sma.h> |
---|
39 | |
---|
40 | /** @file |
---|
41 | * |
---|
42 | * Infiniband protocol |
---|
43 | * |
---|
44 | */ |
---|
45 | |
---|
46 | /** List of Infiniband devices */ |
---|
47 | struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices ); |
---|
48 | |
---|
49 | /** List of open Infiniband devices, in reverse order of opening */ |
---|
50 | static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices ); |
---|
51 | |
---|
52 | /* Disambiguate the various possible EINPROGRESSes */ |
---|
53 | #define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 ) |
---|
54 | #define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 ) |
---|
55 | |
---|
56 | /** Human-readable message for the link statuses */ |
---|
57 | struct errortab infiniband_errors[] __errortab = { |
---|
58 | { EINPROGRESS_INIT, "Initialising" }, |
---|
59 | { EINPROGRESS_ARMED, "Armed" }, |
---|
60 | }; |
---|
61 | |
---|
62 | /*************************************************************************** |
---|
63 | * |
---|
64 | * Completion queues |
---|
65 | * |
---|
66 | *************************************************************************** |
---|
67 | */ |
---|
68 | |
---|
69 | /** |
---|
70 | * Create completion queue |
---|
71 | * |
---|
72 | * @v ibdev Infiniband device |
---|
73 | * @v num_cqes Number of completion queue entries |
---|
74 | * @v op Completion queue operations |
---|
75 | * @ret cq New completion queue |
---|
76 | */ |
---|
77 | struct ib_completion_queue * |
---|
78 | ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, |
---|
79 | struct ib_completion_queue_operations *op ) { |
---|
80 | struct ib_completion_queue *cq; |
---|
81 | int rc; |
---|
82 | |
---|
83 | DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); |
---|
84 | |
---|
85 | /* Allocate and initialise data structure */ |
---|
86 | cq = zalloc ( sizeof ( *cq ) ); |
---|
87 | if ( ! cq ) |
---|
88 | goto err_alloc_cq; |
---|
89 | cq->ibdev = ibdev; |
---|
90 | list_add ( &cq->list, &ibdev->cqs ); |
---|
91 | cq->num_cqes = num_cqes; |
---|
92 | INIT_LIST_HEAD ( &cq->work_queues ); |
---|
93 | cq->op = op; |
---|
94 | |
---|
95 | /* Perform device-specific initialisation and get CQN */ |
---|
96 | if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { |
---|
97 | DBGC ( ibdev, "IBDEV %p could not initialise completion " |
---|
98 | "queue: %s\n", ibdev, strerror ( rc ) ); |
---|
99 | goto err_dev_create_cq; |
---|
100 | } |
---|
101 | |
---|
102 | DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " |
---|
103 | "with CQN %#lx\n", ibdev, num_cqes, cq, |
---|
104 | ib_cq_get_drvdata ( cq ), cq->cqn ); |
---|
105 | return cq; |
---|
106 | |
---|
107 | ibdev->op->destroy_cq ( ibdev, cq ); |
---|
108 | err_dev_create_cq: |
---|
109 | list_del ( &cq->list ); |
---|
110 | free ( cq ); |
---|
111 | err_alloc_cq: |
---|
112 | return NULL; |
---|
113 | } |
---|
114 | |
---|
115 | /** |
---|
116 | * Destroy completion queue |
---|
117 | * |
---|
118 | * @v ibdev Infiniband device |
---|
119 | * @v cq Completion queue |
---|
120 | */ |
---|
121 | void ib_destroy_cq ( struct ib_device *ibdev, |
---|
122 | struct ib_completion_queue *cq ) { |
---|
123 | DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", |
---|
124 | ibdev, cq->cqn ); |
---|
125 | assert ( list_empty ( &cq->work_queues ) ); |
---|
126 | ibdev->op->destroy_cq ( ibdev, cq ); |
---|
127 | list_del ( &cq->list ); |
---|
128 | free ( cq ); |
---|
129 | } |
---|
130 | |
---|
131 | /** |
---|
132 | * Poll completion queue |
---|
133 | * |
---|
134 | * @v ibdev Infiniband device |
---|
135 | * @v cq Completion queue |
---|
136 | */ |
---|
137 | void ib_poll_cq ( struct ib_device *ibdev, |
---|
138 | struct ib_completion_queue *cq ) { |
---|
139 | struct ib_work_queue *wq; |
---|
140 | |
---|
141 | /* Poll completion queue */ |
---|
142 | ibdev->op->poll_cq ( ibdev, cq ); |
---|
143 | |
---|
144 | /* Refill receive work queues */ |
---|
145 | list_for_each_entry ( wq, &cq->work_queues, list ) { |
---|
146 | if ( ! wq->is_send ) |
---|
147 | ib_refill_recv ( ibdev, wq->qp ); |
---|
148 | } |
---|
149 | } |
---|
150 | |
---|
151 | /*************************************************************************** |
---|
152 | * |
---|
153 | * Work queues |
---|
154 | * |
---|
155 | *************************************************************************** |
---|
156 | */ |
---|
157 | |
---|
158 | /** |
---|
159 | * Create queue pair |
---|
160 | * |
---|
161 | * @v ibdev Infiniband device |
---|
162 | * @v type Queue pair type |
---|
163 | * @v num_send_wqes Number of send work queue entries |
---|
164 | * @v send_cq Send completion queue |
---|
165 | * @v num_recv_wqes Number of receive work queue entries |
---|
166 | * @v recv_cq Receive completion queue |
---|
167 | * @ret qp Queue pair |
---|
168 | * |
---|
169 | * The queue pair will be left in the INIT state; you must call |
---|
170 | * ib_modify_qp() before it is ready to use for sending and receiving. |
---|
171 | */ |
---|
172 | struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, |
---|
173 | enum ib_queue_pair_type type, |
---|
174 | unsigned int num_send_wqes, |
---|
175 | struct ib_completion_queue *send_cq, |
---|
176 | unsigned int num_recv_wqes, |
---|
177 | struct ib_completion_queue *recv_cq ) { |
---|
178 | struct ib_queue_pair *qp; |
---|
179 | size_t total_size; |
---|
180 | int rc; |
---|
181 | |
---|
182 | DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); |
---|
183 | |
---|
184 | /* Allocate and initialise data structure */ |
---|
185 | total_size = ( sizeof ( *qp ) + |
---|
186 | ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + |
---|
187 | ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); |
---|
188 | qp = zalloc ( total_size ); |
---|
189 | if ( ! qp ) |
---|
190 | goto err_alloc_qp; |
---|
191 | qp->ibdev = ibdev; |
---|
192 | list_add ( &qp->list, &ibdev->qps ); |
---|
193 | qp->type = type; |
---|
194 | qp->send.qp = qp; |
---|
195 | qp->send.is_send = 1; |
---|
196 | qp->send.cq = send_cq; |
---|
197 | list_add ( &qp->send.list, &send_cq->work_queues ); |
---|
198 | qp->send.psn = ( random() & 0xffffffUL ); |
---|
199 | qp->send.num_wqes = num_send_wqes; |
---|
200 | qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); |
---|
201 | qp->recv.qp = qp; |
---|
202 | qp->recv.cq = recv_cq; |
---|
203 | list_add ( &qp->recv.list, &recv_cq->work_queues ); |
---|
204 | qp->recv.psn = ( random() & 0xffffffUL ); |
---|
205 | qp->recv.num_wqes = num_recv_wqes; |
---|
206 | qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + |
---|
207 | ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); |
---|
208 | INIT_LIST_HEAD ( &qp->mgids ); |
---|
209 | |
---|
210 | /* Perform device-specific initialisation and get QPN */ |
---|
211 | if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { |
---|
212 | DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " |
---|
213 | "%s\n", ibdev, strerror ( rc ) ); |
---|
214 | goto err_dev_create_qp; |
---|
215 | } |
---|
216 | DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", |
---|
217 | ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn ); |
---|
218 | DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", |
---|
219 | ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, |
---|
220 | qp->recv.iobufs ); |
---|
221 | DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", |
---|
222 | ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, |
---|
223 | ( ( ( void * ) qp ) + total_size ) ); |
---|
224 | |
---|
225 | /* Calculate externally-visible QPN */ |
---|
226 | switch ( type ) { |
---|
227 | case IB_QPT_SMI: |
---|
228 | qp->ext_qpn = IB_QPN_SMI; |
---|
229 | break; |
---|
230 | case IB_QPT_GSI: |
---|
231 | qp->ext_qpn = IB_QPN_GSI; |
---|
232 | break; |
---|
233 | default: |
---|
234 | qp->ext_qpn = qp->qpn; |
---|
235 | break; |
---|
236 | } |
---|
237 | if ( qp->ext_qpn != qp->qpn ) { |
---|
238 | DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n", |
---|
239 | ibdev, qp->qpn, qp->ext_qpn ); |
---|
240 | } |
---|
241 | |
---|
242 | return qp; |
---|
243 | |
---|
244 | ibdev->op->destroy_qp ( ibdev, qp ); |
---|
245 | err_dev_create_qp: |
---|
246 | list_del ( &qp->send.list ); |
---|
247 | list_del ( &qp->recv.list ); |
---|
248 | list_del ( &qp->list ); |
---|
249 | free ( qp ); |
---|
250 | err_alloc_qp: |
---|
251 | return NULL; |
---|
252 | } |
---|
253 | |
---|
254 | /** |
---|
255 | * Modify queue pair |
---|
256 | * |
---|
257 | * @v ibdev Infiniband device |
---|
258 | * @v qp Queue pair |
---|
259 | * @v av New address vector, if applicable |
---|
260 | * @ret rc Return status code |
---|
261 | */ |
---|
262 | int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { |
---|
263 | int rc; |
---|
264 | |
---|
265 | DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn ); |
---|
266 | |
---|
267 | if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) { |
---|
268 | DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n", |
---|
269 | ibdev, qp->qpn, strerror ( rc ) ); |
---|
270 | return rc; |
---|
271 | } |
---|
272 | |
---|
273 | return 0; |
---|
274 | } |
---|
275 | |
---|
276 | /** |
---|
277 | * Destroy queue pair |
---|
278 | * |
---|
279 | * @v ibdev Infiniband device |
---|
280 | * @v qp Queue pair |
---|
281 | */ |
---|
282 | void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { |
---|
283 | struct io_buffer *iobuf; |
---|
284 | unsigned int i; |
---|
285 | |
---|
286 | DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n", |
---|
287 | ibdev, qp->qpn ); |
---|
288 | |
---|
289 | assert ( list_empty ( &qp->mgids ) ); |
---|
290 | |
---|
291 | /* Perform device-specific destruction */ |
---|
292 | ibdev->op->destroy_qp ( ibdev, qp ); |
---|
293 | |
---|
294 | /* Complete any remaining I/O buffers with errors */ |
---|
295 | for ( i = 0 ; i < qp->send.num_wqes ; i++ ) { |
---|
296 | if ( ( iobuf = qp->send.iobufs[i] ) != NULL ) |
---|
297 | ib_complete_send ( ibdev, qp, iobuf, -ECANCELED ); |
---|
298 | } |
---|
299 | for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) { |
---|
300 | if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) { |
---|
301 | ib_complete_recv ( ibdev, qp, NULL, iobuf, |
---|
302 | -ECANCELED ); |
---|
303 | } |
---|
304 | } |
---|
305 | |
---|
306 | /* Remove work queues from completion queue */ |
---|
307 | list_del ( &qp->send.list ); |
---|
308 | list_del ( &qp->recv.list ); |
---|
309 | |
---|
310 | /* Free QP */ |
---|
311 | list_del ( &qp->list ); |
---|
312 | free ( qp ); |
---|
313 | } |
---|
314 | |
---|
315 | /** |
---|
316 | * Find queue pair by QPN |
---|
317 | * |
---|
318 | * @v ibdev Infiniband device |
---|
319 | * @v qpn Queue pair number |
---|
320 | * @ret qp Queue pair, or NULL |
---|
321 | */ |
---|
322 | struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev, |
---|
323 | unsigned long qpn ) { |
---|
324 | struct ib_queue_pair *qp; |
---|
325 | |
---|
326 | list_for_each_entry ( qp, &ibdev->qps, list ) { |
---|
327 | if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) ) |
---|
328 | return qp; |
---|
329 | } |
---|
330 | return NULL; |
---|
331 | } |
---|
332 | |
---|
333 | /** |
---|
334 | * Find queue pair by multicast GID |
---|
335 | * |
---|
336 | * @v ibdev Infiniband device |
---|
337 | * @v gid Multicast GID |
---|
338 | * @ret qp Queue pair, or NULL |
---|
339 | */ |
---|
340 | struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev, |
---|
341 | struct ib_gid *gid ) { |
---|
342 | struct ib_queue_pair *qp; |
---|
343 | struct ib_multicast_gid *mgid; |
---|
344 | |
---|
345 | list_for_each_entry ( qp, &ibdev->qps, list ) { |
---|
346 | list_for_each_entry ( mgid, &qp->mgids, list ) { |
---|
347 | if ( memcmp ( &mgid->gid, gid, |
---|
348 | sizeof ( mgid->gid ) ) == 0 ) { |
---|
349 | return qp; |
---|
350 | } |
---|
351 | } |
---|
352 | } |
---|
353 | return NULL; |
---|
354 | } |
---|
355 | |
---|
356 | /** |
---|
357 | * Find work queue belonging to completion queue |
---|
358 | * |
---|
359 | * @v cq Completion queue |
---|
360 | * @v qpn Queue pair number |
---|
361 | * @v is_send Find send work queue (rather than receive) |
---|
362 | * @ret wq Work queue, or NULL if not found |
---|
363 | */ |
---|
364 | struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, |
---|
365 | unsigned long qpn, int is_send ) { |
---|
366 | struct ib_work_queue *wq; |
---|
367 | |
---|
368 | list_for_each_entry ( wq, &cq->work_queues, list ) { |
---|
369 | if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) |
---|
370 | return wq; |
---|
371 | } |
---|
372 | return NULL; |
---|
373 | } |
---|
374 | |
---|
375 | /** |
---|
376 | * Post send work queue entry |
---|
377 | * |
---|
378 | * @v ibdev Infiniband device |
---|
379 | * @v qp Queue pair |
---|
380 | * @v av Address vector |
---|
381 | * @v iobuf I/O buffer |
---|
382 | * @ret rc Return status code |
---|
383 | */ |
---|
384 | int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, |
---|
385 | struct ib_address_vector *av, |
---|
386 | struct io_buffer *iobuf ) { |
---|
387 | struct ib_address_vector av_copy; |
---|
388 | int rc; |
---|
389 | |
---|
390 | /* Check queue fill level */ |
---|
391 | if ( qp->send.fill >= qp->send.num_wqes ) { |
---|
392 | DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n", |
---|
393 | ibdev, qp->qpn ); |
---|
394 | return -ENOBUFS; |
---|
395 | } |
---|
396 | |
---|
397 | /* Use default address vector if none specified */ |
---|
398 | if ( ! av ) |
---|
399 | av = &qp->av; |
---|
400 | |
---|
401 | /* Make modifiable copy of address vector */ |
---|
402 | memcpy ( &av_copy, av, sizeof ( av_copy ) ); |
---|
403 | av = &av_copy; |
---|
404 | |
---|
405 | /* Fill in optional parameters in address vector */ |
---|
406 | if ( ! av->qkey ) |
---|
407 | av->qkey = qp->qkey; |
---|
408 | if ( ! av->rate ) |
---|
409 | av->rate = IB_RATE_2_5; |
---|
410 | |
---|
411 | /* Post to hardware */ |
---|
412 | if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) { |
---|
413 | DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: " |
---|
414 | "%s\n", ibdev, qp->qpn, strerror ( rc ) ); |
---|
415 | return rc; |
---|
416 | } |
---|
417 | |
---|
418 | qp->send.fill++; |
---|
419 | return 0; |
---|
420 | } |
---|
421 | |
---|
422 | /** |
---|
423 | * Post receive work queue entry |
---|
424 | * |
---|
425 | * @v ibdev Infiniband device |
---|
426 | * @v qp Queue pair |
---|
427 | * @v iobuf I/O buffer |
---|
428 | * @ret rc Return status code |
---|
429 | */ |
---|
430 | int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, |
---|
431 | struct io_buffer *iobuf ) { |
---|
432 | int rc; |
---|
433 | |
---|
434 | /* Check packet length */ |
---|
435 | if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) { |
---|
436 | DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n", |
---|
437 | ibdev, qp->qpn, iob_tailroom ( iobuf ) ); |
---|
438 | return -EINVAL; |
---|
439 | } |
---|
440 | |
---|
441 | /* Check queue fill level */ |
---|
442 | if ( qp->recv.fill >= qp->recv.num_wqes ) { |
---|
443 | DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n", |
---|
444 | ibdev, qp->qpn ); |
---|
445 | return -ENOBUFS; |
---|
446 | } |
---|
447 | |
---|
448 | /* Post to hardware */ |
---|
449 | if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) { |
---|
450 | DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: " |
---|
451 | "%s\n", ibdev, qp->qpn, strerror ( rc ) ); |
---|
452 | return rc; |
---|
453 | } |
---|
454 | |
---|
455 | qp->recv.fill++; |
---|
456 | return 0; |
---|
457 | } |
---|
458 | |
---|
459 | /** |
---|
460 | * Complete send work queue entry |
---|
461 | * |
---|
462 | * @v ibdev Infiniband device |
---|
463 | * @v qp Queue pair |
---|
464 | * @v iobuf I/O buffer |
---|
465 | * @v rc Completion status code |
---|
466 | */ |
---|
467 | void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, |
---|
468 | struct io_buffer *iobuf, int rc ) { |
---|
469 | |
---|
470 | if ( qp->send.cq->op->complete_send ) { |
---|
471 | qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc ); |
---|
472 | } else { |
---|
473 | free_iob ( iobuf ); |
---|
474 | } |
---|
475 | qp->send.fill--; |
---|
476 | } |
---|
477 | |
---|
478 | /** |
---|
479 | * Complete receive work queue entry |
---|
480 | * |
---|
481 | * @v ibdev Infiniband device |
---|
482 | * @v qp Queue pair |
---|
483 | * @v av Address vector |
---|
484 | * @v iobuf I/O buffer |
---|
485 | * @v rc Completion status code |
---|
486 | */ |
---|
487 | void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, |
---|
488 | struct ib_address_vector *av, |
---|
489 | struct io_buffer *iobuf, int rc ) { |
---|
490 | |
---|
491 | if ( qp->recv.cq->op->complete_recv ) { |
---|
492 | qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc ); |
---|
493 | } else { |
---|
494 | free_iob ( iobuf ); |
---|
495 | } |
---|
496 | qp->recv.fill--; |
---|
497 | } |
---|
498 | |
---|
499 | /** |
---|
500 | * Refill receive work queue |
---|
501 | * |
---|
502 | * @v ibdev Infiniband device |
---|
503 | * @v qp Queue pair |
---|
504 | */ |
---|
505 | void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { |
---|
506 | struct io_buffer *iobuf; |
---|
507 | int rc; |
---|
508 | |
---|
509 | /* Keep filling while unfilled entries remain */ |
---|
510 | while ( qp->recv.fill < qp->recv.num_wqes ) { |
---|
511 | |
---|
512 | /* Allocate I/O buffer */ |
---|
513 | iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE ); |
---|
514 | if ( ! iobuf ) { |
---|
515 | /* Non-fatal; we will refill on next attempt */ |
---|
516 | return; |
---|
517 | } |
---|
518 | |
---|
519 | /* Post I/O buffer */ |
---|
520 | if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) { |
---|
521 | DBGC ( ibdev, "IBDEV %p could not refill: %s\n", |
---|
522 | ibdev, strerror ( rc ) ); |
---|
523 | free_iob ( iobuf ); |
---|
524 | /* Give up */ |
---|
525 | return; |
---|
526 | } |
---|
527 | } |
---|
528 | } |
---|
529 | |
---|
530 | /*************************************************************************** |
---|
531 | * |
---|
532 | * Link control |
---|
533 | * |
---|
534 | *************************************************************************** |
---|
535 | */ |
---|
536 | |
---|
537 | /** |
---|
538 | * Open port |
---|
539 | * |
---|
540 | * @v ibdev Infiniband device |
---|
541 | * @ret rc Return status code |
---|
542 | */ |
---|
543 | int ib_open ( struct ib_device *ibdev ) { |
---|
544 | int rc; |
---|
545 | |
---|
546 | /* Increment device open request counter */ |
---|
547 | if ( ibdev->open_count++ > 0 ) { |
---|
548 | /* Device was already open; do nothing */ |
---|
549 | return 0; |
---|
550 | } |
---|
551 | |
---|
552 | /* Create subnet management interface */ |
---|
553 | ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI ); |
---|
554 | if ( ! ibdev->smi ) { |
---|
555 | DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev ); |
---|
556 | rc = -ENOMEM; |
---|
557 | goto err_create_smi; |
---|
558 | } |
---|
559 | |
---|
560 | /* Create subnet management agent */ |
---|
561 | if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) { |
---|
562 | DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n", |
---|
563 | ibdev, strerror ( rc ) ); |
---|
564 | goto err_create_sma; |
---|
565 | } |
---|
566 | |
---|
567 | /* Create general services interface */ |
---|
568 | ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI ); |
---|
569 | if ( ! ibdev->gsi ) { |
---|
570 | DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev ); |
---|
571 | rc = -ENOMEM; |
---|
572 | goto err_create_gsi; |
---|
573 | } |
---|
574 | |
---|
575 | /* Open device */ |
---|
576 | if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) { |
---|
577 | DBGC ( ibdev, "IBDEV %p could not open: %s\n", |
---|
578 | ibdev, strerror ( rc ) ); |
---|
579 | goto err_open; |
---|
580 | } |
---|
581 | |
---|
582 | /* Add to head of open devices list */ |
---|
583 | list_add ( &ibdev->open_list, &open_ib_devices ); |
---|
584 | |
---|
585 | assert ( ibdev->open_count == 1 ); |
---|
586 | return 0; |
---|
587 | |
---|
588 | ibdev->op->close ( ibdev ); |
---|
589 | err_open: |
---|
590 | ib_destroy_mi ( ibdev, ibdev->gsi ); |
---|
591 | err_create_gsi: |
---|
592 | ib_destroy_sma ( ibdev, ibdev->smi ); |
---|
593 | err_create_sma: |
---|
594 | ib_destroy_mi ( ibdev, ibdev->smi ); |
---|
595 | err_create_smi: |
---|
596 | assert ( ibdev->open_count == 1 ); |
---|
597 | ibdev->open_count = 0; |
---|
598 | return rc; |
---|
599 | } |
---|
600 | |
---|
601 | /** |
---|
602 | * Close port |
---|
603 | * |
---|
604 | * @v ibdev Infiniband device |
---|
605 | */ |
---|
606 | void ib_close ( struct ib_device *ibdev ) { |
---|
607 | |
---|
608 | /* Decrement device open request counter */ |
---|
609 | ibdev->open_count--; |
---|
610 | |
---|
611 | /* Close device if this was the last remaining requested opening */ |
---|
612 | if ( ibdev->open_count == 0 ) { |
---|
613 | list_del ( &ibdev->open_list ); |
---|
614 | ib_destroy_mi ( ibdev, ibdev->gsi ); |
---|
615 | ib_destroy_sma ( ibdev, ibdev->smi ); |
---|
616 | ib_destroy_mi ( ibdev, ibdev->smi ); |
---|
617 | ibdev->op->close ( ibdev ); |
---|
618 | } |
---|
619 | } |
---|
620 | |
---|
621 | /** |
---|
622 | * Get link state |
---|
623 | * |
---|
624 | * @v ibdev Infiniband device |
---|
625 | * @ret rc Link status code |
---|
626 | */ |
---|
627 | int ib_link_rc ( struct ib_device *ibdev ) { |
---|
628 | switch ( ibdev->port_state ) { |
---|
629 | case IB_PORT_STATE_DOWN: return -ENOTCONN; |
---|
630 | case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT; |
---|
631 | case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED; |
---|
632 | case IB_PORT_STATE_ACTIVE: return 0; |
---|
633 | default: return -EINVAL; |
---|
634 | } |
---|
635 | } |
---|
636 | |
---|
637 | /*************************************************************************** |
---|
638 | * |
---|
639 | * Multicast |
---|
640 | * |
---|
641 | *************************************************************************** |
---|
642 | */ |
---|
643 | |
---|
644 | /** |
---|
645 | * Attach to multicast group |
---|
646 | * |
---|
647 | * @v ibdev Infiniband device |
---|
648 | * @v qp Queue pair |
---|
649 | * @v gid Multicast GID |
---|
650 | * @ret rc Return status code |
---|
651 | * |
---|
652 | * Note that this function handles only the local device's attachment |
---|
653 | * to the multicast GID; it does not issue the relevant MADs to join |
---|
654 | * the multicast group on the subnet. |
---|
655 | */ |
---|
656 | int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, |
---|
657 | struct ib_gid *gid ) { |
---|
658 | struct ib_multicast_gid *mgid; |
---|
659 | int rc; |
---|
660 | |
---|
661 | /* Add to software multicast GID list */ |
---|
662 | mgid = zalloc ( sizeof ( *mgid ) ); |
---|
663 | if ( ! mgid ) { |
---|
664 | rc = -ENOMEM; |
---|
665 | goto err_alloc_mgid; |
---|
666 | } |
---|
667 | memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) ); |
---|
668 | list_add ( &mgid->list, &qp->mgids ); |
---|
669 | |
---|
670 | /* Add to hardware multicast GID list */ |
---|
671 | if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 ) |
---|
672 | goto err_dev_mcast_attach; |
---|
673 | |
---|
674 | return 0; |
---|
675 | |
---|
676 | err_dev_mcast_attach: |
---|
677 | list_del ( &mgid->list ); |
---|
678 | free ( mgid ); |
---|
679 | err_alloc_mgid: |
---|
680 | return rc; |
---|
681 | } |
---|
682 | |
---|
683 | /** |
---|
684 | * Detach from multicast group |
---|
685 | * |
---|
686 | * @v ibdev Infiniband device |
---|
687 | * @v qp Queue pair |
---|
688 | * @v gid Multicast GID |
---|
689 | */ |
---|
690 | void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, |
---|
691 | struct ib_gid *gid ) { |
---|
692 | struct ib_multicast_gid *mgid; |
---|
693 | |
---|
694 | /* Remove from hardware multicast GID list */ |
---|
695 | ibdev->op->mcast_detach ( ibdev, qp, gid ); |
---|
696 | |
---|
697 | /* Remove from software multicast GID list */ |
---|
698 | list_for_each_entry ( mgid, &qp->mgids, list ) { |
---|
699 | if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) { |
---|
700 | list_del ( &mgid->list ); |
---|
701 | free ( mgid ); |
---|
702 | break; |
---|
703 | } |
---|
704 | } |
---|
705 | } |
---|
706 | |
---|
707 | /*************************************************************************** |
---|
708 | * |
---|
709 | * Miscellaneous |
---|
710 | * |
---|
711 | *************************************************************************** |
---|
712 | */ |
---|
713 | |
---|
714 | /** |
---|
715 | * Get Infiniband HCA information |
---|
716 | * |
---|
717 | * @v ibdev Infiniband device |
---|
718 | * @ret hca_guid HCA GUID |
---|
719 | * @ret num_ports Number of ports |
---|
720 | */ |
---|
721 | int ib_get_hca_info ( struct ib_device *ibdev, |
---|
722 | struct ib_gid_half *hca_guid ) { |
---|
723 | struct ib_device *tmp; |
---|
724 | int num_ports = 0; |
---|
725 | |
---|
726 | /* Search for IB devices with the same physical device to |
---|
727 | * identify port count and a suitable Node GUID. |
---|
728 | */ |
---|
729 | for_each_ibdev ( tmp ) { |
---|
730 | if ( tmp->dev != ibdev->dev ) |
---|
731 | continue; |
---|
732 | if ( num_ports == 0 ) { |
---|
733 | memcpy ( hca_guid, &tmp->gid.u.half[1], |
---|
734 | sizeof ( *hca_guid ) ); |
---|
735 | } |
---|
736 | num_ports++; |
---|
737 | } |
---|
738 | return num_ports; |
---|
739 | } |
---|
740 | |
---|
741 | /** |
---|
742 | * Set port information |
---|
743 | * |
---|
744 | * @v ibdev Infiniband device |
---|
745 | * @v mad Set port information MAD |
---|
746 | */ |
---|
747 | int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) { |
---|
748 | int rc; |
---|
749 | |
---|
750 | /* Adapters with embedded SMAs do not need to support this method */ |
---|
751 | if ( ! ibdev->op->set_port_info ) { |
---|
752 | DBGC ( ibdev, "IBDEV %p does not support setting port " |
---|
753 | "information\n", ibdev ); |
---|
754 | return -ENOTSUP; |
---|
755 | } |
---|
756 | |
---|
757 | if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) { |
---|
758 | DBGC ( ibdev, "IBDEV %p could not set port information: %s\n", |
---|
759 | ibdev, strerror ( rc ) ); |
---|
760 | return rc; |
---|
761 | } |
---|
762 | |
---|
763 | return 0; |
---|
764 | }; |
---|
765 | |
---|
766 | /** |
---|
767 | * Set partition key table |
---|
768 | * |
---|
769 | * @v ibdev Infiniband device |
---|
770 | * @v mad Set partition key table MAD |
---|
771 | */ |
---|
772 | int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) { |
---|
773 | int rc; |
---|
774 | |
---|
775 | /* Adapters with embedded SMAs do not need to support this method */ |
---|
776 | if ( ! ibdev->op->set_pkey_table ) { |
---|
777 | DBGC ( ibdev, "IBDEV %p does not support setting partition " |
---|
778 | "key table\n", ibdev ); |
---|
779 | return -ENOTSUP; |
---|
780 | } |
---|
781 | |
---|
782 | if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) { |
---|
783 | DBGC ( ibdev, "IBDEV %p could not set partition key table: " |
---|
784 | "%s\n", ibdev, strerror ( rc ) ); |
---|
785 | return rc; |
---|
786 | } |
---|
787 | |
---|
788 | return 0; |
---|
789 | }; |
---|
790 | |
---|
791 | /*************************************************************************** |
---|
792 | * |
---|
793 | * Event queues |
---|
794 | * |
---|
795 | *************************************************************************** |
---|
796 | */ |
---|
797 | |
---|
798 | /** |
---|
799 | * Handle Infiniband link state change |
---|
800 | * |
---|
801 | * @v ibdev Infiniband device |
---|
802 | */ |
---|
803 | void ib_link_state_changed ( struct ib_device *ibdev ) { |
---|
804 | |
---|
805 | /* Notify IPoIB of link state change */ |
---|
806 | ipoib_link_state_changed ( ibdev ); |
---|
807 | } |
---|
808 | |
---|
809 | /** |
---|
810 | * Poll event queue |
---|
811 | * |
---|
812 | * @v ibdev Infiniband device |
---|
813 | */ |
---|
814 | void ib_poll_eq ( struct ib_device *ibdev ) { |
---|
815 | struct ib_completion_queue *cq; |
---|
816 | |
---|
817 | /* Poll device's event queue */ |
---|
818 | ibdev->op->poll_eq ( ibdev ); |
---|
819 | |
---|
820 | /* Poll all completion queues */ |
---|
821 | list_for_each_entry ( cq, &ibdev->cqs, list ) |
---|
822 | ib_poll_cq ( ibdev, cq ); |
---|
823 | } |
---|
824 | |
---|
825 | /** |
---|
826 | * Single-step the Infiniband event queue |
---|
827 | * |
---|
828 | * @v process Infiniband event queue process |
---|
829 | */ |
---|
830 | static void ib_step ( struct process *process __unused ) { |
---|
831 | struct ib_device *ibdev; |
---|
832 | |
---|
833 | for_each_ibdev ( ibdev ) |
---|
834 | ib_poll_eq ( ibdev ); |
---|
835 | } |
---|
836 | |
---|
837 | /** Infiniband event queue process */ |
---|
838 | struct process ib_process __permanent_process = { |
---|
839 | .list = LIST_HEAD_INIT ( ib_process.list ), |
---|
840 | .step = ib_step, |
---|
841 | }; |
---|
842 | |
---|
843 | /*************************************************************************** |
---|
844 | * |
---|
845 | * Infiniband device creation/destruction |
---|
846 | * |
---|
847 | *************************************************************************** |
---|
848 | */ |
---|
849 | |
---|
850 | /** |
---|
851 | * Allocate Infiniband device |
---|
852 | * |
---|
853 | * @v priv_size Size of driver private data area |
---|
854 | * @ret ibdev Infiniband device, or NULL |
---|
855 | */ |
---|
856 | struct ib_device * alloc_ibdev ( size_t priv_size ) { |
---|
857 | struct ib_device *ibdev; |
---|
858 | void *drv_priv; |
---|
859 | size_t total_len; |
---|
860 | |
---|
861 | total_len = ( sizeof ( *ibdev ) + priv_size ); |
---|
862 | ibdev = zalloc ( total_len ); |
---|
863 | if ( ibdev ) { |
---|
864 | drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); |
---|
865 | ib_set_drvdata ( ibdev, drv_priv ); |
---|
866 | INIT_LIST_HEAD ( &ibdev->cqs ); |
---|
867 | INIT_LIST_HEAD ( &ibdev->qps ); |
---|
868 | ibdev->port_state = IB_PORT_STATE_DOWN; |
---|
869 | ibdev->lid = IB_LID_NONE; |
---|
870 | ibdev->pkey = IB_PKEY_DEFAULT; |
---|
871 | } |
---|
872 | return ibdev; |
---|
873 | } |
---|
874 | |
---|
875 | /** |
---|
876 | * Register Infiniband device |
---|
877 | * |
---|
878 | * @v ibdev Infiniband device |
---|
879 | * @ret rc Return status code |
---|
880 | */ |
---|
881 | int register_ibdev ( struct ib_device *ibdev ) { |
---|
882 | int rc; |
---|
883 | |
---|
884 | /* Add to device list */ |
---|
885 | ibdev_get ( ibdev ); |
---|
886 | list_add_tail ( &ibdev->list, &ib_devices ); |
---|
887 | |
---|
888 | /* Add IPoIB device */ |
---|
889 | if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { |
---|
890 | DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n", |
---|
891 | ibdev, strerror ( rc ) ); |
---|
892 | goto err_ipoib_probe; |
---|
893 | } |
---|
894 | |
---|
895 | DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev, |
---|
896 | ibdev->dev->name ); |
---|
897 | return 0; |
---|
898 | |
---|
899 | err_ipoib_probe: |
---|
900 | list_del ( &ibdev->list ); |
---|
901 | ibdev_put ( ibdev ); |
---|
902 | return rc; |
---|
903 | } |
---|
904 | |
---|
905 | /** |
---|
906 | * Unregister Infiniband device |
---|
907 | * |
---|
908 | * @v ibdev Infiniband device |
---|
909 | */ |
---|
910 | void unregister_ibdev ( struct ib_device *ibdev ) { |
---|
911 | |
---|
912 | /* Close device */ |
---|
913 | ipoib_remove ( ibdev ); |
---|
914 | |
---|
915 | /* Remove from device list */ |
---|
916 | list_del ( &ibdev->list ); |
---|
917 | ibdev_put ( ibdev ); |
---|
918 | DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev ); |
---|
919 | } |
---|
920 | |
---|
921 | /** |
---|
922 | * Find Infiniband device by GID |
---|
923 | * |
---|
924 | * @v gid GID |
---|
925 | * @ret ibdev Infiniband device, or NULL |
---|
926 | */ |
---|
927 | struct ib_device * find_ibdev ( struct ib_gid *gid ) { |
---|
928 | struct ib_device *ibdev; |
---|
929 | |
---|
930 | for_each_ibdev ( ibdev ) { |
---|
931 | if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 ) |
---|
932 | return ibdev; |
---|
933 | } |
---|
934 | return NULL; |
---|
935 | } |
---|
936 | |
---|
937 | /** |
---|
938 | * Get most recently opened Infiniband device |
---|
939 | * |
---|
940 | * @ret ibdev Most recently opened Infiniband device, or NULL |
---|
941 | */ |
---|
942 | struct ib_device * last_opened_ibdev ( void ) { |
---|
943 | struct ib_device *ibdev; |
---|
944 | |
---|
945 | list_for_each_entry ( ibdev, &open_ib_devices, open_list ) { |
---|
946 | assert ( ibdev->open_count != 0 ); |
---|
947 | return ibdev; |
---|
948 | } |
---|
949 | |
---|
950 | return NULL; |
---|
951 | } |
---|