1 | #include <string.h> |
---|
2 | #include <stdlib.h> |
---|
3 | #include <stdio.h> |
---|
4 | #include <assert.h> |
---|
5 | #include <errno.h> |
---|
6 | #include <byteswap.h> |
---|
7 | #include <gpxe/timer.h> |
---|
8 | #include <gpxe/iobuf.h> |
---|
9 | #include <gpxe/malloc.h> |
---|
10 | #include <gpxe/retry.h> |
---|
11 | #include <gpxe/refcnt.h> |
---|
12 | #include <gpxe/xfer.h> |
---|
13 | #include <gpxe/open.h> |
---|
14 | #include <gpxe/uri.h> |
---|
15 | #include <gpxe/tcpip.h> |
---|
16 | #include <gpxe/tcp.h> |
---|
17 | |
---|
18 | /** @file |
---|
19 | * |
---|
20 | * TCP protocol |
---|
21 | * |
---|
22 | */ |
---|
23 | |
---|
24 | FILE_LICENCE ( GPL2_OR_LATER ); |
---|
25 | |
---|
26 | /** A TCP connection */ |
---|
27 | struct tcp_connection { |
---|
28 | /** Reference counter */ |
---|
29 | struct refcnt refcnt; |
---|
30 | /** List of TCP connections */ |
---|
31 | struct list_head list; |
---|
32 | |
---|
33 | /** Data transfer interface */ |
---|
34 | struct xfer_interface xfer; |
---|
35 | /** Data transfer interface closed flag */ |
---|
36 | int xfer_closed; |
---|
37 | |
---|
38 | /** Remote socket address */ |
---|
39 | struct sockaddr_tcpip peer; |
---|
40 | /** Local port, in network byte order */ |
---|
41 | unsigned int local_port; |
---|
42 | |
---|
43 | /** Current TCP state */ |
---|
44 | unsigned int tcp_state; |
---|
45 | /** Previous TCP state |
---|
46 | * |
---|
47 | * Maintained only for debug messages |
---|
48 | */ |
---|
49 | unsigned int prev_tcp_state; |
---|
50 | /** Current sequence number |
---|
51 | * |
---|
52 | * Equivalent to SND.UNA in RFC 793 terminology. |
---|
53 | */ |
---|
54 | uint32_t snd_seq; |
---|
55 | /** Unacknowledged sequence count |
---|
56 | * |
---|
57 | * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology. |
---|
58 | */ |
---|
59 | uint32_t snd_sent; |
---|
60 | /** Send window |
---|
61 | * |
---|
62 | * Equivalent to SND.WND in RFC 793 terminology |
---|
63 | */ |
---|
64 | uint32_t snd_win; |
---|
65 | /** Current acknowledgement number |
---|
66 | * |
---|
67 | * Equivalent to RCV.NXT in RFC 793 terminology. |
---|
68 | */ |
---|
69 | uint32_t rcv_ack; |
---|
70 | /** Receive window |
---|
71 | * |
---|
72 | * Equivalent to RCV.WND in RFC 793 terminology. |
---|
73 | */ |
---|
74 | uint32_t rcv_win; |
---|
75 | /** Most recent received timestamp |
---|
76 | * |
---|
77 | * Equivalent to TS.Recent in RFC 1323 terminology. |
---|
78 | */ |
---|
79 | uint32_t ts_recent; |
---|
80 | /** Timestamps enabled */ |
---|
81 | int timestamps; |
---|
82 | |
---|
83 | /** Transmit queue */ |
---|
84 | struct list_head queue; |
---|
85 | /** Retransmission timer */ |
---|
86 | struct retry_timer timer; |
---|
87 | }; |
---|
88 | |
---|
89 | /** |
---|
90 | * List of registered TCP connections |
---|
91 | */ |
---|
92 | static LIST_HEAD ( tcp_conns ); |
---|
93 | |
---|
94 | /* Forward declarations */ |
---|
95 | static struct xfer_interface_operations tcp_xfer_operations; |
---|
96 | static void tcp_expired ( struct retry_timer *timer, int over ); |
---|
97 | static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, |
---|
98 | uint32_t win ); |
---|
99 | |
---|
100 | /** |
---|
101 | * Name TCP state |
---|
102 | * |
---|
103 | * @v state TCP state |
---|
104 | * @ret name Name of TCP state |
---|
105 | */ |
---|
106 | static inline __attribute__ (( always_inline )) const char * |
---|
107 | tcp_state ( int state ) { |
---|
108 | switch ( state ) { |
---|
109 | case TCP_CLOSED: return "CLOSED"; |
---|
110 | case TCP_LISTEN: return "LISTEN"; |
---|
111 | case TCP_SYN_SENT: return "SYN_SENT"; |
---|
112 | case TCP_SYN_RCVD: return "SYN_RCVD"; |
---|
113 | case TCP_ESTABLISHED: return "ESTABLISHED"; |
---|
114 | case TCP_FIN_WAIT_1: return "FIN_WAIT_1"; |
---|
115 | case TCP_FIN_WAIT_2: return "FIN_WAIT_2"; |
---|
116 | case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK"; |
---|
117 | case TCP_TIME_WAIT: return "TIME_WAIT"; |
---|
118 | case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; |
---|
119 | default: return "INVALID"; |
---|
120 | } |
---|
121 | } |
---|
122 | |
---|
123 | /** |
---|
124 | * Dump TCP state transition |
---|
125 | * |
---|
126 | * @v tcp TCP connection |
---|
127 | */ |
---|
128 | static inline __attribute__ (( always_inline )) void |
---|
129 | tcp_dump_state ( struct tcp_connection *tcp ) { |
---|
130 | |
---|
131 | if ( tcp->tcp_state != tcp->prev_tcp_state ) { |
---|
132 | DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp, |
---|
133 | tcp_state ( tcp->prev_tcp_state ), |
---|
134 | tcp_state ( tcp->tcp_state ) ); |
---|
135 | } |
---|
136 | tcp->prev_tcp_state = tcp->tcp_state; |
---|
137 | } |
---|
138 | |
---|
139 | /** |
---|
140 | * Dump TCP flags |
---|
141 | * |
---|
142 | * @v flags TCP flags |
---|
143 | */ |
---|
144 | static inline __attribute__ (( always_inline )) void |
---|
145 | tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { |
---|
146 | if ( flags & TCP_RST ) |
---|
147 | DBGC2 ( tcp, " RST" ); |
---|
148 | if ( flags & TCP_SYN ) |
---|
149 | DBGC2 ( tcp, " SYN" ); |
---|
150 | if ( flags & TCP_PSH ) |
---|
151 | DBGC2 ( tcp, " PSH" ); |
---|
152 | if ( flags & TCP_FIN ) |
---|
153 | DBGC2 ( tcp, " FIN" ); |
---|
154 | if ( flags & TCP_ACK ) |
---|
155 | DBGC2 ( tcp, " ACK" ); |
---|
156 | } |
---|
157 | |
---|
158 | /*************************************************************************** |
---|
159 | * |
---|
160 | * Open and close |
---|
161 | * |
---|
162 | *************************************************************************** |
---|
163 | */ |
---|
164 | |
---|
165 | /** |
---|
166 | * Bind TCP connection to local port |
---|
167 | * |
---|
168 | * @v tcp TCP connection |
---|
169 | * @v port Local port number, in network-endian order |
---|
170 | * @ret rc Return status code |
---|
171 | * |
---|
172 | * If the port is 0, the connection is assigned an available port |
---|
173 | * between 1024 and 65535. |
---|
174 | */ |
---|
175 | static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) { |
---|
176 | struct tcp_connection *existing; |
---|
177 | static uint16_t try_port = 1023; |
---|
178 | |
---|
179 | /* If no port specified, find the first available port */ |
---|
180 | if ( ! port ) { |
---|
181 | while ( try_port ) { |
---|
182 | try_port++; |
---|
183 | if ( try_port < 1024 ) |
---|
184 | continue; |
---|
185 | if ( tcp_bind ( tcp, htons ( try_port ) ) == 0 ) |
---|
186 | return 0; |
---|
187 | } |
---|
188 | DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp ); |
---|
189 | return -EADDRINUSE; |
---|
190 | } |
---|
191 | |
---|
192 | /* Attempt bind to local port */ |
---|
193 | list_for_each_entry ( existing, &tcp_conns, list ) { |
---|
194 | if ( existing->local_port == port ) { |
---|
195 | DBGC ( tcp, "TCP %p could not bind: port %d in use\n", |
---|
196 | tcp, ntohs ( port ) ); |
---|
197 | return -EADDRINUSE; |
---|
198 | } |
---|
199 | } |
---|
200 | tcp->local_port = port; |
---|
201 | |
---|
202 | DBGC ( tcp, "TCP %p bound to port %d\n", tcp, ntohs ( port ) ); |
---|
203 | return 0; |
---|
204 | } |
---|
205 | |
---|
206 | /** |
---|
207 | * Open a TCP connection |
---|
208 | * |
---|
209 | * @v xfer Data transfer interface |
---|
210 | * @v peer Peer socket address |
---|
211 | * @v local Local socket address, or NULL |
---|
212 | * @ret rc Return status code |
---|
213 | */ |
---|
214 | static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer, |
---|
215 | struct sockaddr *local ) { |
---|
216 | struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; |
---|
217 | struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; |
---|
218 | struct tcp_connection *tcp; |
---|
219 | unsigned int bind_port; |
---|
220 | int rc; |
---|
221 | |
---|
222 | /* Allocate and initialise structure */ |
---|
223 | tcp = zalloc ( sizeof ( *tcp ) ); |
---|
224 | if ( ! tcp ) |
---|
225 | return -ENOMEM; |
---|
226 | DBGC ( tcp, "TCP %p allocated\n", tcp ); |
---|
227 | xfer_init ( &tcp->xfer, &tcp_xfer_operations, &tcp->refcnt ); |
---|
228 | tcp->prev_tcp_state = TCP_CLOSED; |
---|
229 | tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN ); |
---|
230 | tcp_dump_state ( tcp ); |
---|
231 | tcp->snd_seq = random(); |
---|
232 | INIT_LIST_HEAD ( &tcp->queue ); |
---|
233 | tcp->timer.expired = tcp_expired; |
---|
234 | memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) ); |
---|
235 | |
---|
236 | /* Bind to local port */ |
---|
237 | bind_port = ( st_local ? st_local->st_port : 0 ); |
---|
238 | if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 ) |
---|
239 | goto err; |
---|
240 | |
---|
241 | /* Start timer to initiate SYN */ |
---|
242 | start_timer_nodelay ( &tcp->timer ); |
---|
243 | |
---|
244 | /* Attach parent interface, transfer reference to connection |
---|
245 | * list and return |
---|
246 | */ |
---|
247 | xfer_plug_plug ( &tcp->xfer, xfer ); |
---|
248 | list_add ( &tcp->list, &tcp_conns ); |
---|
249 | return 0; |
---|
250 | |
---|
251 | err: |
---|
252 | ref_put ( &tcp->refcnt ); |
---|
253 | return rc; |
---|
254 | } |
---|
255 | |
---|
256 | /** |
---|
257 | * Close TCP connection |
---|
258 | * |
---|
259 | * @v tcp TCP connection |
---|
260 | * @v rc Reason for close |
---|
261 | * |
---|
262 | * Closes the data transfer interface. If the TCP state machine is in |
---|
263 | * a suitable state, the connection will be deleted. |
---|
264 | */ |
---|
265 | static void tcp_close ( struct tcp_connection *tcp, int rc ) { |
---|
266 | struct io_buffer *iobuf; |
---|
267 | struct io_buffer *tmp; |
---|
268 | |
---|
269 | /* Close data transfer interface */ |
---|
270 | xfer_nullify ( &tcp->xfer ); |
---|
271 | xfer_close ( &tcp->xfer, rc ); |
---|
272 | tcp->xfer_closed = 1; |
---|
273 | |
---|
274 | /* If we are in CLOSED, or have otherwise not yet received a |
---|
275 | * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the |
---|
276 | * connection. |
---|
277 | */ |
---|
278 | if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { |
---|
279 | |
---|
280 | /* Transition to CLOSED for the sake of debugging messages */ |
---|
281 | tcp->tcp_state = TCP_CLOSED; |
---|
282 | tcp_dump_state ( tcp ); |
---|
283 | |
---|
284 | /* Free any unsent I/O buffers */ |
---|
285 | list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { |
---|
286 | list_del ( &iobuf->list ); |
---|
287 | free_iob ( iobuf ); |
---|
288 | } |
---|
289 | |
---|
290 | /* Remove from list and drop reference */ |
---|
291 | stop_timer ( &tcp->timer ); |
---|
292 | list_del ( &tcp->list ); |
---|
293 | ref_put ( &tcp->refcnt ); |
---|
294 | DBGC ( tcp, "TCP %p connection deleted\n", tcp ); |
---|
295 | return; |
---|
296 | } |
---|
297 | |
---|
298 | /* If we have not had our SYN acknowledged (i.e. we are in |
---|
299 | * SYN_RCVD), pretend that it has been acknowledged so that we |
---|
300 | * can send a FIN without breaking things. |
---|
301 | */ |
---|
302 | if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) |
---|
303 | tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 ); |
---|
304 | |
---|
305 | /* If we have no data remaining to send, start sending FIN */ |
---|
306 | if ( list_empty ( &tcp->queue ) ) { |
---|
307 | tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); |
---|
308 | tcp_dump_state ( tcp ); |
---|
309 | } |
---|
310 | } |
---|
311 | |
---|
312 | /*************************************************************************** |
---|
313 | * |
---|
314 | * Transmit data path |
---|
315 | * |
---|
316 | *************************************************************************** |
---|
317 | */ |
---|
318 | |
---|
319 | /** |
---|
320 | * Calculate transmission window |
---|
321 | * |
---|
322 | * @v tcp TCP connection |
---|
323 | * @ret len Maximum length that can be sent in a single packet |
---|
324 | */ |
---|
325 | static size_t tcp_xmit_win ( struct tcp_connection *tcp ) { |
---|
326 | size_t len; |
---|
327 | |
---|
328 | /* Not ready if we're not in a suitable connection state */ |
---|
329 | if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) |
---|
330 | return 0; |
---|
331 | |
---|
332 | /* Length is the minimum of the receiver's window and the path MTU */ |
---|
333 | len = tcp->snd_win; |
---|
334 | if ( len > TCP_PATH_MTU ) |
---|
335 | len = TCP_PATH_MTU; |
---|
336 | |
---|
337 | return len; |
---|
338 | } |
---|
339 | |
---|
340 | /** |
---|
341 | * Process TCP transmit queue |
---|
342 | * |
---|
343 | * @v tcp TCP connection |
---|
344 | * @v max_len Maximum length to process |
---|
345 | * @v dest I/O buffer to fill with data, or NULL |
---|
346 | * @v remove Remove data from queue |
---|
347 | * @ret len Length of data processed |
---|
348 | * |
---|
349 | * This processes at most @c max_len bytes from the TCP connection's |
---|
350 | * transmit queue. Data will be copied into the @c dest I/O buffer |
---|
351 | * (if provided) and, if @c remove is true, removed from the transmit |
---|
352 | * queue. |
---|
353 | */ |
---|
354 | static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len, |
---|
355 | struct io_buffer *dest, int remove ) { |
---|
356 | struct io_buffer *iobuf; |
---|
357 | struct io_buffer *tmp; |
---|
358 | size_t frag_len; |
---|
359 | size_t len = 0; |
---|
360 | |
---|
361 | list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { |
---|
362 | frag_len = iob_len ( iobuf ); |
---|
363 | if ( frag_len > max_len ) |
---|
364 | frag_len = max_len; |
---|
365 | if ( dest ) { |
---|
366 | memcpy ( iob_put ( dest, frag_len ), iobuf->data, |
---|
367 | frag_len ); |
---|
368 | } |
---|
369 | if ( remove ) { |
---|
370 | iob_pull ( iobuf, frag_len ); |
---|
371 | if ( ! iob_len ( iobuf ) ) { |
---|
372 | list_del ( &iobuf->list ); |
---|
373 | free_iob ( iobuf ); |
---|
374 | } |
---|
375 | } |
---|
376 | len += frag_len; |
---|
377 | max_len -= frag_len; |
---|
378 | } |
---|
379 | return len; |
---|
380 | } |
---|
381 | |
---|
382 | /** |
---|
383 | * Transmit any outstanding data |
---|
384 | * |
---|
385 | * @v tcp TCP connection |
---|
386 | * @v force_send Force sending of packet |
---|
387 | * |
---|
388 | * Transmits any outstanding data on the connection. |
---|
389 | * |
---|
390 | * Note that even if an error is returned, the retransmission timer |
---|
391 | * will have been started if necessary, and so the stack will |
---|
392 | * eventually attempt to retransmit the failed packet. |
---|
393 | */ |
---|
394 | static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { |
---|
395 | struct io_buffer *iobuf; |
---|
396 | struct tcp_header *tcphdr; |
---|
397 | struct tcp_mss_option *mssopt; |
---|
398 | struct tcp_timestamp_padded_option *tsopt; |
---|
399 | void *payload; |
---|
400 | unsigned int flags; |
---|
401 | size_t len = 0; |
---|
402 | uint32_t seq_len; |
---|
403 | uint32_t app_win; |
---|
404 | uint32_t max_rcv_win; |
---|
405 | int rc; |
---|
406 | |
---|
407 | /* If retransmission timer is already running, do nothing */ |
---|
408 | if ( timer_running ( &tcp->timer ) ) |
---|
409 | return 0; |
---|
410 | |
---|
411 | /* Calculate both the actual (payload) and sequence space |
---|
412 | * lengths that we wish to transmit. |
---|
413 | */ |
---|
414 | if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) { |
---|
415 | len = tcp_process_queue ( tcp, tcp_xmit_win ( tcp ), |
---|
416 | NULL, 0 ); |
---|
417 | } |
---|
418 | seq_len = len; |
---|
419 | flags = TCP_FLAGS_SENDING ( tcp->tcp_state ); |
---|
420 | if ( flags & ( TCP_SYN | TCP_FIN ) ) { |
---|
421 | /* SYN or FIN consume one byte, and we can never send both */ |
---|
422 | assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) ); |
---|
423 | seq_len++; |
---|
424 | } |
---|
425 | tcp->snd_sent = seq_len; |
---|
426 | |
---|
427 | /* If we have nothing to transmit, stop now */ |
---|
428 | if ( ( seq_len == 0 ) && ! force_send ) |
---|
429 | return 0; |
---|
430 | |
---|
431 | /* If we are transmitting anything that requires |
---|
432 | * acknowledgement (i.e. consumes sequence space), start the |
---|
433 | * retransmission timer. Do this before attempting to |
---|
434 | * allocate the I/O buffer, in case allocation itself fails. |
---|
435 | */ |
---|
436 | if ( seq_len ) |
---|
437 | start_timer ( &tcp->timer ); |
---|
438 | |
---|
439 | /* Allocate I/O buffer */ |
---|
440 | iobuf = alloc_iob ( len + MAX_HDR_LEN ); |
---|
441 | if ( ! iobuf ) { |
---|
442 | DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " |
---|
443 | "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), |
---|
444 | tcp->rcv_ack ); |
---|
445 | return -ENOMEM; |
---|
446 | } |
---|
447 | iob_reserve ( iobuf, MAX_HDR_LEN ); |
---|
448 | |
---|
449 | /* Fill data payload from transmit queue */ |
---|
450 | tcp_process_queue ( tcp, len, iobuf, 0 ); |
---|
451 | |
---|
452 | /* Expand receive window if possible */ |
---|
453 | max_rcv_win = ( ( freemem * 3 ) / 4 ); |
---|
454 | if ( max_rcv_win > TCP_MAX_WINDOW_SIZE ) |
---|
455 | max_rcv_win = TCP_MAX_WINDOW_SIZE; |
---|
456 | app_win = xfer_window ( &tcp->xfer ); |
---|
457 | if ( max_rcv_win > app_win ) |
---|
458 | max_rcv_win = app_win; |
---|
459 | max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ |
---|
460 | if ( tcp->rcv_win < max_rcv_win ) |
---|
461 | tcp->rcv_win = max_rcv_win; |
---|
462 | |
---|
463 | /* Fill up the TCP header */ |
---|
464 | payload = iobuf->data; |
---|
465 | if ( flags & TCP_SYN ) { |
---|
466 | mssopt = iob_push ( iobuf, sizeof ( *mssopt ) ); |
---|
467 | mssopt->kind = TCP_OPTION_MSS; |
---|
468 | mssopt->length = sizeof ( *mssopt ); |
---|
469 | mssopt->mss = htons ( TCP_MSS ); |
---|
470 | } |
---|
471 | if ( ( flags & TCP_SYN ) || tcp->timestamps ) { |
---|
472 | tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); |
---|
473 | memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) ); |
---|
474 | tsopt->tsopt.kind = TCP_OPTION_TS; |
---|
475 | tsopt->tsopt.length = sizeof ( tsopt->tsopt ); |
---|
476 | tsopt->tsopt.tsval = ntohl ( currticks() ); |
---|
477 | tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent ); |
---|
478 | } |
---|
479 | if ( ! ( flags & TCP_SYN ) ) |
---|
480 | flags |= TCP_PSH; |
---|
481 | tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); |
---|
482 | memset ( tcphdr, 0, sizeof ( *tcphdr ) ); |
---|
483 | tcphdr->src = tcp->local_port; |
---|
484 | tcphdr->dest = tcp->peer.st_port; |
---|
485 | tcphdr->seq = htonl ( tcp->snd_seq ); |
---|
486 | tcphdr->ack = htonl ( tcp->rcv_ack ); |
---|
487 | tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); |
---|
488 | tcphdr->flags = flags; |
---|
489 | tcphdr->win = htons ( tcp->rcv_win ); |
---|
490 | tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); |
---|
491 | |
---|
492 | /* Dump header */ |
---|
493 | DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", |
---|
494 | tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), |
---|
495 | ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), |
---|
496 | ntohl ( tcphdr->ack ), len ); |
---|
497 | tcp_dump_flags ( tcp, tcphdr->flags ); |
---|
498 | DBGC2 ( tcp, "\n" ); |
---|
499 | |
---|
500 | /* Transmit packet */ |
---|
501 | if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, |
---|
502 | &tcphdr->csum ) ) != 0 ) { |
---|
503 | DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", |
---|
504 | tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), |
---|
505 | tcp->rcv_ack, strerror ( rc ) ); |
---|
506 | return rc; |
---|
507 | } |
---|
508 | |
---|
509 | return 0; |
---|
510 | } |
---|
511 | |
---|
512 | /** |
---|
513 | * Retransmission timer expired |
---|
514 | * |
---|
515 | * @v timer Retry timer |
---|
516 | * @v over Failure indicator |
---|
517 | */ |
---|
518 | static void tcp_expired ( struct retry_timer *timer, int over ) { |
---|
519 | struct tcp_connection *tcp = |
---|
520 | container_of ( timer, struct tcp_connection, timer ); |
---|
521 | int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ); |
---|
522 | |
---|
523 | DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, |
---|
524 | ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), |
---|
525 | tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); |
---|
526 | |
---|
527 | assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || |
---|
528 | ( tcp->tcp_state == TCP_SYN_RCVD ) || |
---|
529 | ( tcp->tcp_state == TCP_ESTABLISHED ) || |
---|
530 | ( tcp->tcp_state == TCP_FIN_WAIT_1 ) || |
---|
531 | ( tcp->tcp_state == TCP_TIME_WAIT ) || |
---|
532 | ( tcp->tcp_state == TCP_CLOSE_WAIT ) || |
---|
533 | ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) ); |
---|
534 | |
---|
535 | if ( over || graceful_close ) { |
---|
536 | /* If we have finally timed out and given up, or if |
---|
537 | * this is the result of a graceful close, terminate |
---|
538 | * the connection |
---|
539 | */ |
---|
540 | tcp->tcp_state = TCP_CLOSED; |
---|
541 | tcp_dump_state ( tcp ); |
---|
542 | tcp_close ( tcp, -ETIMEDOUT ); |
---|
543 | } else { |
---|
544 | /* Otherwise, retransmit the packet */ |
---|
545 | tcp_xmit ( tcp, 0 ); |
---|
546 | } |
---|
547 | } |
---|
548 | |
---|
549 | /** |
---|
550 | * Send RST response to incoming packet |
---|
551 | * |
---|
552 | * @v in_tcphdr TCP header of incoming packet |
---|
553 | * @ret rc Return status code |
---|
554 | */ |
---|
555 | static int tcp_xmit_reset ( struct tcp_connection *tcp, |
---|
556 | struct sockaddr_tcpip *st_dest, |
---|
557 | struct tcp_header *in_tcphdr ) { |
---|
558 | struct io_buffer *iobuf; |
---|
559 | struct tcp_header *tcphdr; |
---|
560 | int rc; |
---|
561 | |
---|
562 | /* Allocate space for dataless TX buffer */ |
---|
563 | iobuf = alloc_iob ( MAX_HDR_LEN ); |
---|
564 | if ( ! iobuf ) { |
---|
565 | DBGC ( tcp, "TCP %p could not allocate iobuf for RST " |
---|
566 | "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), |
---|
567 | ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); |
---|
568 | return -ENOMEM; |
---|
569 | } |
---|
570 | iob_reserve ( iobuf, MAX_HDR_LEN ); |
---|
571 | |
---|
572 | /* Construct RST response */ |
---|
573 | tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); |
---|
574 | memset ( tcphdr, 0, sizeof ( *tcphdr ) ); |
---|
575 | tcphdr->src = in_tcphdr->dest; |
---|
576 | tcphdr->dest = in_tcphdr->src; |
---|
577 | tcphdr->seq = in_tcphdr->ack; |
---|
578 | tcphdr->ack = in_tcphdr->seq; |
---|
579 | tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 ); |
---|
580 | tcphdr->flags = ( TCP_RST | TCP_ACK ); |
---|
581 | tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE ); |
---|
582 | tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); |
---|
583 | |
---|
584 | /* Dump header */ |
---|
585 | DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", |
---|
586 | tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), |
---|
587 | ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), |
---|
588 | ntohl ( tcphdr->ack ), 0 ); |
---|
589 | tcp_dump_flags ( tcp, tcphdr->flags ); |
---|
590 | DBGC2 ( tcp, "\n" ); |
---|
591 | |
---|
592 | /* Transmit packet */ |
---|
593 | if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, |
---|
594 | NULL, &tcphdr->csum ) ) != 0 ) { |
---|
595 | DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " |
---|
596 | "%s\n", tcp, ntohl ( in_tcphdr->ack ), |
---|
597 | ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), |
---|
598 | strerror ( rc ) ); |
---|
599 | return rc; |
---|
600 | } |
---|
601 | |
---|
602 | return 0; |
---|
603 | } |
---|
604 | |
---|
605 | /*************************************************************************** |
---|
606 | * |
---|
607 | * Receive data path |
---|
608 | * |
---|
609 | *************************************************************************** |
---|
610 | */ |
---|
611 | |
---|
612 | /** |
---|
613 | * Identify TCP connection by local port number |
---|
614 | * |
---|
615 | * @v local_port Local port (in network-endian order) |
---|
616 | * @ret tcp TCP connection, or NULL |
---|
617 | */ |
---|
618 | static struct tcp_connection * tcp_demux ( unsigned int local_port ) { |
---|
619 | struct tcp_connection *tcp; |
---|
620 | |
---|
621 | list_for_each_entry ( tcp, &tcp_conns, list ) { |
---|
622 | if ( tcp->local_port == local_port ) |
---|
623 | return tcp; |
---|
624 | } |
---|
625 | return NULL; |
---|
626 | } |
---|
627 | |
---|
628 | /** |
---|
629 | * Parse TCP received options |
---|
630 | * |
---|
631 | * @v tcp TCP connection |
---|
632 | * @v data Raw options data |
---|
633 | * @v len Raw options length |
---|
634 | * @v options Options structure to fill in |
---|
635 | */ |
---|
636 | static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, |
---|
637 | size_t len, struct tcp_options *options ) { |
---|
638 | const void *end = ( data + len ); |
---|
639 | const struct tcp_option *option; |
---|
640 | unsigned int kind; |
---|
641 | |
---|
642 | memset ( options, 0, sizeof ( *options ) ); |
---|
643 | while ( data < end ) { |
---|
644 | option = data; |
---|
645 | kind = option->kind; |
---|
646 | if ( kind == TCP_OPTION_END ) |
---|
647 | return; |
---|
648 | if ( kind == TCP_OPTION_NOP ) { |
---|
649 | data++; |
---|
650 | continue; |
---|
651 | } |
---|
652 | switch ( kind ) { |
---|
653 | case TCP_OPTION_MSS: |
---|
654 | options->mssopt = data; |
---|
655 | break; |
---|
656 | case TCP_OPTION_TS: |
---|
657 | options->tsopt = data; |
---|
658 | break; |
---|
659 | default: |
---|
660 | DBGC ( tcp, "TCP %p received unknown option %d\n", |
---|
661 | tcp, kind ); |
---|
662 | break; |
---|
663 | } |
---|
664 | data += option->length; |
---|
665 | } |
---|
666 | } |
---|
667 | |
---|
668 | /** |
---|
669 | * Consume received sequence space |
---|
670 | * |
---|
671 | * @v tcp TCP connection |
---|
672 | * @v seq_len Sequence space length to consume |
---|
673 | */ |
---|
674 | static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { |
---|
675 | tcp->rcv_ack += seq_len; |
---|
676 | if ( tcp->rcv_win > seq_len ) { |
---|
677 | tcp->rcv_win -= seq_len; |
---|
678 | } else { |
---|
679 | tcp->rcv_win = 0; |
---|
680 | } |
---|
681 | } |
---|
682 | |
---|
683 | /** |
---|
684 | * Handle TCP received SYN |
---|
685 | * |
---|
686 | * @v tcp TCP connection |
---|
687 | * @v seq SEQ value (in host-endian order) |
---|
688 | * @v options TCP options |
---|
689 | * @ret rc Return status code |
---|
690 | */ |
---|
691 | static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, |
---|
692 | struct tcp_options *options ) { |
---|
693 | |
---|
694 | /* Synchronise sequence numbers on first SYN */ |
---|
695 | if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { |
---|
696 | tcp->rcv_ack = seq; |
---|
697 | if ( options->tsopt ) |
---|
698 | tcp->timestamps = 1; |
---|
699 | } |
---|
700 | |
---|
701 | /* Ignore duplicate SYN */ |
---|
702 | if ( ( tcp->rcv_ack - seq ) > 0 ) |
---|
703 | return 0; |
---|
704 | |
---|
705 | /* Mark SYN as received and start sending ACKs with each packet */ |
---|
706 | tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) | |
---|
707 | TCP_STATE_RCVD ( TCP_SYN ) ); |
---|
708 | |
---|
709 | /* Acknowledge SYN */ |
---|
710 | tcp_rx_seq ( tcp, 1 ); |
---|
711 | |
---|
712 | return 0; |
---|
713 | } |
---|
714 | |
---|
715 | /** |
---|
716 | * Handle TCP received ACK |
---|
717 | * |
---|
718 | * @v tcp TCP connection |
---|
719 | * @v ack ACK value (in host-endian order) |
---|
720 | * @v win WIN value (in host-endian order) |
---|
721 | * @ret rc Return status code |
---|
722 | */ |
---|
723 | static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, |
---|
724 | uint32_t win ) { |
---|
725 | uint32_t ack_len = ( ack - tcp->snd_seq ); |
---|
726 | size_t len; |
---|
727 | unsigned int acked_flags; |
---|
728 | |
---|
729 | /* Check for out-of-range or old duplicate ACKs */ |
---|
730 | if ( ack_len > tcp->snd_sent ) { |
---|
731 | DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " |
---|
732 | "sent only %08x..%08x\n", tcp, tcp->snd_seq, |
---|
733 | ( tcp->snd_seq + ack_len ), tcp->snd_seq, |
---|
734 | ( tcp->snd_seq + tcp->snd_sent ) ); |
---|
735 | |
---|
736 | if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { |
---|
737 | /* Just ignore what might be old duplicate ACKs */ |
---|
738 | return 0; |
---|
739 | } else { |
---|
740 | /* Send RST if an out-of-range ACK is received |
---|
741 | * on a not-yet-established connection, as per |
---|
742 | * RFC 793. |
---|
743 | */ |
---|
744 | return -EINVAL; |
---|
745 | } |
---|
746 | } |
---|
747 | |
---|
748 | /* Ignore ACKs that don't actually acknowledge any new data. |
---|
749 | * (In particular, do not stop the retransmission timer; this |
---|
750 | * avoids creating a sorceror's apprentice syndrome when a |
---|
751 | * duplicate ACK is received and we still have data in our |
---|
752 | * transmit queue.) |
---|
753 | */ |
---|
754 | if ( ack_len == 0 ) |
---|
755 | return 0; |
---|
756 | |
---|
757 | /* Stop the retransmission timer */ |
---|
758 | stop_timer ( &tcp->timer ); |
---|
759 | |
---|
760 | /* Determine acknowledged flags and data length */ |
---|
761 | len = ack_len; |
---|
762 | acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & |
---|
763 | ( TCP_SYN | TCP_FIN ) ); |
---|
764 | if ( acked_flags ) |
---|
765 | len--; |
---|
766 | |
---|
767 | /* Update SEQ and sent counters, and window size */ |
---|
768 | tcp->snd_seq = ack; |
---|
769 | tcp->snd_sent = 0; |
---|
770 | tcp->snd_win = win; |
---|
771 | |
---|
772 | /* Remove any acknowledged data from transmit queue */ |
---|
773 | tcp_process_queue ( tcp, len, NULL, 1 ); |
---|
774 | |
---|
775 | /* Mark SYN/FIN as acknowledged if applicable. */ |
---|
776 | if ( acked_flags ) |
---|
777 | tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags ); |
---|
778 | |
---|
779 | /* Start sending FIN if we've had all possible data ACKed */ |
---|
780 | if ( list_empty ( &tcp->queue ) && tcp->xfer_closed ) |
---|
781 | tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); |
---|
782 | |
---|
783 | return 0; |
---|
784 | } |
---|
785 | |
---|
786 | /** |
---|
787 | * Handle TCP received data |
---|
788 | * |
---|
789 | * @v tcp TCP connection |
---|
790 | * @v seq SEQ value (in host-endian order) |
---|
791 | * @v iobuf I/O buffer |
---|
792 | * @ret rc Return status code |
---|
793 | * |
---|
794 | * This function takes ownership of the I/O buffer. |
---|
795 | */ |
---|
796 | static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, |
---|
797 | struct io_buffer *iobuf ) { |
---|
798 | uint32_t already_rcvd; |
---|
799 | uint32_t len; |
---|
800 | int rc; |
---|
801 | |
---|
802 | /* Ignore duplicate or out-of-order data */ |
---|
803 | already_rcvd = ( tcp->rcv_ack - seq ); |
---|
804 | len = iob_len ( iobuf ); |
---|
805 | if ( already_rcvd >= len ) { |
---|
806 | free_iob ( iobuf ); |
---|
807 | return 0; |
---|
808 | } |
---|
809 | iob_pull ( iobuf, already_rcvd ); |
---|
810 | len -= already_rcvd; |
---|
811 | |
---|
812 | /* Deliver data to application */ |
---|
813 | if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { |
---|
814 | DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", |
---|
815 | tcp, seq, ( seq + len ), strerror ( rc ) ); |
---|
816 | return rc; |
---|
817 | } |
---|
818 | |
---|
819 | /* Acknowledge new data */ |
---|
820 | tcp_rx_seq ( tcp, len ); |
---|
821 | |
---|
822 | return 0; |
---|
823 | } |
---|
824 | |
---|
825 | /** |
---|
826 | * Handle TCP received FIN |
---|
827 | * |
---|
828 | * @v tcp TCP connection |
---|
829 | * @v seq SEQ value (in host-endian order) |
---|
830 | * @ret rc Return status code |
---|
831 | */ |
---|
832 | static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) { |
---|
833 | |
---|
834 | /* Ignore duplicate or out-of-order FIN */ |
---|
835 | if ( ( tcp->rcv_ack - seq ) > 0 ) |
---|
836 | return 0; |
---|
837 | |
---|
838 | /* Mark FIN as received and acknowledge it */ |
---|
839 | tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN ); |
---|
840 | tcp_rx_seq ( tcp, 1 ); |
---|
841 | |
---|
842 | /* Close connection */ |
---|
843 | tcp_close ( tcp, 0 ); |
---|
844 | |
---|
845 | return 0; |
---|
846 | } |
---|
847 | |
---|
848 | /** |
---|
849 | * Handle TCP received RST |
---|
850 | * |
---|
851 | * @v tcp TCP connection |
---|
852 | * @v seq SEQ value (in host-endian order) |
---|
853 | * @ret rc Return status code |
---|
854 | */ |
---|
855 | static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { |
---|
856 | |
---|
857 | /* Accept RST only if it falls within the window. If we have |
---|
858 | * not yet received a SYN, then we have no window to test |
---|
859 | * against, so fall back to checking that our SYN has been |
---|
860 | * ACKed. |
---|
861 | */ |
---|
862 | if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) { |
---|
863 | if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win ) |
---|
864 | return 0; |
---|
865 | } else { |
---|
866 | if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) |
---|
867 | return 0; |
---|
868 | } |
---|
869 | |
---|
870 | /* Abort connection */ |
---|
871 | tcp->tcp_state = TCP_CLOSED; |
---|
872 | tcp_dump_state ( tcp ); |
---|
873 | tcp_close ( tcp, -ECONNRESET ); |
---|
874 | |
---|
875 | DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); |
---|
876 | return -ECONNRESET; |
---|
877 | } |
---|
878 | |
---|
879 | /** |
---|
880 | * Process received packet |
---|
881 | * |
---|
882 | * @v iobuf I/O buffer |
---|
883 | * @v st_src Partially-filled source address |
---|
884 | * @v st_dest Partially-filled destination address |
---|
885 | * @v pshdr_csum Pseudo-header checksum |
---|
886 | * @ret rc Return status code |
---|
887 | */ |
---|
888 | static int tcp_rx ( struct io_buffer *iobuf, |
---|
889 | struct sockaddr_tcpip *st_src, |
---|
890 | struct sockaddr_tcpip *st_dest __unused, |
---|
891 | uint16_t pshdr_csum ) { |
---|
892 | struct tcp_header *tcphdr = iobuf->data; |
---|
893 | struct tcp_connection *tcp; |
---|
894 | struct tcp_options options; |
---|
895 | size_t hlen; |
---|
896 | uint16_t csum; |
---|
897 | uint32_t start_seq; |
---|
898 | uint32_t seq; |
---|
899 | uint32_t ack; |
---|
900 | uint32_t win; |
---|
901 | unsigned int flags; |
---|
902 | size_t len; |
---|
903 | int rc; |
---|
904 | |
---|
905 | /* Sanity check packet */ |
---|
906 | if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) { |
---|
907 | DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n", |
---|
908 | iob_len ( iobuf ), sizeof ( *tcphdr ) ); |
---|
909 | rc = -EINVAL; |
---|
910 | goto discard; |
---|
911 | } |
---|
912 | hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4; |
---|
913 | if ( hlen < sizeof ( *tcphdr ) ) { |
---|
914 | DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n", |
---|
915 | hlen, sizeof ( *tcphdr ) ); |
---|
916 | rc = -EINVAL; |
---|
917 | goto discard; |
---|
918 | } |
---|
919 | if ( hlen > iob_len ( iobuf ) ) { |
---|
920 | DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n", |
---|
921 | hlen, iob_len ( iobuf ) ); |
---|
922 | rc = -EINVAL; |
---|
923 | goto discard; |
---|
924 | } |
---|
925 | csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, |
---|
926 | iob_len ( iobuf ) ); |
---|
927 | if ( csum != 0 ) { |
---|
928 | DBG ( "TCP checksum incorrect (is %04x including checksum " |
---|
929 | "field, should be 0000)\n", csum ); |
---|
930 | rc = -EINVAL; |
---|
931 | goto discard; |
---|
932 | } |
---|
933 | |
---|
934 | /* Parse parameters from header and strip header */ |
---|
935 | tcp = tcp_demux ( tcphdr->dest ); |
---|
936 | start_seq = seq = ntohl ( tcphdr->seq ); |
---|
937 | ack = ntohl ( tcphdr->ack ); |
---|
938 | win = ntohs ( tcphdr->win ); |
---|
939 | flags = tcphdr->flags; |
---|
940 | tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), |
---|
941 | ( hlen - sizeof ( *tcphdr ) ), &options ); |
---|
942 | iob_pull ( iobuf, hlen ); |
---|
943 | len = iob_len ( iobuf ); |
---|
944 | |
---|
945 | /* Dump header */ |
---|
946 | DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08zx %4zd", |
---|
947 | tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), |
---|
948 | ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), |
---|
949 | ( ntohl ( tcphdr->seq ) + len + |
---|
950 | ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len); |
---|
951 | tcp_dump_flags ( tcp, tcphdr->flags ); |
---|
952 | DBGC2 ( tcp, "\n" ); |
---|
953 | |
---|
954 | /* If no connection was found, send RST */ |
---|
955 | if ( ! tcp ) { |
---|
956 | tcp_xmit_reset ( tcp, st_src, tcphdr ); |
---|
957 | rc = -ENOTCONN; |
---|
958 | goto discard; |
---|
959 | } |
---|
960 | |
---|
961 | /* Handle ACK, if present */ |
---|
962 | if ( flags & TCP_ACK ) { |
---|
963 | if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) { |
---|
964 | tcp_xmit_reset ( tcp, st_src, tcphdr ); |
---|
965 | goto discard; |
---|
966 | } |
---|
967 | } |
---|
968 | |
---|
969 | /* Handle SYN, if present */ |
---|
970 | if ( flags & TCP_SYN ) { |
---|
971 | tcp_rx_syn ( tcp, seq, &options ); |
---|
972 | seq++; |
---|
973 | } |
---|
974 | |
---|
975 | /* Handle RST, if present */ |
---|
976 | if ( flags & TCP_RST ) { |
---|
977 | if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 ) |
---|
978 | goto discard; |
---|
979 | } |
---|
980 | |
---|
981 | /* Handle new data, if any */ |
---|
982 | tcp_rx_data ( tcp, seq, iobuf ); |
---|
983 | seq += len; |
---|
984 | |
---|
985 | /* Handle FIN, if present */ |
---|
986 | if ( flags & TCP_FIN ) { |
---|
987 | tcp_rx_fin ( tcp, seq ); |
---|
988 | seq++; |
---|
989 | } |
---|
990 | |
---|
991 | /* Update timestamp, if present and applicable */ |
---|
992 | if ( ( seq == tcp->rcv_ack ) && options.tsopt ) |
---|
993 | tcp->ts_recent = ntohl ( options.tsopt->tsval ); |
---|
994 | |
---|
995 | /* Dump out any state change as a result of the received packet */ |
---|
996 | tcp_dump_state ( tcp ); |
---|
997 | |
---|
998 | /* Send out any pending data. We force sending a reply if either |
---|
999 | * |
---|
1000 | * a) the peer is expecting an ACK (i.e. consumed sequence space), or |
---|
1001 | * b) either end of the packet was outside the receive window |
---|
1002 | * |
---|
1003 | * Case (b) enables us to support TCP keepalives using |
---|
1004 | * zero-length packets, which we would otherwise ignore. Note |
---|
1005 | * that for case (b), we need *only* consider zero-length |
---|
1006 | * packets, since non-zero-length packets will already be |
---|
1007 | * caught by case (a). |
---|
1008 | */ |
---|
1009 | tcp_xmit ( tcp, ( ( start_seq != seq ) || |
---|
1010 | ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) ); |
---|
1011 | |
---|
1012 | /* If this packet was the last we expect to receive, set up |
---|
1013 | * timer to expire and cause the connection to be freed. |
---|
1014 | */ |
---|
1015 | if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) { |
---|
1016 | tcp->timer.timeout = ( 2 * TCP_MSL ); |
---|
1017 | start_timer ( &tcp->timer ); |
---|
1018 | } |
---|
1019 | |
---|
1020 | return 0; |
---|
1021 | |
---|
1022 | discard: |
---|
1023 | /* Free received packet */ |
---|
1024 | free_iob ( iobuf ); |
---|
1025 | return rc; |
---|
1026 | } |
---|
1027 | |
---|
1028 | /** TCP protocol */ |
---|
1029 | struct tcpip_protocol tcp_protocol __tcpip_protocol = { |
---|
1030 | .name = "TCP", |
---|
1031 | .rx = tcp_rx, |
---|
1032 | .tcpip_proto = IP_TCP, |
---|
1033 | }; |
---|
1034 | |
---|
1035 | /*************************************************************************** |
---|
1036 | * |
---|
1037 | * Data transfer interface |
---|
1038 | * |
---|
1039 | *************************************************************************** |
---|
1040 | */ |
---|
1041 | |
---|
1042 | /** |
---|
1043 | * Close interface |
---|
1044 | * |
---|
1045 | * @v xfer Data transfer interface |
---|
1046 | * @v rc Reason for close |
---|
1047 | */ |
---|
1048 | static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) { |
---|
1049 | struct tcp_connection *tcp = |
---|
1050 | container_of ( xfer, struct tcp_connection, xfer ); |
---|
1051 | |
---|
1052 | /* Close data transfer interface */ |
---|
1053 | tcp_close ( tcp, rc ); |
---|
1054 | |
---|
1055 | /* Transmit FIN, if possible */ |
---|
1056 | tcp_xmit ( tcp, 0 ); |
---|
1057 | } |
---|
1058 | |
---|
1059 | /** |
---|
1060 | * Check flow control window |
---|
1061 | * |
---|
1062 | * @v xfer Data transfer interface |
---|
1063 | * @ret len Length of window |
---|
1064 | */ |
---|
1065 | static size_t tcp_xfer_window ( struct xfer_interface *xfer ) { |
---|
1066 | struct tcp_connection *tcp = |
---|
1067 | container_of ( xfer, struct tcp_connection, xfer ); |
---|
1068 | |
---|
1069 | /* Not ready if data queue is non-empty. This imposes a limit |
---|
1070 | * of only one unACKed packet in the TX queue at any time; we |
---|
1071 | * do this to conserve memory usage. |
---|
1072 | */ |
---|
1073 | if ( ! list_empty ( &tcp->queue ) ) |
---|
1074 | return 0; |
---|
1075 | |
---|
1076 | /* Return TCP window length */ |
---|
1077 | return tcp_xmit_win ( tcp ); |
---|
1078 | } |
---|
1079 | |
---|
1080 | /** |
---|
1081 | * Deliver datagram as I/O buffer |
---|
1082 | * |
---|
1083 | * @v xfer Data transfer interface |
---|
1084 | * @v iobuf Datagram I/O buffer |
---|
1085 | * @v meta Data transfer metadata |
---|
1086 | * @ret rc Return status code |
---|
1087 | */ |
---|
1088 | static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer, |
---|
1089 | struct io_buffer *iobuf, |
---|
1090 | struct xfer_metadata *meta __unused ) { |
---|
1091 | struct tcp_connection *tcp = |
---|
1092 | container_of ( xfer, struct tcp_connection, xfer ); |
---|
1093 | |
---|
1094 | /* Enqueue packet */ |
---|
1095 | list_add_tail ( &iobuf->list, &tcp->queue ); |
---|
1096 | |
---|
1097 | /* Transmit data, if possible */ |
---|
1098 | tcp_xmit ( tcp, 0 ); |
---|
1099 | |
---|
1100 | return 0; |
---|
1101 | } |
---|
1102 | |
---|
1103 | /** TCP data transfer interface operations */ |
---|
1104 | static struct xfer_interface_operations tcp_xfer_operations = { |
---|
1105 | .close = tcp_xfer_close, |
---|
1106 | .vredirect = ignore_xfer_vredirect, |
---|
1107 | .window = tcp_xfer_window, |
---|
1108 | .alloc_iob = default_xfer_alloc_iob, |
---|
1109 | .deliver_iob = tcp_xfer_deliver_iob, |
---|
1110 | .deliver_raw = xfer_deliver_as_iob, |
---|
1111 | }; |
---|
1112 | |
---|
1113 | /*************************************************************************** |
---|
1114 | * |
---|
1115 | * Openers |
---|
1116 | * |
---|
1117 | *************************************************************************** |
---|
1118 | */ |
---|
1119 | |
---|
1120 | /** TCP socket opener */ |
---|
1121 | struct socket_opener tcp_socket_opener __socket_opener = { |
---|
1122 | .semantics = TCP_SOCK_STREAM, |
---|
1123 | .family = AF_INET, |
---|
1124 | .open = tcp_open, |
---|
1125 | }; |
---|
1126 | |
---|
1127 | /** Linkage hack */ |
---|
1128 | int tcp_sock_stream = TCP_SOCK_STREAM; |
---|
1129 | |
---|
1130 | /** |
---|
1131 | * Open TCP URI |
---|
1132 | * |
---|
1133 | * @v xfer Data transfer interface |
---|
1134 | * @v uri URI |
---|
1135 | * @ret rc Return status code |
---|
1136 | */ |
---|
1137 | static int tcp_open_uri ( struct xfer_interface *xfer, struct uri *uri ) { |
---|
1138 | struct sockaddr_tcpip peer; |
---|
1139 | |
---|
1140 | /* Sanity check */ |
---|
1141 | if ( ! uri->host ) |
---|
1142 | return -EINVAL; |
---|
1143 | |
---|
1144 | memset ( &peer, 0, sizeof ( peer ) ); |
---|
1145 | peer.st_port = htons ( uri_port ( uri, 0 ) ); |
---|
1146 | return xfer_open_named_socket ( xfer, SOCK_STREAM, |
---|
1147 | ( struct sockaddr * ) &peer, |
---|
1148 | uri->host, NULL ); |
---|
1149 | } |
---|
1150 | |
---|
1151 | /** TCP URI opener */ |
---|
1152 | struct uri_opener tcp_uri_opener __uri_opener = { |
---|
1153 | .scheme = "tcp", |
---|
1154 | .open = tcp_open_uri, |
---|
1155 | }; |
---|
1156 | |
---|