source: bootcd/isolinux/syslinux-6.03/gpxe/src/core/uri.c

Last change on this file was e16e8f2, checked in by Edwin Eefting <edwin@datux.nl>, 3 years ago

bootstuff

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19FILE_LICENCE ( GPL2_OR_LATER );
20
21/** @file
22 *
23 * Uniform Resource Identifiers
24 *
25 */
26
27#include <stdint.h>
28#include <stdlib.h>
29#include <string.h>
30#include <libgen.h>
31#include <ctype.h>
32#include <gpxe/vsprintf.h>
33#include <gpxe/uri.h>
34
35/**
36 * Dump URI for debugging
37 *
38 * @v uri               URI
39 */
40static void dump_uri ( struct uri *uri ) {
41        if ( ! uri )
42                return;
43        if ( uri->scheme )
44                DBG ( " scheme \"%s\"", uri->scheme );
45        if ( uri->opaque )
46                DBG ( " opaque \"%s\"", uri->opaque );
47        if ( uri->user )
48                DBG ( " user \"%s\"", uri->user );
49        if ( uri->password )
50                DBG ( " password \"%s\"", uri->password );
51        if ( uri->host )
52                DBG ( " host \"%s\"", uri->host );
53        if ( uri->port )
54                DBG ( " port \"%s\"", uri->port );
55        if ( uri->path )
56                DBG ( " path \"%s\"", uri->path );
57        if ( uri->query )
58                DBG ( " query \"%s\"", uri->query );
59        if ( uri->fragment )
60                DBG ( " fragment \"%s\"", uri->fragment );
61}
62
63/**
64 * Parse URI
65 *
66 * @v uri_string        URI as a string
67 * @ret uri             URI
68 *
69 * Splits a URI into its component parts.  The return URI structure is
70 * dynamically allocated and must eventually be freed by calling
71 * uri_put().
72 */
73struct uri * parse_uri ( const char *uri_string ) {
74        struct uri *uri;
75        char *raw;
76        char *tmp;
77        char *path = NULL;
78        char *authority = NULL;
79        int i;
80        size_t raw_len;
81
82        /* Allocate space for URI struct and a copy of the string */
83        raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
84        uri = zalloc ( sizeof ( *uri ) + raw_len );
85        if ( ! uri )
86                return NULL;
87        raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
88
89        /* Copy in the raw string */
90        memcpy ( raw, uri_string, raw_len );
91
92        /* Start by chopping off the fragment, if it exists */
93        if ( ( tmp = strchr ( raw, '#' ) ) ) {
94                *(tmp++) = '\0';
95                uri->fragment = tmp;
96        }
97
98        /* Identify absolute/relative URI.  We ignore schemes that are
99         * apparently only a single character long, since otherwise we
100         * misinterpret a DOS-style path name ("C:\path\to\file") as a
101         * URI with scheme="C",opaque="\path\to\file".
102         */
103        if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
104                /* Absolute URI: identify hierarchical/opaque */
105                uri->scheme = raw;
106                *(tmp++) = '\0';
107                if ( *tmp == '/' ) {
108                        /* Absolute URI with hierarchical part */
109                        path = tmp;
110                } else {
111                        /* Absolute URI with opaque part */
112                        uri->opaque = tmp;
113                }
114        } else {
115                /* Relative URI */
116                path = raw;
117        }
118
119        /* If we don't have a path (i.e. we have an absolute URI with
120         * an opaque portion, we're already finished processing
121         */
122        if ( ! path )
123                goto done;
124
125        /* Chop off the query, if it exists */
126        if ( ( tmp = strchr ( path, '?' ) ) ) {
127                *(tmp++) = '\0';
128                uri->query = tmp;
129        }
130
131        /* Identify net/absolute/relative path */
132        if ( strncmp ( path, "//", 2 ) == 0 ) {
133                /* Net path.  If this is terminated by the first '/'
134                 * of an absolute path, then we have no space for a
135                 * terminator after the authority field, so shuffle
136                 * the authority down by one byte, overwriting one of
137                 * the two slashes.
138                 */
139                authority = ( path + 2 );
140                if ( ( tmp = strchr ( authority, '/' ) ) ) {
141                        /* Shuffle down */
142                        uri->path = tmp;
143                        memmove ( ( authority - 1 ), authority,
144                                  ( tmp - authority ) );
145                        authority--;
146                        *(--tmp) = '\0';
147                }
148        } else {
149                /* Absolute/relative path */
150                uri->path = path;
151        }
152
153        /* Split authority into user[:password] and host[:port] portions */
154        if ( ( tmp = strchr ( authority, '@' ) ) ) {
155                /* Has user[:password] */
156                *(tmp++) = '\0';
157                uri->host = tmp;
158                uri->user = authority;
159                if ( ( tmp = strchr ( authority, ':' ) ) ) {
160                        /* Has password */
161                        *(tmp++) = '\0';
162                        uri->password = tmp;
163                }
164        } else {
165                /* No user:password */
166                uri->host = authority;
167        }
168
169        /* Split host into host[:port] */
170        if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
171                *(tmp++) = '\0';
172                uri->port = tmp;
173        }
174
175        /* Decode fields that should be decoded */
176        for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
177                const char *field = uri_get_field ( uri, i );
178                if ( field && ( URI_ENCODED & ( 1 << i ) ) )
179                        uri_decode ( field, ( char * ) field,
180                                     strlen ( field ) + 1 /* NUL */ );
181        }
182
183 done:
184        DBG ( "URI \"%s\" split into", uri_string );
185        dump_uri ( uri );
186        DBG ( "\n" );
187
188        return uri;
189}
190
191/**
192 * Get port from URI
193 *
194 * @v uri               URI, or NULL
195 * @v default_port      Default port to use if none specified in URI
196 * @ret port            Port
197 */
198unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
199        if ( ( ! uri ) || ( ! uri->port ) )
200                return default_port;
201        return ( strtoul ( uri->port, NULL, 0 ) );
202}
203
204/**
205 * Unparse URI
206 *
207 * @v buf               Buffer to fill with URI string
208 * @v size              Size of buffer
209 * @v uri               URI to write into buffer, or NULL
210 * @v fields            Bitmask of fields to include in URI string, or URI_ALL
211 * @ret len             Length of URI string
212 */
213int unparse_uri ( char *buf, size_t size, struct uri *uri,
214                  unsigned int fields ) {
215        /* List of characters that typically go before certain fields */
216        static char separators[] = { /* scheme */ 0, /* opaque */ ':',
217                                     /* user */ 0, /* password */ ':',
218                                     /* host */ '@', /* port */ ':',
219                                     /* path */ 0, /* query */ '?',
220                                     /* fragment */ '#' };
221        int used = 0;
222        int i;
223
224        DBG ( "URI unparsing" );
225        dump_uri ( uri );
226        DBG ( "\n" );
227
228        /* Ensure buffer is NUL-terminated */
229        if ( size )
230                buf[0] = '\0';
231
232        /* Special-case NULL URI */
233        if ( ! uri )
234                return 0;
235
236        /* Iterate through requested fields */
237        for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
238                const char *field = uri_get_field ( uri, i );
239                char sep = separators[i];
240
241                /* Ensure `fields' only contains bits for fields that exist */
242                if ( ! field )
243                        fields &= ~( 1 << i );
244
245                /* Store this field if we were asked to */
246                if ( fields & ( 1 << i ) ) {
247                        /* Print :// if we're non-opaque and had a scheme */
248                        if ( ( fields & URI_SCHEME_BIT ) &&
249                             ( i > URI_OPAQUE ) ) {
250                                used += ssnprintf ( buf + used, size - used,
251                                                    "://" );
252                                /* Only print :// once */
253                                fields &= ~URI_SCHEME_BIT;
254                        }
255
256                        /* Only print separator if an earlier field exists */
257                        if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
258                                used += ssnprintf ( buf + used, size - used,
259                                                    "%c", sep );
260
261                        /* Print contents of field, possibly encoded */
262                        if ( URI_ENCODED & ( 1 << i ) )
263                                used += uri_encode ( field, buf + used,
264                                                     size - used, i );
265                        else
266                                used += ssnprintf ( buf + used, size - used,
267                                                    "%s", field );
268                }
269        }
270
271        return used;
272}
273
274/**
275 * Duplicate URI
276 *
277 * @v uri               URI
278 * @ret uri             Duplicate URI
279 *
280 * Creates a modifiable copy of a URI.
281 */
282struct uri * uri_dup ( struct uri *uri ) {
283        size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
284        char buf[len];
285
286        unparse_uri ( buf, len, uri, URI_ALL );
287        return parse_uri ( buf );
288}
289
290/**
291 * Resolve base+relative path
292 *
293 * @v base_uri          Base path
294 * @v relative_uri      Relative path
295 * @ret resolved_uri    Resolved path
296 *
297 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
298 * path (e.g. "initrd.gz") and produces a new path
299 * (e.g. "/var/lib/tftpboot/initrd.gz").  Note that any non-directory
300 * portion of the base path will automatically be stripped; this
301 * matches the semantics used when resolving the path component of
302 * URIs.
303 */
304char * resolve_path ( const char *base_path,
305                      const char *relative_path ) {
306        size_t base_len = ( strlen ( base_path ) + 1 );
307        char base_path_copy[base_len];
308        char *base_tmp = base_path_copy;
309        char *resolved;
310
311        /* If relative path is absolute, just re-use it */
312        if ( relative_path[0] == '/' )
313                return strdup ( relative_path );
314
315        /* Create modifiable copy of path for dirname() */
316        memcpy ( base_tmp, base_path, base_len );
317        base_tmp = dirname ( base_tmp );
318
319        /* Process "./" and "../" elements */
320        while ( *relative_path == '.' ) {
321                relative_path++;
322                if ( *relative_path == 0 ) {
323                        /* Do nothing */
324                } else if ( *relative_path == '/' ) {
325                        relative_path++;
326                } else if ( *relative_path == '.' ) {
327                        relative_path++;
328                        if ( *relative_path == 0 ) {
329                                base_tmp = dirname ( base_tmp );
330                        } else if ( *relative_path == '/' ) {
331                                base_tmp = dirname ( base_tmp );
332                                relative_path++;
333                        } else {
334                                relative_path -= 2;
335                                break;
336                        }
337                } else {
338                        relative_path--;
339                        break;
340                }
341        }
342
343        /* Create and return new path */
344        if ( asprintf ( &resolved, "%s%s%s", base_tmp,
345                        ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
346                          "" : "/" ), relative_path ) < 0 )
347                return NULL;
348
349        return resolved;
350}
351
352/**
353 * Resolve base+relative URI
354 *
355 * @v base_uri          Base URI, or NULL
356 * @v relative_uri      Relative URI
357 * @ret resolved_uri    Resolved URI
358 *
359 * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
360 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
361 * (e.g. "http://etherboot.org/initrds/initrd.gz").
362 */
363struct uri * resolve_uri ( struct uri *base_uri,
364                           struct uri *relative_uri ) {
365        struct uri tmp_uri;
366        char *tmp_path = NULL;
367        struct uri *new_uri;
368
369        /* If relative URI is absolute, just re-use it */
370        if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
371                return uri_get ( relative_uri );
372
373        /* Mangle URI */
374        memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
375        if ( relative_uri->path ) {
376                tmp_path = resolve_path ( ( base_uri->path ?
377                                            base_uri->path : "/" ),
378                                          relative_uri->path );
379                tmp_uri.path = tmp_path;
380                tmp_uri.query = relative_uri->query;
381                tmp_uri.fragment = relative_uri->fragment;
382        } else if ( relative_uri->query ) {
383                tmp_uri.query = relative_uri->query;
384                tmp_uri.fragment = relative_uri->fragment;
385        } else if ( relative_uri->fragment ) {
386                tmp_uri.fragment = relative_uri->fragment;
387        }
388
389        /* Create demangled URI */
390        new_uri = uri_dup ( &tmp_uri );
391        free ( tmp_path );
392        return new_uri;
393}
394
395/**
396 * Test for unreserved URI characters
397 *
398 * @v c                 Character to test
399 * @v field             Field of URI in which character lies
400 * @ret is_unreserved   Character is an unreserved character
401 */
402static int is_unreserved_uri_char ( int c, int field ) {
403        /* According to RFC3986, the unreserved character set is
404         *
405         * A-Z a-z 0-9 - _ . ~
406         *
407         * but we also pass & ; = in queries, / in paths,
408         * and everything in opaques
409         */
410        int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
411                    ( c == '-' ) || ( c == '_' ) ||
412                    ( c == '.' ) || ( c == '~' ) );
413
414        if ( field == URI_QUERY )
415                ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
416
417        if ( field == URI_PATH )
418                ok = ok || ( c == '/' );
419
420        if ( field == URI_OPAQUE )
421                ok = 1;
422
423        return ok;
424}
425
426/**
427 * URI-encode string
428 *
429 * @v raw_string        String to be URI-encoded
430 * @v buf               Buffer to contain encoded string
431 * @v len               Length of buffer
432 * @v field             Field of URI in which string lies
433 * @ret len             Length of encoded string (excluding NUL)
434 */
435size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
436                    int field ) {
437        ssize_t remaining = len;
438        size_t used;
439        unsigned char c;
440
441        if ( len > 0 )
442                buf[0] = '\0';
443
444        while ( ( c = *(raw_string++) ) ) {
445                if ( is_unreserved_uri_char ( c, field ) ) {
446                        used = ssnprintf ( buf, remaining, "%c", c );
447                } else {
448                        used = ssnprintf ( buf, remaining, "%%%02X", c );
449                }
450                buf += used;
451                remaining -= used;
452        }
453
454        return ( len - remaining );
455}
456
457/**
458 * Decode URI-encoded string
459 *
460 * @v encoded_string    URI-encoded string
461 * @v buf               Buffer to contain decoded string
462 * @v len               Length of buffer
463 * @ret len             Length of decoded string (excluding NUL)
464 *
465 * This function may be used in-place, with @a buf the same as
466 * @a encoded_string.
467 */
468size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
469        ssize_t remaining;
470        char hexbuf[3];
471        char *hexbuf_end;
472        unsigned char c;
473
474        for ( remaining = len; *encoded_string; remaining-- ) {
475                if ( *encoded_string == '%' ) {
476                        encoded_string++;
477                        snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
478                                   encoded_string );
479                        c = strtoul ( hexbuf, &hexbuf_end, 16 );
480                        encoded_string += ( hexbuf_end - hexbuf );
481                } else {
482                        c = *(encoded_string++);
483                }
484                if ( remaining > 1 )
485                        *buf++ = c;
486        }
487
488        if ( len )
489                *buf = 0;
490
491        return ( len - remaining );
492}
Note: See TracBrowser for help on using the repository browser.