Context Navigation

source: bootcd/isolinux/syslinux-6.03/gpxe/src/core/uri.c

Last change on this file was e16e8f2, checked in by Edwin Eefting <edwin@datux.nl>, 3 years ago
bootstuff
Property mode set to `100644`
File size: 12.4 KB

Line
1	/*
2	* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3	*
4	* This program is free software; you can redistribute it and/or
5	* modify it under the terms of the GNU General Public License as
6	* published by the Free Software Foundation; either version 2 of the
7	* License, or any later version.
8	*
9	* This program is distributed in the hope that it will be useful, but
10	* WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	* General Public License for more details.
13	*
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17	*/
18
19	FILE_LICENCE ( GPL2_OR_LATER );
20
21	/** @file
22	*
23	* Uniform Resource Identifiers
24	*
25	*/
26
27	#include <stdint.h>
28	#include <stdlib.h>
29	#include <string.h>
30	#include <libgen.h>
31	#include <ctype.h>
32	#include <gpxe/vsprintf.h>
33	#include <gpxe/uri.h>
34
35	/**
36	* Dump URI for debugging
37	*
38	* @v uri URI
39	*/
40	static void dump_uri ( struct uri *uri ) {
41	if ( ! uri )
42	return;
43	if ( uri->scheme )
44	DBG ( " scheme \"%s\"", uri->scheme );
45	if ( uri->opaque )
46	DBG ( " opaque \"%s\"", uri->opaque );
47	if ( uri->user )
48	DBG ( " user \"%s\"", uri->user );
49	if ( uri->password )
50	DBG ( " password \"%s\"", uri->password );
51	if ( uri->host )
52	DBG ( " host \"%s\"", uri->host );
53	if ( uri->port )
54	DBG ( " port \"%s\"", uri->port );
55	if ( uri->path )
56	DBG ( " path \"%s\"", uri->path );
57	if ( uri->query )
58	DBG ( " query \"%s\"", uri->query );
59	if ( uri->fragment )
60	DBG ( " fragment \"%s\"", uri->fragment );
61	}
62
63	/**
64	* Parse URI
65	*
66	* @v uri_string URI as a string
67	* @ret uri URI
68	*
69	* Splits a URI into its component parts. The return URI structure is
70	* dynamically allocated and must eventually be freed by calling
71	* uri_put().
72	*/
73	struct uri * parse_uri ( const char *uri_string ) {
74	struct uri *uri;
75	char *raw;
76	char *tmp;
77	char *path = NULL;
78	char *authority = NULL;
79	int i;
80	size_t raw_len;
81
82	/* Allocate space for URI struct and a copy of the string */
83	raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
84	uri = zalloc ( sizeof ( *uri ) + raw_len );
85	if ( ! uri )
86	return NULL;
87	raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
88
89	/* Copy in the raw string */
90	memcpy ( raw, uri_string, raw_len );
91
92	/* Start by chopping off the fragment, if it exists */
93	if ( ( tmp = strchr ( raw, '#' ) ) ) {
94	*(tmp++) = '\0';
95	uri->fragment = tmp;
96	}
97
98	/* Identify absolute/relative URI. We ignore schemes that are
99	* apparently only a single character long, since otherwise we
100	* misinterpret a DOS-style path name ("C:\path\to\file") as a
101	* URI with scheme="C",opaque="\path\to\file".
102	*/
103	if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
104	/* Absolute URI: identify hierarchical/opaque */
105	uri->scheme = raw;
106	*(tmp++) = '\0';
107	if ( *tmp == '/' ) {
108	/* Absolute URI with hierarchical part */
109	path = tmp;
110	} else {
111	/* Absolute URI with opaque part */
112	uri->opaque = tmp;
113	}
114	} else {
115	/* Relative URI */
116	path = raw;
117	}
118
119	/* If we don't have a path (i.e. we have an absolute URI with
120	* an opaque portion, we're already finished processing
121	*/
122	if ( ! path )
123	goto done;
124
125	/* Chop off the query, if it exists */
126	if ( ( tmp = strchr ( path, '?' ) ) ) {
127	*(tmp++) = '\0';
128	uri->query = tmp;
129	}
130
131	/* Identify net/absolute/relative path */
132	if ( strncmp ( path, "//", 2 ) == 0 ) {
133	/* Net path. If this is terminated by the first '/'
134	* of an absolute path, then we have no space for a
135	* terminator after the authority field, so shuffle
136	* the authority down by one byte, overwriting one of
137	* the two slashes.
138	*/
139	authority = ( path + 2 );
140	if ( ( tmp = strchr ( authority, '/' ) ) ) {
141	/* Shuffle down */
142	uri->path = tmp;
143	memmove ( ( authority - 1 ), authority,
144	( tmp - authority ) );
145	authority--;
146	*(--tmp) = '\0';
147	}
148	} else {
149	/* Absolute/relative path */
150	uri->path = path;
151	}
152
153	/* Split authority into user[:password] and host[:port] portions */
154	if ( ( tmp = strchr ( authority, '@' ) ) ) {
155	/* Has user[:password] */
156	*(tmp++) = '\0';
157	uri->host = tmp;
158	uri->user = authority;
159	if ( ( tmp = strchr ( authority, ':' ) ) ) {
160	/* Has password */
161	*(tmp++) = '\0';
162	uri->password = tmp;
163	}
164	} else {
165	/* No user:password */
166	uri->host = authority;
167	}
168
169	/* Split host into host[:port] */
170	if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
171	*(tmp++) = '\0';
172	uri->port = tmp;
173	}
174
175	/* Decode fields that should be decoded */
176	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
177	const char *field = uri_get_field ( uri, i );
178	if ( field && ( URI_ENCODED & ( 1 << i ) ) )
179	uri_decode ( field, ( char * ) field,
180	strlen ( field ) + 1 /* NUL */ );
181	}
182
183	done:
184	DBG ( "URI \"%s\" split into", uri_string );
185	dump_uri ( uri );
186	DBG ( "\n" );
187
188	return uri;
189	}
190
191	/**
192	* Get port from URI
193	*
194	* @v uri URI, or NULL
195	* @v default_port Default port to use if none specified in URI
196	* @ret port Port
197	*/
198	unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
199	if ( ( ! uri ) \|\| ( ! uri->port ) )
200	return default_port;
201	return ( strtoul ( uri->port, NULL, 0 ) );
202	}
203
204	/**
205	* Unparse URI
206	*
207	* @v buf Buffer to fill with URI string
208	* @v size Size of buffer
209	* @v uri URI to write into buffer, or NULL
210	* @v fields Bitmask of fields to include in URI string, or URI_ALL
211	* @ret len Length of URI string
212	*/
213	int unparse_uri ( char buf, size_t size, struct uri uri,
214	unsigned int fields ) {
215	/* List of characters that typically go before certain fields */
216	static char separators[] = { /* scheme / 0, / opaque */ ':',
217	/* user / 0, / password */ ':',
218	/* host / '@', / port */ ':',
219	/* path / 0, / query */ '?',
220	/* fragment */ '#' };
221	int used = 0;
222	int i;
223
224	DBG ( "URI unparsing" );
225	dump_uri ( uri );
226	DBG ( "\n" );
227
228	/* Ensure buffer is NUL-terminated */
229	if ( size )
230	buf[0] = '\0';
231
232	/* Special-case NULL URI */
233	if ( ! uri )
234	return 0;
235
236	/* Iterate through requested fields */
237	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
238	const char *field = uri_get_field ( uri, i );
239	char sep = separators[i];
240
241	/* Ensure `fields' only contains bits for fields that exist */
242	if ( ! field )
243	fields &= ~( 1 << i );
244
245	/* Store this field if we were asked to */
246	if ( fields & ( 1 << i ) ) {
247	/* Print :// if we're non-opaque and had a scheme */
248	if ( ( fields & URI_SCHEME_BIT ) &&
249	( i > URI_OPAQUE ) ) {
250	used += ssnprintf ( buf + used, size - used,
251	"://" );
252	/* Only print :// once */
253	fields &= ~URI_SCHEME_BIT;
254	}
255
256	/* Only print separator if an earlier field exists */
257	if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
258	used += ssnprintf ( buf + used, size - used,
259	"%c", sep );
260
261	/* Print contents of field, possibly encoded */
262	if ( URI_ENCODED & ( 1 << i ) )
263	used += uri_encode ( field, buf + used,
264	size - used, i );
265	else
266	used += ssnprintf ( buf + used, size - used,
267	"%s", field );
268	}
269	}
270
271	return used;
272	}
273
274	/**
275	* Duplicate URI
276	*
277	* @v uri URI
278	* @ret uri Duplicate URI
279	*
280	* Creates a modifiable copy of a URI.
281	*/
282	struct uri * uri_dup ( struct uri *uri ) {
283	size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
284	char buf[len];
285
286	unparse_uri ( buf, len, uri, URI_ALL );
287	return parse_uri ( buf );
288	}
289
290	/**
291	* Resolve base+relative path
292	*
293	* @v base_uri Base path
294	* @v relative_uri Relative path
295	* @ret resolved_uri Resolved path
296	*
297	* Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
298	* path (e.g. "initrd.gz") and produces a new path
299	* (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
300	* portion of the base path will automatically be stripped; this
301	* matches the semantics used when resolving the path component of
302	* URIs.
303	*/
304	char * resolve_path ( const char *base_path,
305	const char *relative_path ) {
306	size_t base_len = ( strlen ( base_path ) + 1 );
307	char base_path_copy[base_len];
308	char *base_tmp = base_path_copy;
309	char *resolved;
310
311	/* If relative path is absolute, just re-use it */
312	if ( relative_path[0] == '/' )
313	return strdup ( relative_path );
314
315	/* Create modifiable copy of path for dirname() */
316	memcpy ( base_tmp, base_path, base_len );
317	base_tmp = dirname ( base_tmp );
318
319	/* Process "./" and "../" elements */
320	while ( *relative_path == '.' ) {
321	relative_path++;
322	if ( *relative_path == 0 ) {
323	/* Do nothing */
324	} else if ( *relative_path == '/' ) {
325	relative_path++;
326	} else if ( *relative_path == '.' ) {
327	relative_path++;
328	if ( *relative_path == 0 ) {
329	base_tmp = dirname ( base_tmp );
330	} else if ( *relative_path == '/' ) {
331	base_tmp = dirname ( base_tmp );
332	relative_path++;
333	} else {
334	relative_path -= 2;
335	break;
336	}
337	} else {
338	relative_path--;
339	break;
340	}
341	}
342
343	/* Create and return new path */
344	if ( asprintf ( &resolved, "%s%s%s", base_tmp,
345	( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
346	"" : "/" ), relative_path ) < 0 )
347	return NULL;
348
349	return resolved;
350	}
351
352	/**
353	* Resolve base+relative URI
354	*
355	* @v base_uri Base URI, or NULL
356	* @v relative_uri Relative URI
357	* @ret resolved_uri Resolved URI
358	*
359	* Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
360	* relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
361	* (e.g. "http://etherboot.org/initrds/initrd.gz").
362	*/
363	struct uri * resolve_uri ( struct uri *base_uri,
364	struct uri *relative_uri ) {
365	struct uri tmp_uri;
366	char *tmp_path = NULL;
367	struct uri *new_uri;
368
369	/* If relative URI is absolute, just re-use it */
370	if ( uri_is_absolute ( relative_uri ) \|\| ( ! base_uri ) )
371	return uri_get ( relative_uri );
372
373	/* Mangle URI */
374	memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
375	if ( relative_uri->path ) {
376	tmp_path = resolve_path ( ( base_uri->path ?
377	base_uri->path : "/" ),
378	relative_uri->path );
379	tmp_uri.path = tmp_path;
380	tmp_uri.query = relative_uri->query;
381	tmp_uri.fragment = relative_uri->fragment;
382	} else if ( relative_uri->query ) {
383	tmp_uri.query = relative_uri->query;
384	tmp_uri.fragment = relative_uri->fragment;
385	} else if ( relative_uri->fragment ) {
386	tmp_uri.fragment = relative_uri->fragment;
387	}
388
389	/* Create demangled URI */
390	new_uri = uri_dup ( &tmp_uri );
391	free ( tmp_path );
392	return new_uri;
393	}
394
395	/**
396	* Test for unreserved URI characters
397	*
398	* @v c Character to test
399	* @v field Field of URI in which character lies
400	* @ret is_unreserved Character is an unreserved character
401	*/
402	static int is_unreserved_uri_char ( int c, int field ) {
403	/* According to RFC3986, the unreserved character set is
404	*
405	* A-Z a-z 0-9 - _ . ~
406	*
407	* but we also pass & ; = in queries, / in paths,
408	* and everything in opaques
409	*/
410	int ok = ( isupper ( c ) \|\| islower ( c ) \|\| isdigit ( c ) \|\|
411	( c == '-' ) \|\| ( c == '_' ) \|\|
412	( c == '.' ) \|\| ( c == '~' ) );
413
414	if ( field == URI_QUERY )
415	ok = ok \|\| ( c == ';' ) \|\| ( c == '&' ) \|\| ( c == '=' );
416
417	if ( field == URI_PATH )
418	ok = ok \|\| ( c == '/' );
419
420	if ( field == URI_OPAQUE )
421	ok = 1;
422
423	return ok;
424	}
425
426	/**
427	* URI-encode string
428	*
429	* @v raw_string String to be URI-encoded
430	* @v buf Buffer to contain encoded string
431	* @v len Length of buffer
432	* @v field Field of URI in which string lies
433	* @ret len Length of encoded string (excluding NUL)
434	*/
435	size_t uri_encode ( const char raw_string, char buf, ssize_t len,
436	int field ) {
437	ssize_t remaining = len;
438	size_t used;
439	unsigned char c;
440
441	if ( len > 0 )
442	buf[0] = '\0';
443
444	while ( ( c = *(raw_string++) ) ) {
445	if ( is_unreserved_uri_char ( c, field ) ) {
446	used = ssnprintf ( buf, remaining, "%c", c );
447	} else {
448	used = ssnprintf ( buf, remaining, "%%%02X", c );
449	}
450	buf += used;
451	remaining -= used;
452	}
453
454	return ( len - remaining );
455	}
456
457	/**
458	* Decode URI-encoded string
459	*
460	* @v encoded_string URI-encoded string
461	* @v buf Buffer to contain decoded string
462	* @v len Length of buffer
463	* @ret len Length of decoded string (excluding NUL)
464	*
465	* This function may be used in-place, with @a buf the same as
466	* @a encoded_string.
467	*/
468	size_t uri_decode ( const char encoded_string, char buf, ssize_t len ) {
469	ssize_t remaining;
470	char hexbuf[3];
471	char *hexbuf_end;
472	unsigned char c;
473
474	for ( remaining = len; *encoded_string; remaining-- ) {
475	if ( *encoded_string == '%' ) {
476	encoded_string++;
477	snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
478	encoded_string );
479	c = strtoul ( hexbuf, &hexbuf_end, 16 );
480	encoded_string += ( hexbuf_end - hexbuf );
481	} else {
482	c = *(encoded_string++);
483	}
484	if ( remaining > 1 )
485	*buf++ = c;
486	}
487
488	if ( len )
489	*buf = 0;
490
491	return ( len - remaining );
492	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: