1
/**
2
 * uri.c: set of generic URI related routines 
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * daniel@veillard.com
9
 */
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <string.h>
15
16
#include <libxml/xmlmemory.h>
17
#include <libxml/uri.h>
18
#include <libxml/globals.h>
19
#include <libxml/xmlerror.h>
20
21
static void xmlCleanURI(xmlURIPtr uri);
22
23
/*
24
 * Old rule from 2396 used in legacy handling code
25
 * alpha    = lowalpha | upalpha
26
 */
27
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30
/*
31
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33
 *            "u" | "v" | "w" | "x" | "y" | "z"
34
 */
35
36
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38
/*
39
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
42
 */
43
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
45
#ifdef IS_DIGIT
46
#undef IS_DIGIT
47
#endif
48
/*
49
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50
 */
51
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53
/*
54
 * alphanum = alpha | digit
55
 */
56
57
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59
/*
60
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61
 */
62
63
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
64
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
65
    ((x) == '(') || ((x) == ')'))
66
67
/*
68
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69
 */
70
71
#define IS_UNWISE(p)                                                    \
72
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
73
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
74
       ((*(p) == ']')) || ((*(p) == '`')))
75
/*
76
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77
 *            "[" | "]"
78
 */
79
80
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83
        ((x) == ']'))
84
85
/*
86
 * unreserved = alphanum | mark
87
 */
88
89
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91
/*
92
 * Skip to next pointer char, handle escaped sequences
93
 */
94
95
#define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97
/*
98
 * Productions from the spec.
99
 *
100
 *    authority     = server | reg_name
101
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
102
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
103
 *
104
 * path          = [ abs_path | opaque_part ]
105
 */
106
107
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
109
/************************************************************************
110
 *									*
111
 *                         RFC 3986 parser				*
112
 *									*
113
 ************************************************************************/
114
115
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
117
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
118
#define ISA_HEXDIG(p)							\
119
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
120
        ((*(p) >= 'A') && (*(p) <= 'F')))
121
122
/*
123
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
124
 *                     / "*" / "+" / "," / ";" / "="
125
 */
126
#define ISA_SUB_DELIM(p)						\
127
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
128
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
129
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
130
       ((*(p) == '=')))
131
132
/*
133
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134
 */
135
#define ISA_GEN_DELIM(p)						\
136
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
137
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
138
       ((*(p) == '@')))
139
140
/*
141
 *    reserved      = gen-delims / sub-delims
142
 */
143
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145
/*
146
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
147
 */
148
#define ISA_UNRESERVED(p)						\
149
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
150
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152
/*
153
 *    pct-encoded   = "%" HEXDIG HEXDIG
154
 */
155
#define ISA_PCT_ENCODED(p)						\
156
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158
/*
159
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
160
 */
161
#define ISA_PCHAR(p)							\
162
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
163
      ((*(p) == ':')) || ((*(p) == '@')))
164
165
/**
166
 * xmlParse3986Scheme:
167
 * @uri:  pointer to an URI structure
168
 * @str:  pointer to the string to analyze
169
 *
170
 * Parse an URI scheme
171
 *
172
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173
 *
174
 * Returns 0 or the error code
175
 */
176
static int
177
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178
    const char *cur;
179
180
    if (str == NULL)
181
	return(-1);
182
183
    cur = *str;
184
    if (!ISA_ALPHA(cur))
185
	return(2);
186
    cur++;
187
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189
    if (uri != NULL) {
190
	if (uri->scheme != NULL) xmlFree(uri->scheme);
191
	uri->scheme = STRNDUP(*str, cur - *str);
192
    }
193
    *str = cur;
194
    return(0);
195
}
196
197
/**
198
 * xmlParse3986Fragment:
199
 * @uri:  pointer to an URI structure
200
 * @str:  pointer to the string to analyze
201
 *
202
 * Parse the query part of an URI
203
 *
204
 * fragment      = *( pchar / "/" / "?" )
205
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206
 *       in the fragment identifier but this is used very broadly for
207
 *       xpointer scheme selection, so we are allowing it here to not break
208
 *       for example all the DocBook processing chains.
209
 *
210
 * Returns 0 or the error code
211
 */
212
static int
213
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214
{
215
    const char *cur;
216
217
    if (str == NULL)
218
        return (-1);
219
220
    cur = *str;
221
222
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223
           (*cur == '[') || (*cur == ']') ||
224
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225
        NEXT(cur);
226
    if (uri != NULL) {
227
        if (uri->fragment != NULL)
228
            xmlFree(uri->fragment);
229
	if (uri->cleanup & 2)
230
	    uri->fragment = STRNDUP(*str, cur - *str);
231
	else
232
	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233
    }
234
    *str = cur;
235
    return (0);
236
}
237
238
/**
239
 * xmlParse3986Query:
240
 * @uri:  pointer to an URI structure
241
 * @str:  pointer to the string to analyze
242
 *
243
 * Parse the query part of an URI
244
 *
245
 * query = *uric
246
 *
247
 * Returns 0 or the error code
248
 */
249
static int
250
xmlParse3986Query(xmlURIPtr uri, const char **str)
251
{
252
    const char *cur;
253
254
    if (str == NULL)
255
        return (-1);
256
257
    cur = *str;
258
259
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261
        NEXT(cur);
262
    if (uri != NULL) {
263
        if (uri->query != NULL)
264
            xmlFree(uri->query);
265
	if (uri->cleanup & 2)
266
	    uri->query = STRNDUP(*str, cur - *str);
267
	else
268
	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270
	/* Save the raw bytes of the query as well.
271
	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272
	 */
273
	if (uri->query_raw != NULL)
274
	    xmlFree (uri->query_raw);
275
	uri->query_raw = STRNDUP (*str, cur - *str);
276
    }
277
    *str = cur;
278
    return (0);
279
}
280
281
/**
282
 * xmlParse3986Port:
283
 * @uri:  pointer to an URI structure
284
 * @str:  the string to analyze
285
 *
286
 * Parse a port  part and fills in the appropriate fields
287
 * of the @uri structure
288
 *
289
 * port          = *DIGIT
290
 *
291
 * Returns 0 or the error code
292
 */
293
static int
294
xmlParse3986Port(xmlURIPtr uri, const char **str)
295
{
296
    const char *cur = *str;
297
298
    if (ISA_DIGIT(cur)) {
299
	if (uri != NULL)
300
	    uri->port = 0;
301
	while (ISA_DIGIT(cur)) {
302
	    if (uri != NULL)
303
		uri->port = uri->port * 10 + (*cur - '0');
304
	    cur++;
305
	}
306
	*str = cur;
307
	return(0);
308
    }
309
    return(1);
310
}
311
312
/**
313
 * xmlParse3986Userinfo:
314
 * @uri:  pointer to an URI structure
315
 * @str:  the string to analyze
316
 *
317
 * Parse an user informations part and fills in the appropriate fields
318
 * of the @uri structure
319
 *
320
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
321
 *
322
 * Returns 0 or the error code
323
 */
324
static int
325
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326
{
327
    const char *cur;
328
329
    cur = *str;
330
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331
           ISA_SUB_DELIM(cur) || (*cur == ':'))
332
	NEXT(cur);
333
    if (*cur == '@') {
334
	if (uri != NULL) {
335
	    if (uri->user != NULL) xmlFree(uri->user);
336
	    if (uri->cleanup & 2)
337
		uri->user = STRNDUP(*str, cur - *str);
338
	    else
339
		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340
	}
341
	*str = cur;
342
	return(0);
343
    }
344
    return(1);
345
}
346
347
/**
348
 * xmlParse3986DecOctet:
349
 * @str:  the string to analyze
350
 *
351
 *    dec-octet     = DIGIT                 ; 0-9
352
 *                  / %x31-39 DIGIT         ; 10-99
353
 *                  / "1" 2DIGIT            ; 100-199
354
 *                  / "2" %x30-34 DIGIT     ; 200-249
355
 *                  / "25" %x30-35          ; 250-255
356
 *
357
 * Skip a dec-octet.
358
 *
359
 * Returns 0 if found and skipped, 1 otherwise
360
 */
361
static int
362
xmlParse3986DecOctet(const char **str) {
363
    const char *cur = *str;
364
365
    if (!(ISA_DIGIT(cur)))
366
        return(1);
367
    if (!ISA_DIGIT(cur+1))
368
	cur++;
369
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370
	cur += 2;
371
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372
	cur += 3;
373
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374
	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375
	cur += 3;
376
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
377
	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378
	cur += 3;
379
    else
380
        return(1);
381
    *str = cur;
382
    return(0);
383
}
384
/**
385
 * xmlParse3986Host:
386
 * @uri:  pointer to an URI structure
387
 * @str:  the string to analyze
388
 *
389
 * Parse an host part and fills in the appropriate fields
390
 * of the @uri structure
391
 *
392
 * host          = IP-literal / IPv4address / reg-name
393
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
394
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
395
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
396
 *
397
 * Returns 0 or the error code
398
 */
399
static int
400
xmlParse3986Host(xmlURIPtr uri, const char **str)
401
{
402
    const char *cur = *str;
403
    const char *host;
404
405
    host = cur;
406
    /*
407
     * IPv6 and future adressing scheme are enclosed between brackets
408
     */
409
    if (*cur == '[') {
410
        cur++;
411
	while ((*cur != ']') && (*cur != 0))
412
	    cur++;
413
	if (*cur != ']')
414
	    return(1);
415
	cur++;
416
	goto found;
417
    }
418
    /*
419
     * try to parse an IPv4
420
     */
421
    if (ISA_DIGIT(cur)) {
422
        if (xmlParse3986DecOctet(&cur) != 0)
423
	    goto not_ipv4;
424
	if (*cur != '.')
425
	    goto not_ipv4;
426
	cur++;
427
        if (xmlParse3986DecOctet(&cur) != 0)
428
	    goto not_ipv4;
429
	if (*cur != '.')
430
	    goto not_ipv4;
431
        if (xmlParse3986DecOctet(&cur) != 0)
432
	    goto not_ipv4;
433
	if (*cur != '.')
434
	    goto not_ipv4;
435
        if (xmlParse3986DecOctet(&cur) != 0)
436
	    goto not_ipv4;
437
	goto found;
438
not_ipv4:
439
        cur = *str;
440
    }
441
    /*
442
     * then this should be a hostname which can be empty
443
     */
444
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445
        NEXT(cur);
446
found:
447
    if (uri != NULL) {
448
	if (uri->authority != NULL) xmlFree(uri->authority);
449
	uri->authority = NULL;
450
	if (uri->server != NULL) xmlFree(uri->server);
451
	if (cur != host) {
452
	    if (uri->cleanup & 2)
453
		uri->server = STRNDUP(host, cur - host);
454
	    else
455
		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456
	} else
457
	    uri->server = NULL;
458
    }
459
    *str = cur;
460
    return(0);
461
}
462
463
/**
464
 * xmlParse3986Authority:
465
 * @uri:  pointer to an URI structure
466
 * @str:  the string to analyze
467
 *
468
 * Parse an authority part and fills in the appropriate fields
469
 * of the @uri structure
470
 *
471
 * authority     = [ userinfo "@" ] host [ ":" port ]
472
 *
473
 * Returns 0 or the error code
474
 */
475
static int
476
xmlParse3986Authority(xmlURIPtr uri, const char **str)
477
{
478
    const char *cur;
479
    int ret;
480
481
    cur = *str;
482
    /*
483
     * try to parse an userinfo and check for the trailing @
484
     */
485
    ret = xmlParse3986Userinfo(uri, &cur);
486
    if ((ret != 0) || (*cur != '@'))
487
        cur = *str;
488
    else
489
        cur++;
490
    ret = xmlParse3986Host(uri, &cur);
491
    if (ret != 0) return(ret);
492
    if (*cur == ':') {
493
        cur++;
494
        ret = xmlParse3986Port(uri, &cur);
495
	if (ret != 0) return(ret);
496
    }
497
    *str = cur;
498
    return(0);
499
}
500
501
/**
502
 * xmlParse3986Segment:
503
 * @str:  the string to analyze
504
 * @forbid: an optional forbidden character
505
 * @empty: allow an empty segment
506
 *
507
 * Parse a segment and fills in the appropriate fields
508
 * of the @uri structure
509
 *
510
 * segment       = *pchar
511
 * segment-nz    = 1*pchar
512
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513
 *               ; non-zero-length segment without any colon ":"
514
 *
515
 * Returns 0 or the error code
516
 */
517
static int
518
xmlParse3986Segment(const char **str, char forbid, int empty)
519
{
520
    const char *cur;
521
522
    cur = *str;
523
    if (!ISA_PCHAR(cur)) {
524
        if (empty)
525
	    return(0);
526
	return(1);
527
    }
528
    while (ISA_PCHAR(cur) && (*cur != forbid))
529
        NEXT(cur);
530
    *str = cur;
531
    return (0);
532
}
533
534
/**
535
 * xmlParse3986PathAbEmpty:
536
 * @uri:  pointer to an URI structure
537
 * @str:  the string to analyze
538
 *
539
 * Parse an path absolute or empty and fills in the appropriate fields
540
 * of the @uri structure
541
 *
542
 * path-abempty  = *( "/" segment )
543
 *
544
 * Returns 0 or the error code
545
 */
546
static int
547
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548
{
549
    const char *cur;
550
    int ret;
551
552
    cur = *str;
553
554
    while (*cur == '/') {
555
        cur++;
556
	ret = xmlParse3986Segment(&cur, 0, 1);
557
	if (ret != 0) return(ret);
558
    }
559
    if (uri != NULL) {
560
	if (uri->path != NULL) xmlFree(uri->path);
561
	if (uri->cleanup & 2)
562
	    uri->path = STRNDUP(*str, cur - *str);
563
	else
564
	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
565
    }
566
    *str = cur;
567
    return (0);
568
}
569
570
/**
571
 * xmlParse3986PathAbsolute:
572
 * @uri:  pointer to an URI structure
573
 * @str:  the string to analyze
574
 *
575
 * Parse an path absolute and fills in the appropriate fields
576
 * of the @uri structure
577
 *
578
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
579
 *
580
 * Returns 0 or the error code
581
 */
582
static int
583
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
584
{
585
    const char *cur;
586
    int ret;
587
588
    cur = *str;
589
590
    if (*cur != '/')
591
        return(1);
592
    cur++;
593
    ret = xmlParse3986Segment(&cur, 0, 0);
594
    if (ret == 0) {
595
	while (*cur == '/') {
596
	    cur++;
597
	    ret = xmlParse3986Segment(&cur, 0, 1);
598
	    if (ret != 0) return(ret);
599
	}
600
    }
601
    if (uri != NULL) {
602
	if (uri->path != NULL) xmlFree(uri->path);
603
	if (uri->cleanup & 2)
604
	    uri->path = STRNDUP(*str, cur - *str);
605
	else
606
	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
607
    }
608
    *str = cur;
609
    return (0);
610
}
611
612
/**
613
 * xmlParse3986PathRootless:
614
 * @uri:  pointer to an URI structure
615
 * @str:  the string to analyze
616
 *
617
 * Parse an path without root and fills in the appropriate fields
618
 * of the @uri structure
619
 *
620
 * path-rootless = segment-nz *( "/" segment )
621
 *
622
 * Returns 0 or the error code
623
 */
624
static int
625
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
626
{
627
    const char *cur;
628
    int ret;
629
630
    cur = *str;
631
632
    ret = xmlParse3986Segment(&cur, 0, 0);
633
    if (ret != 0) return(ret);
634
    while (*cur == '/') {
635
        cur++;
636
	ret = xmlParse3986Segment(&cur, 0, 1);
637
	if (ret != 0) return(ret);
638
    }
639
    if (uri != NULL) {
640
	if (uri->path != NULL) xmlFree(uri->path);
641
	if (uri->cleanup & 2)
642
	    uri->path = STRNDUP(*str, cur - *str);
643
	else
644
	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645
    }
646
    *str = cur;
647
    return (0);
648
}
649
650
/**
651
 * xmlParse3986PathNoScheme:
652
 * @uri:  pointer to an URI structure
653
 * @str:  the string to analyze
654
 *
655
 * Parse an path which is not a scheme and fills in the appropriate fields
656
 * of the @uri structure
657
 *
658
 * path-noscheme = segment-nz-nc *( "/" segment )
659
 *
660
 * Returns 0 or the error code
661
 */
662
static int
663
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
664
{
665
    const char *cur;
666
    int ret;
667
668
    cur = *str;
669
670
    ret = xmlParse3986Segment(&cur, ':', 0);
671
    if (ret != 0) return(ret);
672
    while (*cur == '/') {
673
        cur++;
674
	ret = xmlParse3986Segment(&cur, 0, 1);
675
	if (ret != 0) return(ret);
676
    }
677
    if (uri != NULL) {
678
	if (uri->path != NULL) xmlFree(uri->path);
679
	if (uri->cleanup & 2)
680
	    uri->path = STRNDUP(*str, cur - *str);
681
	else
682
	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
683
    }
684
    *str = cur;
685
    return (0);
686
}
687
688
/**
689
 * xmlParse3986HierPart:
690
 * @uri:  pointer to an URI structure
691
 * @str:  the string to analyze
692
 *
693
 * Parse an hierarchical part and fills in the appropriate fields
694
 * of the @uri structure
695
 *
696
 * hier-part     = "//" authority path-abempty
697
 *                / path-absolute
698
 *                / path-rootless
699
 *                / path-empty
700
 *
701
 * Returns 0 or the error code
702
 */
703
static int
704
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
705
{
706
    const char *cur;
707
    int ret;
708
709
    cur = *str;
710
711
    if ((*cur == '/') && (*(cur + 1) == '/')) {
712
        cur += 2;
713
	ret = xmlParse3986Authority(uri, &cur);
714
	if (ret != 0) return(ret);
715
	ret = xmlParse3986PathAbEmpty(uri, &cur);
716
	if (ret != 0) return(ret);
717
	*str = cur;
718
	return(0);
719
    } else if (*cur == '/') {
720
        ret = xmlParse3986PathAbsolute(uri, &cur);
721
	if (ret != 0) return(ret);
722
    } else if (ISA_PCHAR(cur)) {
723
        ret = xmlParse3986PathRootless(uri, &cur);
724
	if (ret != 0) return(ret);
725
    } else {
726
	/* path-empty is effectively empty */
727
	if (uri != NULL) {
728
	    if (uri->path != NULL) xmlFree(uri->path);
729
	    uri->path = NULL;
730
	}
731
    }
732
    *str = cur;
733
    return (0);
734
}
735
736
/**
737
 * xmlParse3986RelativeRef:
738
 * @uri:  pointer to an URI structure
739
 * @str:  the string to analyze
740
 *
741
 * Parse an URI string and fills in the appropriate fields
742
 * of the @uri structure
743
 *
744
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
745
 * relative-part = "//" authority path-abempty
746
 *               / path-absolute
747
 *               / path-noscheme
748
 *               / path-empty
749
 *
750
 * Returns 0 or the error code
751
 */
752
static int
753
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
754
    int ret;
755
756
    if ((*str == '/') && (*(str + 1) == '/')) {
757
        str += 2;
758
	ret = xmlParse3986Authority(uri, &str);
759
	if (ret != 0) return(ret);
760
	ret = xmlParse3986PathAbEmpty(uri, &str);
761
	if (ret != 0) return(ret);
762
    } else if (*str == '/') {
763
	ret = xmlParse3986PathAbsolute(uri, &str);
764
	if (ret != 0) return(ret);
765
    } else if (ISA_PCHAR(str)) {
766
        ret = xmlParse3986PathNoScheme(uri, &str);
767
	if (ret != 0) return(ret);
768
    } else {
769
	/* path-empty is effectively empty */
770
	if (uri != NULL) {
771
	    if (uri->path != NULL) xmlFree(uri->path);
772
	    uri->path = NULL;
773
	}
774
    }
775
776
    if (*str == '?') {
777
	str++;
778
	ret = xmlParse3986Query(uri, &str);
779
	if (ret != 0) return(ret);
780
    }
781
    if (*str == '#') {
782
	str++;
783
	ret = xmlParse3986Fragment(uri, &str);
784
	if (ret != 0) return(ret);
785
    }
786
    if (*str != 0) {
787
	xmlCleanURI(uri);
788
	return(1);
789
    }
790
    return(0);
791
}
792
793
794
/**
795
 * xmlParse3986URI:
796
 * @uri:  pointer to an URI structure
797
 * @str:  the string to analyze
798
 *
799
 * Parse an URI string and fills in the appropriate fields
800
 * of the @uri structure
801
 *
802
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
803
 *
804
 * Returns 0 or the error code
805
 */
806
static int
807
xmlParse3986URI(xmlURIPtr uri, const char *str) {
808
    int ret;
809
810
    ret = xmlParse3986Scheme(uri, &str);
811
    if (ret != 0) return(ret);
812
    if (*str != ':') {
813
	return(1);
814
    }
815
    str++;
816
    ret = xmlParse3986HierPart(uri, &str);
817
    if (ret != 0) return(ret);
818
    if (*str == '?') {
819
	str++;
820
	ret = xmlParse3986Query(uri, &str);
821
	if (ret != 0) return(ret);
822
    }
823
    if (*str == '#') {
824
	str++;
825
	ret = xmlParse3986Fragment(uri, &str);
826
	if (ret != 0) return(ret);
827
    }
828
    if (*str != 0) {
829
	xmlCleanURI(uri);
830
	return(1);
831
    }
832
    return(0);
833
}
834
835
/**
836
 * xmlParse3986URIReference:
837
 * @uri:  pointer to an URI structure
838
 * @str:  the string to analyze
839
 *
840
 * Parse an URI reference string and fills in the appropriate fields
841
 * of the @uri structure
842
 *
843
 * URI-reference = URI / relative-ref
844
 *
845
 * Returns 0 or the error code
846
 */
847
static int
848
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
849
    int ret;
850
851
    if (str == NULL)
852
	return(-1);
853
    xmlCleanURI(uri);
854
855
    /*
856
     * Try first to parse absolute refs, then fallback to relative if
857
     * it fails.
858
     */
859
    ret = xmlParse3986URI(uri, str);
860
    if (ret != 0) {
861
	xmlCleanURI(uri);
862
        ret = xmlParse3986RelativeRef(uri, str);
863
	if (ret != 0) {
864
	    xmlCleanURI(uri);
865
	    return(ret);
866
	}
867
    }
868
    return(0);
869
}
870
871
/**
872
 * xmlParseURI:
873
 * @str:  the URI string to analyze
874
 *
875
 * Parse an URI based on RFC 3986
876
 *
877
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
878
 *
879
 * Returns a newly built xmlURIPtr or NULL in case of error
880
 */
881
xmlURIPtr
882
xmlParseURI(const char *str) {
883
    xmlURIPtr uri;
884
    int ret;
885
886
    if (str == NULL)
887
	return(NULL);
888
    uri = xmlCreateURI();
889
    if (uri != NULL) {
890
	ret = xmlParse3986URIReference(uri, str);
891
        if (ret) {
892
	    xmlFreeURI(uri);
893
	    return(NULL);
894
	}
895
    }
896
    return(uri);
897
}
898
899
/**
900
 * xmlParseURIReference:
901
 * @uri:  pointer to an URI structure
902
 * @str:  the string to analyze
903
 *
904
 * Parse an URI reference string based on RFC 3986 and fills in the
905
 * appropriate fields of the @uri structure
906
 *
907
 * URI-reference = URI / relative-ref
908
 *
909
 * Returns 0 or the error code
910
 */
911
int
912
xmlParseURIReference(xmlURIPtr uri, const char *str) {
913
    return(xmlParse3986URIReference(uri, str));
914
}
915
916
/**
917
 * xmlParseURIRaw:
918
 * @str:  the URI string to analyze
919
 * @raw:  if 1 unescaping of URI pieces are disabled
920
 *
921
 * Parse an URI but allows to keep intact the original fragments.
922
 *
923
 * URI-reference = URI / relative-ref
924
 *
925
 * Returns a newly built xmlURIPtr or NULL in case of error
926
 */
927
xmlURIPtr
928
xmlParseURIRaw(const char *str, int raw) {
929
    xmlURIPtr uri;
930
    int ret;
931
932
    if (str == NULL)
933
	return(NULL);
934
    uri = xmlCreateURI();
935
    if (uri != NULL) {
936
        if (raw) {
937
	    uri->cleanup |= 2;
938
	}
939
	ret = xmlParseURIReference(uri, str);
940
        if (ret) {
941
	    xmlFreeURI(uri);
942
	    return(NULL);
943
	}
944
    }
945
    return(uri);
946
}
947
948
/************************************************************************
949
 *									*
950
 *			Generic URI structure functions			*
951
 *									*
952
 ************************************************************************/
953
954
/**
955
 * xmlCreateURI:
956
 *
957
 * Simply creates an empty xmlURI
958
 *
959
 * Returns the new structure or NULL in case of error
960
 */
961
xmlURIPtr
962
xmlCreateURI(void) {
963
    xmlURIPtr ret;
964
965
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
966
    if (ret == NULL) {
967
	xmlGenericError(xmlGenericErrorContext,
968
		"xmlCreateURI: out of memory\n");
969
	return(NULL);
970
    }
971
    memset(ret, 0, sizeof(xmlURI));
972
    return(ret);
973
}
974
975
/**
976
 * xmlSaveUri:
977
 * @uri:  pointer to an xmlURI
978
 *
979
 * Save the URI as an escaped string
980
 *
981
 * Returns a new string (to be deallocated by caller)
982
 */
983
xmlChar *
984
xmlSaveUri(xmlURIPtr uri) {
985
    xmlChar *ret = NULL;
986
    xmlChar *temp;
987
    const char *p;
988
    int len;
989
    int max;
990
991
    if (uri == NULL) return(NULL);
992
993
994
    max = 80;
995
    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
996
    if (ret == NULL) {
997
	xmlGenericError(xmlGenericErrorContext,
998
		"xmlSaveUri: out of memory\n");
999
	return(NULL);
1000
    }
1001
    len = 0;
1002
1003
    if (uri->scheme != NULL) {
1004
	p = uri->scheme;
1005
	while (*p != 0) {
1006
	    if (len >= max) {
1007
		max *= 2;
1008
		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1009
		if (temp == NULL) {
1010
		    xmlGenericError(xmlGenericErrorContext,
1011
			    "xmlSaveUri: out of memory\n");
1012
		    xmlFree(ret);
1013
		    return(NULL);
1014
		}
1015
		ret = temp;
1016
	    }
1017
	    ret[len++] = *p++;
1018
	}
1019
	if (len >= max) {
1020
	    max *= 2;
1021
	    temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1022
	    if (temp == NULL) {
1023
		xmlGenericError(xmlGenericErrorContext,
1024
			"xmlSaveUri: out of memory\n");
1025
		xmlFree(ret);
1026
		return(NULL);
1027
	    }
1028
	    ret = temp;
1029
	}
1030
	ret[len++] = ':';
1031
    }
1032
    if (uri->opaque != NULL) {
1033
	p = uri->opaque;
1034
	while (*p != 0) {
1035
	    if (len + 3 >= max) {
1036
		max *= 2;
1037
		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038
		if (temp == NULL) {
1039
		    xmlGenericError(xmlGenericErrorContext,
1040
			    "xmlSaveUri: out of memory\n");
1041
		    xmlFree(ret);
1042
		    return(NULL);
1043
		}
1044
		ret = temp;
1045
	    }
1046
	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1047
		ret[len++] = *p++;
1048
	    else {
1049
		int val = *(unsigned char *)p++;
1050
		int hi = val / 0x10, lo = val % 0x10;
1051
		ret[len++] = '%';
1052
		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1053
		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1054
	    }
1055
	}
1056
    } else {
1057
	if (uri->server != NULL) {
1058
	    if (len + 3 >= max) {
1059
		max *= 2;
1060
		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1061
		if (temp == NULL) {
1062
		    xmlGenericError(xmlGenericErrorContext,
1063
			    "xmlSaveUri: out of memory\n");
1064
                  xmlFree(ret);  
1065
		    return(NULL);
1066
		}
1067
		ret = temp;
1068
	    }
1069
	    ret[len++] = '/';
1070
	    ret[len++] = '/';
1071
	    if (uri->user != NULL) {
1072
		p = uri->user;
1073
		while (*p != 0) {
1074
		    if (len + 3 >= max) {
1075
			max *= 2;
1076
			temp = (xmlChar *) xmlRealloc(ret,
1077
				(max + 1) * sizeof(xmlChar));
1078
			if (temp == NULL) {
1079
			    xmlGenericError(xmlGenericErrorContext,
1080
				    "xmlSaveUri: out of memory\n");
1081
			    xmlFree(ret);
1082
			    return(NULL);
1083
			}
1084
			ret = temp;
1085
		    }
1086
		    if ((IS_UNRESERVED(*(p))) ||
1087
			((*(p) == ';')) || ((*(p) == ':')) ||
1088
			((*(p) == '&')) || ((*(p) == '=')) ||
1089
			((*(p) == '+')) || ((*(p) == '$')) ||
1090
			((*(p) == ',')))
1091
			ret[len++] = *p++;
1092
		    else {
1093
			int val = *(unsigned char *)p++;
1094
			int hi = val / 0x10, lo = val % 0x10;
1095
			ret[len++] = '%';
1096
			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1097
			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1098
		    }
1099
		}
1100
		if (len + 3 >= max) {
1101
		    max *= 2;
1102
		    temp = (xmlChar *) xmlRealloc(ret,
1103
			    (max + 1) * sizeof(xmlChar));
1104
		    if (temp == NULL) {
1105
			xmlGenericError(xmlGenericErrorContext,
1106
				"xmlSaveUri: out of memory\n");
1107
			xmlFree(ret);
1108
			return(NULL);
1109
		    }
1110
		    ret = temp;
1111
		}
1112
		ret[len++] = '@';
1113
	    }
1114
	    p = uri->server;
1115
	    while (*p != 0) {
1116
		if (len >= max) {
1117
		    max *= 2;
1118
		    temp = (xmlChar *) xmlRealloc(ret,
1119
			    (max + 1) * sizeof(xmlChar));
1120
		    if (temp == NULL) {
1121
			xmlGenericError(xmlGenericErrorContext,
1122
				"xmlSaveUri: out of memory\n");
1123
			xmlFree(ret);
1124
			return(NULL);
1125
		    }
1126
		    ret = temp;
1127
		}
1128
		ret[len++] = *p++;
1129
	    }
1130
	    if (uri->port > 0) {
1131
		if (len + 10 >= max) {
1132
		    max *= 2;
1133
		    temp = (xmlChar *) xmlRealloc(ret,
1134
			    (max + 1) * sizeof(xmlChar));
1135
		    if (temp == NULL) {
1136
			xmlGenericError(xmlGenericErrorContext,
1137
				"xmlSaveUri: out of memory\n");
1138
                     xmlFree(ret);
1139
			return(NULL);
1140
		    }
1141
		    ret = temp;
1142
		}
1143
		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1144
	    }
1145
	} else if (uri->authority != NULL) {
1146
	    if (len + 3 >= max) {
1147
		max *= 2;
1148
		temp = (xmlChar *) xmlRealloc(ret,
1149
			(max + 1) * sizeof(xmlChar));
1150
		if (temp == NULL) {
1151
			xmlGenericError(xmlGenericErrorContext,
1152
				"xmlSaveUri: out of memory\n");
1153
                     xmlFree(ret);
1154
			return(NULL);
1155
		    }
1156
		    ret = temp;
1157
	    }
1158
	    ret[len++] = '/';
1159
	    ret[len++] = '/';
1160
	    p = uri->authority;
1161
	    while (*p != 0) {
1162
		if (len + 3 >= max) {
1163
		    max *= 2;
1164
		    temp = (xmlChar *) xmlRealloc(ret,
1165
			    (max + 1) * sizeof(xmlChar));
1166
		    if (temp == NULL) {
1167
			xmlGenericError(xmlGenericErrorContext,
1168
				"xmlSaveUri: out of memory\n");
1169
                     xmlFree(ret);
1170
			return(NULL);
1171
		    }
1172
		    ret = temp;
1173
		}
1174
		if ((IS_UNRESERVED(*(p))) ||
1175
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1176
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1177
                    ((*(p) == '=')) || ((*(p) == '+')))
1178
		    ret[len++] = *p++;
1179
		else {
1180
		    int val = *(unsigned char *)p++;
1181
		    int hi = val / 0x10, lo = val % 0x10;
1182
		    ret[len++] = '%';
1183
		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1184
		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1185
		}
1186
	    }
1187
	} else if (uri->scheme != NULL) {
1188
	    if (len + 3 >= max) {
1189
		max *= 2;
1190
		temp = (xmlChar *) xmlRealloc(ret,
1191
			(max + 1) * sizeof(xmlChar));
1192
		if (temp == NULL) {
1193
			xmlGenericError(xmlGenericErrorContext,
1194
				"xmlSaveUri: out of memory\n");
1195
                     xmlFree(ret);
1196
			return(NULL);
1197
		    }
1198
		    ret = temp;
1199
	    }
1200
	    ret[len++] = '/';
1201
	    ret[len++] = '/';
1202
	}
1203
	if (uri->path != NULL) {
1204
	    p = uri->path;
1205
	    /*
1206
	     * the colon in file:///d: should not be escaped or
1207
	     * Windows accesses fail later.
1208
	     */
1209
	    if ((uri->scheme != NULL) &&
1210
		(p[0] == '/') &&
1211
		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1212
		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213
		(p[2] == ':') &&
1214
	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215
		if (len + 3 >= max) {
1216
		    max *= 2;
1217
		    ret = (xmlChar *) xmlRealloc(ret,
1218
			    (max + 1) * sizeof(xmlChar));
1219
		    if (ret == NULL) {
1220
			xmlGenericError(xmlGenericErrorContext,
1221
				"xmlSaveUri: out of memory\n");
1222
			return(NULL);
1223
		    }
1224
		}
1225
		ret[len++] = *p++;
1226
		ret[len++] = *p++;
1227
		ret[len++] = *p++;
1228
	    }
1229
	    while (*p != 0) {
1230
		if (len + 3 >= max) {
1231
		    max *= 2;
1232
		    temp = (xmlChar *) xmlRealloc(ret,
1233
			    (max + 1) * sizeof(xmlChar));
1234
		    if (temp == NULL) {
1235
			xmlGenericError(xmlGenericErrorContext,
1236
				"xmlSaveUri: out of memory\n");
1237
                     xmlFree(ret);
1238
			return(NULL);
1239
		    }
1240
		    ret = temp;
1241
		}
1242
		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1243
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1244
	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1245
	            ((*(p) == ',')))
1246
		    ret[len++] = *p++;
1247
		else {
1248
		    int val = *(unsigned char *)p++;
1249
		    int hi = val / 0x10, lo = val % 0x10;
1250
		    ret[len++] = '%';
1251
		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1252
		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1253
		}
1254
	    }
1255
	}
1256
	if (uri->query_raw != NULL) {
1257
	    if (len + 1 >= max) {
1258
		max *= 2;
1259
		temp = (xmlChar *) xmlRealloc(ret,
1260
			(max + 1) * sizeof(xmlChar));
1261
		if (temp == NULL) {
1262
			xmlGenericError(xmlGenericErrorContext,
1263
				"xmlSaveUri: out of memory\n");
1264
                     xmlFree(ret);
1265
			return(NULL);
1266
		    }
1267
		    ret = temp;
1268
	    }
1269
	    ret[len++] = '?';
1270
	    p = uri->query_raw;
1271
	    while (*p != 0) {
1272
		if (len + 1 >= max) {
1273
		    max *= 2;
1274
		    temp = (xmlChar *) xmlRealloc(ret,
1275
			    (max + 1) * sizeof(xmlChar));
1276
		    if (temp == NULL) {
1277
			xmlGenericError(xmlGenericErrorContext,
1278
				"xmlSaveUri: out of memory\n");
1279
                     xmlFree(ret);
1280
			return(NULL);
1281
		    }
1282
		    ret = temp;
1283
		}
1284
		ret[len++] = *p++;
1285
	    }
1286
	} else if (uri->query != NULL) {
1287
	    if (len + 3 >= max) {
1288
		max *= 2;
1289
		temp = (xmlChar *) xmlRealloc(ret,
1290
			(max + 1) * sizeof(xmlChar));
1291
		if (temp == NULL) {
1292
			xmlGenericError(xmlGenericErrorContext,
1293
				"xmlSaveUri: out of memory\n");
1294
                     xmlFree(ret);
1295
			return(NULL);
1296
		    }
1297
		    ret = temp;
1298
	    }
1299
	    ret[len++] = '?';
1300
	    p = uri->query;
1301
	    while (*p != 0) {
1302
		if (len + 3 >= max) {
1303
		    max *= 2;
1304
		    temp = (xmlChar *) xmlRealloc(ret,
1305
			    (max + 1) * sizeof(xmlChar));
1306
		    if (temp == NULL) {
1307
			xmlGenericError(xmlGenericErrorContext,
1308
				"xmlSaveUri: out of memory\n");
1309
                     xmlFree(ret);
1310
			return(NULL);
1311
		    }
1312
		    ret = temp;
1313
		}
1314
		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 
1315
		    ret[len++] = *p++;
1316
		else {
1317
		    int val = *(unsigned char *)p++;
1318
		    int hi = val / 0x10, lo = val % 0x10;
1319
		    ret[len++] = '%';
1320
		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1321
		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1322
		}
1323
	    }
1324
	}
1325
    }
1326
    if (uri->fragment != NULL) {
1327
	if (len + 3 >= max) {
1328
	    max *= 2;
1329
	    temp = (xmlChar *) xmlRealloc(ret,
1330
		    (max + 1) * sizeof(xmlChar));
1331
	    if (temp == NULL) {
1332
			xmlGenericError(xmlGenericErrorContext,
1333
				"xmlSaveUri: out of memory\n");
1334
                     xmlFree(ret);
1335
			return(NULL);
1336
		    }
1337
		    ret = temp;
1338
	}
1339
	ret[len++] = '#';
1340
	p = uri->fragment;
1341
	while (*p != 0) {
1342
	    if (len + 3 >= max) {
1343
		max *= 2;
1344
		temp = (xmlChar *) xmlRealloc(ret,
1345
			(max + 1) * sizeof(xmlChar));
1346
		if (temp == NULL) {
1347
			xmlGenericError(xmlGenericErrorContext,
1348
				"xmlSaveUri: out of memory\n");
1349
                     xmlFree(ret);
1350
			return(NULL);
1351
		    }
1352
		    ret = temp;
1353
	    }
1354
	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 
1355
		ret[len++] = *p++;
1356
	    else {
1357
		int val = *(unsigned char *)p++;
1358
		int hi = val / 0x10, lo = val % 0x10;
1359
		ret[len++] = '%';
1360
		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1361
		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1362
	    }
1363
	}
1364
    }
1365
    if (len >= max) {
1366
	max *= 2;
1367
	temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1368
	if (temp == NULL) {
1369
			xmlGenericError(xmlGenericErrorContext,
1370
				"xmlSaveUri: out of memory\n");
1371
                     xmlFree(ret);
1372
			return(NULL);
1373
		    }
1374
		    ret = temp;
1375
    }
1376
    ret[len++] = 0;
1377
    return(ret);
1378
}
1379
1380
/**
1381
 * xmlPrintURI:
1382
 * @stream:  a FILE* for the output
1383
 * @uri:  pointer to an xmlURI
1384
 *
1385
 * Prints the URI in the stream @stream.
1386
 */
1387
void
1388
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1389
    xmlChar *out;
1390
1391
    out = xmlSaveUri(uri);
1392
    if (out != NULL) {
1393
	fprintf(stream, "%s", (char *) out);
1394
	xmlFree(out);
1395
    }
1396
}
1397
1398
/**
1399
 * xmlCleanURI:
1400
 * @uri:  pointer to an xmlURI
1401
 *
1402
 * Make sure the xmlURI struct is free of content
1403
 */
1404
static void
1405
xmlCleanURI(xmlURIPtr uri) {
1406
    if (uri == NULL) return;
1407
1408
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1409
    uri->scheme = NULL;
1410
    if (uri->server != NULL) xmlFree(uri->server);
1411
    uri->server = NULL;
1412
    if (uri->user != NULL) xmlFree(uri->user);
1413
    uri->user = NULL;
1414
    if (uri->path != NULL) xmlFree(uri->path);
1415
    uri->path = NULL;
1416
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1417
    uri->fragment = NULL;
1418
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1419
    uri->opaque = NULL;
1420
    if (uri->authority != NULL) xmlFree(uri->authority);
1421
    uri->authority = NULL;
1422
    if (uri->query != NULL) xmlFree(uri->query);
1423
    uri->query = NULL;
1424
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1425
    uri->query_raw = NULL;
1426
}
1427
1428
/**
1429
 * xmlFreeURI:
1430
 * @uri:  pointer to an xmlURI
1431
 *
1432
 * Free up the xmlURI struct
1433
 */
1434
void
1435
xmlFreeURI(xmlURIPtr uri) {
1436
    if (uri == NULL) return;
1437
1438
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1439
    if (uri->server != NULL) xmlFree(uri->server);
1440
    if (uri->user != NULL) xmlFree(uri->user);
1441
    if (uri->path != NULL) xmlFree(uri->path);
1442
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1443
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1444
    if (uri->authority != NULL) xmlFree(uri->authority);
1445
    if (uri->query != NULL) xmlFree(uri->query);
1446
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1447
    xmlFree(uri);
1448
}
1449
1450
/************************************************************************
1451
 *									*
1452
 *			Helper functions				*
1453
 *									*
1454
 ************************************************************************/
1455
1456
/**
1457
 * xmlNormalizeURIPath:
1458
 * @path:  pointer to the path string
1459
 *
1460
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1461
 * Section 5.2, steps 6.c through 6.g.
1462
 *
1463
 * Normalization occurs directly on the string, no new allocation is done
1464
 *
1465
 * Returns 0 or an error code
1466
 */
1467
int
1468
xmlNormalizeURIPath(char *path) {
1469
    char *cur, *out;
1470
1471
    if (path == NULL)
1472
	return(-1);
1473
1474
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1475
     * first non-empty segment.
1476
     */
1477
    cur = path;
1478
    while (cur[0] == '/')
1479
      ++cur;
1480
    if (cur[0] == '\0')
1481
      return(0);
1482
1483
    /* Keep everything we've seen so far.  */
1484
    out = cur;
1485
1486
    /*
1487
     * Analyze each segment in sequence for cases (c) and (d).
1488
     */
1489
    while (cur[0] != '\0') {
1490
	/*
1491
	 * c) All occurrences of "./", where "." is a complete path segment,
1492
	 *    are removed from the buffer string.
1493
	 */
1494
	if ((cur[0] == '.') && (cur[1] == '/')) {
1495
	    cur += 2;
1496
	    /* '//' normalization should be done at this point too */
1497
	    while (cur[0] == '/')
1498
		cur++;
1499
	    continue;
1500
	}
1501
1502
	/*
1503
	 * d) If the buffer string ends with "." as a complete path segment,
1504
	 *    that "." is removed.
1505
	 */
1506
	if ((cur[0] == '.') && (cur[1] == '\0'))
1507
	    break;
1508
1509
	/* Otherwise keep the segment.  */
1510
	while (cur[0] != '/') {
1511
            if (cur[0] == '\0')
1512
              goto done_cd;
1513
	    (out++)[0] = (cur++)[0];
1514
	}
1515
	/* nomalize // */
1516
	while ((cur[0] == '/') && (cur[1] == '/'))
1517
	    cur++;
1518
1519
        (out++)[0] = (cur++)[0];
1520
    }
1521
 done_cd:
1522
    out[0] = '\0';
1523
1524
    /* Reset to the beginning of the first segment for the next sequence.  */
1525
    cur = path;
1526
    while (cur[0] == '/')
1527
      ++cur;
1528
    if (cur[0] == '\0')
1529
	return(0);
1530
1531
    /*
1532
     * Analyze each segment in sequence for cases (e) and (f).
1533
     *
1534
     * e) All occurrences of "<segment>/../", where <segment> is a
1535
     *    complete path segment not equal to "..", are removed from the
1536
     *    buffer string.  Removal of these path segments is performed
1537
     *    iteratively, removing the leftmost matching pattern on each
1538
     *    iteration, until no matching pattern remains.
1539
     *
1540
     * f) If the buffer string ends with "<segment>/..", where <segment>
1541
     *    is a complete path segment not equal to "..", that
1542
     *    "<segment>/.." is removed.
1543
     *
1544
     * To satisfy the "iterative" clause in (e), we need to collapse the
1545
     * string every time we find something that needs to be removed.  Thus,
1546
     * we don't need to keep two pointers into the string: we only need a
1547
     * "current position" pointer.
1548
     */
1549
    while (1) {
1550
        char *segp, *tmp;
1551
1552
        /* At the beginning of each iteration of this loop, "cur" points to
1553
         * the first character of the segment we want to examine.
1554
         */
1555
1556
        /* Find the end of the current segment.  */
1557
        segp = cur;
1558
        while ((segp[0] != '/') && (segp[0] != '\0'))
1559
          ++segp;
1560
1561
        /* If this is the last segment, we're done (we need at least two
1562
         * segments to meet the criteria for the (e) and (f) cases).
1563
         */
1564
        if (segp[0] == '\0')
1565
          break;
1566
1567
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1568
         * keep this segment and try the next one.
1569
         */
1570
        ++segp;
1571
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1572
            || ((segp[0] != '.') || (segp[1] != '.')
1573
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1574
          cur = segp;
1575
          continue;
1576
        }
1577
1578
        /* If we get here, remove this segment and the next one and back up
1579
         * to the previous segment (if there is one), to implement the
1580
         * "iteratively" clause.  It's pretty much impossible to back up
1581
         * while maintaining two pointers into the buffer, so just compact
1582
         * the whole buffer now.
1583
         */
1584
1585
        /* If this is the end of the buffer, we're done.  */
1586
        if (segp[2] == '\0') {
1587
          cur[0] = '\0';
1588
          break;
1589
        }
1590
        /* Valgrind complained, strcpy(cur, segp + 3); */
1591
	/* string will overlap, do not use strcpy */
1592
	tmp = cur;
1593
	segp += 3;
1594
	while ((*tmp++ = *segp++) != 0);
1595
1596
        /* If there are no previous segments, then keep going from here.  */
1597
        segp = cur;
1598
        while ((segp > path) && ((--segp)[0] == '/'))
1599
          ;
1600
        if (segp == path)
1601
          continue;
1602
1603
        /* "segp" is pointing to the end of a previous segment; find it's
1604
         * start.  We need to back up to the previous segment and start
1605
         * over with that to handle things like "foo/bar/../..".  If we
1606
         * don't do this, then on the first pass we'll remove the "bar/..",
1607
         * but be pointing at the second ".." so we won't realize we can also
1608
         * remove the "foo/..".
1609
         */
1610
        cur = segp;
1611
        while ((cur > path) && (cur[-1] != '/'))
1612
          --cur;
1613
    }
1614
    out[0] = '\0';
1615
1616
    /*
1617
     * g) If the resulting buffer string still begins with one or more
1618
     *    complete path segments of "..", then the reference is
1619
     *    considered to be in error. Implementations may handle this
1620
     *    error by retaining these components in the resolved path (i.e.,
1621
     *    treating them as part of the final URI), by removing them from
1622
     *    the resolved path (i.e., discarding relative levels above the
1623
     *    root), or by avoiding traversal of the reference.
1624
     *
1625
     * We discard them from the final path.
1626
     */
1627
    if (path[0] == '/') {
1628
      cur = path;
1629
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1630
             && ((cur[3] == '/') || (cur[3] == '\0')))
1631
	cur += 3;
1632
1633
      if (cur != path) {
1634
	out = path;
1635
	while (cur[0] != '\0')
1636
          (out++)[0] = (cur++)[0];
1637
	out[0] = 0;
1638
      }
1639
    }
1640
1641
    return(0);
1642
}
1643
1644
static int is_hex(char c) {
1645
    if (((c >= '0') && (c <= '9')) ||
1646
        ((c >= 'a') && (c <= 'f')) ||
1647
        ((c >= 'A') && (c <= 'F')))
1648
	return(1);
1649
    return(0);
1650
}
1651
1652
/**
1653
 * xmlURIUnescapeString:
1654
 * @str:  the string to unescape
1655
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1656
 * @target:  optional destination buffer
1657
 *
1658
 * Unescaping routine, but does not check that the string is an URI. The
1659
 * output is a direct unsigned char translation of %XX values (no encoding)
1660
 * Note that the length of the result can only be smaller or same size as
1661
 * the input string.
1662
 *
1663
 * Returns a copy of the string, but unescaped, will return NULL only in case
1664
 * of error
1665
 */
1666
char *
1667
xmlURIUnescapeString(const char *str, int len, char *target) {
1668
    char *ret, *out;
1669
    const char *in;
1670
1671
    if (str == NULL)
1672
	return(NULL);
1673
    if (len <= 0) len = strlen(str);
1674
    if (len < 0) return(NULL);
1675
1676
    if (target == NULL) {
1677
	ret = (char *) xmlMallocAtomic(len + 1);
1678
	if (ret == NULL) {
1679
	    xmlGenericError(xmlGenericErrorContext,
1680
		    "xmlURIUnescapeString: out of memory\n");
1681
	    return(NULL);
1682
	}
1683
    } else
1684
	ret = target;
1685
    in = str;
1686
    out = ret;
1687
    while(len > 0) {
1688
	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1689
	    in++;
1690
	    if ((*in >= '0') && (*in <= '9')) 
1691
	        *out = (*in - '0');
1692
	    else if ((*in >= 'a') && (*in <= 'f'))
1693
	        *out = (*in - 'a') + 10;
1694
	    else if ((*in >= 'A') && (*in <= 'F'))
1695
	        *out = (*in - 'A') + 10;
1696
	    in++;
1697
	    if ((*in >= '0') && (*in <= '9')) 
1698
	        *out = *out * 16 + (*in - '0');
1699
	    else if ((*in >= 'a') && (*in <= 'f'))
1700
	        *out = *out * 16 + (*in - 'a') + 10;
1701
	    else if ((*in >= 'A') && (*in <= 'F'))
1702
	        *out = *out * 16 + (*in - 'A') + 10;
1703
	    in++;
1704
	    len -= 3;
1705
	    out++;
1706
	} else {
1707
	    *out++ = *in++;
1708
	    len--;
1709
	}
1710
    }
1711
    *out = 0;
1712
    return(ret);
1713
}
1714
1715
/**
1716
 * xmlURIEscapeStr:
1717
 * @str:  string to escape
1718
 * @list: exception list string of chars not to escape
1719
 *
1720
 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1721
 * and the characters in the exception list.
1722
 *
1723
 * Returns a new escaped string or NULL in case of error.
1724
 */
1725
xmlChar *
1726
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1727
    xmlChar *ret, ch;
1728
    xmlChar *temp;
1729
    const xmlChar *in;
1730
1731
    unsigned int len, out;
1732
1733
    if (str == NULL)
1734
	return(NULL);
1735
    if (str[0] == 0)
1736
	return(xmlStrdup(str));
1737
    len = xmlStrlen(str);
1738
    if (!(len > 0)) return(NULL);
1739
1740
    len += 20;
1741
    ret = (xmlChar *) xmlMallocAtomic(len);
1742
    if (ret == NULL) {
1743
	xmlGenericError(xmlGenericErrorContext,
1744
		"xmlURIEscapeStr: out of memory\n");
1745
	return(NULL);
1746
    }
1747
    in = (const xmlChar *) str;
1748
    out = 0;
1749
    while(*in != 0) {
1750
	if (len - out <= 3) {
1751
	    len += 20;
1752
	    temp = (xmlChar *) xmlRealloc(ret, len);
1753
	    if (temp == NULL) {
1754
		xmlGenericError(xmlGenericErrorContext,
1755
			"xmlURIEscapeStr: out of memory\n");
1756
		xmlFree(ret);
1757
		return(NULL);
1758
	    }
1759
	    ret = temp;
1760
	}
1761
1762
	ch = *in;
1763
1764
	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1765
	    unsigned char val;
1766
	    ret[out++] = '%';
1767
	    val = ch >> 4;
1768
	    if (val <= 9)
1769
		ret[out++] = '0' + val;
1770
	    else
1771
		ret[out++] = 'A' + val - 0xA;
1772
	    val = ch & 0xF;
1773
	    if (val <= 9)
1774
		ret[out++] = '0' + val;
1775
	    else
1776
		ret[out++] = 'A' + val - 0xA;
1777
	    in++;
1778
	} else {
1779
	    ret[out++] = *in++;
1780
	}
1781
1782
    }
1783
    ret[out] = 0;
1784
    return(ret);
1785
}
1786
1787
/**
1788
 * xmlURIEscape:
1789
 * @str:  the string of the URI to escape
1790
 *
1791
 * Escaping routine, does not do validity checks !
1792
 * It will try to escape the chars needing this, but this is heuristic
1793
 * based it's impossible to be sure.
1794
 *
1795
 * Returns an copy of the string, but escaped
1796
 *
1797
 * 25 May 2001
1798
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1799
 * according to RFC2396.
1800
 *   - Carl Douglas
1801
 */
1802
xmlChar *
1803
xmlURIEscape(const xmlChar * str)
1804
{
1805
    xmlChar *ret, *segment = NULL;
1806
    xmlURIPtr uri;
1807
    int ret2;
1808
1809
#define NULLCHK(p) if(!p) { \
1810
                   xmlGenericError(xmlGenericErrorContext, \
1811
                        "xmlURIEscape: out of memory\n"); \
1812
                        xmlFreeURI(uri); \
1813
                        return NULL; } \
1814
1815
    if (str == NULL)
1816
        return (NULL);
1817
1818
    uri = xmlCreateURI();
1819
    if (uri != NULL) {
1820
	/*
1821
	 * Allow escaping errors in the unescaped form
1822
	 */
1823
        uri->cleanup = 1;
1824
        ret2 = xmlParseURIReference(uri, (const char *)str);
1825
        if (ret2) {
1826
            xmlFreeURI(uri);
1827
            return (NULL);
1828
        }
1829
    }
1830
1831
    if (!uri)
1832
        return NULL;
1833
1834
    ret = NULL;
1835
1836
    if (uri->scheme) {
1837
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1838
        NULLCHK(segment)
1839
        ret = xmlStrcat(ret, segment);
1840
        ret = xmlStrcat(ret, BAD_CAST ":");
1841
        xmlFree(segment);
1842
    }
1843
1844
    if (uri->authority) {
1845
        segment =
1846
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1847
        NULLCHK(segment)
1848
        ret = xmlStrcat(ret, BAD_CAST "//");
1849
        ret = xmlStrcat(ret, segment);
1850
        xmlFree(segment);
1851
    }
1852
1853
    if (uri->user) {
1854
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1855
        NULLCHK(segment)
1856
		ret = xmlStrcat(ret,BAD_CAST "//");	
1857
        ret = xmlStrcat(ret, segment);
1858
        ret = xmlStrcat(ret, BAD_CAST "@");
1859
        xmlFree(segment);
1860
    }
1861
1862
    if (uri->server) {
1863
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1864
        NULLCHK(segment)
1865
		if (uri->user == NULL)
1866
		ret = xmlStrcat(ret, BAD_CAST "//");
1867
        ret = xmlStrcat(ret, segment);
1868
        xmlFree(segment);
1869
    }
1870
1871
    if (uri->port) {
1872
        xmlChar port[10];
1873
1874
        snprintf((char *) port, 10, "%d", uri->port);
1875
        ret = xmlStrcat(ret, BAD_CAST ":");
1876
        ret = xmlStrcat(ret, port);
1877
    }
1878
1879
    if (uri->path) {
1880
        segment =
1881
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1882
        NULLCHK(segment)
1883
        ret = xmlStrcat(ret, segment);
1884
        xmlFree(segment);
1885
    }
1886
1887
    if (uri->query_raw) {
1888
        ret = xmlStrcat(ret, BAD_CAST "?");
1889
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1890
    }
1891
    else if (uri->query) {
1892
        segment =
1893
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1894
        NULLCHK(segment)
1895
        ret = xmlStrcat(ret, BAD_CAST "?");
1896
        ret = xmlStrcat(ret, segment);
1897
        xmlFree(segment);
1898
    }
1899
1900
    if (uri->opaque) {
1901
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1902
        NULLCHK(segment)
1903
        ret = xmlStrcat(ret, segment);
1904
        xmlFree(segment);
1905
    }
1906
1907
    if (uri->fragment) {
1908
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1909
        NULLCHK(segment)
1910
        ret = xmlStrcat(ret, BAD_CAST "#");
1911
        ret = xmlStrcat(ret, segment);
1912
        xmlFree(segment);
1913
    }
1914
1915
    xmlFreeURI(uri);
1916
#undef NULLCHK
1917
1918
    return (ret);
1919
}
1920
1921
/************************************************************************
1922
 *									*
1923
 *			Public functions				*
1924
 *									*
1925
 ************************************************************************/
1926
1927
/**
1928
 * xmlBuildURI:
1929
 * @URI:  the URI instance found in the document
1930
 * @base:  the base value
1931
 *
1932
 * Computes he final URI of the reference done by checking that
1933
 * the given URI is valid, and building the final URI using the
1934
 * base URI. This is processed according to section 5.2 of the 
1935
 * RFC 2396
1936
 *
1937
 * 5.2. Resolving Relative References to Absolute Form
1938
 *
1939
 * Returns a new URI string (to be freed by the caller) or NULL in case
1940
 *         of error.
1941
 */
1942
xmlChar *
1943
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1944
    xmlChar *val = NULL;
1945
    int ret, len, indx, cur, out;
1946
    xmlURIPtr ref = NULL;
1947
    xmlURIPtr bas = NULL;
1948
    xmlURIPtr res = NULL;
1949
1950
    /*
1951
     * 1) The URI reference is parsed into the potential four components and
1952
     *    fragment identifier, as described in Section 4.3.
1953
     *
1954
     *    NOTE that a completely empty URI is treated by modern browsers
1955
     *    as a reference to "." rather than as a synonym for the current
1956
     *    URI.  Should we do that here?
1957
     */
1958
    if (URI == NULL) 
1959
	ret = -1;
1960
    else {
1961
	if (*URI) {
1962
	    ref = xmlCreateURI();
1963
	    if (ref == NULL)
1964
		goto done;
1965
	    ret = xmlParseURIReference(ref, (const char *) URI);
1966
	}
1967
	else
1968
	    ret = 0;
1969
    }
1970
    if (ret != 0)
1971
	goto done;
1972
    if ((ref != NULL) && (ref->scheme != NULL)) {
1973
	/*
1974
	 * The URI is absolute don't modify.
1975
	 */
1976
	val = xmlStrdup(URI);
1977
	goto done;
1978
    }
1979
    if (base == NULL)
1980
	ret = -1;
1981
    else {
1982
	bas = xmlCreateURI();
1983
	if (bas == NULL)
1984
	    goto done;
1985
	ret = xmlParseURIReference(bas, (const char *) base);
1986
    }
1987
    if (ret != 0) {
1988
	if (ref)
1989
	    val = xmlSaveUri(ref);
1990
	goto done;
1991
    }
1992
    if (ref == NULL) {
1993
	/*
1994
	 * the base fragment must be ignored
1995
	 */
1996
	if (bas->fragment != NULL) {
1997
	    xmlFree(bas->fragment);
1998
	    bas->fragment = NULL;
1999
	}
2000
	val = xmlSaveUri(bas);
2001
	goto done;
2002
    }
2003
2004
    /*
2005
     * 2) If the path component is empty and the scheme, authority, and
2006
     *    query components are undefined, then it is a reference to the
2007
     *    current document and we are done.  Otherwise, the reference URI's
2008
     *    query and fragment components are defined as found (or not found)
2009
     *    within the URI reference and not inherited from the base URI.
2010
     *
2011
     *    NOTE that in modern browsers, the parsing differs from the above
2012
     *    in the following aspect:  the query component is allowed to be
2013
     *    defined while still treating this as a reference to the current
2014
     *    document.
2015
     */
2016
    res = xmlCreateURI();
2017
    if (res == NULL)
2018
	goto done;
2019
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
2020
	((ref->authority == NULL) && (ref->server == NULL))) {
2021
	if (bas->scheme != NULL)
2022
	    res->scheme = xmlMemStrdup(bas->scheme);
2023
	if (bas->authority != NULL)
2024
	    res->authority = xmlMemStrdup(bas->authority);
2025
	else if (bas->server != NULL) {
2026
	    res->server = xmlMemStrdup(bas->server);
2027
	    if (bas->user != NULL)
2028
		res->user = xmlMemStrdup(bas->user);
2029
	    res->port = bas->port;		
2030
	}
2031
	if (bas->path != NULL)
2032
	    res->path = xmlMemStrdup(bas->path);
2033
	if (ref->query_raw != NULL)
2034
	    res->query_raw = xmlMemStrdup (ref->query_raw);
2035
	else if (ref->query != NULL)
2036
	    res->query = xmlMemStrdup(ref->query);
2037
	else if (bas->query_raw != NULL)
2038
	    res->query_raw = xmlMemStrdup(bas->query_raw);
2039
	else if (bas->query != NULL)
2040
	    res->query = xmlMemStrdup(bas->query);
2041
	if (ref->fragment != NULL)
2042
	    res->fragment = xmlMemStrdup(ref->fragment);
2043
	goto step_7;
2044
    }
2045
2046
    /*
2047
     * 3) If the scheme component is defined, indicating that the reference
2048
     *    starts with a scheme name, then the reference is interpreted as an
2049
     *    absolute URI and we are done.  Otherwise, the reference URI's
2050
     *    scheme is inherited from the base URI's scheme component.
2051
     */
2052
    if (ref->scheme != NULL) {
2053
	val = xmlSaveUri(ref);
2054
	goto done;
2055
    }
2056
    if (bas->scheme != NULL)
2057
	res->scheme = xmlMemStrdup(bas->scheme);
2058
 
2059
    if (ref->query_raw != NULL)
2060
	res->query_raw = xmlMemStrdup(ref->query_raw);
2061
    else if (ref->query != NULL)
2062
	res->query = xmlMemStrdup(ref->query);
2063
    if (ref->fragment != NULL)
2064
	res->fragment = xmlMemStrdup(ref->fragment);
2065
2066
    /*
2067
     * 4) If the authority component is defined, then the reference is a
2068
     *    network-path and we skip to step 7.  Otherwise, the reference
2069
     *    URI's authority is inherited from the base URI's authority
2070
     *    component, which will also be undefined if the URI scheme does not
2071
     *    use an authority component.
2072
     */
2073
    if ((ref->authority != NULL) || (ref->server != NULL)) {
2074
	if (ref->authority != NULL)
2075
	    res->authority = xmlMemStrdup(ref->authority);
2076
	else {
2077
	    res->server = xmlMemStrdup(ref->server);
2078
	    if (ref->user != NULL)
2079
		res->user = xmlMemStrdup(ref->user);
2080
            res->port = ref->port;		
2081
	}
2082
	if (ref->path != NULL)
2083
	    res->path = xmlMemStrdup(ref->path);
2084
	goto step_7;
2085
    }
2086
    if (bas->authority != NULL)
2087
	res->authority = xmlMemStrdup(bas->authority);
2088
    else if (bas->server != NULL) {
2089
	res->server = xmlMemStrdup(bas->server);
2090
	if (bas->user != NULL)
2091
	    res->user = xmlMemStrdup(bas->user);
2092
	res->port = bas->port;		
2093
    }
2094
2095
    /*
2096
     * 5) If the path component begins with a slash character ("/"), then
2097
     *    the reference is an absolute-path and we skip to step 7.
2098
     */
2099
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2100
	res->path = xmlMemStrdup(ref->path);
2101
	goto step_7;
2102
    }
2103
2104
2105
    /*
2106
     * 6) If this step is reached, then we are resolving a relative-path
2107
     *    reference.  The relative path needs to be merged with the base
2108
     *    URI's path.  Although there are many ways to do this, we will
2109
     *    describe a simple method using a separate string buffer.
2110
     *
2111
     * Allocate a buffer large enough for the result string.
2112
     */
2113
    len = 2; /* extra / and 0 */
2114
    if (ref->path != NULL)
2115
	len += strlen(ref->path);
2116
    if (bas->path != NULL)
2117
	len += strlen(bas->path);
2118
    res->path = (char *) xmlMallocAtomic(len);
2119
    if (res->path == NULL) {
2120
	xmlGenericError(xmlGenericErrorContext,
2121
		"xmlBuildURI: out of memory\n");
2122
	goto done;
2123
    }
2124
    res->path[0] = 0;
2125
2126
    /*
2127
     * a) All but the last segment of the base URI's path component is
2128
     *    copied to the buffer.  In other words, any characters after the
2129
     *    last (right-most) slash character, if any, are excluded.
2130
     */
2131
    cur = 0;
2132
    out = 0;
2133
    if (bas->path != NULL) {
2134
	while (bas->path[cur] != 0) {
2135
	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2136
		cur++;
2137
	    if (bas->path[cur] == 0)
2138
		break;
2139
2140
	    cur++;
2141
	    while (out < cur) {
2142
		res->path[out] = bas->path[out];
2143
		out++;
2144
	    }
2145
	}
2146
    }
2147
    res->path[out] = 0;
2148
2149
    /*
2150
     * b) The reference's path component is appended to the buffer
2151
     *    string.
2152
     */
2153
    if (ref->path != NULL && ref->path[0] != 0) {
2154
	indx = 0;
2155
	/*
2156
	 * Ensure the path includes a '/'
2157
	 */
2158
	if ((out == 0) && (bas->server != NULL))
2159
	    res->path[out++] = '/';
2160
	while (ref->path[indx] != 0) {
2161
	    res->path[out++] = ref->path[indx++];
2162
	}
2163
    }
2164
    res->path[out] = 0;
2165
2166
    /*
2167
     * Steps c) to h) are really path normalization steps
2168
     */
2169
    xmlNormalizeURIPath(res->path);
2170
2171
step_7:
2172
2173
    /*
2174
     * 7) The resulting URI components, including any inherited from the
2175
     *    base URI, are recombined to give the absolute form of the URI
2176
     *    reference.
2177
     */
2178
    val = xmlSaveUri(res);
2179
2180
done:
2181
    if (ref != NULL)
2182
	xmlFreeURI(ref);
2183
    if (bas != NULL)
2184
	xmlFreeURI(bas);
2185
    if (res != NULL)
2186
	xmlFreeURI(res);
2187
    return(val);
2188
}
2189
2190
/**
2191
 * xmlBuildRelativeURI:
2192
 * @URI:  the URI reference under consideration
2193
 * @base:  the base value
2194
 *
2195
 * Expresses the URI of the reference in terms relative to the
2196
 * base.  Some examples of this operation include:
2197
 *     base = "http://site1.com/docs/book1.html"
2198
 *        URI input                        URI returned
2199
 *     docs/pic1.gif                    pic1.gif
2200
 *     docs/img/pic1.gif                img/pic1.gif
2201
 *     img/pic1.gif                     ../img/pic1.gif
2202
 *     http://site1.com/docs/pic1.gif   pic1.gif
2203
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2204
 *
2205
 *     base = "docs/book1.html"
2206
 *        URI input                        URI returned
2207
 *     docs/pic1.gif                    pic1.gif
2208
 *     docs/img/pic1.gif                img/pic1.gif
2209
 *     img/pic1.gif                     ../img/pic1.gif
2210
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2211
 *
2212
 *
2213
 * Note: if the URI reference is really wierd or complicated, it may be
2214
 *       worthwhile to first convert it into a "nice" one by calling
2215
 *       xmlBuildURI (using 'base') before calling this routine,
2216
 *       since this routine (for reasonable efficiency) assumes URI has
2217
 *       already been through some validation.
2218
 *
2219
 * Returns a new URI string (to be freed by the caller) or NULL in case
2220
 * error.
2221
 */
2222
xmlChar *
2223
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2224
{
2225
    xmlChar *val = NULL;
2226
    int ret;
2227
    int ix;
2228
    int pos = 0;
2229
    int nbslash = 0;
2230
    int len;
2231
    xmlURIPtr ref = NULL;
2232
    xmlURIPtr bas = NULL;
2233
    xmlChar *bptr, *uptr, *vptr;
2234
    int remove_path = 0;
2235
2236
    if ((URI == NULL) || (*URI == 0))
2237
	return NULL;
2238
2239
    /*
2240
     * First parse URI into a standard form
2241
     */
2242
    ref = xmlCreateURI ();
2243
    if (ref == NULL)
2244
	return NULL;
2245
    /* If URI not already in "relative" form */
2246
    if (URI[0] != '.') {
2247
	ret = xmlParseURIReference (ref, (const char *) URI);
2248
	if (ret != 0)
2249
	    goto done;		/* Error in URI, return NULL */
2250
    } else
2251
	ref->path = (char *)xmlStrdup(URI);
2252
2253
    /*
2254
     * Next parse base into the same standard form
2255
     */
2256
    if ((base == NULL) || (*base == 0)) {
2257
	val = xmlStrdup (URI);
2258
	goto done;
2259
    }
2260
    bas = xmlCreateURI ();
2261
    if (bas == NULL)
2262
	goto done;
2263
    if (base[0] != '.') {
2264
	ret = xmlParseURIReference (bas, (const char *) base);
2265
	if (ret != 0)
2266
	    goto done;		/* Error in base, return NULL */
2267
    } else
2268
	bas->path = (char *)xmlStrdup(base);
2269
2270
    /*
2271
     * If the scheme / server on the URI differs from the base,
2272
     * just return the URI
2273
     */
2274
    if ((ref->scheme != NULL) &&
2275
	((bas->scheme == NULL) ||
2276
	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2277
	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2278
	val = xmlStrdup (URI);
2279
	goto done;
2280
    }
2281
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2282
	val = xmlStrdup(BAD_CAST "");
2283
	goto done;
2284
    }
2285
    if (bas->path == NULL) {
2286
	val = xmlStrdup((xmlChar *)ref->path);
2287
	goto done;
2288
    }
2289
    if (ref->path == NULL) {
2290
        ref->path = (char *) "/";
2291
	remove_path = 1;
2292
    }
2293
2294
    /*
2295
     * At this point (at last!) we can compare the two paths
2296
     *
2297
     * First we take care of the special case where either of the
2298
     * two path components may be missing (bug 316224)
2299
     */
2300
    if (bas->path == NULL) {
2301
	if (ref->path != NULL) {
2302
	    uptr = (xmlChar *) ref->path;
2303
	    if (*uptr == '/')
2304
		uptr++;
2305
	    /* exception characters from xmlSaveUri */
2306
	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307
	}
2308
	goto done;
2309
    }
2310
    bptr = (xmlChar *)bas->path;
2311
    if (ref->path == NULL) {
2312
	for (ix = 0; bptr[ix] != 0; ix++) {
2313
	    if (bptr[ix] == '/')
2314
		nbslash++;
2315
	}
2316
	uptr = NULL;
2317
	len = 1;	/* this is for a string terminator only */
2318
    } else {
2319
    /*
2320
     * Next we compare the two strings and find where they first differ
2321
     */
2322
	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2323
            pos += 2;
2324
	if ((*bptr == '.') && (bptr[1] == '/'))
2325
            bptr += 2;
2326
	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2327
	    bptr++;
2328
	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2329
	    pos++;
2330
2331
	if (bptr[pos] == ref->path[pos]) {
2332
	    val = xmlStrdup(BAD_CAST "");
2333
	    goto done;		/* (I can't imagine why anyone would do this) */
2334
	}
2335
2336
	/*
2337
	 * In URI, "back up" to the last '/' encountered.  This will be the
2338
	 * beginning of the "unique" suffix of URI
2339
	 */
2340
	ix = pos;
2341
	if ((ref->path[ix] == '/') && (ix > 0))
2342
	    ix--;
2343
	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2344
	    ix -= 2;
2345
	for (; ix > 0; ix--) {
2346
	    if (ref->path[ix] == '/')
2347
		break;
2348
	}
2349
	if (ix == 0) {
2350
	    uptr = (xmlChar *)ref->path;
2351
	} else {
2352
	    ix++;
2353
	    uptr = (xmlChar *)&ref->path[ix];
2354
	}
2355
2356
	/*
2357
	 * In base, count the number of '/' from the differing point
2358
	 */
2359
	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2360
	    for (; bptr[ix] != 0; ix++) {
2361
		if (bptr[ix] == '/')
2362
		    nbslash++;
2363
	    }
2364
	}
2365
	len = xmlStrlen (uptr) + 1;
2366
    }
2367
    
2368
    if (nbslash == 0) {
2369
	if (uptr != NULL)
2370
	    /* exception characters from xmlSaveUri */
2371
	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2372
	goto done;
2373
    }
2374
2375
    /*
2376
     * Allocate just enough space for the returned string -
2377
     * length of the remainder of the URI, plus enough space
2378
     * for the "../" groups, plus one for the terminator
2379
     */
2380
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2381
    if (val == NULL) {
2382
	xmlGenericError(xmlGenericErrorContext,
2383
		"xmlBuildRelativeURI: out of memory\n");
2384
	goto done;
2385
    }
2386
    vptr = val;
2387
    /*
2388
     * Put in as many "../" as needed
2389
     */
2390
    for (; nbslash>0; nbslash--) {
2391
	*vptr++ = '.';
2392
	*vptr++ = '.';
2393
	*vptr++ = '/';
2394
    }
2395
    /*
2396
     * Finish up with the end of the URI
2397
     */
2398
    if (uptr != NULL) {
2399
        if ((vptr > val) && (len > 0) &&
2400
	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2401
	    memcpy (vptr, uptr + 1, len - 1);
2402
	    vptr[len - 2] = 0;
2403
	} else {
2404
	    memcpy (vptr, uptr, len);
2405
	    vptr[len - 1] = 0;
2406
	}
2407
    } else {
2408
	vptr[len - 1] = 0;
2409
    }
2410
2411
    /* escape the freshly-built path */
2412
    vptr = val;
2413
	/* exception characters from xmlSaveUri */
2414
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2415
    xmlFree(vptr);
2416
2417
done:
2418
    /*
2419
     * Free the working variables
2420
     */
2421
    if (remove_path != 0)
2422
        ref->path = NULL;
2423
    if (ref != NULL)
2424
	xmlFreeURI (ref);
2425
    if (bas != NULL)
2426
	xmlFreeURI (bas);
2427
2428
    return val;
2429
}
2430
2431
/**
2432
 * xmlCanonicPath:
2433
 * @path:  the resource locator in a filesystem notation
2434
 *
2435
 * Constructs a canonic path from the specified path. 
2436
 *
2437
 * Returns a new canonic path, or a duplicate of the path parameter if the 
2438
 * construction fails. The caller is responsible for freeing the memory occupied
2439
 * by the returned string. If there is insufficient memory available, or the 
2440
 * argument is NULL, the function returns NULL.
2441
 */
2442
#define IS_WINDOWS_PATH(p) 					\
2443
	((p != NULL) &&						\
2444
	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2445
	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2446
	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2447
xmlChar *
2448
xmlCanonicPath(const xmlChar *path)
2449
{
2450
/*
2451
 * For Windows implementations, additional work needs to be done to
2452
 * replace backslashes in pathnames with "forward slashes"
2453
 */
2454
#if defined(_WIN32) && !defined(__CYGWIN__)    
2455
    int len = 0;
2456
    int i = 0;
2457
    xmlChar *p = NULL;
2458
#endif
2459
    xmlURIPtr uri;
2460
    xmlChar *ret;
2461
    const xmlChar *absuri;
2462
2463
    if (path == NULL)
2464
	return(NULL);
2465
2466
    /* sanitize filename starting with // so it can be used as URI */
2467
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2468
        path++;
2469
2470
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2471
	xmlFreeURI(uri);
2472
	return xmlStrdup(path);
2473
    }
2474
2475
    /* Check if this is an "absolute uri" */
2476
    absuri = xmlStrstr(path, BAD_CAST "://");
2477
    if (absuri != NULL) {
2478
        int l, j;
2479
	unsigned char c;
2480
	xmlChar *escURI;
2481
2482
        /*
2483
	 * this looks like an URI where some parts have not been
2484
	 * escaped leading to a parsing problem.  Check that the first
2485
	 * part matches a protocol.
2486
	 */
2487
	l = absuri - path;
2488
	/* Bypass if first part (part before the '://') is > 20 chars */
2489
	if ((l <= 0) || (l > 20))
2490
	    goto path_processing;
2491
	/* Bypass if any non-alpha characters are present in first part */
2492
	for (j = 0;j < l;j++) {
2493
	    c = path[j];
2494
	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2495
	        goto path_processing;
2496
	}
2497
2498
	/* Escape all except the characters specified in the supplied path */
2499
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2500
	if (escURI != NULL) {
2501
	    /* Try parsing the escaped path */
2502
	    uri = xmlParseURI((const char *) escURI);
2503
	    /* If successful, return the escaped string */
2504
	    if (uri != NULL) {
2505
	        xmlFreeURI(uri);
2506
		return escURI;
2507
	    }
2508
	}
2509
    }
2510
2511
path_processing:
2512
/* For Windows implementations, replace backslashes with 'forward slashes' */
2513
#if defined(_WIN32) && !defined(__CYGWIN__)    
2514
    /*
2515
     * Create a URI structure
2516
     */
2517
    uri = xmlCreateURI();
2518
    if (uri == NULL) {		/* Guard against 'out of memory' */
2519
        return(NULL);
2520
    }
2521
2522
    len = xmlStrlen(path);
2523
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2524
        /* make the scheme 'file' */
2525
	uri->scheme = xmlStrdup(BAD_CAST "file");
2526
	/* allocate space for leading '/' + path + string terminator */
2527
	uri->path = xmlMallocAtomic(len + 2);
2528
	if (uri->path == NULL) {
2529
	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2530
	    return(NULL);
2531
	}
2532
	/* Put in leading '/' plus path */
2533
	uri->path[0] = '/';
2534
	p = uri->path + 1;
2535
	strncpy(p, path, len + 1);
2536
    } else {
2537
	uri->path = xmlStrdup(path);
2538
	if (uri->path == NULL) {
2539
	    xmlFreeURI(uri);
2540
	    return(NULL);
2541
	}
2542
	p = uri->path;
2543
    }
2544
    /* Now change all occurences of '\' to '/' */
2545
    while (*p != '\0') {
2546
	if (*p == '\\')
2547
	    *p = '/';
2548
	p++;
2549
    }
2550
2551
    if (uri->scheme == NULL) {
2552
	ret = xmlStrdup((const xmlChar *) uri->path);
2553
    } else {
2554
	ret = xmlSaveUri(uri);
2555
    }
2556
2557
    xmlFreeURI(uri);
2558
#else
2559
    ret = xmlStrdup((const xmlChar *) path);
2560
#endif
2561
    return(ret);
2562
}
2563
2564
/**
2565
 * xmlPathToURI:
2566
 * @path:  the resource locator in a filesystem notation
2567
 *
2568
 * Constructs an URI expressing the existing path
2569
 *
2570
 * Returns a new URI, or a duplicate of the path parameter if the 
2571
 * construction fails. The caller is responsible for freeing the memory
2572
 * occupied by the returned string. If there is insufficient memory available,
2573
 * or the argument is NULL, the function returns NULL.
2574
 */
2575
xmlChar *
2576
xmlPathToURI(const xmlChar *path)
2577
{
2578
    xmlURIPtr uri;
2579
    xmlURI temp;
2580
    xmlChar *ret, *cal;
2581
2582
    if (path == NULL)
2583
        return(NULL);
2584
2585
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2586
	xmlFreeURI(uri);
2587
	return xmlStrdup(path);
2588
    }
2589
    cal = xmlCanonicPath(path);
2590
    if (cal == NULL)
2591
        return(NULL);
2592
#if defined(_WIN32) && !defined(__CYGWIN__)
2593
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 
2594
       If 'cal' is a valid URI allready then we are done here, as continuing would make
2595
       it invalid. */
2596
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2597
	xmlFreeURI(uri);
2598
	return cal;
2599
    }
2600
    /* 'cal' can contain a relative path with backslashes. If that is processed
2601
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2602
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2603
    ret = cal;
2604
    while (*ret != '\0') {
2605
	if (*ret == '\\')
2606
	    *ret = '/';
2607
	ret++;
2608
    }
2609
#endif
2610
    memset(&temp, 0, sizeof(temp));
2611
    temp.path = (char *) cal;
2612
    ret = xmlSaveUri(&temp);
2613
    xmlFree(cal);
2614
    return(ret);
2615
}
2616
#define bottom_uri
2617
#include "elfgcchack.h"