1
/*
2
 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3
 *             focuses on size, streamability, reentrancy and portability
4
 *
5
 * This is clearly not a general purpose HTTP implementation
6
 * If you look for one, check:
7
 *         http://www.w3.org/Library/
8
 *
9
 * See Copyright for the status of this software.
10
 *
11
 * daniel@veillard.com
12
 */
13
 
14
#define NEED_SOCKETS
15
#define IN_LIBXML
16
#include "libxml.h"
17
18
#ifdef LIBXML_HTTP_ENABLED
19
#include <string.h>
20
21
#ifdef HAVE_STDLIB_H
22
#include <stdlib.h>
23
#endif
24
#ifdef HAVE_UNISTD_H
25
#include <unistd.h>
26
#endif
27
#ifdef HAVE_SYS_TYPES_H
28
#include <sys/types.h>
29
#endif
30
#ifdef HAVE_SYS_SOCKET_H
31
#include <sys/socket.h>
32
#endif
33
#ifdef HAVE_NETINET_IN_H
34
#include <netinet/in.h>
35
#endif
36
#ifdef HAVE_ARPA_INET_H
37
#include <arpa/inet.h>
38
#endif
39
#ifdef HAVE_NETDB_H
40
#include <netdb.h>
41
#endif
42
#ifdef HAVE_RESOLV_H
43
#ifdef HAVE_ARPA_NAMESER_H
44
#include <arpa/nameser.h>
45
#endif
46
#include <resolv.h>
47
#endif
48
#ifdef HAVE_FCNTL_H
49
#include <fcntl.h> 
50
#endif
51
#ifdef HAVE_ERRNO_H
52
#include <errno.h>
53
#endif
54
#ifdef HAVE_SYS_TIME_H
55
#include <sys/time.h>
56
#endif
57
#ifdef HAVE_SYS_SELECT_H
58
#include <sys/select.h>
59
#endif
60
#ifdef HAVE_STRINGS_H
61
#include <strings.h>
62
#endif
63
#ifdef SUPPORT_IP6
64
#include <resolv.h>
65
#endif
66
#ifdef HAVE_ZLIB_H
67
#include <zlib.h>
68
#endif
69
70
71
#ifdef VMS
72
#include <stropts>
73
#define XML_SOCKLEN_T unsigned int
74
#define SOCKET int
75
#endif
76
77
#if defined(__MINGW32__) || defined(_WIN32_WCE)
78
#define _WINSOCKAPI_
79
#include <wsockcompat.h>
80
#include <winsock2.h>
81
#undef XML_SOCKLEN_T
82
#define XML_SOCKLEN_T unsigned int
83
#endif
84
85
86
#include <libxml/globals.h>
87
#include <libxml/xmlerror.h>
88
#include <libxml/xmlmemory.h>
89
#include <libxml/parser.h> /* for xmlStr(n)casecmp() */
90
#include <libxml/nanohttp.h>
91
#include <libxml/globals.h>
92
#include <libxml/uri.h>
93
94
/**
95
 * A couple portability macros
96
 */
97
#ifndef _WINSOCKAPI_
98
#if !defined(__BEOS__) || defined(__HAIKU__)
99
#define closesocket(s) close(s)
100
#endif
101
#define SOCKET int
102
#endif
103
104
#ifdef __BEOS__
105
#ifndef PF_INET
106
#define PF_INET AF_INET
107
#endif
108
#endif
109
110
#ifndef XML_SOCKLEN_T
111
#define XML_SOCKLEN_T unsigned int
112
#endif
113
#ifndef SOCKET
114
#define SOCKET int
115
#endif
116
117
#ifdef STANDALONE
118
#define DEBUG_HTTP
119
#define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
120
#define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
121
#endif
122
123
#define XML_NANO_HTTP_MAX_REDIR	10
124
125
#define XML_NANO_HTTP_CHUNK	4096
126
127
#define XML_NANO_HTTP_CLOSED	0
128
#define XML_NANO_HTTP_WRITE	1
129
#define XML_NANO_HTTP_READ	2
130
#define XML_NANO_HTTP_NONE	4
131
132
typedef struct xmlNanoHTTPCtxt {
133
    char *protocol;	/* the protocol name */
134
    char *hostname;	/* the host name */
135
    int port;		/* the port */
136
    char *path;		/* the path within the URL */
137
    char *query;	/* the query string */
138
    SOCKET fd;		/* the file descriptor for the socket */
139
    int state;		/* WRITE / READ / CLOSED */
140
    char *out;		/* buffer sent (zero terminated) */
141
    char *outptr;	/* index within the buffer sent */
142
    char *in;		/* the receiving buffer */
143
    char *content;	/* the start of the content */
144
    char *inptr;	/* the next byte to read from network */
145
    char *inrptr;	/* the next byte to give back to the client */
146
    int inlen;		/* len of the input buffer */
147
    int last;		/* return code for last operation */
148
    int returnValue;	/* the protocol return value */
149
    int ContentLength;  /* specified content length from HTTP header */
150
    char *contentType;	/* the MIME type for the input */
151
    char *location;	/* the new URL in case of redirect */
152
    char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
153
    char *encoding;	/* encoding extracted from the contentType */
154
    char *mimeType;	/* Mime-Type extracted from the contentType */
155
#ifdef HAVE_ZLIB_H
156
    z_stream *strm;	/* Zlib stream object */
157
    int usesGzip;	/* "Content-Encoding: gzip" was detected */
158
#endif
159
} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
160
161
static int initialized = 0;
162
static char *proxy = NULL;	 /* the proxy name if any */
163
static int proxyPort;	/* the proxy port if any */
164
static unsigned int timeout = 60;/* the select() timeout in seconds */
165
166
static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
167
168
/**
169
 * xmlHTTPErrMemory:
170
 * @extra:  extra informations
171
 *
172
 * Handle an out of memory condition
173
 */
174
static void
175
xmlHTTPErrMemory(const char *extra)
176
{
177
    __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
178
}
179
180
/**
181
 * A portability function
182
 */
183
static int socket_errno(void) {
184
#ifdef _WINSOCKAPI_
185
    return(WSAGetLastError());
186
#else
187
    return(errno);
188
#endif
189
}
190
191
#ifdef SUPPORT_IP6
192
static
193
int have_ipv6(void) {
194
    int s;
195
196
    s = socket (AF_INET6, SOCK_STREAM, 0);
197
    if (s != -1) {
198
	close (s);
199
	return (1);
200
    }
201
    return (0);
202
}
203
#endif
204
205
/**
206
 * xmlNanoHTTPInit:
207
 *
208
 * Initialize the HTTP protocol layer.
209
 * Currently it just checks for proxy informations
210
 */
211
212
void
213
xmlNanoHTTPInit(void) {
214
    const char *env;
215
#ifdef _WINSOCKAPI_
216
    WSADATA wsaData;    
217
#endif
218
219
    if (initialized)
220
	return;
221
222
#ifdef _WINSOCKAPI_
223
    if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
224
	return;
225
#endif
226
227
    if (proxy == NULL) {
228
	proxyPort = 80;
229
	env = getenv("no_proxy");
230
	if (env && ((env[0] == '*') && (env[1] == 0)))
231
	    goto done;
232
	env = getenv("http_proxy");
233
	if (env != NULL) {
234
	    xmlNanoHTTPScanProxy(env);
235
	    goto done;
236
	}
237
	env = getenv("HTTP_PROXY");
238
	if (env != NULL) {
239
	    xmlNanoHTTPScanProxy(env);
240
	    goto done;
241
	}
242
    }
243
done:
244
    initialized = 1;
245
}
246
247
/**
248
 * xmlNanoHTTPCleanup:
249
 *
250
 * Cleanup the HTTP protocol layer.
251
 */
252
253
void
254
xmlNanoHTTPCleanup(void) {
255
    if (proxy != NULL) {
256
	xmlFree(proxy);
257
	proxy = NULL;
258
    }
259
#ifdef _WINSOCKAPI_
260
    if (initialized)
261
	WSACleanup();
262
#endif
263
    initialized = 0;
264
    return;
265
}
266
267
/**
268
 * xmlNanoHTTPScanURL:
269
 * @ctxt:  an HTTP context
270
 * @URL:  The URL used to initialize the context
271
 *
272
 * (Re)Initialize an HTTP context by parsing the URL and finding
273
 * the protocol host port and path it indicates.
274
 */
275
276
static void
277
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
278
    xmlURIPtr uri;
279
    /*
280
     * Clear any existing data from the context
281
     */
282
    if (ctxt->protocol != NULL) { 
283
        xmlFree(ctxt->protocol);
284
	ctxt->protocol = NULL;
285
    }
286
    if (ctxt->hostname != NULL) { 
287
        xmlFree(ctxt->hostname);
288
	ctxt->hostname = NULL;
289
    }
290
    if (ctxt->path != NULL) { 
291
        xmlFree(ctxt->path);
292
	ctxt->path = NULL;
293
    }
294
    if (ctxt->query != NULL) { 
295
        xmlFree(ctxt->query);
296
	ctxt->query = NULL;
297
    }
298
    if (URL == NULL) return;
299
300
    uri = xmlParseURIRaw(URL, 1);
301
    if (uri == NULL)
302
	return;
303
304
    if ((uri->scheme == NULL) || (uri->server == NULL)) {
305
	xmlFreeURI(uri);
306
	return;
307
    }
308
    
309
    ctxt->protocol = xmlMemStrdup(uri->scheme);
310
    ctxt->hostname = xmlMemStrdup(uri->server);
311
    if (uri->path != NULL)
312
	ctxt->path = xmlMemStrdup(uri->path);
313
    else
314
	ctxt->path = xmlMemStrdup("/");
315
    if (uri->query != NULL)
316
	ctxt->query = xmlMemStrdup(uri->query);
317
    if (uri->port != 0)
318
	ctxt->port = uri->port;
319
320
    xmlFreeURI(uri);
321
}
322
323
/**
324
 * xmlNanoHTTPScanProxy:
325
 * @URL:  The proxy URL used to initialize the proxy context
326
 *
327
 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
328
 * the protocol host port it indicates.
329
 * Should be like http://myproxy/ or http://myproxy:3128/
330
 * A NULL URL cleans up proxy informations.
331
 */
332
333
void
334
xmlNanoHTTPScanProxy(const char *URL) {
335
    xmlURIPtr uri;
336
337
    if (proxy != NULL) { 
338
        xmlFree(proxy);
339
	proxy = NULL;
340
    }
341
    proxyPort = 0;
342
343
#ifdef DEBUG_HTTP
344
    if (URL == NULL)
345
	xmlGenericError(xmlGenericErrorContext,
346
		"Removing HTTP proxy info\n");
347
    else
348
	xmlGenericError(xmlGenericErrorContext,
349
		"Using HTTP proxy %s\n", URL);
350
#endif
351
    if (URL == NULL) return;
352
353
    uri = xmlParseURIRaw(URL, 1);
354
    if ((uri == NULL) || (uri->scheme == NULL) ||
355
	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
356
	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
357
	if (uri != NULL)
358
	    xmlFreeURI(uri);
359
	return;
360
    }
361
    
362
    proxy = xmlMemStrdup(uri->server);
363
    if (uri->port != 0)
364
	proxyPort = uri->port;
365
366
    xmlFreeURI(uri);
367
}
368
369
/**
370
 * xmlNanoHTTPNewCtxt:
371
 * @URL:  The URL used to initialize the context
372
 *
373
 * Allocate and initialize a new HTTP context.
374
 *
375
 * Returns an HTTP context or NULL in case of error.
376
 */
377
378
static xmlNanoHTTPCtxtPtr
379
xmlNanoHTTPNewCtxt(const char *URL) {
380
    xmlNanoHTTPCtxtPtr ret;
381
382
    ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
383
    if (ret == NULL) {
384
        xmlHTTPErrMemory("allocating context");
385
        return(NULL);
386
    }
387
388
    memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
389
    ret->port = 80;
390
    ret->returnValue = 0;
391
    ret->fd = -1;
392
    ret->ContentLength = -1;
393
394
    xmlNanoHTTPScanURL(ret, URL);
395
396
    return(ret);
397
}
398
399
/**
400
 * xmlNanoHTTPFreeCtxt:
401
 * @ctxt:  an HTTP context
402
 *
403
 * Frees the context after closing the connection.
404
 */
405
406
static void
407
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
408
    if (ctxt == NULL) return;
409
    if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
410
    if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
411
    if (ctxt->path != NULL) xmlFree(ctxt->path);
412
    if (ctxt->query != NULL) xmlFree(ctxt->query);
413
    if (ctxt->out != NULL) xmlFree(ctxt->out);
414
    if (ctxt->in != NULL) xmlFree(ctxt->in);
415
    if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
416
    if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
417
    if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
418
    if (ctxt->location != NULL) xmlFree(ctxt->location);
419
    if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
420
#ifdef HAVE_ZLIB_H
421
    if (ctxt->strm != NULL) {
422
	inflateEnd(ctxt->strm);
423
	xmlFree(ctxt->strm);
424
    }
425
#endif
426
427
    ctxt->state = XML_NANO_HTTP_NONE;
428
    if (ctxt->fd >= 0) closesocket(ctxt->fd);
429
    ctxt->fd = -1;
430
    xmlFree(ctxt);
431
}
432
433
/**
434
 * xmlNanoHTTPSend:
435
 * @ctxt:  an HTTP context
436
 *
437
 * Send the input needed to initiate the processing on the server side
438
 * Returns number of bytes sent or -1 on error.
439
 */
440
441
static int
442
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char * xmt_ptr, int outlen) {
443
444
    int 	total_sent = 0;
445
446
    if ( (ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL ) ) {
447
        while (total_sent < outlen) {
448
            int nsent = send(ctxt->fd, xmt_ptr + total_sent,
449
                                      outlen - total_sent, 0);
450
            if (nsent>0)
451
                total_sent += nsent;
452
	    else if ( ( nsent == -1 ) && 
453
#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
454
	    	      ( socket_errno( ) != EAGAIN ) &&
455
#endif
456
		        ( socket_errno( ) != EWOULDBLOCK ) ) {
457
		__xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
458
		if ( total_sent == 0 )
459
		    total_sent = -1;
460
		break;
461
	    }
462
	    else {
463
	        /*
464
		**  No data sent
465
		**  Since non-blocking sockets are used, wait for 
466
		**  socket to be writable or default timeout prior
467
		**  to retrying.
468
		*/
469
470
		struct timeval	tv;
471
		fd_set		wfd;
472
473
		tv.tv_sec = timeout;
474
		tv.tv_usec = 0;
475
		FD_ZERO( &wfd );
476
#ifdef _MSC_VER
477
#pragma warning(push)
478
#pragma warning(disable: 4018)
479
#endif
480
		FD_SET( ctxt->fd, &wfd );
481
#ifdef _MSC_VER
482
#pragma warning(pop)
483
#endif
484
		(void)select( ctxt->fd + 1, NULL, &wfd, NULL, &tv );
485
	    }
486
	}
487
    }
488
489
    return total_sent;
490
}
491
492
/**
493
 * xmlNanoHTTPRecv:
494
 * @ctxt:  an HTTP context
495
 *
496
 * Read information coming from the HTTP connection.
497
 * This is a blocking call (but it blocks in select(), not read()).
498
 *
499
 * Returns the number of byte read or -1 in case of error.
500
 */
501
502
static int
503
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
504
    fd_set rfd;
505
    struct timeval tv;
506
507
508
    while (ctxt->state & XML_NANO_HTTP_READ) {
509
	if (ctxt->in == NULL) {
510
	    ctxt->in = (char *) xmlMallocAtomic(65000 * sizeof(char));
511
	    if (ctxt->in == NULL) {
512
		xmlHTTPErrMemory("allocating input");
513
	        ctxt->last = -1;
514
		return(-1);
515
	    }
516
	    ctxt->inlen = 65000;
517
	    ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
518
	}
519
	if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
520
	    int delta = ctxt->inrptr - ctxt->in;
521
	    int len = ctxt->inptr - ctxt->inrptr;
522
	    
523
	    memmove(ctxt->in, ctxt->inrptr, len);
524
	    ctxt->inrptr -= delta;
525
	    ctxt->content -= delta;
526
	    ctxt->inptr -= delta;
527
	}
528
        if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
529
	    int d_inptr = ctxt->inptr - ctxt->in;
530
	    int d_content = ctxt->content - ctxt->in;
531
	    int d_inrptr = ctxt->inrptr - ctxt->in;
532
	    char *	tmp_ptr = ctxt->in;
533
534
	    ctxt->inlen *= 2;
535
            ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
536
	    if (ctxt->in == NULL) {
537
		xmlHTTPErrMemory("allocating input buffer");
538
		xmlFree( tmp_ptr );
539
	        ctxt->last = -1;
540
		return(-1);
541
	    }
542
            ctxt->inptr = ctxt->in + d_inptr;
543
            ctxt->content = ctxt->in + d_content;
544
            ctxt->inrptr = ctxt->in + d_inrptr;
545
	}
546
	ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
547
	if (ctxt->last > 0) {
548
	    ctxt->inptr += ctxt->last;
549
	    return(ctxt->last);
550
	}
551
	if (ctxt->last == 0) {
552
	    return(0);
553
	}
554
	if (ctxt->last == -1) {
555
	    switch (socket_errno()) {
556
		case EINPROGRESS:
557
		case EWOULDBLOCK:
558
#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
559
		case EAGAIN:
560
#endif
561
		    break;
562
563
		case ECONNRESET:
564
		case ESHUTDOWN:
565
		    return ( 0 );
566
567
		default:
568
		    __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
569
		    return(-1);
570
	    }
571
	}
572
573
	tv.tv_sec = timeout;
574
	tv.tv_usec = 0;
575
	FD_ZERO(&rfd);
576
#ifdef _MSC_VER
577
#pragma warning(push)
578
#pragma warning(disable: 4018)
579
#endif
580
	FD_SET(ctxt->fd, &rfd);
581
#ifdef _MSC_VER
582
#pragma warning(pop)
583
#endif
584
	
585
	if ( (select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
586
#if defined(EINTR)
587
		&& (errno != EINTR)
588
#endif
589
	)
590
		return(0);
591
    }
592
    return(0);
593
}
594
595
/**
596
 * xmlNanoHTTPReadLine:
597
 * @ctxt:  an HTTP context
598
 *
599
 * Read one line in the HTTP server output, usually for extracting
600
 * the HTTP protocol informations from the answer header.
601
 *
602
 * Returns a newly allocated string with a copy of the line, or NULL
603
 *         which indicate the end of the input.
604
 */
605
606
static char *
607
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
608
    char buf[4096];
609
    char *bp = buf;
610
    int	rc;
611
    
612
    while (bp - buf < 4095) {
613
	if (ctxt->inrptr == ctxt->inptr) {
614
	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
615
		if (bp == buf)
616
		    return(NULL);
617
		else
618
		    *bp = 0;
619
		return(xmlMemStrdup(buf));
620
	    }
621
	    else if ( rc == -1 ) {
622
	        return ( NULL );
623
	    }
624
	}
625
	*bp = *ctxt->inrptr++;
626
	if (*bp == '\n') {
627
	    *bp = 0;
628
	    return(xmlMemStrdup(buf));
629
	}
630
	if (*bp != '\r')
631
	    bp++;
632
    }
633
    buf[4095] = 0;
634
    return(xmlMemStrdup(buf));
635
}
636
637
638
/**
639
 * xmlNanoHTTPScanAnswer:
640
 * @ctxt:  an HTTP context
641
 * @line:  an HTTP header line
642
 *
643
 * Try to extract useful informations from the server answer.
644
 * We currently parse and process:
645
 *  - The HTTP revision/ return code
646
 *  - The Content-Type, Mime-Type and charset used
647
 *  - The Location for redirect processing.
648
 *
649
 * Returns -1 in case of failure, the file descriptor number otherwise
650
 */
651
652
static void
653
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
654
    const char *cur = line;
655
656
    if (line == NULL) return;
657
658
    if (!strncmp(line, "HTTP/", 5)) {
659
        int version = 0;
660
	int ret = 0;
661
662
	cur += 5;
663
	while ((*cur >= '0') && (*cur <= '9')) {
664
	    version *= 10;
665
	    version += *cur - '0';
666
	    cur++;
667
	}
668
	if (*cur == '.') {
669
	    cur++;
670
	    if ((*cur >= '0') && (*cur <= '9')) {
671
		version *= 10;
672
		version += *cur - '0';
673
		cur++;
674
	    }
675
	    while ((*cur >= '0') && (*cur <= '9'))
676
		cur++;
677
	} else
678
	    version *= 10;
679
	if ((*cur != ' ') && (*cur != '\t')) return;
680
	while ((*cur == ' ') || (*cur == '\t')) cur++;
681
	if ((*cur < '0') || (*cur > '9')) return;
682
	while ((*cur >= '0') && (*cur <= '9')) {
683
	    ret *= 10;
684
	    ret += *cur - '0';
685
	    cur++;
686
	}
687
	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
688
	ctxt->returnValue = ret;
689
    } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
690
        const xmlChar *charset, *last, *mime;
691
        cur += 13;
692
	while ((*cur == ' ') || (*cur == '\t')) cur++;
693
	if (ctxt->contentType != NULL)
694
	    xmlFree(ctxt->contentType);
695
	ctxt->contentType = xmlMemStrdup(cur);
696
	mime = (const xmlChar *) cur;
697
	last = mime;
698
	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
699
	       (*last != ';') && (*last != ','))
700
	    last++;
701
	if (ctxt->mimeType != NULL)
702
	    xmlFree(ctxt->mimeType);
703
	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
704
	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
705
	if (charset != NULL) {
706
	    charset += 8;
707
	    last = charset;
708
	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
709
	           (*last != ';') && (*last != ','))
710
		last++;
711
	    if (ctxt->encoding != NULL)
712
	        xmlFree(ctxt->encoding);
713
	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
714
	}
715
    } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
716
        const xmlChar *charset, *last, *mime;
717
        cur += 12;
718
	if (ctxt->contentType != NULL) return;
719
	while ((*cur == ' ') || (*cur == '\t')) cur++;
720
	ctxt->contentType = xmlMemStrdup(cur);
721
	mime = (const xmlChar *) cur;
722
	last = mime;
723
	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
724
	       (*last != ';') && (*last != ','))
725
	    last++;
726
	if (ctxt->mimeType != NULL)
727
	    xmlFree(ctxt->mimeType);
728
	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
729
	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
730
	if (charset != NULL) {
731
	    charset += 8;
732
	    last = charset;
733
	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
734
	           (*last != ';') && (*last != ','))
735
		last++;
736
	    if (ctxt->encoding != NULL)
737
	        xmlFree(ctxt->encoding);
738
	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
739
	}
740
    } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
741
        cur += 9;
742
	while ((*cur == ' ') || (*cur == '\t')) cur++;
743
	if (ctxt->location != NULL)
744
	    xmlFree(ctxt->location);
745
	if (*cur == '/') {
746
	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
747
	    xmlChar *tmp_loc = 
748
	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
749
	    ctxt->location = 
750
	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
751
	} else {
752
	    ctxt->location = xmlMemStrdup(cur);
753
	}
754
    } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
755
        cur += 17;
756
	while ((*cur == ' ') || (*cur == '\t')) cur++;
757
	if (ctxt->authHeader != NULL)
758
	    xmlFree(ctxt->authHeader);
759
	ctxt->authHeader = xmlMemStrdup(cur);
760
    } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
761
        cur += 19;
762
	while ((*cur == ' ') || (*cur == '\t')) cur++;
763
	if (ctxt->authHeader != NULL)
764
	    xmlFree(ctxt->authHeader);
765
	ctxt->authHeader = xmlMemStrdup(cur);
766
#ifdef HAVE_ZLIB_H
767
    } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
768
	cur += 17;
769
	while ((*cur == ' ') || (*cur == '\t')) cur++;
770
	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
771
	    ctxt->usesGzip = 1;
772
773
	    ctxt->strm = xmlMalloc(sizeof(z_stream));
774
775
	    if (ctxt->strm != NULL) {
776
		ctxt->strm->zalloc = Z_NULL;
777
		ctxt->strm->zfree = Z_NULL;
778
		ctxt->strm->opaque = Z_NULL;
779
		ctxt->strm->avail_in = 0;
780
		ctxt->strm->next_in = Z_NULL;
781
782
		inflateInit2( ctxt->strm, 31 );
783
	    }
784
	}
785
#endif
786
    } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
787
	cur += 15;
788
	ctxt->ContentLength = strtol( cur, NULL, 10 );
789
    }
790
}
791
792
/**
793
 * xmlNanoHTTPConnectAttempt:
794
 * @addr:  a socket address structure
795
 *
796
 * Attempt a connection to the given IP:port endpoint. It forces
797
 * non-blocking semantic on the socket, and allow 60 seconds for
798
 * the host to answer.
799
 *
800
 * Returns -1 in case of failure, the file descriptor number otherwise
801
 */
802
803
static int
804
xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
805
{
806
    fd_set wfd;
807
#ifdef _WINSOCKAPI_
808
    fd_set xfd;
809
#endif
810
    struct timeval tv;
811
    int status;
812
    int addrlen;
813
    SOCKET s;
814
    
815
#ifdef SUPPORT_IP6
816
    if (addr->sa_family == AF_INET6) {
817
	s = socket (PF_INET6, SOCK_STREAM, IPPROTO_TCP);
818
	addrlen = sizeof (struct sockaddr_in6);
819
    }
820
    else
821
#endif
822
    {
823
	s = socket (PF_INET, SOCK_STREAM, IPPROTO_TCP);
824
	addrlen = sizeof (struct sockaddr_in);
825
    }
826
    if (s==-1) {
827
#ifdef DEBUG_HTTP
828
	perror("socket");
829
#endif
830
	__xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
831
	return(-1);
832
    }
833
    
834
#ifdef _WINSOCKAPI_
835
    {
836
	u_long one = 1;
837
838
	status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
839
    }
840
#else /* _WINSOCKAPI_ */
841
#if defined(VMS)
842
    {
843
	int enable = 1;
844
	status = ioctl(s, FIONBIO, &enable);
845
    }
846
#else /* VMS */
847
#if defined(__BEOS__) && !defined(__HAIKU__)
848
	{
849
		bool noblock = true;
850
		status = setsockopt(s, SOL_SOCKET, SO_NONBLOCK, &noblock, sizeof(noblock));
851
	}
852
#else /* __BEOS__ */
853
    if ((status = fcntl(s, F_GETFL, 0)) != -1) {
854
#ifdef O_NONBLOCK
855
	status |= O_NONBLOCK;
856
#else /* O_NONBLOCK */
857
#ifdef F_NDELAY
858
	status |= F_NDELAY;
859
#endif /* F_NDELAY */
860
#endif /* !O_NONBLOCK */
861
	status = fcntl(s, F_SETFL, status);
862
    }
863
    if (status < 0) {
864
#ifdef DEBUG_HTTP
865
	perror("nonblocking");
866
#endif
867
	__xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
868
	closesocket(s);
869
	return(-1);
870
    }
871
#endif /* !__BEOS__ */
872
#endif /* !VMS */
873
#endif /* !_WINSOCKAPI_ */
874
875
    if (connect (s, addr, addrlen) == -1) {
876
	switch (socket_errno()) {
877
	    case EINPROGRESS:
878
	    case EWOULDBLOCK:
879
		break;
880
	    default:
881
		__xmlIOErr(XML_FROM_HTTP, 0, "error connecting to HTTP server");
882
		closesocket(s);
883
		return(-1);
884
	}
885
    }	
886
    
887
    tv.tv_sec = timeout;
888
    tv.tv_usec = 0;
889
890
#ifdef _MSC_VER
891
#pragma warning(push)
892
#pragma warning(disable: 4018)
893
#endif
894
    FD_ZERO(&wfd);
895
    FD_SET(s, &wfd);
896
897
#ifdef _WINSOCKAPI_    
898
    FD_ZERO(&xfd);
899
    FD_SET(s, &xfd);
900
    
901
    switch(select(s+1, NULL, &wfd, &xfd, &tv))
902
#else
903
    switch(select(s+1, NULL, &wfd, NULL, &tv))
904
#endif
905
#ifdef _MSC_VER
906
#pragma warning(pop)
907
#endif
908
    {
909
	case 0:
910
	    /* Time out */
911
	    __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
912
	    closesocket(s);
913
	    return(-1);
914
	case -1:
915
	    /* Ermm.. ?? */
916
	    __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
917
	    closesocket(s);
918
	    return(-1);
919
    }
920
921
    if ( FD_ISSET(s, &wfd)
922
#ifdef _WINSOCKAPI_
923
                           || FD_ISSET(s, &xfd)
924
#endif
925
                                                ) {
926
	XML_SOCKLEN_T len;
927
	len = sizeof(status);
928
#ifdef SO_ERROR
929
	if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&status, &len) < 0 ) {
930
	    /* Solaris error code */
931
	    __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
932
	    return (-1);
933
	}
934
#endif
935
	if ( status ) {
936
	    __xmlIOErr(XML_FROM_HTTP, 0, "Error connecting to remote host");
937
	    closesocket(s);
938
	    errno = status;
939
	    return (-1);
940
	}
941
    } else {
942
	/* pbm */
943
	__xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
944
	closesocket(s);
945
	return (-1);
946
    }
947
    
948
    return(s);
949
}
950
 
951
/**
952
 * xmlNanoHTTPConnectHost:
953
 * @host:  the host name
954
 * @port:  the port number
955
 *
956
 * Attempt a connection to the given host:port endpoint. It tries
957
 * the multiple IP provided by the DNS if available.
958
 *
959
 * Returns -1 in case of failure, the file descriptor number otherwise
960
 */
961
962
static int
963
xmlNanoHTTPConnectHost(const char *host, int port)
964
{
965
    struct hostent *h;
966
    struct sockaddr *addr = NULL;
967
    struct in_addr ia;
968
    struct sockaddr_in sockin;
969
970
#ifdef SUPPORT_IP6
971
    struct in6_addr ia6;
972
    struct sockaddr_in6 sockin6;
973
#endif
974
    int i;
975
    int s;
976
977
    memset (&sockin, 0, sizeof(sockin));
978
#ifdef SUPPORT_IP6
979
    memset (&sockin6, 0, sizeof(sockin6));
980
#endif
981
982
#if !defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && defined(RES_USE_INET6)
983
    if (have_ipv6 ())
984
    {
985
	if (!(_res.options & RES_INIT))
986
	    res_init();
987
	_res.options |= RES_USE_INET6;
988
    }
989
#endif
990
991
#if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
992
    if (have_ipv6 ())
993
#endif
994
#if defined(HAVE_GETADDRINFO) && (defined(SUPPORT_IP6) || defined(_WIN32))
995
    {
996
	int status;
997
	struct addrinfo hints, *res, *result;
998
999
	result = NULL;
1000
	memset (&hints, 0,sizeof(hints));
1001
	hints.ai_socktype = SOCK_STREAM;
1002
1003
	status = getaddrinfo (host, NULL, &hints, &result);
1004
	if (status) {
1005
	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1006
	    return (-1);
1007
	}
1008
1009
	for (res = result; res; res = res->ai_next) {
1010
	    if (res->ai_family == AF_INET) {
1011
		if (res->ai_addrlen > sizeof(sockin)) {
1012
		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1013
		    freeaddrinfo (result);
1014
		    return (-1);
1015
		}
1016
		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1017
		sockin.sin_port = htons (port);
1018
		addr = (struct sockaddr *)&sockin;
1019
#ifdef SUPPORT_IP6
1020
	    } else if (have_ipv6 () && (res->ai_family == AF_INET6)) {
1021
		if (res->ai_addrlen > sizeof(sockin6)) {
1022
		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1023
		    freeaddrinfo (result);
1024
		    return (-1);
1025
		}
1026
		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1027
		sockin6.sin6_port = htons (port);
1028
		addr = (struct sockaddr *)&sockin6;
1029
#endif
1030
	    } else
1031
		continue;              /* for */
1032
1033
	    s = xmlNanoHTTPConnectAttempt (addr);
1034
	    if (s != -1) {
1035
		freeaddrinfo (result);
1036
		return (s);
1037
	    }
1038
	}
1039
1040
	if (result)
1041
	    freeaddrinfo (result);
1042
    }
1043
#endif
1044
#if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1045
    else
1046
#endif
1047
#if !defined(HAVE_GETADDRINFO) || !defined(_WIN32)
1048
    {
1049
	h = gethostbyname (host);
1050
	if (h == NULL) {
1051
1052
/*
1053
 * Okay, I got fed up by the non-portability of this error message
1054
 * extraction code. it work on Linux, if it work on your platform
1055
 * and one want to enable it, send me the defined(foobar) needed
1056
 */
1057
#if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(linux)
1058
	    const char *h_err_txt = "";
1059
1060
	    switch (h_errno) {
1061
		case HOST_NOT_FOUND:
1062
		    h_err_txt = "Authoritive host not found";
1063
		    break;
1064
1065
		case TRY_AGAIN:
1066
		    h_err_txt =
1067
			"Non-authoritive host not found or server failure.";
1068
		    break;
1069
1070
		case NO_RECOVERY:
1071
		    h_err_txt =
1072
			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1073
		    break;
1074
1075
		case NO_ADDRESS:
1076
		    h_err_txt =
1077
			"Valid name, no data record of requested type.";
1078
		    break;
1079
1080
		default:
1081
		    h_err_txt = "No error text defined.";
1082
		    break;
1083
	    }
1084
	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1085
#else
1086
	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1087
#endif
1088
	    return (-1);
1089
	}
1090
1091
	for (i = 0; h->h_addr_list[i]; i++) {
1092
	    if (h->h_addrtype == AF_INET) {
1093
		/* A records (IPv4) */
1094
		if ((unsigned int) h->h_length > sizeof(ia)) {
1095
		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1096
		    return (-1);
1097
		}
1098
		memcpy (&ia, h->h_addr_list[i], h->h_length);
1099
		sockin.sin_family = h->h_addrtype;
1100
		sockin.sin_addr = ia;
1101
		sockin.sin_port = (u_short)htons ((unsigned short)port);
1102
		addr = (struct sockaddr *) &sockin;
1103
#ifdef SUPPORT_IP6
1104
	    } else if (have_ipv6 () && (h->h_addrtype == AF_INET6)) {
1105
		/* AAAA records (IPv6) */
1106
		if ((unsigned int) h->h_length > sizeof(ia6)) {
1107
		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1108
		    return (-1);
1109
		}
1110
		memcpy (&ia6, h->h_addr_list[i], h->h_length);
1111
		sockin6.sin6_family = h->h_addrtype;
1112
		sockin6.sin6_addr = ia6;
1113
		sockin6.sin6_port = htons (port);
1114
		addr = (struct sockaddr *) &sockin6;
1115
#endif
1116
	    } else
1117
		break;              /* for */
1118
1119
	    s = xmlNanoHTTPConnectAttempt (addr);
1120
	    if (s != -1)
1121
		return (s);
1122
	}
1123
    }
1124
#endif
1125
1126
#ifdef DEBUG_HTTP
1127
    xmlGenericError(xmlGenericErrorContext,
1128
                    "xmlNanoHTTPConnectHost:  unable to connect to '%s'.\n",
1129
                    host);
1130
#endif
1131
    return (-1);
1132
}
1133
1134
1135
/**
1136
 * xmlNanoHTTPOpen:
1137
 * @URL:  The URL to load
1138
 * @contentType:  if available the Content-Type information will be
1139
 *                returned at that location
1140
 *
1141
 * This function try to open a connection to the indicated resource
1142
 * via HTTP GET.
1143
 *
1144
 * Returns NULL in case of failure, otherwise a request handler.
1145
 *     The contentType, if provided must be freed by the caller
1146
 */
1147
1148
void*
1149
xmlNanoHTTPOpen(const char *URL, char **contentType) {
1150
    if (contentType != NULL) *contentType = NULL;
1151
    return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1152
}
1153
1154
/**
1155
 * xmlNanoHTTPOpenRedir:
1156
 * @URL:  The URL to load
1157
 * @contentType:  if available the Content-Type information will be
1158
 *                returned at that location
1159
 * @redir: if available the redirected URL will be returned
1160
 *
1161
 * This function try to open a connection to the indicated resource
1162
 * via HTTP GET.
1163
 *
1164
 * Returns NULL in case of failure, otherwise a request handler.
1165
 *     The contentType, if provided must be freed by the caller
1166
 */
1167
1168
void*
1169
xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1170
    if (contentType != NULL) *contentType = NULL;
1171
    if (redir != NULL) *redir = NULL;
1172
    return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1173
}
1174
1175
/**
1176
 * xmlNanoHTTPRead:
1177
 * @ctx:  the HTTP context
1178
 * @dest:  a buffer
1179
 * @len:  the buffer length
1180
 *
1181
 * This function tries to read @len bytes from the existing HTTP connection
1182
 * and saves them in @dest. This is a blocking call.
1183
 *
1184
 * Returns the number of byte read. 0 is an indication of an end of connection.
1185
 *         -1 indicates a parameter error.
1186
 */
1187
int
1188
xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1189
    xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1190
#ifdef HAVE_ZLIB_H
1191
    int bytes_read = 0;
1192
    int orig_avail_in;
1193
    int z_ret;
1194
#endif
1195
1196
    if (ctx == NULL) return(-1);
1197
    if (dest == NULL) return(-1);
1198
    if (len <= 0) return(0);
1199
1200
#ifdef HAVE_ZLIB_H
1201
    if (ctxt->usesGzip == 1) {
1202
        if (ctxt->strm == NULL) return(0);
1203
 
1204
        ctxt->strm->next_out = dest;
1205
        ctxt->strm->avail_out = len;
1206
	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1207
1208
        while (ctxt->strm->avail_out > 0 &&
1209
	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1210
            orig_avail_in = ctxt->strm->avail_in =
1211
			    ctxt->inptr - ctxt->inrptr - bytes_read;
1212
            ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1213
1214
            z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1215
            bytes_read += orig_avail_in - ctxt->strm->avail_in;
1216
1217
            if (z_ret != Z_OK) break;
1218
	}
1219
1220
        ctxt->inrptr += bytes_read;
1221
        return(len - ctxt->strm->avail_out);
1222
    }
1223
#endif
1224
1225
    while (ctxt->inptr - ctxt->inrptr < len) {
1226
        if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1227
    }
1228
    if (ctxt->inptr - ctxt->inrptr < len)
1229
        len = ctxt->inptr - ctxt->inrptr;
1230
    memcpy(dest, ctxt->inrptr, len);
1231
    ctxt->inrptr += len;
1232
    return(len);
1233
}
1234
1235
/**
1236
 * xmlNanoHTTPClose:
1237
 * @ctx:  the HTTP context
1238
 *
1239
 * This function closes an HTTP context, it ends up the connection and
1240
 * free all data related to it.
1241
 */
1242
void
1243
xmlNanoHTTPClose(void *ctx) {
1244
    xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1245
1246
    if (ctx == NULL) return;
1247
1248
    xmlNanoHTTPFreeCtxt(ctxt);
1249
}
1250
1251
/**
1252
 * xmlNanoHTTPMethodRedir:
1253
 * @URL:  The URL to load
1254
 * @method:  the HTTP method to use
1255
 * @input:  the input string if any
1256
 * @contentType:  the Content-Type information IN and OUT
1257
 * @redir:  the redirected URL OUT
1258
 * @headers:  the extra headers
1259
 * @ilen:  input length
1260
 *
1261
 * This function try to open a connection to the indicated resource
1262
 * via HTTP using the given @method, adding the given extra headers
1263
 * and the input buffer for the request content.
1264
 *
1265
 * Returns NULL in case of failure, otherwise a request handler.
1266
 *     The contentType, or redir, if provided must be freed by the caller
1267
 */
1268
1269
void*
1270
xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1271
                  char **contentType, char **redir,
1272
		  const char *headers, int ilen ) {
1273
    xmlNanoHTTPCtxtPtr ctxt;
1274
    char *bp, *p;
1275
    int blen, ret;
1276
    int head;
1277
    int nbRedirects = 0;
1278
    char *redirURL = NULL;
1279
#ifdef DEBUG_HTTP
1280
    int xmt_bytes;
1281
#endif
1282
    
1283
    if (URL == NULL) return(NULL);
1284
    if (method == NULL) method = "GET";
1285
    xmlNanoHTTPInit();
1286
1287
retry:
1288
    if (redirURL == NULL)
1289
	ctxt = xmlNanoHTTPNewCtxt(URL);
1290
    else {
1291
	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1292
	ctxt->location = xmlMemStrdup(redirURL);
1293
    }
1294
1295
    if ( ctxt == NULL ) {
1296
	return ( NULL );
1297
    }
1298
1299
    if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1300
	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1301
        xmlNanoHTTPFreeCtxt(ctxt);
1302
	if (redirURL != NULL) xmlFree(redirURL);
1303
        return(NULL);
1304
    }
1305
    if (ctxt->hostname == NULL) {
1306
	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1307
	           "Failed to identify host in URI");
1308
        xmlNanoHTTPFreeCtxt(ctxt);
1309
	if (redirURL != NULL) xmlFree(redirURL);
1310
        return(NULL);
1311
    }
1312
    if (proxy) {
1313
	blen = strlen(ctxt->hostname) * 2 + 16;
1314
	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1315
    }
1316
    else {
1317
	blen = strlen(ctxt->hostname);
1318
	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1319
    }
1320
    if (ret < 0) {
1321
        xmlNanoHTTPFreeCtxt(ctxt);
1322
	if (redirURL != NULL) xmlFree(redirURL);
1323
        return(NULL);
1324
    }
1325
    ctxt->fd = ret;
1326
1327
    if (input == NULL)
1328
	ilen = 0;
1329
    else
1330
	blen += 36;
1331
1332
    if (headers != NULL)
1333
	blen += strlen(headers) + 2;
1334
    if (contentType && *contentType)
1335
	/* reserve for string plus 'Content-Type: \r\n" */
1336
	blen += strlen(*contentType) + 16;
1337
    if (ctxt->query != NULL)
1338
	/* 1 for '?' */
1339
	blen += strlen(ctxt->query) + 1;
1340
    blen += strlen(method) + strlen(ctxt->path) + 24;
1341
#ifdef HAVE_ZLIB_H
1342
    /* reserve for possible 'Accept-Encoding: gzip' string */
1343
    blen += 23;
1344
#endif
1345
    if (ctxt->port != 80) {
1346
	/* reserve space for ':xxxxx', incl. potential proxy */
1347
	if (proxy)
1348
	    blen += 12;
1349
	else
1350
	    blen += 6;
1351
    }
1352
    bp = (char*)xmlMallocAtomic(blen);
1353
    if ( bp == NULL ) {
1354
        xmlNanoHTTPFreeCtxt( ctxt );
1355
	xmlHTTPErrMemory("allocating header buffer");
1356
	return ( NULL );
1357
    }
1358
1359
    p = bp;
1360
1361
    if (proxy) {
1362
	if (ctxt->port != 80) {
1363
	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s", 
1364
			method, ctxt->hostname,
1365
		 	ctxt->port, ctxt->path );
1366
	}
1367
	else 
1368
	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1369
	    		ctxt->hostname, ctxt->path);
1370
    }
1371
    else
1372
	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1373
1374
    if (ctxt->query != NULL)
1375
	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1376
1377
    if (ctxt->port == 80) {
1378
        p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n", 
1379
		    ctxt->hostname);
1380
    } else {
1381
        p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1382
		    ctxt->hostname, ctxt->port);
1383
    }
1384
1385
#ifdef HAVE_ZLIB_H
1386
    p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1387
#endif
1388
1389
    if (contentType != NULL && *contentType) 
1390
	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1391
1392
    if (headers != NULL)
1393
	p += snprintf( p, blen - (p - bp), "%s", headers );
1394
1395
    if (input != NULL)
1396
	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1397
    else
1398
	snprintf(p, blen - (p - bp), "\r\n");
1399
1400
#ifdef DEBUG_HTTP
1401
    xmlGenericError(xmlGenericErrorContext,
1402
	    "-> %s%s", proxy? "(Proxy) " : "", bp);
1403
    if ((blen -= strlen(bp)+1) < 0)
1404
	xmlGenericError(xmlGenericErrorContext,
1405
		"ERROR: overflowed buffer by %d bytes\n", -blen);
1406
#endif
1407
    ctxt->outptr = ctxt->out = bp;
1408
    ctxt->state = XML_NANO_HTTP_WRITE;
1409
    blen = strlen( ctxt->out );
1410
#ifdef DEBUG_HTTP
1411
    xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1412
    if ( xmt_bytes != blen )
1413
        xmlGenericError( xmlGenericErrorContext,
1414
			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1415
			xmt_bytes, blen,
1416
			"bytes of HTTP headers sent to host",
1417
			ctxt->hostname );
1418
#else
1419
    xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1420
#endif
1421
1422
    if ( input != NULL ) {
1423
#ifdef DEBUG_HTTP
1424
        xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1425
1426
	if ( xmt_bytes != ilen )
1427
	    xmlGenericError( xmlGenericErrorContext,
1428
	    		"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1429
			xmt_bytes, ilen,
1430
			"bytes of HTTP content sent to host",
1431
			ctxt->hostname );
1432
#else
1433
	xmlNanoHTTPSend( ctxt, input, ilen );
1434
#endif
1435
    }
1436
1437
    ctxt->state = XML_NANO_HTTP_READ;
1438
    head = 1;
1439
1440
    while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1441
        if (head && (*p == 0)) {
1442
	    head = 0;
1443
	    ctxt->content = ctxt->inrptr;
1444
	    xmlFree(p);
1445
	    break;
1446
	}
1447
	xmlNanoHTTPScanAnswer(ctxt, p);
1448
1449
#ifdef DEBUG_HTTP
1450
	xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1451
#endif
1452
        xmlFree(p);
1453
    }
1454
1455
    if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1456
        (ctxt->returnValue < 400)) {
1457
#ifdef DEBUG_HTTP
1458
	xmlGenericError(xmlGenericErrorContext,
1459
		"\nRedirect to: %s\n", ctxt->location);
1460
#endif
1461
	while ( xmlNanoHTTPRecv(ctxt) > 0 ) ;
1462
        if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1463
	    nbRedirects++;
1464
	    if (redirURL != NULL)
1465
		xmlFree(redirURL);
1466
	    redirURL = xmlMemStrdup(ctxt->location);
1467
	    xmlNanoHTTPFreeCtxt(ctxt);
1468
	    goto retry;
1469
	}
1470
	xmlNanoHTTPFreeCtxt(ctxt);
1471
	if (redirURL != NULL) xmlFree(redirURL);
1472
#ifdef DEBUG_HTTP
1473
	xmlGenericError(xmlGenericErrorContext,
1474
		"xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1475
#endif
1476
	return(NULL);
1477
    }
1478
1479
    if (contentType != NULL) {
1480
	if (ctxt->contentType != NULL)
1481
	    *contentType = xmlMemStrdup(ctxt->contentType);
1482
	else
1483
	    *contentType = NULL;
1484
    }
1485
1486
    if ((redir != NULL) && (redirURL != NULL)) {
1487
	*redir = redirURL;
1488
    } else {
1489
	if (redirURL != NULL)
1490
	    xmlFree(redirURL);
1491
	if (redir != NULL)
1492
	    *redir = NULL;
1493
    }
1494
1495
#ifdef DEBUG_HTTP
1496
    if (ctxt->contentType != NULL)
1497
	xmlGenericError(xmlGenericErrorContext,
1498
		"\nCode %d, content-type '%s'\n\n",
1499
	       ctxt->returnValue, ctxt->contentType);
1500
    else
1501
	xmlGenericError(xmlGenericErrorContext,
1502
		"\nCode %d, no content-type\n\n",
1503
	       ctxt->returnValue);
1504
#endif
1505
1506
    return((void *) ctxt);
1507
}
1508
1509
/**
1510
 * xmlNanoHTTPMethod:
1511
 * @URL:  The URL to load
1512
 * @method:  the HTTP method to use
1513
 * @input:  the input string if any
1514
 * @contentType:  the Content-Type information IN and OUT
1515
 * @headers:  the extra headers
1516
 * @ilen:  input length
1517
 *
1518
 * This function try to open a connection to the indicated resource
1519
 * via HTTP using the given @method, adding the given extra headers
1520
 * and the input buffer for the request content.
1521
 *
1522
 * Returns NULL in case of failure, otherwise a request handler.
1523
 *     The contentType, if provided must be freed by the caller
1524
 */
1525
1526
void*
1527
xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1528
                  char **contentType, const char *headers, int ilen) {
1529
    return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1530
		                  NULL, headers, ilen));
1531
}
1532
1533
/**
1534
 * xmlNanoHTTPFetch:
1535
 * @URL:  The URL to load
1536
 * @filename:  the filename where the content should be saved
1537
 * @contentType:  if available the Content-Type information will be
1538
 *                returned at that location
1539
 *
1540
 * This function try to fetch the indicated resource via HTTP GET
1541
 * and save it's content in the file.
1542
 *
1543
 * Returns -1 in case of failure, 0 incase of success. The contentType,
1544
 *     if provided must be freed by the caller
1545
 */
1546
int
1547
xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1548
    void *ctxt = NULL;
1549
    char *buf = NULL;
1550
    int fd;
1551
    int len;
1552
    
1553
    if (filename == NULL) return(-1);
1554
    ctxt = xmlNanoHTTPOpen(URL, contentType);
1555
    if (ctxt == NULL) return(-1);
1556
1557
    if (!strcmp(filename, "-")) 
1558
        fd = 0;
1559
    else {
1560
        fd = open(filename, O_CREAT | O_WRONLY, 00644);
1561
	if (fd < 0) {
1562
	    xmlNanoHTTPClose(ctxt);
1563
	    if ((contentType != NULL) && (*contentType != NULL)) {
1564
	        xmlFree(*contentType);
1565
		*contentType = NULL;
1566
	    }
1567
	    return(-1);
1568
	}
1569
    }
1570
1571
    xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1572
    if ( len > 0 ) {
1573
	write(fd, buf, len);
1574
    }
1575
1576
    xmlNanoHTTPClose(ctxt);
1577
    close(fd);
1578
    return(0);
1579
}
1580
1581
#ifdef LIBXML_OUTPUT_ENABLED
1582
/**
1583
 * xmlNanoHTTPSave:
1584
 * @ctxt:  the HTTP context
1585
 * @filename:  the filename where the content should be saved
1586
 *
1587
 * This function saves the output of the HTTP transaction to a file
1588
 * It closes and free the context at the end
1589
 *
1590
 * Returns -1 in case of failure, 0 incase of success.
1591
 */
1592
int
1593
xmlNanoHTTPSave(void *ctxt, const char *filename) {
1594
    char *buf = NULL;
1595
    int fd;
1596
    int len;
1597
    
1598
    if ((ctxt == NULL) || (filename == NULL)) return(-1);
1599
1600
    if (!strcmp(filename, "-")) 
1601
        fd = 0;
1602
    else {
1603
        fd = open(filename, O_CREAT | O_WRONLY, 0666);
1604
	if (fd < 0) {
1605
	    xmlNanoHTTPClose(ctxt);
1606
	    return(-1);
1607
	}
1608
    }
1609
1610
    xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1611
    if ( len > 0 ) {
1612
	write(fd, buf, len);
1613
    }
1614
1615
    xmlNanoHTTPClose(ctxt);
1616
    close(fd);
1617
    return(0);
1618
}
1619
#endif /* LIBXML_OUTPUT_ENABLED */
1620
1621
/**
1622
 * xmlNanoHTTPReturnCode:
1623
 * @ctx:  the HTTP context
1624
 *
1625
 * Get the latest HTTP return code received
1626
 *
1627
 * Returns the HTTP return code for the request.
1628
 */
1629
int
1630
xmlNanoHTTPReturnCode(void *ctx) {
1631
    xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1632
1633
    if (ctxt == NULL) return(-1);
1634
1635
    return(ctxt->returnValue);
1636
}
1637
1638
/**
1639
 * xmlNanoHTTPAuthHeader:
1640
 * @ctx:  the HTTP context
1641
 *
1642
 * Get the authentication header of an HTTP context
1643
 *
1644
 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1645
 * header.
1646
 */
1647
const char *
1648
xmlNanoHTTPAuthHeader(void *ctx) {
1649
    xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1650
1651
    if (ctxt == NULL) return(NULL);
1652
1653
    return(ctxt->authHeader);
1654
}
1655
1656
/**
1657
 * xmlNanoHTTPContentLength:
1658
 * @ctx:  the HTTP context
1659
 *
1660
 * Provides the specified content length from the HTTP header.
1661
 *
1662
 * Return the specified content length from the HTTP header.  Note that
1663
 * a value of -1 indicates that the content length element was not included in
1664
 * the response header.
1665
 */
1666
int
1667
xmlNanoHTTPContentLength( void * ctx ) {
1668
    xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1669
1670
    return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1671
}
1672
1673
/**
1674
 * xmlNanoHTTPRedir:
1675
 * @ctx:  the HTTP context
1676
 *
1677
 * Provides the specified redirection URL if available from the HTTP header.
1678
 *
1679
 * Return the specified redirection URL or NULL if not redirected.
1680
 */
1681
const char *
1682
xmlNanoHTTPRedir( void * ctx ) {
1683
    xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1684
1685
    return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1686
}
1687
1688
/**
1689
 * xmlNanoHTTPEncoding:
1690
 * @ctx:  the HTTP context
1691
 *
1692
 * Provides the specified encoding if specified in the HTTP headers.
1693
 *
1694
 * Return the specified encoding or NULL if not available
1695
 */
1696
const char *
1697
xmlNanoHTTPEncoding( void * ctx ) {
1698
    xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1699
1700
    return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1701
}
1702
1703
/**
1704
 * xmlNanoHTTPMimeType:
1705
 * @ctx:  the HTTP context
1706
 *
1707
 * Provides the specified Mime-Type if specified in the HTTP headers.
1708
 *
1709
 * Return the specified Mime-Type or NULL if not available
1710
 */
1711
const char *
1712
xmlNanoHTTPMimeType( void * ctx ) {
1713
    xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1714
1715
    return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1716
}
1717
1718
/**
1719
 * xmlNanoHTTPFetchContent:
1720
 * @ctx:  the HTTP context
1721
 * @ptr:  pointer to set to the content buffer.
1722
 * @len:  integer pointer to hold the length of the content
1723
 *
1724
 * Check if all the content was read
1725
 *
1726
 * Returns 0 if all the content was read and available, returns
1727
 * -1 if received content length was less than specified or an error 
1728
 * occurred.
1729
 */
1730
static int
1731
xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1732
    xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1733
1734
    int			rc = 0;
1735
    int			cur_lgth;
1736
    int			rcvd_lgth;
1737
    int			dummy_int;
1738
    char *		dummy_ptr = NULL;
1739
1740
    /*  Dummy up return input parameters if not provided  */
1741
1742
    if ( len == NULL )
1743
        len = &dummy_int;
1744
1745
    if ( ptr == NULL )
1746
        ptr = &dummy_ptr;
1747
1748
    /*  But can't work without the context pointer  */
1749
1750
    if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1751
        *len = 0;
1752
	*ptr = NULL;
1753
	return ( -1 );
1754
    }
1755
1756
    rcvd_lgth = ctxt->inptr - ctxt->content;
1757
1758
    while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1759
1760
	rcvd_lgth += cur_lgth;
1761
	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1762
	    break;
1763
    }
1764
1765
    *ptr = ctxt->content;
1766
    *len = rcvd_lgth;
1767
1768
    if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1769
        rc = -1;
1770
    else if ( rcvd_lgth == 0 )
1771
	rc = -1;
1772
1773
    return ( rc );
1774
}
1775
1776
#ifdef STANDALONE
1777
int main(int argc, char **argv) {
1778
    char *contentType = NULL;
1779
1780
    if (argv[1] != NULL) {
1781
	if (argv[2] != NULL) 
1782
	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1783
        else
1784
	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
1785
	if (contentType != NULL) xmlFree(contentType);
1786
    } else {
1787
        xmlGenericError(xmlGenericErrorContext,
1788
		"%s: minimal HTTP GET implementation\n", argv[0]);
1789
        xmlGenericError(xmlGenericErrorContext,
1790
		"\tusage %s [ URL [ filename ] ]\n", argv[0]);
1791
    }
1792
    xmlNanoHTTPCleanup();
1793
    xmlMemoryDump();
1794
    return(0);
1795
}
1796
#endif /* STANDALONE */
1797
#else /* !LIBXML_HTTP_ENABLED */
1798
#ifdef STANDALONE
1799
#include <stdio.h>
1800
int main(int argc, char **argv) {
1801
    xmlGenericError(xmlGenericErrorContext,
1802
	    "%s : HTTP support not compiled in\n", argv[0]);
1803
    return(0);
1804
}
1805
#endif /* STANDALONE */
1806
#endif /* LIBXML_HTTP_ENABLED */
1807
#define bottom_nanohttp
1808
#include "elfgcchack.h"