1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(WIN32) && !defined (__CYGWIN__)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#ifdef HAVE_CTYPE_H
21
#include <ctype.h>
22
#endif
23
#ifdef HAVE_STDLIB_H
24
#include <stdlib.h>
25
#endif
26
#ifdef HAVE_SYS_STAT_H
27
#include <sys/stat.h>
28
#endif
29
#ifdef HAVE_FCNTL_H
30
#include <fcntl.h>
31
#endif
32
#ifdef HAVE_UNISTD_H
33
#include <unistd.h>
34
#endif
35
#ifdef HAVE_ZLIB_H
36
#include <zlib.h>
37
#endif
38
39
#include <libxml/xmlmemory.h>
40
#include <libxml/tree.h>
41
#include <libxml/parser.h>
42
#include <libxml/parserInternals.h>
43
#include <libxml/valid.h>
44
#include <libxml/entities.h>
45
#include <libxml/xmlerror.h>
46
#include <libxml/encoding.h>
47
#include <libxml/valid.h>
48
#include <libxml/xmlIO.h>
49
#include <libxml/uri.h>
50
#include <libxml/dict.h>
51
#include <libxml/SAX.h>
52
#ifdef LIBXML_CATALOG_ENABLED
53
#include <libxml/catalog.h>
54
#endif
55
#include <libxml/globals.h>
56
#include <libxml/chvalid.h>
57
58
/*
59
 * Various global defaults for parsing
60
 */
61
62
/**
63
 * xmlCheckVersion:
64
 * @version: the include version number
65
 *
66
 * check the compiled lib version against the include one.
67
 * This can warn or immediately kill the application
68
 */
69
void
70
xmlCheckVersion(int version) {
71
    int myversion = (int) LIBXML_VERSION;
72
73
    xmlInitParser();
74
75
    if ((myversion / 10000) != (version / 10000)) {
76
	xmlGenericError(xmlGenericErrorContext, 
77
		"Fatal: program compiled against libxml %d using libxml %d\n",
78
		(version / 10000), (myversion / 10000));
79
	fprintf(stderr, 
80
		"Fatal: program compiled against libxml %d using libxml %d\n",
81
		(version / 10000), (myversion / 10000));
82
    }
83
    if ((myversion / 100) < (version / 100)) {
84
	xmlGenericError(xmlGenericErrorContext, 
85
		"Warning: program compiled against libxml %d using older %d\n",
86
		(version / 100), (myversion / 100));
87
    }
88
}
89
90
91
/************************************************************************
92
 *									*
93
 * 		Some factorized error routines				*
94
 *									*
95
 ************************************************************************/
96
97
98
/**
99
 * xmlErrMemory:
100
 * @ctxt:  an XML parser context
101
 * @extra:  extra informations
102
 *
103
 * Handle a redefinition of attribute error
104
 */
105
void
106
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
107
{
108
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
109
        (ctxt->instate == XML_PARSER_EOF))
110
	return;
111
    if (ctxt != NULL) {
112
        ctxt->errNo = XML_ERR_NO_MEMORY;
113
        ctxt->instate = XML_PARSER_EOF;
114
        ctxt->disableSAX = 1;
115
    }
116
    if (extra)
117
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
118
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
119
                        NULL, NULL, 0, 0,
120
                        "Memory allocation failed : %s\n", extra);
121
    else
122
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
123
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
124
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
125
}
126
127
/**
128
 * __xmlErrEncoding:
129
 * @ctxt:  an XML parser context
130
 * @xmlerr:  the error number
131
 * @msg:  the error message
132
 * @str1:  an string info
133
 * @str2:  an string info
134
 *
135
 * Handle an encoding error
136
 */
137
void
138
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
139
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
140
{
141
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
142
        (ctxt->instate == XML_PARSER_EOF))
143
	return;
144
    if (ctxt != NULL)
145
        ctxt->errNo = xmlerr;
146
    __xmlRaiseError(NULL, NULL, NULL,
147
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
148
                    NULL, 0, (const char *) str1, (const char *) str2,
149
                    NULL, 0, 0, msg, str1, str2);
150
    if (ctxt != NULL) {
151
        ctxt->wellFormed = 0;
152
        if (ctxt->recovery == 0)
153
            ctxt->disableSAX = 1;
154
    }
155
}
156
157
/**
158
 * xmlErrInternal:
159
 * @ctxt:  an XML parser context
160
 * @msg:  the error message
161
 * @str:  error informations
162
 *
163
 * Handle an internal error
164
 */
165
static void
166
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
167
{
168
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
169
        (ctxt->instate == XML_PARSER_EOF))
170
	return;
171
    if (ctxt != NULL)
172
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
173
    __xmlRaiseError(NULL, NULL, NULL,
174
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
175
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
176
                    0, 0, msg, str);
177
    if (ctxt != NULL) {
178
        ctxt->wellFormed = 0;
179
        if (ctxt->recovery == 0)
180
            ctxt->disableSAX = 1;
181
    }
182
}
183
184
/**
185
 * xmlErrEncodingInt:
186
 * @ctxt:  an XML parser context
187
 * @error:  the error number
188
 * @msg:  the error message
189
 * @val:  an integer value
190
 *
191
 * n encoding error
192
 */
193
static void
194
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
195
                  const char *msg, int val)
196
{
197
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
198
        (ctxt->instate == XML_PARSER_EOF))
199
	return;
200
    if (ctxt != NULL)
201
        ctxt->errNo = error;
202
    __xmlRaiseError(NULL, NULL, NULL,
203
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
204
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
205
    if (ctxt != NULL) {
206
        ctxt->wellFormed = 0;
207
        if (ctxt->recovery == 0)
208
            ctxt->disableSAX = 1;
209
    }
210
}
211
212
/**
213
 * xmlIsLetter:
214
 * @c:  an unicode character (int)
215
 *
216
 * Check whether the character is allowed by the production
217
 * [84] Letter ::= BaseChar | Ideographic
218
 *
219
 * Returns 0 if not, non-zero otherwise
220
 */
221
int
222
xmlIsLetter(int c) {
223
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
224
}
225
226
/************************************************************************
227
 *									*
228
 * 		Input handling functions for progressive parsing	*
229
 *									*
230
 ************************************************************************/
231
232
/* #define DEBUG_INPUT */
233
/* #define DEBUG_STACK */
234
/* #define DEBUG_PUSH */
235
236
237
/* we need to keep enough input to show errors in context */
238
#define LINE_LEN        80
239
240
#ifdef DEBUG_INPUT
241
#define CHECK_BUFFER(in) check_buffer(in)
242
243
static
244
void check_buffer(xmlParserInputPtr in) {
245
    if (in->base != in->buf->buffer->content) {
246
        xmlGenericError(xmlGenericErrorContext,
247
		"xmlParserInput: base mismatch problem\n");
248
    }
249
    if (in->cur < in->base) {
250
        xmlGenericError(xmlGenericErrorContext,
251
		"xmlParserInput: cur < base problem\n");
252
    }
253
    if (in->cur > in->base + in->buf->buffer->use) {
254
        xmlGenericError(xmlGenericErrorContext,
255
		"xmlParserInput: cur > base + use problem\n");
256
    }
257
    xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
258
            (int) in, (int) in->buf->buffer->content, in->cur - in->base,
259
	    in->buf->buffer->use, in->buf->buffer->size);
260
}
261
262
#else
263
#define CHECK_BUFFER(in) 
264
#endif
265
266
267
/**
268
 * xmlParserInputRead:
269
 * @in:  an XML parser input
270
 * @len:  an indicative size for the lookahead
271
 *
272
 * This function refresh the input for the parser. It doesn't try to
273
 * preserve pointers to the input buffer, and discard already read data
274
 *
275
 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
276
 * end of this entity
277
 */
278
int
279
xmlParserInputRead(xmlParserInputPtr in, int len) {
280
    int ret;
281
    int used;
282
    int indx;
283
284
    if (in == NULL) return(-1);
285
#ifdef DEBUG_INPUT
286
    xmlGenericError(xmlGenericErrorContext, "Read\n");
287
#endif
288
    if (in->buf == NULL) return(-1);
289
    if (in->base == NULL) return(-1);
290
    if (in->cur == NULL) return(-1);
291
    if (in->buf->buffer == NULL) return(-1);
292
    if (in->buf->readcallback == NULL) return(-1);
293
294
    CHECK_BUFFER(in);
295
296
    used = in->cur - in->buf->buffer->content;
297
    ret = xmlBufferShrink(in->buf->buffer, used);
298
    if (ret > 0) {
299
	in->cur -= ret;
300
	in->consumed += ret;
301
    }
302
    ret = xmlParserInputBufferRead(in->buf, len);
303
    if (in->base != in->buf->buffer->content) {
304
        /*
305
	 * the buffer has been reallocated
306
	 */
307
	indx = in->cur - in->base;
308
	in->base = in->buf->buffer->content;
309
	in->cur = &in->buf->buffer->content[indx];
310
    }
311
    in->end = &in->buf->buffer->content[in->buf->buffer->use];
312
313
    CHECK_BUFFER(in);
314
315
    return(ret);
316
}
317
318
/**
319
 * xmlParserInputGrow:
320
 * @in:  an XML parser input
321
 * @len:  an indicative size for the lookahead
322
 *
323
 * This function increase the input for the parser. It tries to
324
 * preserve pointers to the input buffer, and keep already read data
325
 *
326
 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
327
 * end of this entity
328
 */
329
int
330
xmlParserInputGrow(xmlParserInputPtr in, int len) {
331
    int ret;
332
    int indx;
333
334
    if (in == NULL) return(-1);
335
#ifdef DEBUG_INPUT
336
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
337
#endif
338
    if (in->buf == NULL) return(-1);
339
    if (in->base == NULL) return(-1);
340
    if (in->cur == NULL) return(-1);
341
    if (in->buf->buffer == NULL) return(-1);
342
343
    CHECK_BUFFER(in);
344
345
    indx = in->cur - in->base;
346
    if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
347
348
	CHECK_BUFFER(in);
349
350
        return(0);
351
    }
352
    if (in->buf->readcallback != NULL)
353
	ret = xmlParserInputBufferGrow(in->buf, len);
354
    else	
355
        return(0);
356
357
    /*
358
     * NOTE : in->base may be a "dangling" i.e. freed pointer in this
359
     *        block, but we use it really as an integer to do some
360
     *        pointer arithmetic. Insure will raise it as a bug but in
361
     *        that specific case, that's not !
362
     */
363
    if (in->base != in->buf->buffer->content) {
364
        /*
365
	 * the buffer has been reallocated
366
	 */
367
	indx = in->cur - in->base;
368
	in->base = in->buf->buffer->content;
369
	in->cur = &in->buf->buffer->content[indx];
370
    }
371
    in->end = &in->buf->buffer->content[in->buf->buffer->use];
372
373
    CHECK_BUFFER(in);
374
375
    return(ret);
376
}
377
378
/**
379
 * xmlParserInputShrink:
380
 * @in:  an XML parser input
381
 *
382
 * This function removes used input for the parser.
383
 */
384
void
385
xmlParserInputShrink(xmlParserInputPtr in) {
386
    int used;
387
    int ret;
388
    int indx;
389
390
#ifdef DEBUG_INPUT
391
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
392
#endif
393
    if (in == NULL) return;
394
    if (in->buf == NULL) return;
395
    if (in->base == NULL) return;
396
    if (in->cur == NULL) return;
397
    if (in->buf->buffer == NULL) return;
398
399
    CHECK_BUFFER(in);
400
401
    used = in->cur - in->buf->buffer->content;
402
    /*
403
     * Do not shrink on large buffers whose only a tiny fraction
404
     * was consumed
405
     */
406
    if (used > INPUT_CHUNK) {
407
	ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
408
	if (ret > 0) {
409
	    in->cur -= ret;
410
	    in->consumed += ret;
411
	}
412
	in->end = &in->buf->buffer->content[in->buf->buffer->use];
413
    }
414
415
    CHECK_BUFFER(in);
416
417
    if (in->buf->buffer->use > INPUT_CHUNK) {
418
        return;
419
    }
420
    xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
421
    if (in->base != in->buf->buffer->content) {
422
        /*
423
	 * the buffer has been reallocated
424
	 */
425
	indx = in->cur - in->base;
426
	in->base = in->buf->buffer->content;
427
	in->cur = &in->buf->buffer->content[indx];
428
    }
429
    in->end = &in->buf->buffer->content[in->buf->buffer->use];
430
431
    CHECK_BUFFER(in);
432
}
433
434
/************************************************************************
435
 *									*
436
 * 		UTF8 character input and related functions		*
437
 *									*
438
 ************************************************************************/
439
440
/**
441
 * xmlNextChar:
442
 * @ctxt:  the XML parser context
443
 *
444
 * Skip to the next char input char.
445
 */
446
447
void
448
xmlNextChar(xmlParserCtxtPtr ctxt)
449
{
450
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
451
        (ctxt->input == NULL))
452
        return;
453
454
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
455
        if ((*ctxt->input->cur == 0) &&
456
            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
457
            (ctxt->instate != XML_PARSER_COMMENT)) {
458
            /*
459
             * If we are at the end of the current entity and
460
             * the context allows it, we pop consumed entities
461
             * automatically.
462
             * the auto closing should be blocked in other cases
463
             */
464
            xmlPopInput(ctxt);
465
        } else {
466
            const unsigned char *cur;
467
            unsigned char c;
468
469
            /*
470
             *   2.11 End-of-Line Handling
471
             *   the literal two-character sequence "#xD#xA" or a standalone
472
             *   literal #xD, an XML processor must pass to the application
473
             *   the single character #xA.
474
             */
475
            if (*(ctxt->input->cur) == '\n') {
476
                ctxt->input->line++; ctxt->input->col = 1;
477
            } else
478
                ctxt->input->col++;
479
480
            /*
481
             * We are supposed to handle UTF8, check it's valid
482
             * From rfc2044: encoding of the Unicode values on UTF-8:
483
             *
484
             * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
485
             * 0000 0000-0000 007F   0xxxxxxx
486
             * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
487
             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
488
             *
489
             * Check for the 0x110000 limit too
490
             */
491
            cur = ctxt->input->cur;
492
493
            c = *cur;
494
            if (c & 0x80) {
495
	        if (c == 0xC0)
496
		    goto encoding_error;
497
                if (cur[1] == 0)
498
                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
499
                if ((cur[1] & 0xc0) != 0x80)
500
                    goto encoding_error;
501
                if ((c & 0xe0) == 0xe0) {
502
                    unsigned int val;
503
504
                    if (cur[2] == 0)
505
                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
506
                    if ((cur[2] & 0xc0) != 0x80)
507
                        goto encoding_error;
508
                    if ((c & 0xf0) == 0xf0) {
509
                        if (cur[3] == 0)
510
                            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
511
                        if (((c & 0xf8) != 0xf0) ||
512
                            ((cur[3] & 0xc0) != 0x80))
513
                            goto encoding_error;
514
                        /* 4-byte code */
515
                        ctxt->input->cur += 4;
516
                        val = (cur[0] & 0x7) << 18;
517
                        val |= (cur[1] & 0x3f) << 12;
518
                        val |= (cur[2] & 0x3f) << 6;
519
                        val |= cur[3] & 0x3f;
520
                    } else {
521
                        /* 3-byte code */
522
                        ctxt->input->cur += 3;
523
                        val = (cur[0] & 0xf) << 12;
524
                        val |= (cur[1] & 0x3f) << 6;
525
                        val |= cur[2] & 0x3f;
526
                    }
527
                    if (((val > 0xd7ff) && (val < 0xe000)) ||
528
                        ((val > 0xfffd) && (val < 0x10000)) ||
529
                        (val >= 0x110000)) {
530
			xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
531
					  "Char 0x%X out of allowed range\n",
532
					  val);
533
                    }
534
                } else
535
                    /* 2-byte code */
536
                    ctxt->input->cur += 2;
537
            } else
538
                /* 1-byte code */
539
                ctxt->input->cur++;
540
541
            ctxt->nbChars++;
542
            if (*ctxt->input->cur == 0)
543
                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
544
        }
545
    } else {
546
        /*
547
         * Assume it's a fixed length encoding (1) with
548
         * a compatible encoding for the ASCII set, since
549
         * XML constructs only use < 128 chars
550
         */
551
552
        if (*(ctxt->input->cur) == '\n') {
553
            ctxt->input->line++; ctxt->input->col = 1;
554
        } else
555
            ctxt->input->col++;
556
        ctxt->input->cur++;
557
        ctxt->nbChars++;
558
        if (*ctxt->input->cur == 0)
559
            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
560
    }
561
    if ((*ctxt->input->cur == '%') && (!ctxt->html))
562
        xmlParserHandlePEReference(ctxt);
563
    if ((*ctxt->input->cur == 0) &&
564
        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
565
        xmlPopInput(ctxt);
566
    return;
567
encoding_error:
568
    /*
569
     * If we detect an UTF8 error that probably mean that the
570
     * input encoding didn't get properly advertised in the
571
     * declaration header. Report the error and switch the encoding
572
     * to ISO-Latin-1 (if you don't like this policy, just declare the
573
     * encoding !)
574
     */
575
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
576
        (ctxt->input->end - ctxt->input->cur < 4)) {
577
	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
578
		     "Input is not proper UTF-8, indicate encoding !\n",
579
		     NULL, NULL);
580
    } else {
581
        char buffer[150];
582
583
	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
584
			ctxt->input->cur[0], ctxt->input->cur[1],
585
			ctxt->input->cur[2], ctxt->input->cur[3]);
586
	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
587
		     "Input is not proper UTF-8, indicate encoding !\n%s",
588
		     BAD_CAST buffer, NULL);
589
    }
590
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
591
    ctxt->input->cur++;
592
    return;
593
}
594
595
/**
596
 * xmlCurrentChar:
597
 * @ctxt:  the XML parser context
598
 * @len:  pointer to the length of the char read
599
 *
600
 * The current char value, if using UTF-8 this may actually span multiple
601
 * bytes in the input buffer. Implement the end of line normalization:
602
 * 2.11 End-of-Line Handling
603
 * Wherever an external parsed entity or the literal entity value
604
 * of an internal parsed entity contains either the literal two-character
605
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
606
 * must pass to the application the single character #xA.
607
 * This behavior can conveniently be produced by normalizing all
608
 * line breaks to #xA on input, before parsing.)
609
 *
610
 * Returns the current char value and its length
611
 */
612
613
int
614
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
615
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
616
    if (ctxt->instate == XML_PARSER_EOF)
617
	return(0);
618
619
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
620
	    *len = 1;
621
	    return((int) *ctxt->input->cur);
622
    }
623
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
624
	/*
625
	 * We are supposed to handle UTF8, check it's valid
626
	 * From rfc2044: encoding of the Unicode values on UTF-8:
627
	 *
628
	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
629
	 * 0000 0000-0000 007F   0xxxxxxx
630
	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
631
	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
632
	 *
633
	 * Check for the 0x110000 limit too
634
	 */
635
	const unsigned char *cur = ctxt->input->cur;
636
	unsigned char c;
637
	unsigned int val;
638
639
	c = *cur;
640
	if (c & 0x80) {
641
	    if (((c & 0x40) == 0) || (c == 0xC0))
642
		goto encoding_error;
643
	    if (cur[1] == 0)
644
		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
645
	    if ((cur[1] & 0xc0) != 0x80)
646
		goto encoding_error;
647
	    if ((c & 0xe0) == 0xe0) {
648
		if (cur[2] == 0)
649
		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
650
		if ((cur[2] & 0xc0) != 0x80)
651
		    goto encoding_error;
652
		if ((c & 0xf0) == 0xf0) {
653
		    if (cur[3] == 0)
654
			xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
655
		    if (((c & 0xf8) != 0xf0) ||
656
			((cur[3] & 0xc0) != 0x80))
657
			goto encoding_error;
658
		    /* 4-byte code */
659
		    *len = 4;
660
		    val = (cur[0] & 0x7) << 18;
661
		    val |= (cur[1] & 0x3f) << 12;
662
		    val |= (cur[2] & 0x3f) << 6;
663
		    val |= cur[3] & 0x3f;
664
		    if (val < 0x10000)
665
			goto encoding_error;
666
		} else {
667
		  /* 3-byte code */
668
		    *len = 3;
669
		    val = (cur[0] & 0xf) << 12;
670
		    val |= (cur[1] & 0x3f) << 6;
671
		    val |= cur[2] & 0x3f;
672
		    if (val < 0x800)
673
			goto encoding_error;
674
		}
675
	    } else {
676
	      /* 2-byte code */
677
		*len = 2;
678
		val = (cur[0] & 0x1f) << 6;
679
		val |= cur[1] & 0x3f;
680
		if (val < 0x80)
681
		    goto encoding_error;
682
	    }
683
	    if (!IS_CHAR(val)) {
684
	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
685
				  "Char 0x%X out of allowed range\n", val);
686
	    }    
687
	    return(val);
688
	} else {
689
	    /* 1-byte code */
690
	    *len = 1;
691
	    if (*ctxt->input->cur == 0)
692
		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
693
	    if ((*ctxt->input->cur == 0) &&
694
	        (ctxt->input->end > ctxt->input->cur)) {
695
	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
696
				  "Char 0x0 out of allowed range\n", 0);
697
	    }
698
	    if (*ctxt->input->cur == 0xD) {
699
		if (ctxt->input->cur[1] == 0xA) {
700
		    ctxt->nbChars++;
701
		    ctxt->input->cur++;
702
		}
703
		return(0xA);
704
	    }
705
	    return((int) *ctxt->input->cur);
706
	}
707
    }
708
    /*
709
     * Assume it's a fixed length encoding (1) with
710
     * a compatible encoding for the ASCII set, since
711
     * XML constructs only use < 128 chars
712
     */
713
    *len = 1;
714
    if (*ctxt->input->cur == 0xD) {
715
	if (ctxt->input->cur[1] == 0xA) {
716
	    ctxt->nbChars++;
717
	    ctxt->input->cur++;
718
	}
719
	return(0xA);
720
    }
721
    return((int) *ctxt->input->cur);
722
encoding_error:
723
    /*
724
     * An encoding problem may arise from a truncated input buffer
725
     * splitting a character in the middle. In that case do not raise
726
     * an error but return 0 to endicate an end of stream problem
727
     */
728
    if (ctxt->input->end - ctxt->input->cur < 4) {
729
	*len = 0;
730
	return(0);
731
    }
732
733
    /*
734
     * If we detect an UTF8 error that probably mean that the
735
     * input encoding didn't get properly advertised in the
736
     * declaration header. Report the error and switch the encoding
737
     * to ISO-Latin-1 (if you don't like this policy, just declare the
738
     * encoding !)
739
     */
740
    {
741
        char buffer[150];
742
743
	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
744
			ctxt->input->cur[0], ctxt->input->cur[1],
745
			ctxt->input->cur[2], ctxt->input->cur[3]);
746
	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
747
		     "Input is not proper UTF-8, indicate encoding !\n%s",
748
		     BAD_CAST buffer, NULL);
749
    }
750
    ctxt->charset = XML_CHAR_ENCODING_8859_1; 
751
    *len = 1;
752
    return((int) *ctxt->input->cur);
753
}
754
755
/**
756
 * xmlStringCurrentChar:
757
 * @ctxt:  the XML parser context
758
 * @cur:  pointer to the beginning of the char
759
 * @len:  pointer to the length of the char read
760
 *
761
 * The current char value, if using UTF-8 this may actually span multiple
762
 * bytes in the input buffer.
763
 *
764
 * Returns the current char value and its length
765
 */
766
767
int
768
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
769
{
770
    if ((len == NULL) || (cur == NULL)) return(0);
771
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
772
        /*
773
         * We are supposed to handle UTF8, check it's valid
774
         * From rfc2044: encoding of the Unicode values on UTF-8:
775
         *
776
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
777
         * 0000 0000-0000 007F   0xxxxxxx
778
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
779
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
780
         *
781
         * Check for the 0x110000 limit too
782
         */
783
        unsigned char c;
784
        unsigned int val;
785
786
        c = *cur;
787
        if (c & 0x80) {
788
            if ((cur[1] & 0xc0) != 0x80)
789
                goto encoding_error;
790
            if ((c & 0xe0) == 0xe0) {
791
792
                if ((cur[2] & 0xc0) != 0x80)
793
                    goto encoding_error;
794
                if ((c & 0xf0) == 0xf0) {
795
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
796
                        goto encoding_error;
797
                    /* 4-byte code */
798
                    *len = 4;
799
                    val = (cur[0] & 0x7) << 18;
800
                    val |= (cur[1] & 0x3f) << 12;
801
                    val |= (cur[2] & 0x3f) << 6;
802
                    val |= cur[3] & 0x3f;
803
                } else {
804
                    /* 3-byte code */
805
                    *len = 3;
806
                    val = (cur[0] & 0xf) << 12;
807
                    val |= (cur[1] & 0x3f) << 6;
808
                    val |= cur[2] & 0x3f;
809
                }
810
            } else {
811
                /* 2-byte code */
812
                *len = 2;
813
                val = (cur[0] & 0x1f) << 6;
814
                val |= cur[1] & 0x3f;
815
            }
816
            if (!IS_CHAR(val)) {
817
	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
818
				  "Char 0x%X out of allowed range\n", val);
819
            }
820
            return (val);
821
        } else {
822
            /* 1-byte code */
823
            *len = 1;
824
            return ((int) *cur);
825
        }
826
    }
827
    /*
828
     * Assume it's a fixed length encoding (1) with
829
     * a compatible encoding for the ASCII set, since
830
     * XML constructs only use < 128 chars
831
     */
832
    *len = 1;
833
    return ((int) *cur);
834
encoding_error:
835
836
    /*
837
     * An encoding problem may arise from a truncated input buffer
838
     * splitting a character in the middle. In that case do not raise
839
     * an error but return 0 to endicate an end of stream problem
840
     */
841
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
842
        (ctxt->input->end - ctxt->input->cur < 4)) {
843
	*len = 0;
844
	return(0);
845
    }
846
    /*
847
     * If we detect an UTF8 error that probably mean that the
848
     * input encoding didn't get properly advertised in the
849
     * declaration header. Report the error and switch the encoding
850
     * to ISO-Latin-1 (if you don't like this policy, just declare the
851
     * encoding !)
852
     */
853
    {
854
        char buffer[150];
855
856
	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
857
			ctxt->input->cur[0], ctxt->input->cur[1],
858
			ctxt->input->cur[2], ctxt->input->cur[3]);
859
	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
860
		     "Input is not proper UTF-8, indicate encoding !\n%s",
861
		     BAD_CAST buffer, NULL);
862
    }
863
    *len = 1;
864
    return ((int) *cur);
865
}
866
867
/**
868
 * xmlCopyCharMultiByte:
869
 * @out:  pointer to an array of xmlChar
870
 * @val:  the char value
871
 *
872
 * append the char value in the array 
873
 *
874
 * Returns the number of xmlChar written
875
 */
876
int
877
xmlCopyCharMultiByte(xmlChar *out, int val) {
878
    if (out == NULL) return(0);
879
    /*
880
     * We are supposed to handle UTF8, check it's valid
881
     * From rfc2044: encoding of the Unicode values on UTF-8:
882
     *
883
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
884
     * 0000 0000-0000 007F   0xxxxxxx
885
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
886
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
887
     */
888
    if  (val >= 0x80) {
889
	xmlChar *savedout = out;
890
	int bits;
891
	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
892
	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
893
	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
894
	else {
895
	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
896
		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
897
			      val);
898
	    return(0);
899
	}
900
	for ( ; bits >= 0; bits-= 6)
901
	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
902
	return (out - savedout);
903
    }
904
    *out = (xmlChar) val;
905
    return 1;
906
}
907
908
/**
909
 * xmlCopyChar:
910
 * @len:  Ignored, compatibility
911
 * @out:  pointer to an array of xmlChar
912
 * @val:  the char value
913
 *
914
 * append the char value in the array 
915
 *
916
 * Returns the number of xmlChar written
917
 */
918
919
int
920
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
921
    if (out == NULL) return(0);
922
    /* the len parameter is ignored */
923
    if  (val >= 0x80) {
924
	return(xmlCopyCharMultiByte (out, val));
925
    }
926
    *out = (xmlChar) val;
927
    return 1;
928
}
929
930
/************************************************************************
931
 *									*
932
 *		Commodity functions to switch encodings			*
933
 *									*
934
 ************************************************************************/
935
936
/**
937
 * xmlSwitchEncoding:
938
 * @ctxt:  the parser context
939
 * @enc:  the encoding value (number)
940
 *
941
 * change the input functions when discovering the character encoding
942
 * of a given entity.
943
 *
944
 * Returns 0 in case of success, -1 otherwise
945
 */
946
int
947
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
948
{
949
    xmlCharEncodingHandlerPtr handler;
950
951
    if (ctxt == NULL) return(-1);
952
    switch (enc) {
953
	case XML_CHAR_ENCODING_ERROR:
954
	    __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
955
	                   "encoding unknown\n", NULL, NULL);
956
	    return(-1);
957
	case XML_CHAR_ENCODING_NONE:
958
	    /* let's assume it's UTF-8 without the XML decl */
959
	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
960
	    return(0);
961
	case XML_CHAR_ENCODING_UTF8:
962
	    /* default encoding, no conversion should be needed */
963
	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
964
965
	    /*
966
	     * Errata on XML-1.0 June 20 2001
967
	     * Specific handling of the Byte Order Mark for
968
	     * UTF-8
969
	     */
970
	    if ((ctxt->input != NULL) &&
971
		(ctxt->input->cur[0] == 0xEF) &&
972
		(ctxt->input->cur[1] == 0xBB) &&
973
		(ctxt->input->cur[2] == 0xBF)) {
974
		ctxt->input->cur += 3;
975
	    }
976
	    return(0);
977
    case XML_CHAR_ENCODING_UTF16LE:
978
    case XML_CHAR_ENCODING_UTF16BE:
979
        /*The raw input characters are encoded
980
         *in UTF-16. As we expect this function
981
         *to be called after xmlCharEncInFunc, we expect
982
         *ctxt->input->cur to contain UTF-8 encoded characters.
983
         *So the raw UTF16 Byte Order Mark
984
         *has also been converted into
985
         *an UTF-8 BOM. Let's skip that BOM.
986
         */
987
        if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
988
            (ctxt->input->cur[0] == 0xEF) &&
989
            (ctxt->input->cur[1] == 0xBB) &&
990
            (ctxt->input->cur[2] == 0xBF)) {
991
            ctxt->input->cur += 3;
992
        }
993
	break ;
994
	default:
995
	    break;
996
    }
997
    handler = xmlGetCharEncodingHandler(enc);
998
    if (handler == NULL) {
999
	/*
1000
	 * Default handlers.
1001
	 */
1002
	switch (enc) {
1003
	    case XML_CHAR_ENCODING_ASCII:
1004
		/* default encoding, no conversion should be needed */
1005
		ctxt->charset = XML_CHAR_ENCODING_UTF8;
1006
		return(0);
1007
	    case XML_CHAR_ENCODING_UTF16LE:
1008
		break;
1009
	    case XML_CHAR_ENCODING_UTF16BE:
1010
		break;
1011
	    case XML_CHAR_ENCODING_UCS4LE:
1012
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1013
			       "encoding not supported %s\n",
1014
			       BAD_CAST "USC4 little endian", NULL);
1015
		break;
1016
	    case XML_CHAR_ENCODING_UCS4BE:
1017
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1018
			       "encoding not supported %s\n",
1019
			       BAD_CAST "USC4 big endian", NULL);
1020
		break;
1021
	    case XML_CHAR_ENCODING_EBCDIC:
1022
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1023
			       "encoding not supported %s\n",
1024
			       BAD_CAST "EBCDIC", NULL);
1025
		break;
1026
	    case XML_CHAR_ENCODING_UCS4_2143:
1027
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1028
			       "encoding not supported %s\n",
1029
			       BAD_CAST "UCS4 2143", NULL);
1030
		break;
1031
	    case XML_CHAR_ENCODING_UCS4_3412:
1032
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1033
			       "encoding not supported %s\n",
1034
			       BAD_CAST "UCS4 3412", NULL);
1035
		break;
1036
	    case XML_CHAR_ENCODING_UCS2:
1037
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1038
			       "encoding not supported %s\n",
1039
			       BAD_CAST "UCS2", NULL);
1040
		break;
1041
	    case XML_CHAR_ENCODING_8859_1:
1042
	    case XML_CHAR_ENCODING_8859_2:
1043
	    case XML_CHAR_ENCODING_8859_3:
1044
	    case XML_CHAR_ENCODING_8859_4:
1045
	    case XML_CHAR_ENCODING_8859_5:
1046
	    case XML_CHAR_ENCODING_8859_6:
1047
	    case XML_CHAR_ENCODING_8859_7:
1048
	    case XML_CHAR_ENCODING_8859_8:
1049
	    case XML_CHAR_ENCODING_8859_9:
1050
		/*
1051
		 * We used to keep the internal content in the
1052
		 * document encoding however this turns being unmaintainable
1053
		 * So xmlGetCharEncodingHandler() will return non-null
1054
		 * values for this now.
1055
		 */
1056
		if ((ctxt->inputNr == 1) &&
1057
		    (ctxt->encoding == NULL) &&
1058
		    (ctxt->input != NULL) &&
1059
		    (ctxt->input->encoding != NULL)) {
1060
		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1061
		}
1062
		ctxt->charset = enc;
1063
		return(0);
1064
	    case XML_CHAR_ENCODING_2022_JP:
1065
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1066
			       "encoding not supported %s\n",
1067
			       BAD_CAST "ISO-2022-JP", NULL);
1068
		break;
1069
	    case XML_CHAR_ENCODING_SHIFT_JIS:
1070
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1071
			       "encoding not supported %s\n",
1072
			       BAD_CAST "Shift_JIS", NULL);
1073
		break;
1074
	    case XML_CHAR_ENCODING_EUC_JP:
1075
		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1076
			       "encoding not supported %s\n",
1077
			       BAD_CAST "EUC-JP", NULL);
1078
		break;
1079
	    default:
1080
	        break;
1081
	}
1082
    }
1083
    if (handler == NULL)
1084
	return(-1);
1085
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1086
    return(xmlSwitchToEncoding(ctxt, handler));
1087
}
1088
1089
/**
1090
 * xmlSwitchInputEncoding:
1091
 * @ctxt:  the parser context
1092
 * @input:  the input stream
1093
 * @handler:  the encoding handler
1094
 *
1095
 * change the input functions when discovering the character encoding
1096
 * of a given entity.
1097
 *
1098
 * Returns 0 in case of success, -1 otherwise
1099
 */
1100
int
1101
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1102
                       xmlCharEncodingHandlerPtr handler)
1103
{
1104
    int nbchars;
1105
1106
    if (handler == NULL)
1107
        return (-1);
1108
    if (input == NULL)
1109
        return (-1);
1110
    if (input->buf != NULL) {
1111
        if (input->buf->encoder != NULL) {
1112
            /*
1113
             * Check in case the auto encoding detetection triggered
1114
             * in already.
1115
             */
1116
            if (input->buf->encoder == handler)
1117
                return (0);
1118
1119
            /*
1120
             * "UTF-16" can be used for both LE and BE
1121
             if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1122
             BAD_CAST "UTF-16", 6)) &&
1123
             (!xmlStrncmp(BAD_CAST handler->name,
1124
             BAD_CAST "UTF-16", 6))) {
1125
             return(0);
1126
             }
1127
             */
1128
1129
            /*
1130
             * Note: this is a bit dangerous, but that's what it
1131
             * takes to use nearly compatible signature for different
1132
             * encodings.
1133
             */
1134
            xmlCharEncCloseFunc(input->buf->encoder);
1135
            input->buf->encoder = handler;
1136
            return (0);
1137
        }
1138
        input->buf->encoder = handler;
1139
1140
        /*
1141
         * Is there already some content down the pipe to convert ?
1142
         */
1143
        if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
1144
            int processed;
1145
	    unsigned int use;
1146
1147
            /*
1148
             * Specific handling of the Byte Order Mark for 
1149
             * UTF-16
1150
             */
1151
            if ((handler->name != NULL) &&
1152
                (!strcmp(handler->name, "UTF-16LE") ||
1153
                 !strcmp(handler->name, "UTF-16")) &&
1154
                (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1155
                input->cur += 2;
1156
            }
1157
            if ((handler->name != NULL) &&
1158
                (!strcmp(handler->name, "UTF-16BE")) &&
1159
                (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1160
                input->cur += 2;
1161
            }
1162
            /*
1163
             * Errata on XML-1.0 June 20 2001
1164
             * Specific handling of the Byte Order Mark for
1165
             * UTF-8
1166
             */
1167
            if ((handler->name != NULL) &&
1168
                (!strcmp(handler->name, "UTF-8")) &&
1169
                (input->cur[0] == 0xEF) &&
1170
                (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1171
                input->cur += 3;
1172
            }
1173
1174
            /*
1175
             * Shrink the current input buffer.
1176
             * Move it as the raw buffer and create a new input buffer
1177
             */
1178
            processed = input->cur - input->base;
1179
            xmlBufferShrink(input->buf->buffer, processed);
1180
            input->buf->raw = input->buf->buffer;
1181
            input->buf->buffer = xmlBufferCreate();
1182
	    input->buf->rawconsumed = processed;
1183
	    use = input->buf->raw->use;
1184
1185
            if (ctxt->html) {
1186
                /*
1187
                 * convert as much as possible of the buffer
1188
                 */
1189
                nbchars = xmlCharEncInFunc(input->buf->encoder,
1190
                                           input->buf->buffer,
1191
                                           input->buf->raw);
1192
            } else {
1193
                /*
1194
                 * convert just enough to get
1195
                 * '<?xml version="1.0" encoding="xxx"?>'
1196
                 * parsed with the autodetected encoding
1197
                 * into the parser reading buffer.
1198
                 */
1199
                nbchars = xmlCharEncFirstLine(input->buf->encoder,
1200
                                              input->buf->buffer,
1201
                                              input->buf->raw);
1202
            }
1203
            if (nbchars < 0) {
1204
                xmlErrInternal(ctxt,
1205
                               "switching encoding: encoder error\n",
1206
                               NULL);
1207
                return (-1);
1208
            }
1209
	    input->buf->rawconsumed += use - input->buf->raw->use;
1210
            input->base = input->cur = input->buf->buffer->content;
1211
            input->end = &input->base[input->buf->buffer->use];
1212
1213
        }
1214
        return (0);
1215
    } else if (input->length == 0) {
1216
	/*
1217
	 * When parsing a static memory array one must know the
1218
	 * size to be able to convert the buffer.
1219
	 */
1220
	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1221
	return (-1);
1222
    }
1223
    return (0);
1224
}
1225
1226
/**
1227
 * xmlSwitchToEncoding:
1228
 * @ctxt:  the parser context
1229
 * @handler:  the encoding handler
1230
 *
1231
 * change the input functions when discovering the character encoding
1232
 * of a given entity.
1233
 *
1234
 * Returns 0 in case of success, -1 otherwise
1235
 */
1236
int
1237
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
1238
{
1239
    int ret = 0;
1240
1241
    if (handler != NULL) {
1242
        if (ctxt->input != NULL) {
1243
	    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1244
	} else {
1245
	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1246
	                   NULL);
1247
	    return(-1);
1248
	}
1249
	/*
1250
	 * The parsing is now done in UTF8 natively
1251
	 */
1252
	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1253
    } else 
1254
	return(-1);
1255
    return(ret);
1256
}
1257
1258
/************************************************************************
1259
 *									*
1260
 *	Commodity functions to handle entities processing		*
1261
 *									*
1262
 ************************************************************************/
1263
1264
/**
1265
 * xmlFreeInputStream:
1266
 * @input:  an xmlParserInputPtr
1267
 *
1268
 * Free up an input stream.
1269
 */
1270
void
1271
xmlFreeInputStream(xmlParserInputPtr input) {
1272
    if (input == NULL) return;
1273
1274
    if (input->filename != NULL) xmlFree((char *) input->filename);
1275
    if (input->directory != NULL) xmlFree((char *) input->directory);
1276
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1277
    if (input->version != NULL) xmlFree((char *) input->version);
1278
    if ((input->free != NULL) && (input->base != NULL))
1279
        input->free((xmlChar *) input->base);
1280
    if (input->buf != NULL) 
1281
        xmlFreeParserInputBuffer(input->buf);
1282
    xmlFree(input);
1283
}
1284
1285
/**
1286
 * xmlNewInputStream:
1287
 * @ctxt:  an XML parser context
1288
 *
1289
 * Create a new input stream structure
1290
 * Returns the new input stream or NULL
1291
 */
1292
xmlParserInputPtr
1293
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1294
    xmlParserInputPtr input;
1295
    static int id = 0;
1296
1297
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1298
    if (input == NULL) {
1299
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1300
	return(NULL);
1301
    }
1302
    memset(input, 0, sizeof(xmlParserInput));
1303
    input->line = 1;
1304
    input->col = 1;
1305
    input->standalone = -1;
1306
    /*
1307
     * we don't care about thread reentrancy unicity for a single
1308
     * parser context (and hence thread) is sufficient.
1309
     */
1310
    input->id = id++;
1311
    return(input);
1312
}
1313
1314
/**
1315
 * xmlNewIOInputStream:
1316
 * @ctxt:  an XML parser context
1317
 * @input:  an I/O Input
1318
 * @enc:  the charset encoding if known
1319
 *
1320
 * Create a new input stream structure encapsulating the @input into
1321
 * a stream suitable for the parser.
1322
 *
1323
 * Returns the new input stream or NULL
1324
 */
1325
xmlParserInputPtr
1326
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1327
	            xmlCharEncoding enc) {
1328
    xmlParserInputPtr inputStream;
1329
1330
    if (input == NULL) return(NULL);
1331
    if (xmlParserDebugEntities)
1332
	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1333
    inputStream = xmlNewInputStream(ctxt);
1334
    if (inputStream == NULL) {
1335
	return(NULL);
1336
    }
1337
    inputStream->filename = NULL;
1338
    inputStream->buf = input;
1339
    inputStream->base = inputStream->buf->buffer->content;
1340
    inputStream->cur = inputStream->buf->buffer->content;
1341
    inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1342
    if (enc != XML_CHAR_ENCODING_NONE) {
1343
        xmlSwitchEncoding(ctxt, enc);
1344
    }
1345
1346
    return(inputStream);
1347
}
1348
1349
/**
1350
 * xmlNewEntityInputStream:
1351
 * @ctxt:  an XML parser context
1352
 * @entity:  an Entity pointer
1353
 *
1354
 * Create a new input stream based on an xmlEntityPtr
1355
 *
1356
 * Returns the new input stream or NULL
1357
 */
1358
xmlParserInputPtr
1359
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1360
    xmlParserInputPtr input;
1361
1362
    if (entity == NULL) {
1363
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1364
	               NULL);
1365
	return(NULL);
1366
    }
1367
    if (xmlParserDebugEntities)
1368
	xmlGenericError(xmlGenericErrorContext,
1369
		"new input from entity: %s\n", entity->name);
1370
    if (entity->content == NULL) {
1371
	switch (entity->etype) {
1372
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1373
	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1374
		               entity->name);
1375
                break;
1376
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1377
            case XML_EXTERNAL_PARAMETER_ENTITY:
1378
		return(xmlLoadExternalEntity((char *) entity->URI,
1379
		       (char *) entity->ExternalID, ctxt));
1380
            case XML_INTERNAL_GENERAL_ENTITY:
1381
	        xmlErrInternal(ctxt,
1382
		      "Internal entity %s without content !\n",
1383
		               entity->name);
1384
                break;
1385
            case XML_INTERNAL_PARAMETER_ENTITY:
1386
	        xmlErrInternal(ctxt,
1387
		      "Internal parameter entity %s without content !\n",
1388
		               entity->name);
1389
                break;
1390
            case XML_INTERNAL_PREDEFINED_ENTITY:
1391
	        xmlErrInternal(ctxt,
1392
		      "Predefined entity %s without content !\n",
1393
		               entity->name);
1394
                break;
1395
	}
1396
	return(NULL);
1397
    }
1398
    input = xmlNewInputStream(ctxt);
1399
    if (input == NULL) {
1400
	return(NULL);
1401
    }
1402
    if (entity->URI != NULL)
1403
	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1404
    input->base = entity->content;
1405
    input->cur = entity->content;
1406
    input->length = entity->length;
1407
    input->end = &entity->content[input->length];
1408
    return(input);
1409
}
1410
1411
/**
1412
 * xmlNewStringInputStream:
1413
 * @ctxt:  an XML parser context
1414
 * @buffer:  an memory buffer
1415
 *
1416
 * Create a new input stream based on a memory buffer.
1417
 * Returns the new input stream
1418
 */
1419
xmlParserInputPtr
1420
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1421
    xmlParserInputPtr input;
1422
1423
    if (buffer == NULL) {
1424
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1425
	               NULL);
1426
	return(NULL);
1427
    }
1428
    if (xmlParserDebugEntities)
1429
	xmlGenericError(xmlGenericErrorContext,
1430
		"new fixed input: %.30s\n", buffer);
1431
    input = xmlNewInputStream(ctxt);
1432
    if (input == NULL) {
1433
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1434
	return(NULL);
1435
    }
1436
    input->base = buffer;
1437
    input->cur = buffer;
1438
    input->length = xmlStrlen(buffer);
1439
    input->end = &buffer[input->length];
1440
    return(input);
1441
}
1442
1443
/**
1444
 * xmlNewInputFromFile:
1445
 * @ctxt:  an XML parser context
1446
 * @filename:  the filename to use as entity
1447
 *
1448
 * Create a new input stream based on a file or an URL.
1449
 *
1450
 * Returns the new input stream or NULL in case of error
1451
 */
1452
xmlParserInputPtr
1453
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1454
    xmlParserInputBufferPtr buf;
1455
    xmlParserInputPtr inputStream;
1456
    char *directory = NULL;
1457
    xmlChar *URI = NULL;
1458
1459
    if (xmlParserDebugEntities)
1460
	xmlGenericError(xmlGenericErrorContext,
1461
		"new input from file: %s\n", filename);
1462
    if (ctxt == NULL) return(NULL);
1463
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1464
    if (buf == NULL) {
1465
	if (filename == NULL)
1466
	    __xmlLoaderErr(ctxt,
1467
	                   "failed to load external entity: NULL filename \n",
1468
			   NULL);
1469
	else
1470
	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1471
			   (const char *) filename);
1472
	return(NULL);
1473
    }
1474
1475
    inputStream = xmlNewInputStream(ctxt);
1476
    if (inputStream == NULL)
1477
	return(NULL);
1478
1479
    inputStream->buf = buf;
1480
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1481
    if (inputStream == NULL)
1482
        return(NULL);
1483
    
1484
    if (inputStream->filename == NULL)
1485
	URI = xmlStrdup((xmlChar *) filename);
1486
    else
1487
	URI = xmlStrdup((xmlChar *) inputStream->filename);
1488
    directory = xmlParserGetDirectory((const char *) URI);
1489
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1490
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1491
    if (URI != NULL) xmlFree((char *) URI);
1492
    inputStream->directory = directory;
1493
1494
    inputStream->base = inputStream->buf->buffer->content;
1495
    inputStream->cur = inputStream->buf->buffer->content;
1496
    inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1497
    if ((ctxt->directory == NULL) && (directory != NULL))
1498
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1499
    return(inputStream);
1500
}
1501
1502
/************************************************************************
1503
 *									*
1504
 *		Commodity functions to handle parser contexts		*
1505
 *									*
1506
 ************************************************************************/
1507
1508
/**
1509
 * xmlInitParserCtxt:
1510
 * @ctxt:  an XML parser context
1511
 *
1512
 * Initialize a parser context
1513
 *
1514
 * Returns 0 in case of success and -1 in case of error
1515
 */
1516
1517
int
1518
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1519
{
1520
    xmlParserInputPtr input;
1521
1522
    if(ctxt==NULL) {
1523
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1524
        return(-1);
1525
    }
1526
1527
    xmlDefaultSAXHandlerInit();
1528
1529
    if (ctxt->dict == NULL)
1530
	ctxt->dict = xmlDictCreate();
1531
    if (ctxt->dict == NULL) {
1532
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1533
	return(-1);
1534
    }
1535
    if (ctxt->sax == NULL)
1536
	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1537
    if (ctxt->sax == NULL) {
1538
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1539
	return(-1);
1540
    }
1541
    else
1542
        xmlSAXVersion(ctxt->sax, 2);
1543
1544
    ctxt->maxatts = 0;
1545
    ctxt->atts = NULL;
1546
    /* Allocate the Input stack */
1547
    if (ctxt->inputTab == NULL) {
1548
	ctxt->inputTab = (xmlParserInputPtr *)
1549
		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
1550
	ctxt->inputMax = 5;
1551
    }
1552
    if (ctxt->inputTab == NULL) {
1553
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1554
	ctxt->inputNr = 0;
1555
	ctxt->inputMax = 0;
1556
	ctxt->input = NULL;
1557
	return(-1);
1558
    }
1559
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1560
        xmlFreeInputStream(input);
1561
    }
1562
    ctxt->inputNr = 0;
1563
    ctxt->input = NULL;
1564
1565
    ctxt->version = NULL;
1566
    ctxt->encoding = NULL;
1567
    ctxt->standalone = -1;
1568
    ctxt->hasExternalSubset = 0;
1569
    ctxt->hasPErefs = 0;
1570
    ctxt->html = 0;
1571
    ctxt->external = 0;
1572
    ctxt->instate = XML_PARSER_START;
1573
    ctxt->token = 0;
1574
    ctxt->directory = NULL;
1575
1576
    /* Allocate the Node stack */
1577
    if (ctxt->nodeTab == NULL) {
1578
	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1579
	ctxt->nodeMax = 10;
1580
    }
1581
    if (ctxt->nodeTab == NULL) {
1582
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1583
	ctxt->nodeNr = 0;
1584
	ctxt->nodeMax = 0;
1585
	ctxt->node = NULL;
1586
	ctxt->inputNr = 0;
1587
	ctxt->inputMax = 0;
1588
	ctxt->input = NULL;
1589
	return(-1);
1590
    }
1591
    ctxt->nodeNr = 0;
1592
    ctxt->node = NULL;
1593
1594
    /* Allocate the Name stack */
1595
    if (ctxt->nameTab == NULL) {
1596
	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1597
	ctxt->nameMax = 10;
1598
    }
1599
    if (ctxt->nameTab == NULL) {
1600
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1601
	ctxt->nodeNr = 0;
1602
	ctxt->nodeMax = 0;
1603
	ctxt->node = NULL;
1604
	ctxt->inputNr = 0;
1605
	ctxt->inputMax = 0;
1606
	ctxt->input = NULL;
1607
	ctxt->nameNr = 0;
1608
	ctxt->nameMax = 0;
1609
	ctxt->name = NULL;
1610
	return(-1);
1611
    }
1612
    ctxt->nameNr = 0;
1613
    ctxt->name = NULL;
1614
1615
    /* Allocate the space stack */
1616
    if (ctxt->spaceTab == NULL) {
1617
	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1618
	ctxt->spaceMax = 10;
1619
    }
1620
    if (ctxt->spaceTab == NULL) {
1621
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1622
	ctxt->nodeNr = 0;
1623
	ctxt->nodeMax = 0;
1624
	ctxt->node = NULL;
1625
	ctxt->inputNr = 0;
1626
	ctxt->inputMax = 0;
1627
	ctxt->input = NULL;
1628
	ctxt->nameNr = 0;
1629
	ctxt->nameMax = 0;
1630
	ctxt->name = NULL;
1631
	ctxt->spaceNr = 0;
1632
	ctxt->spaceMax = 0;
1633
	ctxt->space = NULL;
1634
	return(-1);
1635
    }
1636
    ctxt->spaceNr = 1;
1637
    ctxt->spaceMax = 10;
1638
    ctxt->spaceTab[0] = -1;
1639
    ctxt->space = &ctxt->spaceTab[0];
1640
    ctxt->userData = ctxt;
1641
    ctxt->myDoc = NULL;
1642
    ctxt->wellFormed = 1;
1643
    ctxt->nsWellFormed = 1;
1644
    ctxt->valid = 1;
1645
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1646
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1647
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1648
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1649
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1650
    if (ctxt->keepBlanks == 0)
1651
	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1652
1653
    ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1654
    ctxt->vctxt.userData = ctxt;
1655
    ctxt->vctxt.error = xmlParserValidityError;
1656
    ctxt->vctxt.warning = xmlParserValidityWarning;
1657
    if (ctxt->validate) {
1658
	if (xmlGetWarningsDefaultValue == 0)
1659
	    ctxt->vctxt.warning = NULL;
1660
	else
1661
	    ctxt->vctxt.warning = xmlParserValidityWarning;
1662
	ctxt->vctxt.nodeMax = 0;
1663
    }
1664
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1665
    ctxt->record_info = 0;
1666
    ctxt->nbChars = 0;
1667
    ctxt->checkIndex = 0;
1668
    ctxt->inSubset = 0;
1669
    ctxt->errNo = XML_ERR_OK;
1670
    ctxt->depth = 0;
1671
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1672
    ctxt->catalogs = NULL;
1673
    ctxt->nbentities = 0;
1674
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1675
    return(0);
1676
}
1677
1678
/**
1679
 * xmlFreeParserCtxt:
1680
 * @ctxt:  an XML parser context
1681
 *
1682
 * Free all the memory used by a parser context. However the parsed
1683
 * document in ctxt->myDoc is not freed.
1684
 */
1685
1686
void
1687
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1688
{
1689
    xmlParserInputPtr input;
1690
1691
    if (ctxt == NULL) return;
1692
1693
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1694
        xmlFreeInputStream(input);
1695
    }
1696
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1697
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1698
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1699
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1700
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1701
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1702
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1703
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1704
#ifdef LIBXML_SAX1_ENABLED
1705
    if ((ctxt->sax != NULL) &&
1706
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1707
#else
1708
    if (ctxt->sax != NULL)
1709
#endif /* LIBXML_SAX1_ENABLED */
1710
        xmlFree(ctxt->sax);
1711
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1712
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1713
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1714
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1715
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1716
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1717
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1718
    if (ctxt->attsDefault != NULL) 
1719
        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1720
    if (ctxt->attsSpecial != NULL)
1721
        xmlHashFree(ctxt->attsSpecial, NULL);
1722
    if (ctxt->freeElems != NULL) {
1723
        xmlNodePtr cur, next;
1724
1725
	cur = ctxt->freeElems;
1726
	while (cur != NULL) {
1727
	    next = cur->next;
1728
	    xmlFree(cur);
1729
	    cur = next;
1730
	}
1731
    }
1732
    if (ctxt->freeAttrs != NULL) {
1733
        xmlAttrPtr cur, next;
1734
1735
	cur = ctxt->freeAttrs;
1736
	while (cur != NULL) {
1737
	    next = cur->next;
1738
	    xmlFree(cur);
1739
	    cur = next;
1740
	}
1741
    }
1742
    /*
1743
     * cleanup the error strings
1744
     */
1745
    if (ctxt->lastError.message != NULL)
1746
        xmlFree(ctxt->lastError.message);
1747
    if (ctxt->lastError.file != NULL)
1748
        xmlFree(ctxt->lastError.file);
1749
    if (ctxt->lastError.str1 != NULL)
1750
        xmlFree(ctxt->lastError.str1);
1751
    if (ctxt->lastError.str2 != NULL)
1752
        xmlFree(ctxt->lastError.str2);
1753
    if (ctxt->lastError.str3 != NULL)
1754
        xmlFree(ctxt->lastError.str3);
1755
1756
#ifdef LIBXML_CATALOG_ENABLED
1757
    if (ctxt->catalogs != NULL)
1758
	xmlCatalogFreeLocal(ctxt->catalogs);
1759
#endif
1760
    xmlFree(ctxt);
1761
}
1762
1763
/**
1764
 * xmlNewParserCtxt:
1765
 *
1766
 * Allocate and initialize a new parser context.
1767
 *
1768
 * Returns the xmlParserCtxtPtr or NULL
1769
 */
1770
1771
xmlParserCtxtPtr
1772
xmlNewParserCtxt(void)
1773
{
1774
    xmlParserCtxtPtr ctxt;
1775
1776
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1777
    if (ctxt == NULL) {
1778
	xmlErrMemory(NULL, "cannot allocate parser context\n");
1779
	return(NULL);
1780
    }
1781
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1782
    if (xmlInitParserCtxt(ctxt) < 0) {
1783
        xmlFreeParserCtxt(ctxt);
1784
	return(NULL);
1785
    }
1786
    return(ctxt);
1787
}
1788
1789
/************************************************************************
1790
 *									*
1791
 *		Handling of node informations				*
1792
 *									*
1793
 ************************************************************************/
1794
1795
/**
1796
 * xmlClearParserCtxt:
1797
 * @ctxt:  an XML parser context
1798
 *
1799
 * Clear (release owned resources) and reinitialize a parser context
1800
 */
1801
1802
void
1803
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1804
{
1805
  if (ctxt==NULL)
1806
    return;
1807
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1808
  xmlCtxtReset(ctxt);
1809
}
1810
1811
1812
/**
1813
 * xmlParserFindNodeInfo:
1814
 * @ctx:  an XML parser context
1815
 * @node:  an XML node within the tree
1816
 *
1817
 * Find the parser node info struct for a given node
1818
 * 
1819
 * Returns an xmlParserNodeInfo block pointer or NULL
1820
 */
1821
const xmlParserNodeInfo *
1822
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1823
{
1824
    unsigned long pos;
1825
1826
    if ((ctx == NULL) || (node == NULL))
1827
        return (NULL);
1828
    /* Find position where node should be at */
1829
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1830
    if (pos < ctx->node_seq.length
1831
        && ctx->node_seq.buffer[pos].node == node)
1832
        return &ctx->node_seq.buffer[pos];
1833
    else
1834
        return NULL;
1835
}
1836
1837
1838
/**
1839
 * xmlInitNodeInfoSeq:
1840
 * @seq:  a node info sequence pointer
1841
 *
1842
 * -- Initialize (set to initial state) node info sequence
1843
 */
1844
void
1845
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1846
{
1847
    if (seq == NULL)
1848
        return;
1849
    seq->length = 0;
1850
    seq->maximum = 0;
1851
    seq->buffer = NULL;
1852
}
1853
1854
/**
1855
 * xmlClearNodeInfoSeq:
1856
 * @seq:  a node info sequence pointer
1857
 *
1858
 * -- Clear (release memory and reinitialize) node
1859
 *   info sequence
1860
 */
1861
void
1862
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1863
{
1864
    if (seq == NULL)
1865
        return;
1866
    if (seq->buffer != NULL)
1867
        xmlFree(seq->buffer);
1868
    xmlInitNodeInfoSeq(seq);
1869
}
1870
1871
/**
1872
 * xmlParserFindNodeInfoIndex:
1873
 * @seq:  a node info sequence pointer
1874
 * @node:  an XML node pointer
1875
 *
1876
 * 
1877
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1878
 *   the given node is or should be at in a sorted sequence
1879
 *
1880
 * Returns a long indicating the position of the record
1881
 */
1882
unsigned long
1883
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1884
                           const xmlNodePtr node)
1885
{
1886
    unsigned long upper, lower, middle;
1887
    int found = 0;
1888
1889
    if ((seq == NULL) || (node == NULL))
1890
        return ((unsigned long) -1);
1891
1892
    /* Do a binary search for the key */
1893
    lower = 1;
1894
    upper = seq->length;
1895
    middle = 0;
1896
    while (lower <= upper && !found) {
1897
        middle = lower + (upper - lower) / 2;
1898
        if (node == seq->buffer[middle - 1].node)
1899
            found = 1;
1900
        else if (node < seq->buffer[middle - 1].node)
1901
            upper = middle - 1;
1902
        else
1903
            lower = middle + 1;
1904
    }
1905
1906
    /* Return position */
1907
    if (middle == 0 || seq->buffer[middle - 1].node < node)
1908
        return middle;
1909
    else
1910
        return middle - 1;
1911
}
1912
1913
1914
/**
1915
 * xmlParserAddNodeInfo:
1916
 * @ctxt:  an XML parser context
1917
 * @info:  a node info sequence pointer
1918
 *
1919
 * Insert node info record into the sorted sequence
1920
 */
1921
void
1922
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1923
                     const xmlParserNodeInfoPtr info)
1924
{
1925
    unsigned long pos;
1926
1927
    if ((ctxt == NULL) || (info == NULL)) return;
1928
1929
    /* Find pos and check to see if node is already in the sequence */
1930
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1931
                                     info->node);
1932
1933
    if ((pos < ctxt->node_seq.length) && 
1934
        (ctxt->node_seq.buffer != NULL) &&
1935
        (ctxt->node_seq.buffer[pos].node == info->node)) {
1936
        ctxt->node_seq.buffer[pos] = *info;
1937
    }
1938
1939
    /* Otherwise, we need to add new node to buffer */
1940
    else {
1941
        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
1942
            xmlParserNodeInfo *tmp_buffer;
1943
            unsigned int byte_size;
1944
1945
            if (ctxt->node_seq.maximum == 0)
1946
                ctxt->node_seq.maximum = 2;
1947
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
1948
			(2 * ctxt->node_seq.maximum));
1949
1950
            if (ctxt->node_seq.buffer == NULL)
1951
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
1952
            else
1953
                tmp_buffer =
1954
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
1955
                                                     byte_size);
1956
1957
            if (tmp_buffer == NULL) {
1958
		xmlErrMemory(ctxt, "failed to allocate buffer\n");
1959
                return;
1960
            }
1961
            ctxt->node_seq.buffer = tmp_buffer;
1962
            ctxt->node_seq.maximum *= 2;
1963
        }
1964
1965
        /* If position is not at end, move elements out of the way */
1966
        if (pos != ctxt->node_seq.length) {
1967
            unsigned long i;
1968
1969
            for (i = ctxt->node_seq.length; i > pos; i--)
1970
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1971
        }
1972
1973
        /* Copy element and increase length */
1974
        ctxt->node_seq.buffer[pos] = *info;
1975
        ctxt->node_seq.length++;
1976
    }
1977
}
1978
1979
/************************************************************************
1980
 *									*
1981
 *		Defaults settings					*
1982
 *									*
1983
 ************************************************************************/
1984
/**
1985
 * xmlPedanticParserDefault:
1986
 * @val:  int 0 or 1 
1987
 *
1988
 * Set and return the previous value for enabling pedantic warnings.
1989
 *
1990
 * Returns the last value for 0 for no substitution, 1 for substitution.
1991
 */
1992
1993
int
1994
xmlPedanticParserDefault(int val) {
1995
    int old = xmlPedanticParserDefaultValue;
1996
1997
    xmlPedanticParserDefaultValue = val;
1998
    return(old);
1999
}
2000
2001
/**
2002
 * xmlLineNumbersDefault:
2003
 * @val:  int 0 or 1 
2004
 *
2005
 * Set and return the previous value for enabling line numbers in elements
2006
 * contents. This may break on old application and is turned off by default.
2007
 *
2008
 * Returns the last value for 0 for no substitution, 1 for substitution.
2009
 */
2010
2011
int
2012
xmlLineNumbersDefault(int val) {
2013
    int old = xmlLineNumbersDefaultValue;
2014
2015
    xmlLineNumbersDefaultValue = val;
2016
    return(old);
2017
}
2018
2019
/**
2020
 * xmlSubstituteEntitiesDefault:
2021
 * @val:  int 0 or 1 
2022
 *
2023
 * Set and return the previous value for default entity support.
2024
 * Initially the parser always keep entity references instead of substituting
2025
 * entity values in the output. This function has to be used to change the
2026
 * default parser behavior
2027
 * SAX::substituteEntities() has to be used for changing that on a file by
2028
 * file basis.
2029
 *
2030
 * Returns the last value for 0 for no substitution, 1 for substitution.
2031
 */
2032
2033
int
2034
xmlSubstituteEntitiesDefault(int val) {
2035
    int old = xmlSubstituteEntitiesDefaultValue;
2036
2037
    xmlSubstituteEntitiesDefaultValue = val;
2038
    return(old);
2039
}
2040
2041
/**
2042
 * xmlKeepBlanksDefault:
2043
 * @val:  int 0 or 1 
2044
 *
2045
 * Set and return the previous value for default blanks text nodes support.
2046
 * The 1.x version of the parser used an heuristic to try to detect
2047
 * ignorable white spaces. As a result the SAX callback was generating
2048
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2049
 * using the DOM output text nodes containing those blanks were not generated.
2050
 * The 2.x and later version will switch to the XML standard way and
2051
 * ignorableWhitespace() are only generated when running the parser in
2052
 * validating mode and when the current element doesn't allow CDATA or
2053
 * mixed content.
2054
 * This function is provided as a way to force the standard behavior 
2055
 * on 1.X libs and to switch back to the old mode for compatibility when
2056
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2057
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2058
 * nodes generated.
2059
 * This value also affect autogeneration of indentation when saving code
2060
 * if blanks sections are kept, indentation is not generated.
2061
 *
2062
 * Returns the last value for 0 for no substitution, 1 for substitution.
2063
 */
2064
2065
int
2066
xmlKeepBlanksDefault(int val) {
2067
    int old = xmlKeepBlanksDefaultValue;
2068
2069
    xmlKeepBlanksDefaultValue = val;
2070
    xmlIndentTreeOutput = !val;
2071
    return(old);
2072
}
2073
2074
#define bottom_parserInternals
2075
#include "elfgcchack.h"