1
/*
2
www.sourceforge.net/projects/tinyxml
3
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
4
5
This software is provided 'as-is', without any express or implied 
6
warranty. In no event will the authors be held liable for any 
7
damages arising from the use of this software.
8
9
Permission is granted to anyone to use this software for any 
10
purpose, including commercial applications, and to alter it and 
11
redistribute it freely, subject to the following restrictions:
12
13
1. The origin of this software must not be misrepresented; you must 
14
not claim that you wrote the original software. If you use this
15
software in a product, an acknowledgment in the product documentation
16
would be appreciated but is not required.
17
18
2. Altered source versions must be plainly marked as such, and 
19
must not be misrepresented as being the original software.
20
21
3. This notice may not be removed or altered from any source 
22
distribution.
23
*/
24
25
#include "tinyxml.h"
26
#include <ctype.h>
27
#include <stddef.h>
28
29
//#define DEBUG_PARSER
30
31
// Note tha "PutString" hardcodes the same list. This
32
// is less flexible than it appears. Changing the entries
33
// or order will break putstring.	
34
TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
35
{
36
	{ "&amp;",  5, '&' },
37
	{ "&lt;",   4, '<' },
38
	{ "&gt;",   4, '>' },
39
	{ "&quot;", 6, '\"' },
40
	{ "&apos;", 6, '\'' }
41
};
42
43
// Bunch of unicode info at:
44
//		http://www.unicode.org/faq/utf_bom.html
45
// Including the basic of this table, which determines the #bytes in the
46
// sequence from the lead byte. 1 placed for invalid sequences --
47
// although the result will be junk, pass it through as much as possible.
48
// Beware of the non-characters in UTF-8:	
49
//				ef bb bf (Microsoft "lead bytes")
50
//				ef bf be
51
//				ef bf bf 
52
53
const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
54
const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
55
const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
56
57
const int TiXmlBase::utf8ByteTable[256] = 
58
{
59
	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
60
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x00
61
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x10
62
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x20
63
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x30
64
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x40
65
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x50
66
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x60
67
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x70	End of ASCII range
68
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x80 0x80 to 0xc1 invalid
69
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x90 
70
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xa0 
71
		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xb0 
72
		1,	1,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xc0 0xc2 to 0xdf 2 byte
73
		2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xd0
74
		3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	// 0xe0 0xe0 to 0xef 3 byte
75
		4,	4,	4,	4,	4,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1	// 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
76
};
77
78
79
void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
80
{
81
	const unsigned long BYTE_MASK = 0xBF;
82
	const unsigned long BYTE_MARK = 0x80;
83
	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
84
85
	if (input < 0x80) 
86
		*length = 1;
87
	else if ( input < 0x800 )
88
		*length = 2;
89
	else if ( input < 0x10000 )
90
		*length = 3;
91
	else if ( input < 0x200000 )
92
		*length = 4;
93
	else
94
		{ *length = 0; return; }	// This code won't covert this correctly anyway.
95
96
	output += *length;
97
98
	// Scary scary fall throughs.
99
	switch (*length) 
100
	{
101
		case 4:
102
			--output; 
103
			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
104
			input >>= 6;
105
		case 3:
106
			--output; 
107
			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
108
			input >>= 6;
109
		case 2:
110
			--output; 
111
			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
112
			input >>= 6;
113
		case 1:
114
			--output; 
115
			*output = (char)(input | FIRST_BYTE_MARK[*length]);
116
	}
117
}
118
119
120
/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
121
{
122
	// This will only work for low-ascii, everything else is assumed to be a valid
123
	// letter. I'm not sure this is the best approach, but it is quite tricky trying
124
	// to figure out alhabetical vs. not across encoding. So take a very 
125
	// conservative approach.
126
127
//	if ( encoding == TIXML_ENCODING_UTF8 )
128
//	{
129
		if ( anyByte < 127 )
130
			return isalpha( anyByte );
131
		else
132
			return 1;	// What else to do? The unicode set is huge...get the english ones right.
133
//	}
134
//	else
135
//	{
136
//		return isalpha( anyByte );
137
//	}
138
}
139
140
141
/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
142
{
143
	// This will only work for low-ascii, everything else is assumed to be a valid
144
	// letter. I'm not sure this is the best approach, but it is quite tricky trying
145
	// to figure out alhabetical vs. not across encoding. So take a very 
146
	// conservative approach.
147
148
//	if ( encoding == TIXML_ENCODING_UTF8 )
149
//	{
150
		if ( anyByte < 127 )
151
			return isalnum( anyByte );
152
		else
153
			return 1;	// What else to do? The unicode set is huge...get the english ones right.
154
//	}
155
//	else
156
//	{
157
//		return isalnum( anyByte );
158
//	}
159
}
160
161
162
class TiXmlParsingData
163
{
164
	friend class TiXmlDocument;
165
  public:
166
	void Stamp( const char* now, TiXmlEncoding encoding );
167
168
	const TiXmlCursor& Cursor()	{ return cursor; }
169
170
  private:
171
	// Only used by the document!
172
	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
173
	{
174
		assert( start );
175
		stamp = start;
176
		tabsize = _tabsize;
177
		cursor.row = row;
178
		cursor.col = col;
179
	}
180
181
	TiXmlCursor		cursor;
182
	const char*		stamp;
183
	int				tabsize;
184
};
185
186
187
void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
188
{
189
	assert( now );
190
191
	// Do nothing if the tabsize is 0.
192
	if ( tabsize < 1 )
193
	{
194
		return;
195
	}
196
197
	// Get the current row, column.
198
	int row = cursor.row;
199
	int col = cursor.col;
200
	const char* p = stamp;
201
	assert( p );
202
203
	while ( p < now )
204
	{
205
		// Treat p as unsigned, so we have a happy compiler.
206
		const unsigned char* pU = (const unsigned char*)p;
207
208
		// Code contributed by Fletcher Dunn: (modified by lee)
209
		switch (*pU) {
210
			case 0:
211
				// We *should* never get here, but in case we do, don't
212
				// advance past the terminating null character, ever
213
				return;
214
215
			case '\r':
216
				// bump down to the next line
217
				++row;
218
				col = 0;				
219
				// Eat the character
220
				++p;
221
222
				// Check for \r\n sequence, and treat this as a single character
223
				if (*p == '\n') {
224
					++p;
225
				}
226
				break;
227
228
			case '\n':
229
				// bump down to the next line
230
				++row;
231
				col = 0;
232
233
				// Eat the character
234
				++p;
235
236
				// Check for \n\r sequence, and treat this as a single
237
				// character.  (Yes, this bizarre thing does occur still
238
				// on some arcane platforms...)
239
				if (*p == '\r') {
240
					++p;
241
				}
242
				break;
243
244
			case '\t':
245
				// Eat the character
246
				++p;
247
248
				// Skip to next tab stop
249
				col = (col / tabsize + 1) * tabsize;
250
				break;
251
252
			case TIXML_UTF_LEAD_0:
253
				if ( encoding == TIXML_ENCODING_UTF8 )
254
				{
255
					if ( *(p+1) && *(p+2) )
256
					{
257
						// In these cases, don't advance the column. These are
258
						// 0-width spaces.
259
						if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
260
							p += 3;	
261
						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
262
							p += 3;	
263
						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
264
							p += 3;	
265
						else
266
							{ p +=3; ++col; }	// A normal character.
267
					}
268
				}
269
				else
270
				{
271
					++p;
272
					++col;
273
				}
274
				break;
275
276
			default:
277
				if ( encoding == TIXML_ENCODING_UTF8 )
278
				{
279
					// Eat the 1 to 4 byte utf8 character.
280
					int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
281
					if ( step == 0 )
282
						step = 1;		// Error case from bad encoding, but handle gracefully.
283
					p += step;
284
285
					// Just advance one column, of course.
286
					++col;
287
				}
288
				else
289
				{
290
					++p;
291
					++col;
292
				}
293
				break;
294
		}
295
	}
296
	cursor.row = row;
297
	cursor.col = col;
298
	assert( cursor.row >= -1 );
299
	assert( cursor.col >= -1 );
300
	stamp = p;
301
	assert( stamp );
302
}
303
304
305
const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
306
{
307
	if ( !p || !*p )
308
	{
309
		return 0;
310
	}
311
	if ( encoding == TIXML_ENCODING_UTF8 )
312
	{
313
		while ( *p )
314
		{
315
			const unsigned char* pU = (const unsigned char*)p;
316
			
317
			// Skip the stupid Microsoft UTF-8 Byte order marks
318
			if (	*(pU+0)==TIXML_UTF_LEAD_0
319
				 && *(pU+1)==TIXML_UTF_LEAD_1 
320
				 && *(pU+2)==TIXML_UTF_LEAD_2 )
321
			{
322
				p += 3;
323
				continue;
324
			}
325
			else if(*(pU+0)==TIXML_UTF_LEAD_0
326
				 && *(pU+1)==0xbfU
327
				 && *(pU+2)==0xbeU )
328
			{
329
				p += 3;
330
				continue;
331
			}
332
			else if(*(pU+0)==TIXML_UTF_LEAD_0
333
				 && *(pU+1)==0xbfU
334
				 && *(pU+2)==0xbfU )
335
			{
336
				p += 3;
337
				continue;
338
			}
339
340
			if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )		// Still using old rules for white space.
341
				++p;
342
			else
343
				break;
344
		}
345
	}
346
	else
347
	{
348
		while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
349
			++p;
350
	}
351
352
	return p;
353
}
354
355
#ifdef TIXML_USE_STL
356
/*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
357
{
358
	for( ;; )
359
	{
360
		if ( !in->good() ) return false;
361
362
		int c = in->peek();
363
		// At this scope, we can't get to a document. So fail silently.
364
		if ( !IsWhiteSpace( c ) || c <= 0 )
365
			return true;
366
367
		*tag += (char) in->get();
368
	}
369
}
370
371
/*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
372
{
373
	//assert( character > 0 && character < 128 );	// else it won't work in utf-8
374
	while ( in->good() )
375
	{
376
		int c = in->peek();
377
		if ( c == character )
378
			return true;
379
		if ( c <= 0 )		// Silent failure: can't get document at this scope
380
			return false;
381
382
		in->get();
383
		*tag += (char) c;
384
	}
385
	return false;
386
}
387
#endif
388
389
const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
390
{
391
	*name = "";
392
	assert( p );
393
394
	// Names start with letters or underscores.
395
	// Of course, in unicode, tinyxml has no idea what a letter *is*. The
396
	// algorithm is generous.
397
	//
398
	// After that, they can be letters, underscores, numbers,
399
	// hyphens, or colons. (Colons are valid ony for namespaces,
400
	// but tinyxml can't tell namespaces from names.)
401
	if (    p && *p 
402
		 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
403
	{
404
		while(		p && *p
405
				&&	(		IsAlphaNum( (unsigned char ) *p, encoding ) 
406
						 || *p == '_'
407
						 || *p == '-'
408
						 || *p == '.'
409
						 || *p == ':' ) )
410
		{
411
			(*name) += *p;
412
			++p;
413
		}
414
		return p;
415
	}
416
	return 0;
417
}
418
419
const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
420
{
421
	// Presume an entity, and pull it out.
422
    TIXML_STRING ent;
423
	int i;
424
	*length = 0;
425
426
	if ( *(p+1) && *(p+1) == '#' && *(p+2) )
427
	{
428
		unsigned long ucs = 0;
429
		ptrdiff_t delta = 0;
430
		unsigned mult = 1;
431
432
		if ( *(p+2) == 'x' )
433
		{
434
			// Hexadecimal.
435
			if ( !*(p+3) ) return 0;
436
437
			const char* q = p+3;
438
			q = strchr( q, ';' );
439
440
			if ( !q || !*q ) return 0;
441
442
			delta = q-p;
443
			--q;
444
445
			while ( *q != 'x' )
446
			{
447
				if ( *q >= '0' && *q <= '9' )
448
					ucs += mult * (*q - '0');
449
				else if ( *q >= 'a' && *q <= 'f' )
450
					ucs += mult * (*q - 'a' + 10);
451
				else if ( *q >= 'A' && *q <= 'F' )
452
					ucs += mult * (*q - 'A' + 10 );
453
				else 
454
					return 0;
455
				mult *= 16;
456
				--q;
457
			}
458
		}
459
		else
460
		{
461
			// Decimal.
462
			if ( !*(p+2) ) return 0;
463
464
			const char* q = p+2;
465
			q = strchr( q, ';' );
466
467
			if ( !q || !*q ) return 0;
468
469
			delta = q-p;
470
			--q;
471
472
			while ( *q != '#' )
473
			{
474
				if ( *q >= '0' && *q <= '9' )
475
					ucs += mult * (*q - '0');
476
				else 
477
					return 0;
478
				mult *= 10;
479
				--q;
480
			}
481
		}
482
		if ( encoding == TIXML_ENCODING_UTF8 )
483
		{
484
			// convert the UCS to UTF-8
485
			ConvertUTF32ToUTF8( ucs, value, length );
486
		}
487
		else
488
		{
489
			*value = (char)ucs;
490
			*length = 1;
491
		}
492
		return p + delta + 1;
493
	}
494
495
	// Now try to match it.
496
	for( i=0; i<NUM_ENTITY; ++i )
497
	{
498
		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
499
		{
500
			assert( strlen( entity[i].str ) == entity[i].strLength );
501
			*value = entity[i].chr;
502
			*length = 1;
503
			return ( p + entity[i].strLength );
504
		}
505
	}
506
507
	// So it wasn't an entity, its unrecognized, or something like that.
508
	*value = *p;	// Don't put back the last one, since we return it!
509
	return p+1;
510
}
511
512
513
bool TiXmlBase::StringEqual( const char* p,
514
							 const char* tag,
515
							 bool ignoreCase,
516
							 TiXmlEncoding encoding )
517
{
518
	assert( p );
519
	assert( tag );
520
	if ( !p || !*p )
521
	{
522
		assert( 0 );
523
		return false;
524
	}
525
526
	const char* q = p;
527
528
	if ( ignoreCase )
529
	{
530
		while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
531
		{
532
			++q;
533
			++tag;
534
		}
535
536
		if ( *tag == 0 )
537
			return true;
538
	}
539
	else
540
	{
541
		while ( *q && *tag && *q == *tag )
542
		{
543
			++q;
544
			++tag;
545
		}
546
547
		if ( *tag == 0 )		// Have we found the end of the tag, and everything equal?
548
			return true;
549
	}
550
	return false;
551
}
552
553
const char* TiXmlBase::ReadText(	const char* p, 
554
									TIXML_STRING * text, 
555
									bool trimWhiteSpace, 
556
									const char* endTag, 
557
									bool caseInsensitive,
558
									TiXmlEncoding encoding )
559
{
560
    *text = "";
561
	if (    !trimWhiteSpace			// certain tags always keep whitespace
562
		 || !condenseWhiteSpace )	// if true, whitespace is always kept
563
	{
564
		// Keep all the white space.
565
		while (	   p && *p
566
				&& !StringEqual( p, endTag, caseInsensitive, encoding )
567
			  )
568
		{
569
			int len;
570
			char cArr[4] = { 0, 0, 0, 0 };
571
			p = GetChar( p, cArr, &len, encoding );
572
			text->append( cArr, len );
573
		}
574
	}
575
	else
576
	{
577
		bool whitespace = false;
578
579
		// Remove leading white space:
580
		p = SkipWhiteSpace( p, encoding );
581
		while (	   p && *p
582
				&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
583
		{
584
			if ( *p == '\r' || *p == '\n' )
585
			{
586
				whitespace = true;
587
				++p;
588
			}
589
			else if ( IsWhiteSpace( *p ) )
590
			{
591
				whitespace = true;
592
				++p;
593
			}
594
			else
595
			{
596
				// If we've found whitespace, add it before the
597
				// new character. Any whitespace just becomes a space.
598
				if ( whitespace )
599
				{
600
					(*text) += ' ';
601
					whitespace = false;
602
				}
603
				int len;
604
				char cArr[4] = { 0, 0, 0, 0 };
605
				p = GetChar( p, cArr, &len, encoding );
606
				if ( len == 1 )
607
					(*text) += cArr[0];	// more efficient
608
				else
609
					text->append( cArr, len );
610
			}
611
		}
612
	}
613
	return p + strlen( endTag );
614
}
615
616
#ifdef TIXML_USE_STL
617
618
void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
619
{
620
	// The basic issue with a document is that we don't know what we're
621
	// streaming. Read something presumed to be a tag (and hope), then
622
	// identify it, and call the appropriate stream method on the tag.
623
	//
624
	// This "pre-streaming" will never read the closing ">" so the
625
	// sub-tag can orient itself.
626
627
	if ( !StreamTo( in, '<', tag ) ) 
628
	{
629
		SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
630
		return;
631
	}
632
633
	while ( in->good() )
634
	{
635
		int tagIndex = (int) tag->length();
636
		while ( in->good() && in->peek() != '>' )
637
		{
638
			int c = in->get();
639
			if ( c <= 0 )
640
			{
641
				SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
642
				break;
643
			}
644
			(*tag) += (char) c;
645
		}
646
647
		if ( in->good() )
648
		{
649
			// We now have something we presume to be a node of 
650
			// some sort. Identify it, and call the node to
651
			// continue streaming.
652
			TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
653
654
			if ( node )
655
			{
656
				node->StreamIn( in, tag );
657
				bool isElement = node->ToElement() != 0;
658
				delete node;
659
				node = 0;
660
661
				// If this is the root element, we're done. Parsing will be
662
				// done by the >> operator.
663
				if ( isElement )
664
				{
665
					return;
666
				}
667
			}
668
			else
669
			{
670
				SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
671
				return;
672
			}
673
		}
674
	}
675
	// We should have returned sooner.
676
	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
677
}
678
679
#endif
680
681
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
682
{
683
	ClearError();
684
685
	// Parse away, at the document level. Since a document
686
	// contains nothing but other tags, most of what happens
687
	// here is skipping white space.
688
	if ( !p || !*p )
689
	{
690
		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
691
		return 0;
692
	}
693
694
	// Note that, for a document, this needs to come
695
	// before the while space skip, so that parsing
696
	// starts from the pointer we are given.
697
	location.Clear();
698
	if ( prevData )
699
	{
700
		location.row = prevData->cursor.row;
701
		location.col = prevData->cursor.col;
702
	}
703
	else
704
	{
705
		location.row = 0;
706
		location.col = 0;
707
	}
708
	TiXmlParsingData data( p, TabSize(), location.row, location.col );
709
	location = data.Cursor();
710
711
	if ( encoding == TIXML_ENCODING_UNKNOWN )
712
	{
713
		// Check for the Microsoft UTF-8 lead bytes.
714
		const unsigned char* pU = (const unsigned char*)p;
715
		if (	*(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
716
			 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
717
			 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
718
		{
719
			encoding = TIXML_ENCODING_UTF8;
720
			useMicrosoftBOM = true;
721
		}
722
	}
723
724
    p = SkipWhiteSpace( p, encoding );
725
	if ( !p )
726
	{
727
		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
728
		return 0;
729
	}
730
731
	while ( p && *p )
732
	{
733
		TiXmlNode* node = Identify( p, encoding );
734
		if ( node )
735
		{
736
			p = node->Parse( p, &data, encoding );
737
			LinkEndChild( node );
738
		}
739
		else
740
		{
741
			break;
742
		}
743
744
		// Did we get encoding info?
745
		if (    encoding == TIXML_ENCODING_UNKNOWN
746
			 && node->ToDeclaration() )
747
		{
748
			TiXmlDeclaration* dec = node->ToDeclaration();
749
			const char* enc = dec->Encoding();
750
			assert( enc );
751
752
			if ( *enc == 0 )
753
				encoding = TIXML_ENCODING_UTF8;
754
			else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
755
				encoding = TIXML_ENCODING_UTF8;
756
			else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
757
				encoding = TIXML_ENCODING_UTF8;	// incorrect, but be nice
758
			else 
759
				encoding = TIXML_ENCODING_LEGACY;
760
		}
761
762
		p = SkipWhiteSpace( p, encoding );
763
	}
764
765
	// Was this empty?
766
	if ( !firstChild ) {
767
		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
768
		return 0;
769
	}
770
771
	// All is well.
772
	return p;
773
}
774
775
void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
776
{	
777
	// The first error in a chain is more accurate - don't set again!
778
	if ( error )
779
		return;
780
781
	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
782
	error   = true;
783
	errorId = err;
784
	errorDesc = errorString[ errorId ];
785
786
	errorLocation.Clear();
787
	if ( pError && data )
788
	{
789
		data->Stamp( pError, encoding );
790
		errorLocation = data->Cursor();
791
	}
792
}
793
794
795
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
796
{
797
	TiXmlNode* returnNode = 0;
798
799
	p = SkipWhiteSpace( p, encoding );
800
	if( !p || !*p || *p != '<' )
801
	{
802
		return 0;
803
	}
804
805
	TiXmlDocument* doc = GetDocument();
806
	p = SkipWhiteSpace( p, encoding );
807
808
	if ( !p || !*p )
809
	{
810
		return 0;
811
	}
812
813
	// What is this thing? 
814
	// - Elements start with a letter or underscore, but xml is reserved.
815
	// - Comments: <!--
816
	// - Decleration: <?xml
817
	// - Everthing else is unknown to tinyxml.
818
	//
819
820
	const char* xmlHeader = { "<?xml" };
821
	const char* commentHeader = { "<!--" };
822
	const char* dtdHeader = { "<!" };
823
	const char* cdataHeader = { "<![CDATA[" };
824
825
	if ( StringEqual( p, xmlHeader, true, encoding ) )
826
	{
827
		#ifdef DEBUG_PARSER
828
			TIXML_LOG( "XML parsing Declaration\n" );
829
		#endif
830
		returnNode = new TiXmlDeclaration();
831
	}
832
	else if ( StringEqual( p, commentHeader, false, encoding ) )
833
	{
834
		#ifdef DEBUG_PARSER
835
			TIXML_LOG( "XML parsing Comment\n" );
836
		#endif
837
		returnNode = new TiXmlComment();
838
	}
839
	else if ( StringEqual( p, cdataHeader, false, encoding ) )
840
	{
841
		#ifdef DEBUG_PARSER
842
			TIXML_LOG( "XML parsing CDATA\n" );
843
		#endif
844
		TiXmlText* text = new TiXmlText( "" );
845
		text->SetCDATA( true );
846
		returnNode = text;
847
	}
848
	else if ( StringEqual( p, dtdHeader, false, encoding ) )
849
	{
850
		#ifdef DEBUG_PARSER
851
			TIXML_LOG( "XML parsing Unknown(1)\n" );
852
		#endif
853
		returnNode = new TiXmlUnknown();
854
	}
855
	else if (    IsAlpha( *(p+1), encoding )
856
			  || *(p+1) == '_' )
857
	{
858
		#ifdef DEBUG_PARSER
859
			TIXML_LOG( "XML parsing Element\n" );
860
		#endif
861
		returnNode = new TiXmlElement( "" );
862
	}
863
	else
864
	{
865
		#ifdef DEBUG_PARSER
866
			TIXML_LOG( "XML parsing Unknown(2)\n" );
867
		#endif
868
		returnNode = new TiXmlUnknown();
869
	}
870
871
	if ( returnNode )
872
	{
873
		// Set the parent, so it can report errors
874
		returnNode->parent = this;
875
	}
876
	else
877
	{
878
		if ( doc )
879
			doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
880
	}
881
	return returnNode;
882
}
883
884
#ifdef TIXML_USE_STL
885
886
void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
887
{
888
	// We're called with some amount of pre-parsing. That is, some of "this"
889
	// element is in "tag". Go ahead and stream to the closing ">"
890
	while( in->good() )
891
	{
892
		int c = in->get();
893
		if ( c <= 0 )
894
		{
895
			TiXmlDocument* document = GetDocument();
896
			if ( document )
897
				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
898
			return;
899
		}
900
		(*tag) += (char) c ;
901
		
902
		if ( c == '>' )
903
			break;
904
	}
905
906
	if ( tag->length() < 3 ) return;
907
908
	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
909
	// If not, identify and stream.
910
911
	if (    tag->at( tag->length() - 1 ) == '>' 
912
		 && tag->at( tag->length() - 2 ) == '/' )
913
	{
914
		// All good!
915
		return;
916
	}
917
	else if ( tag->at( tag->length() - 1 ) == '>' )
918
	{
919
		// There is more. Could be:
920
		//		text
921
		//		closing tag
922
		//		another node.
923
		for ( ;; )
924
		{
925
			StreamWhiteSpace( in, tag );
926
927
			// Do we have text?
928
			if ( in->good() && in->peek() != '<' ) 
929
			{
930
				// Yep, text.
931
				TiXmlText text( "" );
932
				text.StreamIn( in, tag );
933
934
				// What follows text is a closing tag or another node.
935
				// Go around again and figure it out.
936
				continue;
937
			}
938
939
			// We now have either a closing tag...or another node.
940
			// We should be at a "<", regardless.
941
			if ( !in->good() ) return;
942
			assert( in->peek() == '<' );
943
			int tagIndex = (int) tag->length();
944
945
			bool closingTag = false;
946
			bool firstCharFound = false;
947
948
			for( ;; )
949
			{
950
				if ( !in->good() )
951
					return;
952
953
				int c = in->peek();
954
				if ( c <= 0 )
955
				{
956
					TiXmlDocument* document = GetDocument();
957
					if ( document )
958
						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
959
					return;
960
				}
961
				
962
				if ( c == '>' )
963
					break;
964
965
				*tag += (char) c;
966
				in->get();
967
968
				if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
969
				{
970
					firstCharFound = true;
971
					if ( c == '/' )
972
						closingTag = true;
973
				}
974
			}
975
			// If it was a closing tag, then read in the closing '>' to clean up the input stream.
976
			// If it was not, the streaming will be done by the tag.
977
			if ( closingTag )
978
			{
979
				if ( !in->good() )
980
					return;
981
982
				int c = in->get();
983
				if ( c <= 0 )
984
				{
985
					TiXmlDocument* document = GetDocument();
986
					if ( document )
987
						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
988
					return;
989
				}
990
				assert( c == '>' );
991
				*tag += (char) c;
992
993
				// We are done, once we've found our closing tag.
994
				return;
995
			}
996
			else
997
			{
998
				// If not a closing tag, id it, and stream.
999
				const char* tagloc = tag->c_str() + tagIndex;
1000
				TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1001
				if ( !node )
1002
					return;
1003
				node->StreamIn( in, tag );
1004
				delete node;
1005
				node = 0;
1006
1007
				// No return: go around from the beginning: text, closing tag, or node.
1008
			}
1009
		}
1010
	}
1011
}
1012
#endif
1013
1014
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1015
{
1016
	p = SkipWhiteSpace( p, encoding );
1017
	TiXmlDocument* document = GetDocument();
1018
1019
	if ( !p || !*p )
1020
	{
1021
		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1022
		return 0;
1023
	}
1024
1025
	if ( data )
1026
	{
1027
		data->Stamp( p, encoding );
1028
		location = data->Cursor();
1029
	}
1030
1031
	if ( *p != '<' )
1032
	{
1033
		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1034
		return 0;
1035
	}
1036
1037
	p = SkipWhiteSpace( p+1, encoding );
1038
1039
	// Read the name.
1040
	const char* pErr = p;
1041
1042
    p = ReadName( p, &value, encoding );
1043
	if ( !p || !*p )
1044
	{
1045
		if ( document )	document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1046
		return 0;
1047
	}
1048
1049
    TIXML_STRING endTag ("</");
1050
	endTag += value;
1051
	endTag += ">";
1052
1053
	// Check for and read attributes. Also look for an empty
1054
	// tag or an end tag.
1055
	while ( p && *p )
1056
	{
1057
		pErr = p;
1058
		p = SkipWhiteSpace( p, encoding );
1059
		if ( !p || !*p )
1060
		{
1061
			if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1062
			return 0;
1063
		}
1064
		if ( *p == '/' )
1065
		{
1066
			++p;
1067
			// Empty tag.
1068
			if ( *p  != '>' )
1069
			{
1070
				if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );		
1071
				return 0;
1072
			}
1073
			return (p+1);
1074
		}
1075
		else if ( *p == '>' )
1076
		{
1077
			// Done with attributes (if there were any.)
1078
			// Read the value -- which can include other
1079
			// elements -- read the end tag, and return.
1080
			++p;
1081
			p = ReadValue( p, data, encoding );		// Note this is an Element method, and will set the error if one happens.
1082
			if ( !p || !*p )
1083
				return 0;
1084
1085
			// We should find the end tag now
1086
			if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1087
			{
1088
				p += endTag.length();
1089
				return p;
1090
			}
1091
			else
1092
			{
1093
				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1094
				return 0;
1095
			}
1096
		}
1097
		else
1098
		{
1099
			// Try to read an attribute:
1100
			TiXmlAttribute* attrib = new TiXmlAttribute();
1101
			if ( !attrib )
1102
			{
1103
				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1104
				return 0;
1105
			}
1106
1107
			attrib->SetDocument( document );
1108
			const char* pErr = p;
1109
			p = attrib->Parse( p, data, encoding );
1110
1111
			if ( !p || !*p )
1112
			{
1113
				if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1114
				delete attrib;
1115
				return 0;
1116
			}
1117
1118
			// Handle the strange case of double attributes:
1119
			TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1120
			if ( node )
1121
			{
1122
				node->SetValue( attrib->Value() );
1123
				delete attrib;
1124
				return 0;
1125
			}
1126
1127
			attributeSet.Add( attrib );
1128
		}
1129
	}
1130
	return p;
1131
}
1132
1133
1134
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1135
{
1136
	TiXmlDocument* document = GetDocument();
1137
1138
	// Read in text and elements in any order.
1139
	const char* pWithWhiteSpace = p;
1140
	p = SkipWhiteSpace( p, encoding );
1141
1142
	while ( p && *p )
1143
	{
1144
		if ( *p != '<' )
1145
		{
1146
			// Take what we have, make a text element.
1147
			TiXmlText* textNode = new TiXmlText( "" );
1148
1149
			if ( !textNode )
1150
			{
1151
				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1152
				    return 0;
1153
			}
1154
1155
			if ( TiXmlBase::IsWhiteSpaceCondensed() )
1156
			{
1157
				p = textNode->Parse( p, data, encoding );
1158
			}
1159
			else
1160
			{
1161
				// Special case: we want to keep the white space
1162
				// so that leading spaces aren't removed.
1163
				p = textNode->Parse( pWithWhiteSpace, data, encoding );
1164
			}
1165
1166
			if ( !textNode->Blank() )
1167
				LinkEndChild( textNode );
1168
			else
1169
				delete textNode;
1170
		} 
1171
		else 
1172
		{
1173
			// We hit a '<'
1174
			// Have we hit a new element or an end tag? This could also be
1175
			// a TiXmlText in the "CDATA" style.
1176
			if ( StringEqual( p, "</", false, encoding ) )
1177
			{
1178
				return p;
1179
			}
1180
			else
1181
			{
1182
				TiXmlNode* node = Identify( p, encoding );
1183
				if ( node )
1184
				{
1185
					p = node->Parse( p, data, encoding );
1186
					LinkEndChild( node );
1187
				}				
1188
				else
1189
				{
1190
					return 0;
1191
				}
1192
			}
1193
		}
1194
		pWithWhiteSpace = p;
1195
		p = SkipWhiteSpace( p, encoding );
1196
	}
1197
1198
	if ( !p )
1199
	{
1200
		if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1201
	}	
1202
	return p;
1203
}
1204
1205
1206
#ifdef TIXML_USE_STL
1207
void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1208
{
1209
	while ( in->good() )
1210
	{
1211
		int c = in->get();	
1212
		if ( c <= 0 )
1213
		{
1214
			TiXmlDocument* document = GetDocument();
1215
			if ( document )
1216
				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1217
			return;
1218
		}
1219
		(*tag) += (char) c;
1220
1221
		if ( c == '>' )
1222
		{
1223
			// All is well.
1224
			return;		
1225
		}
1226
	}
1227
}
1228
#endif
1229
1230
1231
const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1232
{
1233
	TiXmlDocument* document = GetDocument();
1234
	p = SkipWhiteSpace( p, encoding );
1235
1236
	if ( data )
1237
	{
1238
		data->Stamp( p, encoding );
1239
		location = data->Cursor();
1240
	}
1241
	if ( !p || !*p || *p != '<' )
1242
	{
1243
		if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1244
		return 0;
1245
	}
1246
	++p;
1247
    value = "";
1248
1249
	while ( p && *p && *p != '>' )
1250
	{
1251
		value += *p;
1252
		++p;
1253
	}
1254
1255
	if ( !p )
1256
	{
1257
		if ( document )	document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1258
	}
1259
	if ( *p == '>' )
1260
		return p+1;
1261
	return p;
1262
}
1263
1264
#ifdef TIXML_USE_STL
1265
void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1266
{
1267
	while ( in->good() )
1268
	{
1269
		int c = in->get();	
1270
		if ( c <= 0 )
1271
		{
1272
			TiXmlDocument* document = GetDocument();
1273
			if ( document )
1274
				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1275
			return;
1276
		}
1277
1278
		(*tag) += (char) c;
1279
1280
		if ( c == '>' 
1281
			 && tag->at( tag->length() - 2 ) == '-'
1282
			 && tag->at( tag->length() - 3 ) == '-' )
1283
		{
1284
			// All is well.
1285
			return;		
1286
		}
1287
	}
1288
}
1289
#endif
1290
1291
1292
const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1293
{
1294
	TiXmlDocument* document = GetDocument();
1295
	value = "";
1296
1297
	p = SkipWhiteSpace( p, encoding );
1298
1299
	if ( data )
1300
	{
1301
		data->Stamp( p, encoding );
1302
		location = data->Cursor();
1303
	}
1304
	const char* startTag = "<!--";
1305
	const char* endTag   = "-->";
1306
1307
	if ( !StringEqual( p, startTag, false, encoding ) )
1308
	{
1309
		document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1310
		return 0;
1311
	}
1312
	p += strlen( startTag );
1313
	p = ReadText( p, &value, false, endTag, false, encoding );
1314
	return p;
1315
}
1316
1317
1318
const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1319
{
1320
	p = SkipWhiteSpace( p, encoding );
1321
	if ( !p || !*p ) return 0;
1322
1323
	int tabsize = 4;
1324
	if ( document )
1325
		tabsize = document->TabSize();
1326
1327
	if ( data )
1328
	{
1329
		data->Stamp( p, encoding );
1330
		location = data->Cursor();
1331
	}
1332
	// Read the name, the '=' and the value.
1333
	const char* pErr = p;
1334
	p = ReadName( p, &name, encoding );
1335
	if ( !p || !*p )
1336
	{
1337
		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1338
		return 0;
1339
	}
1340
	p = SkipWhiteSpace( p, encoding );
1341
	if ( !p || !*p || *p != '=' )
1342
	{
1343
		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1344
		return 0;
1345
	}
1346
1347
	++p;	// skip '='
1348
	p = SkipWhiteSpace( p, encoding );
1349
	if ( !p || !*p )
1350
	{
1351
		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1352
		return 0;
1353
	}
1354
	
1355
	const char* end;
1356
1357
	if ( *p == '\'' )
1358
	{
1359
		++p;
1360
		end = "\'";
1361
		p = ReadText( p, &value, false, end, false, encoding );
1362
	}
1363
	else if ( *p == '"' )
1364
	{
1365
		++p;
1366
		end = "\"";
1367
		p = ReadText( p, &value, false, end, false, encoding );
1368
	}
1369
	else
1370
	{
1371
		// All attribute values should be in single or double quotes.
1372
		// But this is such a common error that the parser will try
1373
		// its best, even without them.
1374
		value = "";
1375
		while (    p && *p										// existence
1376
				&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'	// whitespace
1377
				&& *p != '/' && *p != '>' )						// tag end
1378
		{
1379
			value += *p;
1380
			++p;
1381
		}
1382
	}
1383
	return p;
1384
}
1385
1386
#ifdef TIXML_USE_STL
1387
void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1388
{
1389
	if ( cdata )
1390
	{
1391
		int c = in->get();	
1392
		if ( c <= 0 )
1393
		{
1394
			TiXmlDocument* document = GetDocument();
1395
			if ( document )
1396
				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1397
			return;
1398
		}
1399
1400
		(*tag) += (char) c;
1401
1402
		if ( c == '>' 
1403
			 && tag->at( tag->length() - 2 ) == ']'
1404
			 && tag->at( tag->length() - 3 ) == ']' )
1405
		{
1406
			// All is well.
1407
			return;		
1408
		}
1409
	}
1410
	else
1411
	{
1412
		while ( in->good() )
1413
		{
1414
			int c = in->peek();	
1415
			if ( c == '<' )
1416
				return;
1417
			if ( c <= 0 )
1418
			{
1419
				TiXmlDocument* document = GetDocument();
1420
				if ( document )
1421
					document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1422
				return;
1423
			}
1424
1425
			(*tag) += (char) c;
1426
			in->get();
1427
		}
1428
	}
1429
}
1430
#endif
1431
1432
const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1433
{
1434
	value = "";
1435
	TiXmlDocument* document = GetDocument();
1436
1437
	if ( data )
1438
	{
1439
		data->Stamp( p, encoding );
1440
		location = data->Cursor();
1441
	}
1442
1443
	const char* const startTag = "<![CDATA[";
1444
	const char* const endTag   = "]]>";
1445
1446
	if ( cdata || StringEqual( p, startTag, false, encoding ) )
1447
	{
1448
		cdata = true;
1449
1450
		if ( !StringEqual( p, startTag, false, encoding ) )
1451
		{
1452
			document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1453
			return 0;
1454
		}
1455
		p += strlen( startTag );
1456
1457
		// Keep all the white space, ignore the encoding, etc.
1458
		while (	   p && *p
1459
				&& !StringEqual( p, endTag, false, encoding )
1460
			  )
1461
		{
1462
			value += *p;
1463
			++p;
1464
		}
1465
1466
		TIXML_STRING dummy; 
1467
		p = ReadText( p, &dummy, false, endTag, false, encoding );
1468
		return p;
1469
	}
1470
	else
1471
	{
1472
		bool ignoreWhite = true;
1473
1474
		const char* end = "<";
1475
		p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1476
		if ( p )
1477
			return p-1;	// don't truncate the '<'
1478
		return 0;
1479
	}
1480
}
1481
1482
#ifdef TIXML_USE_STL
1483
void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1484
{
1485
	while ( in->good() )
1486
	{
1487
		int c = in->get();
1488
		if ( c <= 0 )
1489
		{
1490
			TiXmlDocument* document = GetDocument();
1491
			if ( document )
1492
				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1493
			return;
1494
		}
1495
		(*tag) += (char) c;
1496
1497
		if ( c == '>' )
1498
		{
1499
			// All is well.
1500
			return;
1501
		}
1502
	}
1503
}
1504
#endif
1505
1506
const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1507
{
1508
	p = SkipWhiteSpace( p, _encoding );
1509
	// Find the beginning, find the end, and look for
1510
	// the stuff in-between.
1511
	TiXmlDocument* document = GetDocument();
1512
	if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1513
	{
1514
		if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1515
		return 0;
1516
	}
1517
	if ( data )
1518
	{
1519
		data->Stamp( p, _encoding );
1520
		location = data->Cursor();
1521
	}
1522
	p += 5;
1523
1524
	version = "";
1525
	encoding = "";
1526
	standalone = "";
1527
1528
	while ( p && *p )
1529
	{
1530
		if ( *p == '>' )
1531
		{
1532
			++p;
1533
			return p;
1534
		}
1535
1536
		p = SkipWhiteSpace( p, _encoding );
1537
		if ( StringEqual( p, "version", true, _encoding ) )
1538
		{
1539
			TiXmlAttribute attrib;
1540
			p = attrib.Parse( p, data, _encoding );		
1541
			version = attrib.Value();
1542
		}
1543
		else if ( StringEqual( p, "encoding", true, _encoding ) )
1544
		{
1545
			TiXmlAttribute attrib;
1546
			p = attrib.Parse( p, data, _encoding );		
1547
			encoding = attrib.Value();
1548
		}
1549
		else if ( StringEqual( p, "standalone", true, _encoding ) )
1550
		{
1551
			TiXmlAttribute attrib;
1552
			p = attrib.Parse( p, data, _encoding );		
1553
			standalone = attrib.Value();
1554
		}
1555
		else
1556
		{
1557
			// Read over whatever it is.
1558
			while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1559
				++p;
1560
		}
1561
	}
1562
	return 0;
1563
}
1564
1565
bool TiXmlText::Blank() const
1566
{
1567
	for ( unsigned i=0; i<value.length(); i++ )
1568
		if ( !IsWhiteSpace( value[i] ) )
1569
			return false;
1570
	return true;
1571
}