1
/*
2
 * regexp.c: generic and extensible Regular Expression engine
3
 *
4
 * Basically designed with the purpose of compiling regexps for 
5
 * the variety of validation/shemas mechanisms now available in
6
 * XML related specifications these include:
7
 *    - XML-1.0 DTD validation
8
 *    - XML Schemas structure part 1
9
 *    - XML Schemas Datatypes part 2 especially Appendix F
10
 *    - RELAX-NG/TREX i.e. the counter proposal
11
 *
12
 * See Copyright for the status of this software.
13
 *
14
 * Daniel Veillard <veillard@redhat.com>
15
 */
16
17
#define IN_LIBXML
18
#include "libxml.h"
19
20
#ifdef LIBXML_REGEXP_ENABLED
21
22
/* #define DEBUG_ERR */
23
24
#include <stdio.h>
25
#include <string.h>
26
#ifdef HAVE_LIMITS_H
27
#include <limits.h>
28
#endif
29
30
#include <libxml/tree.h>
31
#include <libxml/parserInternals.h>
32
#include <libxml/xmlregexp.h>
33
#include <libxml/xmlautomata.h>
34
#include <libxml/xmlunicode.h>
35
36
#ifndef INT_MAX
37
#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
38
#endif
39
40
/* #define DEBUG_REGEXP_GRAPH */
41
/* #define DEBUG_REGEXP_EXEC */
42
/* #define DEBUG_PUSH */
43
/* #define DEBUG_COMPACTION */
44
45
#define MAX_PUSH 10000000
46
47
#define ERROR(str)							\
48
    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
49
    xmlRegexpErrCompile(ctxt, str);
50
#define NEXT ctxt->cur++
51
#define CUR (*(ctxt->cur))
52
#define NXT(index) (ctxt->cur[index])
53
54
#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
55
#define NEXTL(l) ctxt->cur += l;
56
#define XML_REG_STRING_SEPARATOR '|'
57
/*
58
 * Need PREV to check on a '-' within a Character Group. May only be used
59
 * when it's guaranteed that cur is not at the beginning of ctxt->string!
60
 */
61
#define PREV (ctxt->cur[-1])
62
63
/**
64
 * TODO:
65
 *
66
 * macro to flag unimplemented blocks
67
 */
68
#define TODO 								\
69
    xmlGenericError(xmlGenericErrorContext,				\
70
	    "Unimplemented block at %s:%d\n",				\
71
            __FILE__, __LINE__);
72
73
/************************************************************************
74
 * 									*
75
 * 			Datatypes and structures			*
76
 * 									*
77
 ************************************************************************/
78
79
/*
80
 * Note: the order of the enums below is significant, do not shuffle
81
 */
82
typedef enum {
83
    XML_REGEXP_EPSILON = 1,
84
    XML_REGEXP_CHARVAL,
85
    XML_REGEXP_RANGES,
86
    XML_REGEXP_SUBREG,  /* used for () sub regexps */
87
    XML_REGEXP_STRING,
88
    XML_REGEXP_ANYCHAR, /* . */
89
    XML_REGEXP_ANYSPACE, /* \s */
90
    XML_REGEXP_NOTSPACE, /* \S */
91
    XML_REGEXP_INITNAME, /* \l */
92
    XML_REGEXP_NOTINITNAME, /* \L */
93
    XML_REGEXP_NAMECHAR, /* \c */
94
    XML_REGEXP_NOTNAMECHAR, /* \C */
95
    XML_REGEXP_DECIMAL, /* \d */
96
    XML_REGEXP_NOTDECIMAL, /* \D */
97
    XML_REGEXP_REALCHAR, /* \w */
98
    XML_REGEXP_NOTREALCHAR, /* \W */
99
    XML_REGEXP_LETTER = 100,
100
    XML_REGEXP_LETTER_UPPERCASE,
101
    XML_REGEXP_LETTER_LOWERCASE,
102
    XML_REGEXP_LETTER_TITLECASE,
103
    XML_REGEXP_LETTER_MODIFIER,
104
    XML_REGEXP_LETTER_OTHERS,
105
    XML_REGEXP_MARK,
106
    XML_REGEXP_MARK_NONSPACING,
107
    XML_REGEXP_MARK_SPACECOMBINING,
108
    XML_REGEXP_MARK_ENCLOSING,
109
    XML_REGEXP_NUMBER,
110
    XML_REGEXP_NUMBER_DECIMAL,
111
    XML_REGEXP_NUMBER_LETTER,
112
    XML_REGEXP_NUMBER_OTHERS,
113
    XML_REGEXP_PUNCT,
114
    XML_REGEXP_PUNCT_CONNECTOR,
115
    XML_REGEXP_PUNCT_DASH,
116
    XML_REGEXP_PUNCT_OPEN,
117
    XML_REGEXP_PUNCT_CLOSE,
118
    XML_REGEXP_PUNCT_INITQUOTE,
119
    XML_REGEXP_PUNCT_FINQUOTE,
120
    XML_REGEXP_PUNCT_OTHERS,
121
    XML_REGEXP_SEPAR,
122
    XML_REGEXP_SEPAR_SPACE,
123
    XML_REGEXP_SEPAR_LINE,
124
    XML_REGEXP_SEPAR_PARA,
125
    XML_REGEXP_SYMBOL,
126
    XML_REGEXP_SYMBOL_MATH,
127
    XML_REGEXP_SYMBOL_CURRENCY,
128
    XML_REGEXP_SYMBOL_MODIFIER,
129
    XML_REGEXP_SYMBOL_OTHERS,
130
    XML_REGEXP_OTHER,
131
    XML_REGEXP_OTHER_CONTROL,
132
    XML_REGEXP_OTHER_FORMAT,
133
    XML_REGEXP_OTHER_PRIVATE,
134
    XML_REGEXP_OTHER_NA,
135
    XML_REGEXP_BLOCK_NAME
136
} xmlRegAtomType;
137
138
typedef enum {
139
    XML_REGEXP_QUANT_EPSILON = 1,
140
    XML_REGEXP_QUANT_ONCE,
141
    XML_REGEXP_QUANT_OPT,
142
    XML_REGEXP_QUANT_MULT,
143
    XML_REGEXP_QUANT_PLUS,
144
    XML_REGEXP_QUANT_ONCEONLY,
145
    XML_REGEXP_QUANT_ALL,
146
    XML_REGEXP_QUANT_RANGE
147
} xmlRegQuantType;
148
149
typedef enum {
150
    XML_REGEXP_START_STATE = 1,
151
    XML_REGEXP_FINAL_STATE,
152
    XML_REGEXP_TRANS_STATE,
153
    XML_REGEXP_SINK_STATE,
154
    XML_REGEXP_UNREACH_STATE
155
} xmlRegStateType;
156
157
typedef enum {
158
    XML_REGEXP_MARK_NORMAL = 0,
159
    XML_REGEXP_MARK_START,
160
    XML_REGEXP_MARK_VISITED
161
} xmlRegMarkedType;
162
163
typedef struct _xmlRegRange xmlRegRange;
164
typedef xmlRegRange *xmlRegRangePtr;
165
166
struct _xmlRegRange {
167
    int neg;		/* 0 normal, 1 not, 2 exclude */
168
    xmlRegAtomType type;
169
    int start;
170
    int end;
171
    xmlChar *blockName;
172
};
173
174
typedef struct _xmlRegAtom xmlRegAtom;
175
typedef xmlRegAtom *xmlRegAtomPtr;
176
177
typedef struct _xmlAutomataState xmlRegState;
178
typedef xmlRegState *xmlRegStatePtr;
179
180
struct _xmlRegAtom {
181
    int no;
182
    xmlRegAtomType type;
183
    xmlRegQuantType quant;
184
    int min;
185
    int max;
186
187
    void *valuep;
188
    void *valuep2;
189
    int neg;
190
    int codepoint;
191
    xmlRegStatePtr start;
192
    xmlRegStatePtr start0;
193
    xmlRegStatePtr stop;
194
    int maxRanges;
195
    int nbRanges;
196
    xmlRegRangePtr *ranges;
197
    void *data;
198
};
199
200
typedef struct _xmlRegCounter xmlRegCounter;
201
typedef xmlRegCounter *xmlRegCounterPtr;
202
203
struct _xmlRegCounter {
204
    int min;
205
    int max;
206
};
207
208
typedef struct _xmlRegTrans xmlRegTrans;
209
typedef xmlRegTrans *xmlRegTransPtr;
210
211
struct _xmlRegTrans {
212
    xmlRegAtomPtr atom;
213
    int to;
214
    int counter;
215
    int count;
216
    int nd;
217
};
218
219
struct _xmlAutomataState {
220
    xmlRegStateType type;
221
    xmlRegMarkedType mark;
222
    xmlRegMarkedType reached;
223
    int no;
224
    int maxTrans;
225
    int nbTrans;
226
    xmlRegTrans *trans;
227
    /*  knowing states ponting to us can speed things up */
228
    int maxTransTo;
229
    int nbTransTo;
230
    int *transTo;
231
};
232
233
typedef struct _xmlAutomata xmlRegParserCtxt;
234
typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
235
236
struct _xmlAutomata {
237
    xmlChar *string;
238
    xmlChar *cur;
239
240
    int error;
241
    int neg;
242
243
    xmlRegStatePtr start;
244
    xmlRegStatePtr end;
245
    xmlRegStatePtr state;
246
247
    xmlRegAtomPtr atom;
248
249
    int maxAtoms;
250
    int nbAtoms;
251
    xmlRegAtomPtr *atoms;
252
253
    int maxStates;
254
    int nbStates;
255
    xmlRegStatePtr *states;
256
257
    int maxCounters;
258
    int nbCounters;
259
    xmlRegCounter *counters;
260
261
    int determinist;
262
    int negs;
263
};
264
265
struct _xmlRegexp {
266
    xmlChar *string;
267
    int nbStates;
268
    xmlRegStatePtr *states;
269
    int nbAtoms;
270
    xmlRegAtomPtr *atoms;
271
    int nbCounters;
272
    xmlRegCounter *counters;
273
    int determinist;
274
    /*
275
     * That's the compact form for determinists automatas
276
     */
277
    int nbstates;
278
    int *compact;
279
    void **transdata;
280
    int nbstrings;
281
    xmlChar **stringMap;
282
};
283
284
typedef struct _xmlRegExecRollback xmlRegExecRollback;
285
typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
286
287
struct _xmlRegExecRollback {
288
    xmlRegStatePtr state;/* the current state */
289
    int index;		/* the index in the input stack */
290
    int nextbranch;	/* the next transition to explore in that state */
291
    int *counts;	/* save the automata state if it has some */
292
};
293
294
typedef struct _xmlRegInputToken xmlRegInputToken;
295
typedef xmlRegInputToken *xmlRegInputTokenPtr;
296
297
struct _xmlRegInputToken {
298
    xmlChar *value;
299
    void *data;
300
};
301
302
struct _xmlRegExecCtxt {
303
    int status;		/* execution status != 0 indicate an error */
304
    int determinist;	/* did we find an indeterministic behaviour */
305
    xmlRegexpPtr comp;	/* the compiled regexp */
306
    xmlRegExecCallbacks callback;
307
    void *data;
308
309
    xmlRegStatePtr state;/* the current state */
310
    int transno;	/* the current transition on that state */
311
    int transcount;	/* the number of chars in char counted transitions */
312
313
    /*
314
     * A stack of rollback states
315
     */
316
    int maxRollbacks;
317
    int nbRollbacks;
318
    xmlRegExecRollback *rollbacks;
319
320
    /*
321
     * The state of the automata if any
322
     */
323
    int *counts;
324
325
    /*
326
     * The input stack
327
     */
328
    int inputStackMax;
329
    int inputStackNr;
330
    int index;
331
    int *charStack;
332
    const xmlChar *inputString; /* when operating on characters */
333
    xmlRegInputTokenPtr inputStack;/* when operating on strings */
334
335
    /*
336
     * error handling
337
     */
338
    int errStateNo;		/* the error state number */
339
    xmlRegStatePtr errState;    /* the error state */
340
    xmlChar *errString;		/* the string raising the error */
341
    int *errCounts;		/* counters at the error state */
342
    int nbPush;
343
};
344
345
#define REGEXP_ALL_COUNTER	0x123456
346
#define REGEXP_ALL_LAX_COUNTER	0x123457
347
348
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
349
static void xmlRegFreeState(xmlRegStatePtr state);
350
static void xmlRegFreeAtom(xmlRegAtomPtr atom);
351
static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
352
static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
353
static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
354
                  int neg, int start, int end, const xmlChar *blockName);
355
356
/************************************************************************
357
 *									*
358
 * 		Regexp memory error handler				*
359
 *									*
360
 ************************************************************************/
361
/**
362
 * xmlRegexpErrMemory:
363
 * @extra:  extra information
364
 *
365
 * Handle an out of memory condition
366
 */
367
static void
368
xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
369
{
370
    const char *regexp = NULL;
371
    if (ctxt != NULL) {
372
        regexp = (const char *) ctxt->string;
373
	ctxt->error = XML_ERR_NO_MEMORY;
374
    }
375
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
376
		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
377
		    regexp, NULL, 0, 0,
378
		    "Memory allocation failed : %s\n", extra);
379
}
380
381
/**
382
 * xmlRegexpErrCompile:
383
 * @extra:  extra information
384
 *
385
 * Handle a compilation failure
386
 */
387
static void
388
xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
389
{
390
    const char *regexp = NULL;
391
    int idx = 0;
392
393
    if (ctxt != NULL) {
394
        regexp = (const char *) ctxt->string;
395
	idx = ctxt->cur - ctxt->string;
396
	ctxt->error = XML_REGEXP_COMPILE_ERROR;
397
    }
398
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
399
		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
400
		    regexp, NULL, idx, 0,
401
		    "failed to compile: %s\n", extra);
402
}
403
404
/************************************************************************
405
 * 									*
406
 * 			Allocation/Deallocation				*
407
 * 									*
408
 ************************************************************************/
409
410
static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
411
/**
412
 * xmlRegEpxFromParse:
413
 * @ctxt:  the parser context used to build it
414
 *
415
 * Allocate a new regexp and fill it with the result from the parser
416
 *
417
 * Returns the new regexp or NULL in case of error
418
 */
419
static xmlRegexpPtr
420
xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
421
    xmlRegexpPtr ret;
422
423
    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
424
    if (ret == NULL) {
425
	xmlRegexpErrMemory(ctxt, "compiling regexp");
426
	return(NULL);
427
    }
428
    memset(ret, 0, sizeof(xmlRegexp));
429
    ret->string = ctxt->string;
430
    ret->nbStates = ctxt->nbStates;
431
    ret->states = ctxt->states;
432
    ret->nbAtoms = ctxt->nbAtoms;
433
    ret->atoms = ctxt->atoms;
434
    ret->nbCounters = ctxt->nbCounters;
435
    ret->counters = ctxt->counters;
436
    ret->determinist = ctxt->determinist;
437
    if (ret->determinist == -1) {
438
        xmlRegexpIsDeterminist(ret);
439
    }
440
441
    if ((ret->determinist != 0) &&
442
	(ret->nbCounters == 0) &&
443
	(ctxt->negs == 0) &&
444
	(ret->atoms != NULL) &&
445
	(ret->atoms[0] != NULL) &&
446
	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
447
	int i, j, nbstates = 0, nbatoms = 0;
448
	int *stateRemap;
449
	int *stringRemap;
450
	int *transitions;
451
	void **transdata;
452
	xmlChar **stringMap;
453
        xmlChar *value;
454
455
	/*
456
	 * Switch to a compact representation
457
	 * 1/ counting the effective number of states left
458
	 * 2/ counting the unique number of atoms, and check that
459
	 *    they are all of the string type
460
	 * 3/ build a table state x atom for the transitions
461
	 */
462
463
	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
464
	if (stateRemap == NULL) {
465
	    xmlRegexpErrMemory(ctxt, "compiling regexp");
466
	    xmlFree(ret);
467
	    return(NULL);
468
	}
469
	for (i = 0;i < ret->nbStates;i++) {
470
	    if (ret->states[i] != NULL) {
471
		stateRemap[i] = nbstates;
472
		nbstates++;
473
	    } else {
474
		stateRemap[i] = -1;
475
	    }
476
	}
477
#ifdef DEBUG_COMPACTION
478
	printf("Final: %d states\n", nbstates);
479
#endif
480
	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
481
	if (stringMap == NULL) {
482
	    xmlRegexpErrMemory(ctxt, "compiling regexp");
483
	    xmlFree(stateRemap);
484
	    xmlFree(ret);
485
	    return(NULL);
486
	}
487
	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
488
	if (stringRemap == NULL) {
489
	    xmlRegexpErrMemory(ctxt, "compiling regexp");
490
	    xmlFree(stringMap);
491
	    xmlFree(stateRemap);
492
	    xmlFree(ret);
493
	    return(NULL);
494
	}
495
	for (i = 0;i < ret->nbAtoms;i++) {
496
	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
497
		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
498
		value = ret->atoms[i]->valuep;
499
                for (j = 0;j < nbatoms;j++) {
500
		    if (xmlStrEqual(stringMap[j], value)) {
501
			stringRemap[i] = j;
502
			break;
503
		    }
504
		}
505
		if (j >= nbatoms) {
506
		    stringRemap[i] = nbatoms;
507
		    stringMap[nbatoms] = xmlStrdup(value);
508
		    if (stringMap[nbatoms] == NULL) {
509
			for (i = 0;i < nbatoms;i++)
510
			    xmlFree(stringMap[i]);
511
			xmlFree(stringRemap);
512
			xmlFree(stringMap);
513
			xmlFree(stateRemap);
514
			xmlFree(ret);
515
			return(NULL);
516
		    }
517
		    nbatoms++;
518
		}
519
	    } else {
520
		xmlFree(stateRemap);
521
		xmlFree(stringRemap);
522
		for (i = 0;i < nbatoms;i++)
523
		    xmlFree(stringMap[i]);
524
		xmlFree(stringMap);
525
		xmlFree(ret);
526
		return(NULL);
527
	    }
528
	}
529
#ifdef DEBUG_COMPACTION
530
	printf("Final: %d atoms\n", nbatoms);
531
#endif
532
	transitions = (int *) xmlMalloc((nbstates + 1) *
533
	                                (nbatoms + 1) * sizeof(int));
534
	if (transitions == NULL) {
535
	    xmlFree(stateRemap);
536
	    xmlFree(stringRemap);
537
	    xmlFree(stringMap);
538
	    xmlFree(ret);
539
	    return(NULL);
540
	}
541
	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
542
543
	/*
544
	 * Allocate the transition table. The first entry for each
545
	 * state corresponds to the state type.
546
	 */
547
	transdata = NULL;
548
549
	for (i = 0;i < ret->nbStates;i++) {
550
	    int stateno, atomno, targetno, prev;
551
	    xmlRegStatePtr state;
552
	    xmlRegTransPtr trans;
553
554
	    stateno = stateRemap[i];
555
	    if (stateno == -1)
556
		continue;
557
	    state = ret->states[i];
558
559
	    transitions[stateno * (nbatoms + 1)] = state->type;
560
561
	    for (j = 0;j < state->nbTrans;j++) {
562
		trans = &(state->trans[j]);
563
		if ((trans->to == -1) || (trans->atom == NULL))
564
		    continue;
565
                atomno = stringRemap[trans->atom->no];
566
		if ((trans->atom->data != NULL) && (transdata == NULL)) {
567
		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
568
			                            sizeof(void *));
569
		    if (transdata != NULL)
570
			memset(transdata, 0,
571
			       nbstates * nbatoms * sizeof(void *));
572
		    else {
573
			xmlRegexpErrMemory(ctxt, "compiling regexp");
574
			break;
575
		    }
576
		}
577
		targetno = stateRemap[trans->to];
578
		/*
579
		 * if the same atom can generate transitions to 2 different
580
		 * states then it means the automata is not determinist and
581
		 * the compact form can't be used !
582
		 */
583
		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
584
		if (prev != 0) {
585
		    if (prev != targetno + 1) {
586
			ret->determinist = 0;
587
#ifdef DEBUG_COMPACTION
588
			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
589
			       i, j, trans->atom->no, trans->to, atomno, targetno);
590
			printf("       previous to is %d\n", prev);
591
#endif
592
			if (transdata != NULL)
593
			    xmlFree(transdata);
594
			xmlFree(transitions);
595
			xmlFree(stateRemap);
596
			xmlFree(stringRemap);
597
			for (i = 0;i < nbatoms;i++)
598
			    xmlFree(stringMap[i]);
599
			xmlFree(stringMap);
600
			goto not_determ;
601
		    }
602
		} else {
603
#if 0
604
		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
605
			   i, j, trans->atom->no, trans->to, atomno, targetno);
606
#endif
607
		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
608
			targetno + 1; /* to avoid 0 */
609
		    if (transdata != NULL)
610
			transdata[stateno * nbatoms + atomno] =
611
			    trans->atom->data;
612
		}
613
	    }
614
	}
615
	ret->determinist = 1;
616
#ifdef DEBUG_COMPACTION
617
	/*
618
	 * Debug
619
	 */
620
	for (i = 0;i < nbstates;i++) {
621
	    for (j = 0;j < nbatoms + 1;j++) {
622
                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
623
	    }
624
	    printf("\n");
625
	}
626
	printf("\n");
627
#endif
628
	/*
629
	 * Cleanup of the old data
630
	 */
631
	if (ret->states != NULL) {
632
	    for (i = 0;i < ret->nbStates;i++)
633
		xmlRegFreeState(ret->states[i]);
634
	    xmlFree(ret->states);
635
	}
636
	ret->states = NULL;
637
	ret->nbStates = 0;
638
	if (ret->atoms != NULL) {
639
	    for (i = 0;i < ret->nbAtoms;i++)
640
		xmlRegFreeAtom(ret->atoms[i]);
641
	    xmlFree(ret->atoms);
642
	}
643
	ret->atoms = NULL;
644
	ret->nbAtoms = 0;
645
646
	ret->compact = transitions;
647
	ret->transdata = transdata;
648
	ret->stringMap = stringMap;
649
	ret->nbstrings = nbatoms;
650
	ret->nbstates = nbstates;
651
	xmlFree(stateRemap);
652
	xmlFree(stringRemap);
653
    }
654
not_determ:
655
    ctxt->string = NULL;
656
    ctxt->nbStates = 0;
657
    ctxt->states = NULL;
658
    ctxt->nbAtoms = 0;
659
    ctxt->atoms = NULL;
660
    ctxt->nbCounters = 0;
661
    ctxt->counters = NULL;
662
    return(ret);
663
}
664
665
/**
666
 * xmlRegNewParserCtxt:
667
 * @string:  the string to parse
668
 *
669
 * Allocate a new regexp parser context
670
 *
671
 * Returns the new context or NULL in case of error
672
 */
673
static xmlRegParserCtxtPtr
674
xmlRegNewParserCtxt(const xmlChar *string) {
675
    xmlRegParserCtxtPtr ret;
676
677
    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
678
    if (ret == NULL)
679
	return(NULL);
680
    memset(ret, 0, sizeof(xmlRegParserCtxt));
681
    if (string != NULL)
682
	ret->string = xmlStrdup(string);
683
    ret->cur = ret->string;
684
    ret->neg = 0;
685
    ret->negs = 0;
686
    ret->error = 0;
687
    ret->determinist = -1;
688
    return(ret);
689
}
690
691
/**
692
 * xmlRegNewRange:
693
 * @ctxt:  the regexp parser context
694
 * @neg:  is that negative
695
 * @type:  the type of range
696
 * @start:  the start codepoint
697
 * @end:  the end codepoint
698
 *
699
 * Allocate a new regexp range
700
 *
701
 * Returns the new range or NULL in case of error
702
 */
703
static xmlRegRangePtr
704
xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
705
	       int neg, xmlRegAtomType type, int start, int end) {
706
    xmlRegRangePtr ret;
707
708
    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
709
    if (ret == NULL) {
710
	xmlRegexpErrMemory(ctxt, "allocating range");
711
	return(NULL);
712
    }
713
    ret->neg = neg;
714
    ret->type = type;
715
    ret->start = start;
716
    ret->end = end;
717
    return(ret);
718
}
719
720
/**
721
 * xmlRegFreeRange:
722
 * @range:  the regexp range
723
 *
724
 * Free a regexp range
725
 */
726
static void
727
xmlRegFreeRange(xmlRegRangePtr range) {
728
    if (range == NULL)
729
	return;
730
731
    if (range->blockName != NULL)
732
	xmlFree(range->blockName);
733
    xmlFree(range);
734
}
735
736
/**
737
 * xmlRegCopyRange:
738
 * @range:  the regexp range
739
 *
740
 * Copy a regexp range
741
 *
742
 * Returns the new copy or NULL in case of error.
743
 */
744
static xmlRegRangePtr
745
xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
746
    xmlRegRangePtr ret;
747
748
    if (range == NULL)
749
	return(NULL);
750
751
    ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
752
                         range->end);
753
    if (ret == NULL)
754
        return(NULL);
755
    if (range->blockName != NULL) {
756
	ret->blockName = xmlStrdup(range->blockName);
757
	if (ret->blockName == NULL) {
758
	    xmlRegexpErrMemory(ctxt, "allocating range");
759
	    xmlRegFreeRange(ret);
760
	    return(NULL);
761
	}
762
    }
763
    return(ret);
764
}
765
766
/**
767
 * xmlRegNewAtom:
768
 * @ctxt:  the regexp parser context
769
 * @type:  the type of atom
770
 *
771
 * Allocate a new atom
772
 *
773
 * Returns the new atom or NULL in case of error
774
 */
775
static xmlRegAtomPtr
776
xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
777
    xmlRegAtomPtr ret;
778
779
    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
780
    if (ret == NULL) {
781
	xmlRegexpErrMemory(ctxt, "allocating atom");
782
	return(NULL);
783
    }
784
    memset(ret, 0, sizeof(xmlRegAtom));
785
    ret->type = type;
786
    ret->quant = XML_REGEXP_QUANT_ONCE;
787
    ret->min = 0;
788
    ret->max = 0;
789
    return(ret);
790
}
791
792
/**
793
 * xmlRegFreeAtom:
794
 * @atom:  the regexp atom
795
 *
796
 * Free a regexp atom
797
 */
798
static void
799
xmlRegFreeAtom(xmlRegAtomPtr atom) {
800
    int i;
801
802
    if (atom == NULL)
803
	return;
804
805
    for (i = 0;i < atom->nbRanges;i++)
806
	xmlRegFreeRange(atom->ranges[i]);
807
    if (atom->ranges != NULL)
808
	xmlFree(atom->ranges);
809
    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
810
	xmlFree(atom->valuep);
811
    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
812
	xmlFree(atom->valuep2);
813
    if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
814
	xmlFree(atom->valuep);
815
    xmlFree(atom);
816
}
817
818
/**
819
 * xmlRegCopyAtom:
820
 * @ctxt:  the regexp parser context
821
 * @atom:  the oiginal atom
822
 *
823
 * Allocate a new regexp range
824
 *
825
 * Returns the new atom or NULL in case of error
826
 */
827
static xmlRegAtomPtr
828
xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
829
    xmlRegAtomPtr ret;
830
831
    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
832
    if (ret == NULL) {
833
	xmlRegexpErrMemory(ctxt, "copying atom");
834
	return(NULL);
835
    }
836
    memset(ret, 0, sizeof(xmlRegAtom));
837
    ret->type = atom->type;
838
    ret->quant = atom->quant;
839
    ret->min = atom->min;
840
    ret->max = atom->max;
841
    if (atom->nbRanges > 0) {
842
        int i;
843
844
        ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
845
	                                           atom->nbRanges);
846
	if (ret->ranges == NULL) {
847
	    xmlRegexpErrMemory(ctxt, "copying atom");
848
	    goto error;
849
	}
850
	for (i = 0;i < atom->nbRanges;i++) {
851
	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
852
	    if (ret->ranges[i] == NULL)
853
	        goto error;
854
	    ret->nbRanges = i + 1;
855
	}
856
    }
857
    return(ret);
858
859
error:
860
    xmlRegFreeAtom(ret);
861
    return(NULL);
862
}
863
864
static xmlRegStatePtr
865
xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
866
    xmlRegStatePtr ret;
867
868
    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
869
    if (ret == NULL) {
870
	xmlRegexpErrMemory(ctxt, "allocating state");
871
	return(NULL);
872
    }
873
    memset(ret, 0, sizeof(xmlRegState));
874
    ret->type = XML_REGEXP_TRANS_STATE;
875
    ret->mark = XML_REGEXP_MARK_NORMAL;
876
    return(ret);
877
}
878
879
/**
880
 * xmlRegFreeState:
881
 * @state:  the regexp state
882
 *
883
 * Free a regexp state
884
 */
885
static void
886
xmlRegFreeState(xmlRegStatePtr state) {
887
    if (state == NULL)
888
	return;
889
890
    if (state->trans != NULL)
891
	xmlFree(state->trans);
892
    if (state->transTo != NULL)
893
	xmlFree(state->transTo);
894
    xmlFree(state);
895
}
896
897
/**
898
 * xmlRegFreeParserCtxt:
899
 * @ctxt:  the regexp parser context
900
 *
901
 * Free a regexp parser context
902
 */
903
static void
904
xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
905
    int i;
906
    if (ctxt == NULL)
907
	return;
908
909
    if (ctxt->string != NULL)
910
	xmlFree(ctxt->string);
911
    if (ctxt->states != NULL) {
912
	for (i = 0;i < ctxt->nbStates;i++)
913
	    xmlRegFreeState(ctxt->states[i]);
914
	xmlFree(ctxt->states);
915
    }
916
    if (ctxt->atoms != NULL) {
917
	for (i = 0;i < ctxt->nbAtoms;i++)
918
	    xmlRegFreeAtom(ctxt->atoms[i]);
919
	xmlFree(ctxt->atoms);
920
    }
921
    if (ctxt->counters != NULL)
922
	xmlFree(ctxt->counters);
923
    xmlFree(ctxt);
924
}
925
926
/************************************************************************
927
 * 									*
928
 * 			Display of Data structures			*
929
 * 									*
930
 ************************************************************************/
931
932
static void
933
xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
934
    switch (type) {
935
        case XML_REGEXP_EPSILON:
936
	    fprintf(output, "epsilon "); break;
937
        case XML_REGEXP_CHARVAL:
938
	    fprintf(output, "charval "); break;
939
        case XML_REGEXP_RANGES:
940
	    fprintf(output, "ranges "); break;
941
        case XML_REGEXP_SUBREG:
942
	    fprintf(output, "subexpr "); break;
943
        case XML_REGEXP_STRING:
944
	    fprintf(output, "string "); break;
945
        case XML_REGEXP_ANYCHAR:
946
	    fprintf(output, "anychar "); break;
947
        case XML_REGEXP_ANYSPACE:
948
	    fprintf(output, "anyspace "); break;
949
        case XML_REGEXP_NOTSPACE:
950
	    fprintf(output, "notspace "); break;
951
        case XML_REGEXP_INITNAME:
952
	    fprintf(output, "initname "); break;
953
        case XML_REGEXP_NOTINITNAME:
954
	    fprintf(output, "notinitname "); break;
955
        case XML_REGEXP_NAMECHAR:
956
	    fprintf(output, "namechar "); break;
957
        case XML_REGEXP_NOTNAMECHAR:
958
	    fprintf(output, "notnamechar "); break;
959
        case XML_REGEXP_DECIMAL:
960
	    fprintf(output, "decimal "); break;
961
        case XML_REGEXP_NOTDECIMAL:
962
	    fprintf(output, "notdecimal "); break;
963
        case XML_REGEXP_REALCHAR:
964
	    fprintf(output, "realchar "); break;
965
        case XML_REGEXP_NOTREALCHAR:
966
	    fprintf(output, "notrealchar "); break;
967
        case XML_REGEXP_LETTER:
968
            fprintf(output, "LETTER "); break;
969
        case XML_REGEXP_LETTER_UPPERCASE:
970
            fprintf(output, "LETTER_UPPERCASE "); break;
971
        case XML_REGEXP_LETTER_LOWERCASE:
972
            fprintf(output, "LETTER_LOWERCASE "); break;
973
        case XML_REGEXP_LETTER_TITLECASE:
974
            fprintf(output, "LETTER_TITLECASE "); break;
975
        case XML_REGEXP_LETTER_MODIFIER:
976
            fprintf(output, "LETTER_MODIFIER "); break;
977
        case XML_REGEXP_LETTER_OTHERS:
978
            fprintf(output, "LETTER_OTHERS "); break;
979
        case XML_REGEXP_MARK:
980
            fprintf(output, "MARK "); break;
981
        case XML_REGEXP_MARK_NONSPACING:
982
            fprintf(output, "MARK_NONSPACING "); break;
983
        case XML_REGEXP_MARK_SPACECOMBINING:
984
            fprintf(output, "MARK_SPACECOMBINING "); break;
985
        case XML_REGEXP_MARK_ENCLOSING:
986
            fprintf(output, "MARK_ENCLOSING "); break;
987
        case XML_REGEXP_NUMBER:
988
            fprintf(output, "NUMBER "); break;
989
        case XML_REGEXP_NUMBER_DECIMAL:
990
            fprintf(output, "NUMBER_DECIMAL "); break;
991
        case XML_REGEXP_NUMBER_LETTER:
992
            fprintf(output, "NUMBER_LETTER "); break;
993
        case XML_REGEXP_NUMBER_OTHERS:
994
            fprintf(output, "NUMBER_OTHERS "); break;
995
        case XML_REGEXP_PUNCT:
996
            fprintf(output, "PUNCT "); break;
997
        case XML_REGEXP_PUNCT_CONNECTOR:
998
            fprintf(output, "PUNCT_CONNECTOR "); break;
999
        case XML_REGEXP_PUNCT_DASH:
1000
            fprintf(output, "PUNCT_DASH "); break;
1001
        case XML_REGEXP_PUNCT_OPEN:
1002
            fprintf(output, "PUNCT_OPEN "); break;
1003
        case XML_REGEXP_PUNCT_CLOSE:
1004
            fprintf(output, "PUNCT_CLOSE "); break;
1005
        case XML_REGEXP_PUNCT_INITQUOTE:
1006
            fprintf(output, "PUNCT_INITQUOTE "); break;
1007
        case XML_REGEXP_PUNCT_FINQUOTE:
1008
            fprintf(output, "PUNCT_FINQUOTE "); break;
1009
        case XML_REGEXP_PUNCT_OTHERS:
1010
            fprintf(output, "PUNCT_OTHERS "); break;
1011
        case XML_REGEXP_SEPAR:
1012
            fprintf(output, "SEPAR "); break;
1013
        case XML_REGEXP_SEPAR_SPACE:
1014
            fprintf(output, "SEPAR_SPACE "); break;
1015
        case XML_REGEXP_SEPAR_LINE:
1016
            fprintf(output, "SEPAR_LINE "); break;
1017
        case XML_REGEXP_SEPAR_PARA:
1018
            fprintf(output, "SEPAR_PARA "); break;
1019
        case XML_REGEXP_SYMBOL:
1020
            fprintf(output, "SYMBOL "); break;
1021
        case XML_REGEXP_SYMBOL_MATH:
1022
            fprintf(output, "SYMBOL_MATH "); break;
1023
        case XML_REGEXP_SYMBOL_CURRENCY:
1024
            fprintf(output, "SYMBOL_CURRENCY "); break;
1025
        case XML_REGEXP_SYMBOL_MODIFIER:
1026
            fprintf(output, "SYMBOL_MODIFIER "); break;
1027
        case XML_REGEXP_SYMBOL_OTHERS:
1028
            fprintf(output, "SYMBOL_OTHERS "); break;
1029
        case XML_REGEXP_OTHER:
1030
            fprintf(output, "OTHER "); break;
1031
        case XML_REGEXP_OTHER_CONTROL:
1032
            fprintf(output, "OTHER_CONTROL "); break;
1033
        case XML_REGEXP_OTHER_FORMAT:
1034
            fprintf(output, "OTHER_FORMAT "); break;
1035
        case XML_REGEXP_OTHER_PRIVATE:
1036
            fprintf(output, "OTHER_PRIVATE "); break;
1037
        case XML_REGEXP_OTHER_NA:
1038
            fprintf(output, "OTHER_NA "); break;
1039
        case XML_REGEXP_BLOCK_NAME:
1040
	    fprintf(output, "BLOCK "); break;
1041
    }
1042
}
1043
1044
static void
1045
xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
1046
    switch (type) {
1047
        case XML_REGEXP_QUANT_EPSILON:
1048
	    fprintf(output, "epsilon "); break;
1049
        case XML_REGEXP_QUANT_ONCE:
1050
	    fprintf(output, "once "); break;
1051
        case XML_REGEXP_QUANT_OPT:
1052
	    fprintf(output, "? "); break;
1053
        case XML_REGEXP_QUANT_MULT:
1054
	    fprintf(output, "* "); break;
1055
        case XML_REGEXP_QUANT_PLUS:
1056
	    fprintf(output, "+ "); break;
1057
	case XML_REGEXP_QUANT_RANGE:
1058
	    fprintf(output, "range "); break;
1059
	case XML_REGEXP_QUANT_ONCEONLY:
1060
	    fprintf(output, "onceonly "); break;
1061
	case XML_REGEXP_QUANT_ALL:
1062
	    fprintf(output, "all "); break;
1063
    }
1064
}
1065
static void
1066
xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
1067
    fprintf(output, "  range: ");
1068
    if (range->neg)
1069
	fprintf(output, "negative ");
1070
    xmlRegPrintAtomType(output, range->type);
1071
    fprintf(output, "%c - %c\n", range->start, range->end);
1072
}
1073
1074
static void
1075
xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
1076
    fprintf(output, " atom: ");
1077
    if (atom == NULL) {
1078
	fprintf(output, "NULL\n");
1079
	return;
1080
    }
1081
    if (atom->neg)
1082
        fprintf(output, "not ");
1083
    xmlRegPrintAtomType(output, atom->type);
1084
    xmlRegPrintQuantType(output, atom->quant);
1085
    if (atom->quant == XML_REGEXP_QUANT_RANGE)
1086
	fprintf(output, "%d-%d ", atom->min, atom->max);
1087
    if (atom->type == XML_REGEXP_STRING)
1088
	fprintf(output, "'%s' ", (char *) atom->valuep);
1089
    if (atom->type == XML_REGEXP_CHARVAL)
1090
	fprintf(output, "char %c\n", atom->codepoint);
1091
    else if (atom->type == XML_REGEXP_RANGES) {
1092
	int i;
1093
	fprintf(output, "%d entries\n", atom->nbRanges);
1094
	for (i = 0; i < atom->nbRanges;i++)
1095
	    xmlRegPrintRange(output, atom->ranges[i]);
1096
    } else if (atom->type == XML_REGEXP_SUBREG) {
1097
	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1098
    } else {
1099
	fprintf(output, "\n");
1100
    }
1101
}
1102
1103
static void
1104
xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1105
    fprintf(output, "  trans: ");
1106
    if (trans == NULL) {
1107
	fprintf(output, "NULL\n");
1108
	return;
1109
    }
1110
    if (trans->to < 0) {
1111
	fprintf(output, "removed\n");
1112
	return;
1113
    }
1114
    if (trans->nd != 0) {
1115
	if (trans->nd == 2)
1116
	    fprintf(output, "last not determinist, ");
1117
	else
1118
	    fprintf(output, "not determinist, ");
1119
    }
1120
    if (trans->counter >= 0) {
1121
	fprintf(output, "counted %d, ", trans->counter);
1122
    }
1123
    if (trans->count == REGEXP_ALL_COUNTER) {
1124
	fprintf(output, "all transition, ");
1125
    } else if (trans->count >= 0) {
1126
	fprintf(output, "count based %d, ", trans->count);
1127
    }
1128
    if (trans->atom == NULL) {
1129
	fprintf(output, "epsilon to %d\n", trans->to);
1130
	return;
1131
    }
1132
    if (trans->atom->type == XML_REGEXP_CHARVAL)
1133
	fprintf(output, "char %c ", trans->atom->codepoint);
1134
    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1135
}
1136
    
1137
static void
1138
xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1139
    int i;
1140
1141
    fprintf(output, " state: ");
1142
    if (state == NULL) {
1143
	fprintf(output, "NULL\n");
1144
	return;
1145
    }
1146
    if (state->type == XML_REGEXP_START_STATE)
1147
	fprintf(output, "START ");
1148
    if (state->type == XML_REGEXP_FINAL_STATE)
1149
	fprintf(output, "FINAL ");
1150
    
1151
    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1152
    for (i = 0;i < state->nbTrans; i++) {
1153
	xmlRegPrintTrans(output, &(state->trans[i]));
1154
    }
1155
}
1156
1157
#ifdef DEBUG_REGEXP_GRAPH
1158
static void
1159
xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1160
    int i;
1161
1162
    fprintf(output, " ctxt: ");
1163
    if (ctxt == NULL) {
1164
	fprintf(output, "NULL\n");
1165
	return;
1166
    }
1167
    fprintf(output, "'%s' ", ctxt->string);
1168
    if (ctxt->error)
1169
	fprintf(output, "error ");
1170
    if (ctxt->neg)
1171
	fprintf(output, "neg ");
1172
    fprintf(output, "\n");
1173
    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1174
    for (i = 0;i < ctxt->nbAtoms; i++) {
1175
	fprintf(output, " %02d ", i);
1176
	xmlRegPrintAtom(output, ctxt->atoms[i]);
1177
    }
1178
    if (ctxt->atom != NULL) {
1179
	fprintf(output, "current atom:\n");
1180
	xmlRegPrintAtom(output, ctxt->atom);
1181
    }
1182
    fprintf(output, "%d states:", ctxt->nbStates);
1183
    if (ctxt->start != NULL)
1184
	fprintf(output, " start: %d", ctxt->start->no);
1185
    if (ctxt->end != NULL)
1186
	fprintf(output, " end: %d", ctxt->end->no);
1187
    fprintf(output, "\n");
1188
    for (i = 0;i < ctxt->nbStates; i++) {
1189
	xmlRegPrintState(output, ctxt->states[i]);
1190
    }
1191
    fprintf(output, "%d counters:\n", ctxt->nbCounters);
1192
    for (i = 0;i < ctxt->nbCounters; i++) {
1193
	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1194
		                                ctxt->counters[i].max);
1195
    }
1196
}
1197
#endif
1198
1199
/************************************************************************
1200
 * 									*
1201
 *		 Finite Automata structures manipulations		*
1202
 * 									*
1203
 ************************************************************************/
1204
1205
static void 
1206
xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1207
	           int neg, xmlRegAtomType type, int start, int end,
1208
		   xmlChar *blockName) {
1209
    xmlRegRangePtr range;
1210
1211
    if (atom == NULL) {
1212
	ERROR("add range: atom is NULL");
1213
	return;
1214
    }
1215
    if (atom->type != XML_REGEXP_RANGES) {
1216
	ERROR("add range: atom is not ranges");
1217
	return;
1218
    }
1219
    if (atom->maxRanges == 0) {
1220
	atom->maxRanges = 4;
1221
	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1222
		                             sizeof(xmlRegRangePtr));
1223
	if (atom->ranges == NULL) {
1224
	    xmlRegexpErrMemory(ctxt, "adding ranges");
1225
	    atom->maxRanges = 0;
1226
	    return;
1227
	}
1228
    } else if (atom->nbRanges >= atom->maxRanges) {
1229
	xmlRegRangePtr *tmp;
1230
	atom->maxRanges *= 2;
1231
	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1232
		                             sizeof(xmlRegRangePtr));
1233
	if (tmp == NULL) {
1234
	    xmlRegexpErrMemory(ctxt, "adding ranges");
1235
	    atom->maxRanges /= 2;
1236
	    return;
1237
	}
1238
	atom->ranges = tmp;
1239
    }
1240
    range = xmlRegNewRange(ctxt, neg, type, start, end);
1241
    if (range == NULL)
1242
	return;
1243
    range->blockName = blockName;
1244
    atom->ranges[atom->nbRanges++] = range;
1245
    
1246
}
1247
1248
static int
1249
xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1250
    if (ctxt->maxCounters == 0) {
1251
	ctxt->maxCounters = 4;
1252
	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1253
		                             sizeof(xmlRegCounter));
1254
	if (ctxt->counters == NULL) {
1255
	    xmlRegexpErrMemory(ctxt, "allocating counter");
1256
	    ctxt->maxCounters = 0;
1257
	    return(-1);
1258
	}
1259
    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1260
	xmlRegCounter *tmp;
1261
	ctxt->maxCounters *= 2;
1262
	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1263
		                           sizeof(xmlRegCounter));
1264
	if (tmp == NULL) {
1265
	    xmlRegexpErrMemory(ctxt, "allocating counter");
1266
	    ctxt->maxCounters /= 2;
1267
	    return(-1);
1268
	}
1269
	ctxt->counters = tmp;
1270
    }
1271
    ctxt->counters[ctxt->nbCounters].min = -1;
1272
    ctxt->counters[ctxt->nbCounters].max = -1;
1273
    return(ctxt->nbCounters++);
1274
}
1275
1276
static int 
1277
xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1278
    if (atom == NULL) {
1279
	ERROR("atom push: atom is NULL");
1280
	return(-1);
1281
    }
1282
    if (ctxt->maxAtoms == 0) {
1283
	ctxt->maxAtoms = 4;
1284
	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1285
		                             sizeof(xmlRegAtomPtr));
1286
	if (ctxt->atoms == NULL) {
1287
	    xmlRegexpErrMemory(ctxt, "pushing atom");
1288
	    ctxt->maxAtoms = 0;
1289
	    return(-1);
1290
	}
1291
    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1292
	xmlRegAtomPtr *tmp;
1293
	ctxt->maxAtoms *= 2;
1294
	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1295
		                             sizeof(xmlRegAtomPtr));
1296
	if (tmp == NULL) {
1297
	    xmlRegexpErrMemory(ctxt, "allocating counter");
1298
	    ctxt->maxAtoms /= 2;
1299
	    return(-1);
1300
	}
1301
	ctxt->atoms = tmp;
1302
    }
1303
    atom->no = ctxt->nbAtoms;
1304
    ctxt->atoms[ctxt->nbAtoms++] = atom;
1305
    return(0);
1306
}
1307
1308
static void 
1309
xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1310
                      int from) {
1311
    if (target->maxTransTo == 0) {
1312
	target->maxTransTo = 8;
1313
	target->transTo = (int *) xmlMalloc(target->maxTransTo *
1314
		                             sizeof(int));
1315
	if (target->transTo == NULL) {
1316
	    xmlRegexpErrMemory(ctxt, "adding transition");
1317
	    target->maxTransTo = 0;
1318
	    return;
1319
	}
1320
    } else if (target->nbTransTo >= target->maxTransTo) {
1321
	int *tmp;
1322
	target->maxTransTo *= 2;
1323
	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1324
		                             sizeof(int));
1325
	if (tmp == NULL) {
1326
	    xmlRegexpErrMemory(ctxt, "adding transition");
1327
	    target->maxTransTo /= 2;
1328
	    return;
1329
	}
1330
	target->transTo = tmp;
1331
    }
1332
    target->transTo[target->nbTransTo] = from;
1333
    target->nbTransTo++;
1334
}
1335
1336
static void 
1337
xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1338
	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1339
		    int counter, int count) {
1340
1341
    int nrtrans;
1342
1343
    if (state == NULL) {
1344
	ERROR("add state: state is NULL");
1345
	return;
1346
    }
1347
    if (target == NULL) {
1348
	ERROR("add state: target is NULL");
1349
	return;
1350
    }
1351
    /*
1352
     * Other routines follow the philosophy 'When in doubt, add a transition'
1353
     * so we check here whether such a transition is already present and, if
1354
     * so, silently ignore this request.
1355
     */
1356
1357
    for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1358
	xmlRegTransPtr trans = &(state->trans[nrtrans]);
1359
	if ((trans->atom == atom) &&
1360
	    (trans->to == target->no) &&
1361
	    (trans->counter == counter) &&
1362
	    (trans->count == count)) {
1363
#ifdef DEBUG_REGEXP_GRAPH
1364
	    printf("Ignoring duplicate transition from %d to %d\n",
1365
		    state->no, target->no);
1366
#endif
1367
	    return;
1368
	}
1369
    }
1370
1371
    if (state->maxTrans == 0) {
1372
	state->maxTrans = 8;
1373
	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1374
		                             sizeof(xmlRegTrans));
1375
	if (state->trans == NULL) {
1376
	    xmlRegexpErrMemory(ctxt, "adding transition");
1377
	    state->maxTrans = 0;
1378
	    return;
1379
	}
1380
    } else if (state->nbTrans >= state->maxTrans) {
1381
	xmlRegTrans *tmp;
1382
	state->maxTrans *= 2;
1383
	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1384
		                             sizeof(xmlRegTrans));
1385
	if (tmp == NULL) {
1386
	    xmlRegexpErrMemory(ctxt, "adding transition");
1387
	    state->maxTrans /= 2;
1388
	    return;
1389
	}
1390
	state->trans = tmp;
1391
    }
1392
#ifdef DEBUG_REGEXP_GRAPH
1393
    printf("Add trans from %d to %d ", state->no, target->no);
1394
    if (count == REGEXP_ALL_COUNTER)
1395
	printf("all transition\n");
1396
    else if (count >= 0)
1397
	printf("count based %d\n", count);
1398
    else if (counter >= 0)
1399
	printf("counted %d\n", counter);
1400
    else if (atom == NULL)
1401
	printf("epsilon transition\n");
1402
    else if (atom != NULL) 
1403
        xmlRegPrintAtom(stdout, atom);
1404
#endif
1405
1406
    state->trans[state->nbTrans].atom = atom;
1407
    state->trans[state->nbTrans].to = target->no;
1408
    state->trans[state->nbTrans].counter = counter;
1409
    state->trans[state->nbTrans].count = count;
1410
    state->trans[state->nbTrans].nd = 0;
1411
    state->nbTrans++;
1412
    xmlRegStateAddTransTo(ctxt, target, state->no);
1413
}
1414
1415
static int
1416
xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1417
    if (state == NULL) return(-1);
1418
    if (ctxt->maxStates == 0) {
1419
	ctxt->maxStates = 4;
1420
	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1421
		                             sizeof(xmlRegStatePtr));
1422
	if (ctxt->states == NULL) {
1423
	    xmlRegexpErrMemory(ctxt, "adding state");
1424
	    ctxt->maxStates = 0;
1425
	    return(-1);
1426
	}
1427
    } else if (ctxt->nbStates >= ctxt->maxStates) {
1428
	xmlRegStatePtr *tmp;
1429
	ctxt->maxStates *= 2;
1430
	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1431
		                             sizeof(xmlRegStatePtr));
1432
	if (tmp == NULL) {
1433
	    xmlRegexpErrMemory(ctxt, "adding state");
1434
	    ctxt->maxStates /= 2;
1435
	    return(-1);
1436
	}
1437
	ctxt->states = tmp;
1438
    }
1439
    state->no = ctxt->nbStates;
1440
    ctxt->states[ctxt->nbStates++] = state;
1441
    return(0);
1442
}
1443
1444
/**
1445
 * xmlFAGenerateAllTransition:
1446
 * @ctxt:  a regexp parser context
1447
 * @from:  the from state
1448
 * @to:  the target state or NULL for building a new one
1449
 * @lax:
1450
 *
1451
 */
1452
static void
1453
xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1454
			   xmlRegStatePtr from, xmlRegStatePtr to,
1455
			   int lax) {
1456
    if (to == NULL) {
1457
	to = xmlRegNewState(ctxt);
1458
	xmlRegStatePush(ctxt, to);
1459
	ctxt->state = to;
1460
    }
1461
    if (lax)
1462
	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1463
    else
1464
	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1465
}
1466
1467
/**
1468
 * xmlFAGenerateEpsilonTransition:
1469
 * @ctxt:  a regexp parser context
1470
 * @from:  the from state
1471
 * @to:  the target state or NULL for building a new one
1472
 *
1473
 */
1474
static void
1475
xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1476
			       xmlRegStatePtr from, xmlRegStatePtr to) {
1477
    if (to == NULL) {
1478
	to = xmlRegNewState(ctxt);
1479
	xmlRegStatePush(ctxt, to);
1480
	ctxt->state = to;
1481
    }
1482
    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1483
}
1484
1485
/**
1486
 * xmlFAGenerateCountedEpsilonTransition:
1487
 * @ctxt:  a regexp parser context
1488
 * @from:  the from state
1489
 * @to:  the target state or NULL for building a new one
1490
 * counter:  the counter for that transition
1491
 *
1492
 */
1493
static void
1494
xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1495
	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1496
    if (to == NULL) {
1497
	to = xmlRegNewState(ctxt);
1498
	xmlRegStatePush(ctxt, to);
1499
	ctxt->state = to;
1500
    }
1501
    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1502
}
1503
1504
/**
1505
 * xmlFAGenerateCountedTransition:
1506
 * @ctxt:  a regexp parser context
1507
 * @from:  the from state
1508
 * @to:  the target state or NULL for building a new one
1509
 * counter:  the counter for that transition
1510
 *
1511
 */
1512
static void
1513
xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1514
	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1515
    if (to == NULL) {
1516
	to = xmlRegNewState(ctxt);
1517
	xmlRegStatePush(ctxt, to);
1518
	ctxt->state = to;
1519
    }
1520
    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1521
}
1522
1523
/**
1524
 * xmlFAGenerateTransitions:
1525
 * @ctxt:  a regexp parser context
1526
 * @from:  the from state
1527
 * @to:  the target state or NULL for building a new one
1528
 * @atom:  the atom generating the transition
1529
 *
1530
 * Returns 0 if success and -1 in case of error.
1531
 */
1532
static int
1533
xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1534
	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1535
    xmlRegStatePtr end;
1536
1537
    if (atom == NULL) {
1538
	ERROR("genrate transition: atom == NULL");
1539
	return(-1);
1540
    }
1541
    if (atom->type == XML_REGEXP_SUBREG) {
1542
	/*
1543
	 * this is a subexpression handling one should not need to
1544
	 * create a new node except for XML_REGEXP_QUANT_RANGE.
1545
	 */
1546
	if (xmlRegAtomPush(ctxt, atom) < 0) {
1547
	    return(-1);
1548
	}
1549
	if ((to != NULL) && (atom->stop != to) &&
1550
	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1551
	    /*
1552
	     * Generate an epsilon transition to link to the target
1553
	     */
1554
	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1555
#ifdef DV
1556
	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
1557
		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1558
	    to = xmlRegNewState(ctxt);
1559
	    xmlRegStatePush(ctxt, to);
1560
	    ctxt->state = to;
1561
	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1562
#endif
1563
	}
1564
	switch (atom->quant) {
1565
	    case XML_REGEXP_QUANT_OPT:
1566
		atom->quant = XML_REGEXP_QUANT_ONCE;
1567
		/*
1568
		 * transition done to the state after end of atom.
1569
		 *      1. set transition from atom start to new state
1570
		 *      2. set transition from atom end to this state. 
1571
		 */
1572
		xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1573
		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state);
1574
		break;
1575
	    case XML_REGEXP_QUANT_MULT:
1576
		atom->quant = XML_REGEXP_QUANT_ONCE;
1577
		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1578
		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1579
		break;
1580
	    case XML_REGEXP_QUANT_PLUS:
1581
		atom->quant = XML_REGEXP_QUANT_ONCE;
1582
		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1583
		break;
1584
	    case XML_REGEXP_QUANT_RANGE: {
1585
		int counter;
1586
		xmlRegStatePtr inter, newstate;
1587
1588
		/*
1589
		 * create the final state now if needed
1590
		 */
1591
		if (to != NULL) {
1592
		    newstate = to;
1593
		} else {
1594
		    newstate = xmlRegNewState(ctxt);
1595
		    xmlRegStatePush(ctxt, newstate);
1596
		}
1597
1598
		/*
1599
		 * The principle here is to use counted transition
1600
		 * to avoid explosion in the number of states in the
1601
		 * graph. This is clearly more complex but should not
1602
		 * be exploitable at runtime.
1603
		 */
1604
		if ((atom->min == 0) && (atom->start0 == NULL)) {
1605
		    xmlRegAtomPtr copy;
1606
		    /*
1607
		     * duplicate a transition based on atom to count next
1608
		     * occurences after 1. We cannot loop to atom->start
1609
		     * directly because we need an epsilon transition to 
1610
		     * newstate.
1611
		     */
1612
		     /* ???? For some reason it seems we never reach that
1613
		        case, I suppose this got optimized out before when
1614
			building the automata */
1615
		    copy = xmlRegCopyAtom(ctxt, atom);
1616
		    if (copy == NULL)
1617
		        return(-1);
1618
		    copy->quant = XML_REGEXP_QUANT_ONCE;
1619
		    copy->min = 0;
1620
		    copy->max = 0;
1621
1622
		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
1623
		        < 0)
1624
			return(-1);
1625
		    inter = ctxt->state;
1626
		    counter = xmlRegGetCounter(ctxt);
1627
		    ctxt->counters[counter].min = atom->min - 1;
1628
		    ctxt->counters[counter].max = atom->max - 1;
1629
		    /* count the number of times we see it again */
1630
		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1631
						   atom->stop, counter);
1632
		    /* allow a way out based on the count */
1633
		    xmlFAGenerateCountedTransition(ctxt, inter,
1634
			                           newstate, counter);
1635
		    /* and also allow a direct exit for 0 */
1636
		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1637
		                                   newstate);
1638
		} else {
1639
		    /*
1640
		     * either we need the atom at least once or there
1641
		     * is an atom->start0 allowing to easilly plug the
1642
		     * epsilon transition.
1643
		     */
1644
		    counter = xmlRegGetCounter(ctxt);
1645
		    ctxt->counters[counter].min = atom->min - 1;
1646
		    ctxt->counters[counter].max = atom->max - 1;
1647
		    /* count the number of times we see it again */
1648
		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1649
						   atom->start, counter);
1650
		    /* allow a way out based on the count */
1651
		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
1652
			                           newstate, counter);
1653
		    /* and if needed allow a direct exit for 0 */
1654
		    if (atom->min == 0)
1655
			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1656
						       newstate);
1657
1658
		}
1659
		atom->min = 0;
1660
		atom->max = 0;
1661
		atom->quant = XML_REGEXP_QUANT_ONCE;
1662
		ctxt->state = newstate;
1663
	    }
1664
	    default:
1665
		break;
1666
	}
1667
	return(0);
1668
    } 
1669
    if ((atom->min == 0) && (atom->max == 0) &&
1670
               (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1671
        /*
1672
	 * we can discard the atom and generate an epsilon transition instead
1673
	 */
1674
	if (to == NULL) {
1675
	    to = xmlRegNewState(ctxt);
1676
	    if (to != NULL)
1677
		xmlRegStatePush(ctxt, to);
1678
	    else {
1679
		return(-1);
1680
	    }
1681
	}
1682
	xmlFAGenerateEpsilonTransition(ctxt, from, to);
1683
	ctxt->state = to;
1684
	xmlRegFreeAtom(atom);
1685
	return(0);
1686
    }
1687
    if (to == NULL) {
1688
	to = xmlRegNewState(ctxt);
1689
	if (to != NULL)
1690
	    xmlRegStatePush(ctxt, to);
1691
	else {
1692
	    return(-1);
1693
	}
1694
    } 
1695
    end = to;
1696
    if ((atom->quant == XML_REGEXP_QUANT_MULT) || 
1697
        (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1698
	/*
1699
	 * Do not pollute the target state by adding transitions from
1700
	 * it as it is likely to be the shared target of multiple branches.
1701
	 * So isolate with an epsilon transition.
1702
	 */
1703
        xmlRegStatePtr tmp;
1704
	
1705
	tmp = xmlRegNewState(ctxt);
1706
	if (tmp != NULL)
1707
	    xmlRegStatePush(ctxt, tmp);
1708
	else {
1709
	    return(-1);
1710
	}
1711
	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1712
	to = tmp;
1713
    }
1714
    if (xmlRegAtomPush(ctxt, atom) < 0) {
1715
	return(-1);
1716
    }
1717
    xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1718
    ctxt->state = end;
1719
    switch (atom->quant) {
1720
	case XML_REGEXP_QUANT_OPT:
1721
	    atom->quant = XML_REGEXP_QUANT_ONCE;
1722
	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1723
	    break;
1724
	case XML_REGEXP_QUANT_MULT:
1725
	    atom->quant = XML_REGEXP_QUANT_ONCE;
1726
	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1727
	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1728
	    break;
1729
	case XML_REGEXP_QUANT_PLUS:
1730
	    atom->quant = XML_REGEXP_QUANT_ONCE;
1731
	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1732
	    break;
1733
	case XML_REGEXP_QUANT_RANGE: 
1734
#if DV_test
1735
	    if (atom->min == 0) {
1736
		xmlFAGenerateEpsilonTransition(ctxt, from, to);
1737
	    }
1738
#endif
1739
	    break;
1740
	default:
1741
	    break;
1742
    }
1743
    return(0);
1744
}
1745
1746
/**
1747
 * xmlFAReduceEpsilonTransitions:
1748
 * @ctxt:  a regexp parser context
1749
 * @fromnr:  the from state
1750
 * @tonr:  the to state 
1751
 * @counter:  should that transition be associated to a counted
1752
 *
1753
 */
1754
static void
1755
xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1756
	                      int tonr, int counter) {
1757
    int transnr;
1758
    xmlRegStatePtr from;
1759
    xmlRegStatePtr to;
1760
1761
#ifdef DEBUG_REGEXP_GRAPH
1762
    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1763
#endif
1764
    from = ctxt->states[fromnr];
1765
    if (from == NULL)
1766
	return;
1767
    to = ctxt->states[tonr];
1768
    if (to == NULL)
1769
	return;
1770
    if ((to->mark == XML_REGEXP_MARK_START) ||
1771
	(to->mark == XML_REGEXP_MARK_VISITED))
1772
	return;
1773
1774
    to->mark = XML_REGEXP_MARK_VISITED;
1775
    if (to->type == XML_REGEXP_FINAL_STATE) {
1776
#ifdef DEBUG_REGEXP_GRAPH
1777
	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1778
#endif
1779
	from->type = XML_REGEXP_FINAL_STATE;
1780
    }
1781
    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1782
        if (to->trans[transnr].to < 0)
1783
	    continue;
1784
	if (to->trans[transnr].atom == NULL) {
1785
	    /*
1786
	     * Don't remove counted transitions
1787
	     * Don't loop either
1788
	     */
1789
	    if (to->trans[transnr].to != fromnr) {
1790
		if (to->trans[transnr].count >= 0) {
1791
		    int newto = to->trans[transnr].to;
1792
1793
		    xmlRegStateAddTrans(ctxt, from, NULL,
1794
					ctxt->states[newto], 
1795
					-1, to->trans[transnr].count);
1796
		} else {
1797
#ifdef DEBUG_REGEXP_GRAPH
1798
		    printf("Found epsilon trans %d from %d to %d\n",
1799
			   transnr, tonr, to->trans[transnr].to);
1800
#endif
1801
		    if (to->trans[transnr].counter >= 0) {
1802
			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1803
					      to->trans[transnr].to,
1804
					      to->trans[transnr].counter);
1805
		    } else {
1806
			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1807
					      to->trans[transnr].to,
1808
					      counter);
1809
		    }
1810
		}
1811
	    }
1812
	} else {
1813
	    int newto = to->trans[transnr].to;
1814
1815
	    if (to->trans[transnr].counter >= 0) {
1816
		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
1817
				    ctxt->states[newto], 
1818
				    to->trans[transnr].counter, -1);
1819
	    } else {
1820
		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
1821
				    ctxt->states[newto], counter, -1);
1822
	    }
1823
	}
1824
    }
1825
    to->mark = XML_REGEXP_MARK_NORMAL;
1826
}
1827
1828
/**
1829
 * xmlFAEliminateSimpleEpsilonTransitions:
1830
 * @ctxt:  a regexp parser context
1831
 *
1832
 * Eliminating general epsilon transitions can get costly in the general 
1833
 * algorithm due to the large amount of generated new transitions and
1834
 * associated comparisons. However for simple epsilon transition used just
1835
 * to separate building blocks when generating the automata this can be
1836
 * reduced to state elimination:
1837
 *    - if there exists an epsilon from X to Y
1838
 *    - if there is no other transition from X
1839
 * then X and Y are semantically equivalent and X can be eliminated
1840
 * If X is the start state then make Y the start state, else replace the
1841
 * target of all transitions to X by transitions to Y.
1842
 */
1843
static void
1844
xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1845
    int statenr, i, j, newto;
1846
    xmlRegStatePtr state, tmp;
1847
1848
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1849
	state = ctxt->states[statenr];
1850
	if (state == NULL)
1851
	    continue;
1852
	if (state->nbTrans != 1)
1853
	    continue;
1854
	if (state->type == XML_REGEXP_UNREACH_STATE)
1855
	    continue;
1856
	/* is the only transition out a basic transition */
1857
	if ((state->trans[0].atom == NULL) &&
1858
	    (state->trans[0].to >= 0) &&
1859
	    (state->trans[0].to != statenr) &&
1860
	    (state->trans[0].counter < 0) &&
1861
	    (state->trans[0].count < 0)) {
1862
	    newto = state->trans[0].to;
1863
1864
            if (state->type == XML_REGEXP_START_STATE) {
1865
#ifdef DEBUG_REGEXP_GRAPH
1866
		printf("Found simple epsilon trans from start %d to %d\n",
1867
		       statenr, newto);
1868
#endif     
1869
            } else {
1870
#ifdef DEBUG_REGEXP_GRAPH
1871
		printf("Found simple epsilon trans from %d to %d\n",
1872
		       statenr, newto);
1873
#endif     
1874
	        for (i = 0;i < state->nbTransTo;i++) {
1875
		    tmp = ctxt->states[state->transTo[i]];
1876
		    for (j = 0;j < tmp->nbTrans;j++) {
1877
			if (tmp->trans[j].to == statenr) {
1878
#ifdef DEBUG_REGEXP_GRAPH
1879
			    printf("Changed transition %d on %d to go to %d\n",
1880
				   j, tmp->no, newto);
1881
#endif     
1882
			    tmp->trans[j].to = -1;
1883
			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1884
			    			ctxt->states[newto],
1885
					        tmp->trans[j].counter,
1886
						tmp->trans[j].count);
1887
			}
1888
		    }
1889
		}
1890
		if (state->type == XML_REGEXP_FINAL_STATE)
1891
		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1892
		/* eliminate the transition completely */
1893
		state->nbTrans = 0;
1894
1895
                state->type = XML_REGEXP_UNREACH_STATE;
1896
1897
	    }
1898
            
1899
	}
1900
    }
1901
}
1902
/**
1903
 * xmlFAEliminateEpsilonTransitions:
1904
 * @ctxt:  a regexp parser context
1905
 *
1906
 */
1907
static void
1908
xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1909
    int statenr, transnr;
1910
    xmlRegStatePtr state;
1911
    int has_epsilon;
1912
1913
    if (ctxt->states == NULL) return;
1914
1915
    /*
1916
     * Eliminate simple epsilon transition and the associated unreachable
1917
     * states.
1918
     */
1919
    xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1920
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1921
	state = ctxt->states[statenr];
1922
	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
1923
#ifdef DEBUG_REGEXP_GRAPH
1924
	    printf("Removed unreachable state %d\n", statenr);
1925
#endif
1926
	    xmlRegFreeState(state);
1927
	    ctxt->states[statenr] = NULL;
1928
	}
1929
    }
1930
1931
    has_epsilon = 0;
1932
1933
    /*
1934
     * Build the completed transitions bypassing the epsilons
1935
     * Use a marking algorithm to avoid loops
1936
     * Mark sink states too.
1937
     * Process from the latests states backward to the start when
1938
     * there is long cascading epsilon chains this minimize the
1939
     * recursions and transition compares when adding the new ones
1940
     */
1941
    for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1942
	state = ctxt->states[statenr];
1943
	if (state == NULL)
1944
	    continue;
1945
	if ((state->nbTrans == 0) &&
1946
	    (state->type != XML_REGEXP_FINAL_STATE)) {
1947
	    state->type = XML_REGEXP_SINK_STATE;
1948
	}
1949
	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1950
	    if ((state->trans[transnr].atom == NULL) &&
1951
		(state->trans[transnr].to >= 0)) {
1952
		if (state->trans[transnr].to == statenr) {
1953
		    state->trans[transnr].to = -1;
1954
#ifdef DEBUG_REGEXP_GRAPH
1955
		    printf("Removed loopback epsilon trans %d on %d\n",
1956
			   transnr, statenr);
1957
#endif
1958
		} else if (state->trans[transnr].count < 0) {
1959
		    int newto = state->trans[transnr].to;
1960
1961
#ifdef DEBUG_REGEXP_GRAPH
1962
		    printf("Found epsilon trans %d from %d to %d\n",
1963
			   transnr, statenr, newto);
1964
#endif
1965
		    has_epsilon = 1;
1966
		    state->trans[transnr].to = -2;
1967
		    state->mark = XML_REGEXP_MARK_START;
1968
		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1969
				      newto, state->trans[transnr].counter);
1970
		    state->mark = XML_REGEXP_MARK_NORMAL;
1971
#ifdef DEBUG_REGEXP_GRAPH
1972
		} else {
1973
		    printf("Found counted transition %d on %d\n",
1974
			   transnr, statenr);
1975
#endif
1976
	        }
1977
	    }
1978
	}
1979
    }
1980
    /*
1981
     * Eliminate the epsilon transitions
1982
     */
1983
    if (has_epsilon) {
1984
	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1985
	    state = ctxt->states[statenr];
1986
	    if (state == NULL)
1987
		continue;
1988
	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1989
		xmlRegTransPtr trans = &(state->trans[transnr]);
1990
		if ((trans->atom == NULL) &&
1991
		    (trans->count < 0) &&
1992
		    (trans->to >= 0)) {
1993
		    trans->to = -1;
1994
		}
1995
	    }
1996
	}
1997
    }
1998
1999
    /*
2000
     * Use this pass to detect unreachable states too
2001
     */
2002
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2003
	state = ctxt->states[statenr];
2004
	if (state != NULL)
2005
	    state->reached = XML_REGEXP_MARK_NORMAL;
2006
    }
2007
    state = ctxt->states[0];
2008
    if (state != NULL)
2009
	state->reached = XML_REGEXP_MARK_START;
2010
    while (state != NULL) {
2011
	xmlRegStatePtr target = NULL;
2012
	state->reached = XML_REGEXP_MARK_VISITED;
2013
	/*
2014
	 * Mark all states reachable from the current reachable state
2015
	 */
2016
	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2017
	    if ((state->trans[transnr].to >= 0) &&
2018
		((state->trans[transnr].atom != NULL) ||
2019
		 (state->trans[transnr].count >= 0))) {
2020
		int newto = state->trans[transnr].to;
2021
2022
		if (ctxt->states[newto] == NULL)
2023
		    continue;
2024
		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
2025
		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
2026
		    target = ctxt->states[newto];
2027
		}
2028
	    }
2029
	}
2030
2031
	/*
2032
	 * find the next accessible state not explored
2033
	 */
2034
	if (target == NULL) {
2035
	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
2036
		state = ctxt->states[statenr];
2037
		if ((state != NULL) && (state->reached ==
2038
			XML_REGEXP_MARK_START)) {
2039
		    target = state;
2040
		    break;
2041
		}
2042
	    }
2043
	}
2044
	state = target;
2045
    }
2046
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2047
	state = ctxt->states[statenr];
2048
	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
2049
#ifdef DEBUG_REGEXP_GRAPH
2050
	    printf("Removed unreachable state %d\n", statenr);
2051
#endif
2052
	    xmlRegFreeState(state);
2053
	    ctxt->states[statenr] = NULL;
2054
	}
2055
    }
2056
2057
}
2058
2059
static int
2060
xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2061
    int ret = 0;
2062
2063
    if ((range1->type == XML_REGEXP_RANGES) ||
2064
        (range2->type == XML_REGEXP_RANGES) ||
2065
        (range2->type == XML_REGEXP_SUBREG) ||
2066
        (range1->type == XML_REGEXP_SUBREG) ||
2067
        (range1->type == XML_REGEXP_STRING) ||
2068
        (range2->type == XML_REGEXP_STRING))
2069
	return(-1);
2070
2071
    /* put them in order */
2072
    if (range1->type > range2->type) {
2073
        xmlRegRangePtr tmp;
2074
2075
	tmp = range1;
2076
	range1 = range2;
2077
	range2 = tmp;
2078
    }
2079
    if ((range1->type == XML_REGEXP_ANYCHAR) ||
2080
        (range2->type == XML_REGEXP_ANYCHAR)) {
2081
	ret = 1;
2082
    } else if ((range1->type == XML_REGEXP_EPSILON) ||
2083
               (range2->type == XML_REGEXP_EPSILON)) {
2084
	return(0);
2085
    } else if (range1->type == range2->type) {
2086
        if ((range1->type != XML_REGEXP_CHARVAL) ||
2087
	    (range1->end < range2->start) ||
2088
	    (range2->end < range1->start))
2089
	    ret = 1;
2090
	else
2091
	    ret = 0;
2092
    } else if (range1->type == XML_REGEXP_CHARVAL) {
2093
        int codepoint;
2094
	int neg = 0;
2095
2096
	/*
2097
	 * just check all codepoints in the range for acceptance,
2098
	 * this is usually way cheaper since done only once at
2099
	 * compilation than testing over and over at runtime or 
2100
	 * pushing too many states when evaluating.
2101
	 */
2102
	if (((range1->neg == 0) && (range2->neg != 0)) ||
2103
	    ((range1->neg != 0) && (range2->neg == 0)))
2104
	    neg = 1;
2105
2106
	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2107
	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2108
					    0, range2->start, range2->end,
2109
					    range2->blockName);
2110
	    if (ret < 0)
2111
	        return(-1);
2112
	    if (((neg == 1) && (ret == 0)) ||
2113
	        ((neg == 0) && (ret == 1)))
2114
		return(1);
2115
	}
2116
	return(0);
2117
    } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2118
               (range2->type == XML_REGEXP_BLOCK_NAME)) {
2119
	if (range1->type == range2->type) {
2120
	    ret = xmlStrEqual(range1->blockName, range2->blockName);
2121
	} else {
2122
	    /*
2123
	     * comparing a block range with anything else is way
2124
	     * too costly, and maintining the table is like too much
2125
	     * memory too, so let's force the automata to save state
2126
	     * here.
2127
	     */
2128
	    return(1);
2129
	}
2130
    } else if ((range1->type < XML_REGEXP_LETTER) ||
2131
               (range2->type < XML_REGEXP_LETTER)) {
2132
	if ((range1->type == XML_REGEXP_ANYSPACE) &&
2133
	    (range2->type == XML_REGEXP_NOTSPACE))
2134
	    ret = 0;
2135
	else if ((range1->type == XML_REGEXP_INITNAME) &&
2136
	         (range2->type == XML_REGEXP_NOTINITNAME))
2137
	    ret = 0;
2138
	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2139
	         (range2->type == XML_REGEXP_NOTNAMECHAR))
2140
	    ret = 0;
2141
	else if ((range1->type == XML_REGEXP_DECIMAL) &&
2142
	         (range2->type == XML_REGEXP_NOTDECIMAL))
2143
	    ret = 0;
2144
	else if ((range1->type == XML_REGEXP_REALCHAR) &&
2145
	         (range2->type == XML_REGEXP_NOTREALCHAR))
2146
	    ret = 0;
2147
	else {
2148
	    /* same thing to limit complexity */
2149
	    return(1);
2150
	}
2151
    } else {
2152
        ret = 0;
2153
        /* range1->type < range2->type here */
2154
        switch (range1->type) {
2155
	    case XML_REGEXP_LETTER:
2156
	         /* all disjoint except in the subgroups */
2157
	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2158
		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2159
		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2160
		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2161
		     (range2->type == XML_REGEXP_LETTER_OTHERS))
2162
		     ret = 1;
2163
		 break;
2164
	    case XML_REGEXP_MARK:
2165
	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2166
		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2167
		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
2168
		     ret = 1;
2169
		 break;
2170
	    case XML_REGEXP_NUMBER:
2171
	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2172
		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2173
		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
2174
		     ret = 1;
2175
		 break;
2176
	    case XML_REGEXP_PUNCT:
2177
	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2178
		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
2179
		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2180
		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2181
		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2182
		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2183
		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
2184
		     ret = 1;
2185
		 break;
2186
	    case XML_REGEXP_SEPAR:
2187
	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2188
		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
2189
		     (range2->type == XML_REGEXP_SEPAR_PARA))
2190
		     ret = 1;
2191
		 break;
2192
	    case XML_REGEXP_SYMBOL:
2193
	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2194
		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2195
		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2196
		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2197
		     ret = 1;
2198
		 break;
2199
	    case XML_REGEXP_OTHER:
2200
	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2201
		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2202
		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
2203
		     ret = 1;
2204
		 break;
2205
            default:
2206
	         if ((range2->type >= XML_REGEXP_LETTER) &&
2207
		     (range2->type < XML_REGEXP_BLOCK_NAME))
2208
		     ret = 0;
2209
		 else {
2210
		     /* safety net ! */
2211
		     return(1);
2212
		 }
2213
	}
2214
    }
2215
    if (((range1->neg == 0) && (range2->neg != 0)) ||
2216
        ((range1->neg != 0) && (range2->neg == 0)))
2217
	ret = !ret;
2218
    return(1);
2219
}
2220
2221
/**
2222
 * xmlFACompareAtomTypes:
2223
 * @type1:  an atom type
2224
 * @type2:  an atom type
2225
 *
2226
 * Compares two atoms type to check whether they intersect in some ways,
2227
 * this is used by xmlFACompareAtoms only
2228
 *
2229
 * Returns 1 if they may intersect and 0 otherwise
2230
 */
2231
static int
2232
xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2233
    if ((type1 == XML_REGEXP_EPSILON) ||
2234
        (type1 == XML_REGEXP_CHARVAL) ||
2235
	(type1 == XML_REGEXP_RANGES) ||
2236
	(type1 == XML_REGEXP_SUBREG) ||
2237
	(type1 == XML_REGEXP_STRING) ||
2238
	(type1 == XML_REGEXP_ANYCHAR))
2239
	return(1);
2240
    if ((type2 == XML_REGEXP_EPSILON) ||
2241
        (type2 == XML_REGEXP_CHARVAL) ||
2242
	(type2 == XML_REGEXP_RANGES) ||
2243
	(type2 == XML_REGEXP_SUBREG) ||
2244
	(type2 == XML_REGEXP_STRING) ||
2245
	(type2 == XML_REGEXP_ANYCHAR))
2246
	return(1);
2247
2248
    if (type1 == type2) return(1);
2249
2250
    /* simplify subsequent compares by making sure type1 < type2 */
2251
    if (type1 > type2) {
2252
        xmlRegAtomType tmp = type1;
2253
	type1 = type2;
2254
	type2 = tmp;
2255
    }
2256
    switch (type1) {
2257
        case XML_REGEXP_ANYSPACE: /* \s */
2258
	    /* can't be a letter, number, mark, pontuation, symbol */
2259
	    if ((type2 == XML_REGEXP_NOTSPACE) ||
2260
		((type2 >= XML_REGEXP_LETTER) &&
2261
		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2262
	        ((type2 >= XML_REGEXP_NUMBER) &&
2263
		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2264
	        ((type2 >= XML_REGEXP_MARK) &&
2265
		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2266
	        ((type2 >= XML_REGEXP_PUNCT) &&
2267
		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2268
	        ((type2 >= XML_REGEXP_SYMBOL) &&
2269
		 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2270
	        ) return(0);
2271
	    break;
2272
        case XML_REGEXP_NOTSPACE: /* \S */
2273
	    break;
2274
        case XML_REGEXP_INITNAME: /* \l */
2275
	    /* can't be a number, mark, separator, pontuation, symbol or other */
2276
	    if ((type2 == XML_REGEXP_NOTINITNAME) ||
2277
	        ((type2 >= XML_REGEXP_NUMBER) &&
2278
		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2279
	        ((type2 >= XML_REGEXP_MARK) &&
2280
		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2281
	        ((type2 >= XML_REGEXP_SEPAR) &&
2282
		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2283
	        ((type2 >= XML_REGEXP_PUNCT) &&
2284
		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2285
	        ((type2 >= XML_REGEXP_SYMBOL) &&
2286
		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2287
	        ((type2 >= XML_REGEXP_OTHER) &&
2288
		 (type2 <= XML_REGEXP_OTHER_NA))
2289
		) return(0);
2290
	    break;
2291
        case XML_REGEXP_NOTINITNAME: /* \L */
2292
	    break;
2293
        case XML_REGEXP_NAMECHAR: /* \c */
2294
	    /* can't be a mark, separator, pontuation, symbol or other */
2295
	    if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2296
	        ((type2 >= XML_REGEXP_MARK) &&
2297
		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2298
	        ((type2 >= XML_REGEXP_PUNCT) &&
2299
		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2300
	        ((type2 >= XML_REGEXP_SEPAR) &&
2301
		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2302
	        ((type2 >= XML_REGEXP_SYMBOL) &&
2303
		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2304
	        ((type2 >= XML_REGEXP_OTHER) &&
2305
		 (type2 <= XML_REGEXP_OTHER_NA))
2306
		) return(0);
2307
	    break;
2308
        case XML_REGEXP_NOTNAMECHAR: /* \C */
2309
	    break;
2310
        case XML_REGEXP_DECIMAL: /* \d */
2311
	    /* can't be a letter, mark, separator, pontuation, symbol or other */
2312
	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2313
	        (type2 == XML_REGEXP_REALCHAR) ||
2314
		((type2 >= XML_REGEXP_LETTER) &&
2315
		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2316
	        ((type2 >= XML_REGEXP_MARK) &&
2317
		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2318
	        ((type2 >= XML_REGEXP_PUNCT) &&
2319
		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2320
	        ((type2 >= XML_REGEXP_SEPAR) &&
2321
		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2322
	        ((type2 >= XML_REGEXP_SYMBOL) &&
2323
		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2324
	        ((type2 >= XML_REGEXP_OTHER) &&
2325
		 (type2 <= XML_REGEXP_OTHER_NA))
2326
		)return(0);
2327
	    break;
2328
        case XML_REGEXP_NOTDECIMAL: /* \D */
2329
	    break;
2330
        case XML_REGEXP_REALCHAR: /* \w */
2331
	    /* can't be a mark, separator, pontuation, symbol or other */
2332
	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2333
	        ((type2 >= XML_REGEXP_MARK) &&
2334
		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2335
	        ((type2 >= XML_REGEXP_PUNCT) &&
2336
		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2337
	        ((type2 >= XML_REGEXP_SEPAR) &&
2338
		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2339
	        ((type2 >= XML_REGEXP_SYMBOL) &&
2340
		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2341
	        ((type2 >= XML_REGEXP_OTHER) &&
2342
		 (type2 <= XML_REGEXP_OTHER_NA))
2343
		)return(0);
2344
	    break;
2345
        case XML_REGEXP_NOTREALCHAR: /* \W */
2346
	    break;
2347
	/*
2348
	 * at that point we know both type 1 and type2 are from
2349
	 * character categories are ordered and are different,
2350
	 * it becomes simple because this is a partition
2351
	 */
2352
        case XML_REGEXP_LETTER:
2353
	    if (type2 <= XML_REGEXP_LETTER_OTHERS)
2354
	        return(1);
2355
	    return(0);
2356
        case XML_REGEXP_LETTER_UPPERCASE:
2357
        case XML_REGEXP_LETTER_LOWERCASE:
2358
        case XML_REGEXP_LETTER_TITLECASE:
2359
        case XML_REGEXP_LETTER_MODIFIER:
2360
        case XML_REGEXP_LETTER_OTHERS:
2361
	    return(0);
2362
        case XML_REGEXP_MARK:
2363
	    if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2364
	        return(1);
2365
	    return(0);
2366
        case XML_REGEXP_MARK_NONSPACING:
2367
        case XML_REGEXP_MARK_SPACECOMBINING:
2368
        case XML_REGEXP_MARK_ENCLOSING:
2369
	    return(0);
2370
        case XML_REGEXP_NUMBER:
2371
	    if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2372
	        return(1);
2373
	    return(0);
2374
        case XML_REGEXP_NUMBER_DECIMAL:
2375
        case XML_REGEXP_NUMBER_LETTER:
2376
        case XML_REGEXP_NUMBER_OTHERS:
2377
	    return(0);
2378
        case XML_REGEXP_PUNCT:
2379
	    if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2380
	        return(1);
2381
	    return(0);
2382
        case XML_REGEXP_PUNCT_CONNECTOR:
2383
        case XML_REGEXP_PUNCT_DASH:
2384
        case XML_REGEXP_PUNCT_OPEN:
2385
        case XML_REGEXP_PUNCT_CLOSE:
2386
        case XML_REGEXP_PUNCT_INITQUOTE:
2387
        case XML_REGEXP_PUNCT_FINQUOTE:
2388
        case XML_REGEXP_PUNCT_OTHERS:
2389
	    return(0);
2390
        case XML_REGEXP_SEPAR:
2391
	    if (type2 <= XML_REGEXP_SEPAR_PARA)
2392
	        return(1);
2393
	    return(0);
2394
        case XML_REGEXP_SEPAR_SPACE:
2395
        case XML_REGEXP_SEPAR_LINE:
2396
        case XML_REGEXP_SEPAR_PARA:
2397
	    return(0);
2398
        case XML_REGEXP_SYMBOL:
2399
	    if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2400
	        return(1);
2401
	    return(0);
2402
        case XML_REGEXP_SYMBOL_MATH:
2403
        case XML_REGEXP_SYMBOL_CURRENCY:
2404
        case XML_REGEXP_SYMBOL_MODIFIER:
2405
        case XML_REGEXP_SYMBOL_OTHERS:
2406
	    return(0);
2407
        case XML_REGEXP_OTHER:
2408
	    if (type2 <= XML_REGEXP_OTHER_NA)
2409
	        return(1);
2410
	    return(0);
2411
        case XML_REGEXP_OTHER_CONTROL:
2412
        case XML_REGEXP_OTHER_FORMAT:
2413
        case XML_REGEXP_OTHER_PRIVATE:
2414
        case XML_REGEXP_OTHER_NA:
2415
	    return(0);
2416
	default:
2417
	    break;
2418
    }
2419
    return(1);
2420
}
2421
2422
/**
2423
 * xmlFAEqualAtoms:
2424
 * @atom1:  an atom
2425
 * @atom2:  an atom
2426
 *
2427
 * Compares two atoms to check whether they are the same exactly
2428
 * this is used to remove equivalent transitions
2429
 *
2430
 * Returns 1 if same and 0 otherwise
2431
 */
2432
static int
2433
xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
2434
    int ret = 0;
2435
2436
    if (atom1 == atom2)
2437
	return(1);
2438
    if ((atom1 == NULL) || (atom2 == NULL))
2439
	return(0);
2440
2441
    if (atom1->type != atom2->type)
2442
        return(0);
2443
    switch (atom1->type) {
2444
        case XML_REGEXP_EPSILON:
2445
	    ret = 0;
2446
	    break;
2447
        case XML_REGEXP_STRING:
2448
	    ret = xmlStrEqual((xmlChar *)atom1->valuep,
2449
	                      (xmlChar *)atom2->valuep);
2450
	    break;
2451
        case XML_REGEXP_CHARVAL:
2452
	    ret = (atom1->codepoint == atom2->codepoint);
2453
	    break;
2454
	case XML_REGEXP_RANGES:
2455
	    /* too hard to do in the general case */
2456
	    ret = 0;
2457
	default:
2458
	    break;
2459
    }
2460
    return(ret);
2461
}
2462
2463
/**
2464
 * xmlFACompareAtoms:
2465
 * @atom1:  an atom
2466
 * @atom2:  an atom
2467
 *
2468
 * Compares two atoms to check whether they intersect in some ways,
2469
 * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
2470
 *
2471
 * Returns 1 if yes and 0 otherwise
2472
 */
2473
static int
2474
xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
2475
    int ret = 1;
2476
2477
    if (atom1 == atom2)
2478
	return(1);
2479
    if ((atom1 == NULL) || (atom2 == NULL))
2480
	return(0);
2481
2482
    if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2483
        (atom2->type == XML_REGEXP_ANYCHAR))
2484
	return(1);
2485
2486
    if (atom1->type > atom2->type) {
2487
	xmlRegAtomPtr tmp;
2488
	tmp = atom1;
2489
	atom1 = atom2;
2490
	atom2 = tmp;
2491
    }
2492
    if (atom1->type != atom2->type) {
2493
        ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2494
	/* if they can't intersect at the type level break now */
2495
	if (ret == 0)
2496
	    return(0);
2497
    }
2498
    switch (atom1->type) {
2499
        case XML_REGEXP_STRING:
2500
	    ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
2501
	                                 (xmlChar *)atom2->valuep);
2502
	    break;
2503
        case XML_REGEXP_EPSILON:
2504
	    goto not_determinist;
2505
        case XML_REGEXP_CHARVAL:
2506
	    if (atom2->type == XML_REGEXP_CHARVAL) {
2507
		ret = (atom1->codepoint == atom2->codepoint);
2508
	    } else {
2509
	        ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2510
		if (ret < 0)
2511
		    ret = 1;
2512
	    }
2513
	    break;
2514
        case XML_REGEXP_RANGES:
2515
	    if (atom2->type == XML_REGEXP_RANGES) {
2516
	        int i, j, res;
2517
		xmlRegRangePtr r1, r2;
2518
2519
		/*
2520
		 * need to check that none of the ranges eventually matches
2521
		 */
2522
		for (i = 0;i < atom1->nbRanges;i++) {
2523
		    for (j = 0;j < atom2->nbRanges;j++) {
2524
			r1 = atom1->ranges[i];
2525
			r2 = atom2->ranges[j];
2526
			res = xmlFACompareRanges(r1, r2);
2527
			if (res == 1) {
2528
			    ret = 1;
2529
			    goto done;
2530
			}
2531
		    }
2532
		}
2533
		ret = 0;
2534
	    }
2535
	    break;
2536
	default:
2537
	    goto not_determinist;
2538
    }
2539
done:
2540
    if (atom1->neg != atom2->neg) {
2541
        ret = !ret;
2542
    }
2543
    if (ret == 0)
2544
        return(0);
2545
not_determinist:
2546
    return(1);
2547
}
2548
2549
/**
2550
 * xmlFARecurseDeterminism:
2551
 * @ctxt:  a regexp parser context
2552
 *
2553
 * Check whether the associated regexp is determinist,
2554
 * should be called after xmlFAEliminateEpsilonTransitions()
2555
 *
2556
 */
2557
static int
2558
xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2559
	                 int to, xmlRegAtomPtr atom) {
2560
    int ret = 1;
2561
    int res;
2562
    int transnr, nbTrans;
2563
    xmlRegTransPtr t1;
2564
2565
    if (state == NULL)
2566
	return(ret);
2567
    /*
2568
     * don't recurse on transitions potentially added in the course of
2569
     * the elimination.
2570
     */
2571
    nbTrans = state->nbTrans;
2572
    for (transnr = 0;transnr < nbTrans;transnr++) {
2573
	t1 = &(state->trans[transnr]);
2574
	/*
2575
	 * check transitions conflicting with the one looked at
2576
	 */
2577
	if (t1->atom == NULL) {
2578
	    if (t1->to < 0)
2579
		continue;
2580
	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2581
		                           to, atom);
2582
	    if (res == 0) {
2583
	        ret = 0;
2584
		/* t1->nd = 1; */
2585
	    }
2586
	    continue;
2587
	}
2588
	if (t1->to != to)
2589
	    continue;
2590
	if (xmlFACompareAtoms(t1->atom, atom)) {
2591
	    ret = 0;
2592
	    /* mark the transition as non-deterministic */
2593
	    t1->nd = 1;
2594
	}
2595
    }
2596
    return(ret);
2597
}
2598
2599
/**
2600
 * xmlFAComputesDeterminism:
2601
 * @ctxt:  a regexp parser context
2602
 *
2603
 * Check whether the associated regexp is determinist,
2604
 * should be called after xmlFAEliminateEpsilonTransitions()
2605
 *
2606
 */
2607
static int
2608
xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2609
    int statenr, transnr;
2610
    xmlRegStatePtr state;
2611
    xmlRegTransPtr t1, t2, last;
2612
    int i;
2613
    int ret = 1;
2614
2615
#ifdef DEBUG_REGEXP_GRAPH
2616
    printf("xmlFAComputesDeterminism\n");
2617
    xmlRegPrintCtxt(stdout, ctxt);
2618
#endif
2619
    if (ctxt->determinist != -1)
2620
	return(ctxt->determinist);
2621
2622
    /*
2623
     * First cleanup the automata removing cancelled transitions
2624
     */
2625
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2626
	state = ctxt->states[statenr];
2627
	if (state == NULL)
2628
	    continue;
2629
	if (state->nbTrans < 2)
2630
	    continue;
2631
	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2632
	    t1 = &(state->trans[transnr]);
2633
	    /*
2634
	     * Determinism checks in case of counted or all transitions
2635
	     * will have to be handled separately
2636
	     */
2637
	    if (t1->atom == NULL) {
2638
		/* t1->nd = 1; */
2639
		continue;
2640
	    }
2641
	    if (t1->to == -1) /* eliminated */
2642
		continue;
2643
	    for (i = 0;i < transnr;i++) {
2644
		t2 = &(state->trans[i]);
2645
		if (t2->to == -1) /* eliminated */
2646
		    continue;
2647
		if (t2->atom != NULL) {
2648
		    if (t1->to == t2->to) {
2649
			if (xmlFAEqualAtoms(t1->atom, t2->atom))
2650
			    t2->to = -1; /* eliminated */
2651
		    }
2652
		}
2653
	    }
2654
	}
2655
    }
2656
2657
    /*
2658
     * Check for all states that there aren't 2 transitions
2659
     * with the same atom and a different target.
2660
     */
2661
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2662
	state = ctxt->states[statenr];
2663
	if (state == NULL)
2664
	    continue;
2665
	if (state->nbTrans < 2)
2666
	    continue;
2667
	last = NULL;
2668
	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2669
	    t1 = &(state->trans[transnr]);
2670
	    /*
2671
	     * Determinism checks in case of counted or all transitions
2672
	     * will have to be handled separately
2673
	     */
2674
	    if (t1->atom == NULL) {
2675
		continue;
2676
	    }
2677
	    if (t1->to == -1) /* eliminated */
2678
		continue;
2679
	    for (i = 0;i < transnr;i++) {
2680
		t2 = &(state->trans[i]);
2681
		if (t2->to == -1) /* eliminated */
2682
		    continue;
2683
		if (t2->atom != NULL) {
2684
		    /* not determinist ! */
2685
		    if (xmlFACompareAtoms(t1->atom, t2->atom)) {
2686
			ret = 0;
2687
			/* mark the transitions as non-deterministic ones */
2688
			t1->nd = 1;
2689
			t2->nd = 1;
2690
			last = t1;
2691
		    }
2692
		} else if (t1->to != -1) {
2693
		    /*
2694
		     * do the closure in case of remaining specific
2695
		     * epsilon transitions like choices or all
2696
		     */
2697
		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2698
						   t2->to, t2->atom);
2699
		    /* don't shortcut the computation so all non deterministic
2700
		       transition get marked down
2701
		    if (ret == 0)
2702
			return(0);
2703
		     */
2704
		    if (ret == 0) {
2705
			t1->nd = 1;
2706
			/* t2->nd = 1; */
2707
			last = t1;
2708
		    }
2709
		}
2710
	    }
2711
	    /* don't shortcut the computation so all non deterministic
2712
	       transition get marked down
2713
	    if (ret == 0)
2714
		break; */
2715
	}
2716
2717
	/*
2718
	 * mark specifically the last non-deterministic transition
2719
	 * from a state since there is no need to set-up rollback
2720
	 * from it
2721
	 */
2722
	if (last != NULL) {
2723
	    last->nd = 2;
2724
	}
2725
2726
	/* don't shortcut the computation so all non deterministic
2727
	   transition get marked down
2728
	if (ret == 0)
2729
	    break; */
2730
    }
2731
2732
    ctxt->determinist = ret;
2733
    return(ret);
2734
}
2735
2736
/************************************************************************
2737
 * 									*
2738
 *	Routines to check input against transition atoms		*
2739
 * 									*
2740
 ************************************************************************/
2741
2742
static int
2743
xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2744
	                  int start, int end, const xmlChar *blockName) {
2745
    int ret = 0;
2746
2747
    switch (type) {
2748
        case XML_REGEXP_STRING:
2749
        case XML_REGEXP_SUBREG:
2750
        case XML_REGEXP_RANGES:
2751
        case XML_REGEXP_EPSILON:
2752
	    return(-1);
2753
        case XML_REGEXP_ANYCHAR:
2754
	    ret = ((codepoint != '\n') && (codepoint != '\r'));
2755
	    break;
2756
        case XML_REGEXP_CHARVAL:
2757
	    ret = ((codepoint >= start) && (codepoint <= end));
2758
	    break;
2759
        case XML_REGEXP_NOTSPACE:
2760
	    neg = !neg;
2761
        case XML_REGEXP_ANYSPACE:
2762
	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
2763
		   (codepoint == '\t') || (codepoint == ' '));
2764
	    break;
2765
        case XML_REGEXP_NOTINITNAME:
2766
	    neg = !neg;
2767
        case XML_REGEXP_INITNAME:
2768
	    ret = (IS_LETTER(codepoint) || 
2769
		   (codepoint == '_') || (codepoint == ':'));
2770
	    break;
2771
        case XML_REGEXP_NOTNAMECHAR:
2772
	    neg = !neg;
2773
        case XML_REGEXP_NAMECHAR:
2774
	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2775
		   (codepoint == '.') || (codepoint == '-') ||
2776
		   (codepoint == '_') || (codepoint == ':') ||
2777
		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2778
	    break;
2779
        case XML_REGEXP_NOTDECIMAL:
2780
	    neg = !neg;
2781
        case XML_REGEXP_DECIMAL:
2782
	    ret = xmlUCSIsCatNd(codepoint);
2783
	    break;
2784
        case XML_REGEXP_REALCHAR:
2785
	    neg = !neg;
2786
        case XML_REGEXP_NOTREALCHAR:
2787
	    ret = xmlUCSIsCatP(codepoint);
2788
	    if (ret == 0)
2789
		ret = xmlUCSIsCatZ(codepoint);
2790
	    if (ret == 0)
2791
		ret = xmlUCSIsCatC(codepoint);
2792
	    break;
2793
        case XML_REGEXP_LETTER:
2794
	    ret = xmlUCSIsCatL(codepoint);
2795
	    break;
2796
        case XML_REGEXP_LETTER_UPPERCASE:
2797
	    ret = xmlUCSIsCatLu(codepoint);
2798
	    break;
2799
        case XML_REGEXP_LETTER_LOWERCASE:
2800
	    ret = xmlUCSIsCatLl(codepoint);
2801
	    break;
2802
        case XML_REGEXP_LETTER_TITLECASE:
2803
	    ret = xmlUCSIsCatLt(codepoint);
2804
	    break;
2805
        case XML_REGEXP_LETTER_MODIFIER:
2806
	    ret = xmlUCSIsCatLm(codepoint);
2807
	    break;
2808
        case XML_REGEXP_LETTER_OTHERS:
2809
	    ret = xmlUCSIsCatLo(codepoint);
2810
	    break;
2811
        case XML_REGEXP_MARK:
2812
	    ret = xmlUCSIsCatM(codepoint);
2813
	    break;
2814
        case XML_REGEXP_MARK_NONSPACING:
2815
	    ret = xmlUCSIsCatMn(codepoint);
2816
	    break;
2817
        case XML_REGEXP_MARK_SPACECOMBINING:
2818
	    ret = xmlUCSIsCatMc(codepoint);
2819
	    break;
2820
        case XML_REGEXP_MARK_ENCLOSING:
2821
	    ret = xmlUCSIsCatMe(codepoint);
2822
	    break;
2823
        case XML_REGEXP_NUMBER:
2824
	    ret = xmlUCSIsCatN(codepoint);
2825
	    break;
2826
        case XML_REGEXP_NUMBER_DECIMAL:
2827
	    ret = xmlUCSIsCatNd(codepoint);
2828
	    break;
2829
        case XML_REGEXP_NUMBER_LETTER:
2830
	    ret = xmlUCSIsCatNl(codepoint);
2831
	    break;
2832
        case XML_REGEXP_NUMBER_OTHERS:
2833
	    ret = xmlUCSIsCatNo(codepoint);
2834
	    break;
2835
        case XML_REGEXP_PUNCT:
2836
	    ret = xmlUCSIsCatP(codepoint);
2837
	    break;
2838
        case XML_REGEXP_PUNCT_CONNECTOR:
2839
	    ret = xmlUCSIsCatPc(codepoint);
2840
	    break;
2841
        case XML_REGEXP_PUNCT_DASH:
2842
	    ret = xmlUCSIsCatPd(codepoint);
2843
	    break;
2844
        case XML_REGEXP_PUNCT_OPEN:
2845
	    ret = xmlUCSIsCatPs(codepoint);
2846
	    break;
2847
        case XML_REGEXP_PUNCT_CLOSE:
2848
	    ret = xmlUCSIsCatPe(codepoint);
2849
	    break;
2850
        case XML_REGEXP_PUNCT_INITQUOTE:
2851
	    ret = xmlUCSIsCatPi(codepoint);
2852
	    break;
2853
        case XML_REGEXP_PUNCT_FINQUOTE:
2854
	    ret = xmlUCSIsCatPf(codepoint);
2855
	    break;
2856
        case XML_REGEXP_PUNCT_OTHERS:
2857
	    ret = xmlUCSIsCatPo(codepoint);
2858
	    break;
2859
        case XML_REGEXP_SEPAR:
2860
	    ret = xmlUCSIsCatZ(codepoint);
2861
	    break;
2862
        case XML_REGEXP_SEPAR_SPACE:
2863
	    ret = xmlUCSIsCatZs(codepoint);
2864
	    break;
2865
        case XML_REGEXP_SEPAR_LINE:
2866
	    ret = xmlUCSIsCatZl(codepoint);
2867
	    break;
2868
        case XML_REGEXP_SEPAR_PARA:
2869
	    ret = xmlUCSIsCatZp(codepoint);
2870
	    break;
2871
        case XML_REGEXP_SYMBOL:
2872
	    ret = xmlUCSIsCatS(codepoint);
2873
	    break;
2874
        case XML_REGEXP_SYMBOL_MATH:
2875
	    ret = xmlUCSIsCatSm(codepoint);
2876
	    break;
2877
        case XML_REGEXP_SYMBOL_CURRENCY:
2878
	    ret = xmlUCSIsCatSc(codepoint);
2879
	    break;
2880
        case XML_REGEXP_SYMBOL_MODIFIER:
2881
	    ret = xmlUCSIsCatSk(codepoint);
2882
	    break;
2883
        case XML_REGEXP_SYMBOL_OTHERS:
2884
	    ret = xmlUCSIsCatSo(codepoint);
2885
	    break;
2886
        case XML_REGEXP_OTHER:
2887
	    ret = xmlUCSIsCatC(codepoint);
2888
	    break;
2889
        case XML_REGEXP_OTHER_CONTROL:
2890
	    ret = xmlUCSIsCatCc(codepoint);
2891
	    break;
2892
        case XML_REGEXP_OTHER_FORMAT:
2893
	    ret = xmlUCSIsCatCf(codepoint);
2894
	    break;
2895
        case XML_REGEXP_OTHER_PRIVATE:
2896
	    ret = xmlUCSIsCatCo(codepoint);
2897
	    break;
2898
        case XML_REGEXP_OTHER_NA:
2899
	    /* ret = xmlUCSIsCatCn(codepoint); */
2900
	    /* Seems it doesn't exist anymore in recent Unicode releases */
2901
	    ret = 0;
2902
	    break;
2903
        case XML_REGEXP_BLOCK_NAME:
2904
	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2905
	    break;
2906
    }
2907
    if (neg)
2908
	return(!ret);
2909
    return(ret);
2910
}
2911
2912
static int
2913
xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2914
    int i, ret = 0;
2915
    xmlRegRangePtr range;
2916
2917
    if ((atom == NULL) || (!IS_CHAR(codepoint)))
2918
	return(-1);
2919
2920
    switch (atom->type) {
2921
        case XML_REGEXP_SUBREG:
2922
        case XML_REGEXP_EPSILON:
2923
	    return(-1);
2924
        case XML_REGEXP_CHARVAL:
2925
            return(codepoint == atom->codepoint);
2926
        case XML_REGEXP_RANGES: {
2927
	    int accept = 0;
2928
2929
	    for (i = 0;i < atom->nbRanges;i++) {
2930
		range = atom->ranges[i];
2931
		if (range->neg == 2) {
2932
		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2933
						0, range->start, range->end,
2934
						range->blockName);
2935
		    if (ret != 0)
2936
			return(0); /* excluded char */
2937
		} else if (range->neg) {
2938
		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2939
						0, range->start, range->end,
2940
						range->blockName);
2941
		    if (ret == 0)
2942
		        accept = 1;
2943
		    else
2944
		        return(0);
2945
		} else {
2946
		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2947
						0, range->start, range->end,
2948
						range->blockName);
2949
		    if (ret != 0)
2950
			accept = 1; /* might still be excluded */
2951
		}
2952
	    }
2953
	    return(accept);
2954
	}
2955
        case XML_REGEXP_STRING:
2956
	    printf("TODO: XML_REGEXP_STRING\n");
2957
	    return(-1);
2958
        case XML_REGEXP_ANYCHAR:
2959
        case XML_REGEXP_ANYSPACE:
2960
        case XML_REGEXP_NOTSPACE:
2961
        case XML_REGEXP_INITNAME:
2962
        case XML_REGEXP_NOTINITNAME:
2963
        case XML_REGEXP_NAMECHAR:
2964
        case XML_REGEXP_NOTNAMECHAR:
2965
        case XML_REGEXP_DECIMAL:
2966
        case XML_REGEXP_NOTDECIMAL:
2967
        case XML_REGEXP_REALCHAR:
2968
        case XML_REGEXP_NOTREALCHAR:
2969
        case XML_REGEXP_LETTER:
2970
        case XML_REGEXP_LETTER_UPPERCASE:
2971
        case XML_REGEXP_LETTER_LOWERCASE:
2972
        case XML_REGEXP_LETTER_TITLECASE:
2973
        case XML_REGEXP_LETTER_MODIFIER:
2974
        case XML_REGEXP_LETTER_OTHERS:
2975
        case XML_REGEXP_MARK:
2976
        case XML_REGEXP_MARK_NONSPACING:
2977
        case XML_REGEXP_MARK_SPACECOMBINING:
2978
        case XML_REGEXP_MARK_ENCLOSING:
2979
        case XML_REGEXP_NUMBER:
2980
        case XML_REGEXP_NUMBER_DECIMAL:
2981
        case XML_REGEXP_NUMBER_LETTER:
2982
        case XML_REGEXP_NUMBER_OTHERS:
2983
        case XML_REGEXP_PUNCT:
2984
        case XML_REGEXP_PUNCT_CONNECTOR:
2985
        case XML_REGEXP_PUNCT_DASH:
2986
        case XML_REGEXP_PUNCT_OPEN:
2987
        case XML_REGEXP_PUNCT_CLOSE:
2988
        case XML_REGEXP_PUNCT_INITQUOTE:
2989
        case XML_REGEXP_PUNCT_FINQUOTE:
2990
        case XML_REGEXP_PUNCT_OTHERS:
2991
        case XML_REGEXP_SEPAR:
2992
        case XML_REGEXP_SEPAR_SPACE:
2993
        case XML_REGEXP_SEPAR_LINE:
2994
        case XML_REGEXP_SEPAR_PARA:
2995
        case XML_REGEXP_SYMBOL:
2996
        case XML_REGEXP_SYMBOL_MATH:
2997
        case XML_REGEXP_SYMBOL_CURRENCY:
2998
        case XML_REGEXP_SYMBOL_MODIFIER:
2999
        case XML_REGEXP_SYMBOL_OTHERS:
3000
        case XML_REGEXP_OTHER:
3001
        case XML_REGEXP_OTHER_CONTROL:
3002
        case XML_REGEXP_OTHER_FORMAT:
3003
        case XML_REGEXP_OTHER_PRIVATE:
3004
        case XML_REGEXP_OTHER_NA:
3005
	case XML_REGEXP_BLOCK_NAME:
3006
	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3007
		                            (const xmlChar *)atom->valuep);
3008
	    if (atom->neg)
3009
		ret = !ret;
3010
	    break;
3011
    }
3012
    return(ret);
3013
}
3014
3015
/************************************************************************
3016
 * 									*
3017
 *	Saving and restoring state of an execution context		*
3018
 * 									*
3019
 ************************************************************************/
3020
3021
#ifdef DEBUG_REGEXP_EXEC
3022
static void
3023
xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3024
    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3025
    if (exec->inputStack != NULL) {
3026
	int i;
3027
	printf(": ");
3028
	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
3029
	    printf("%s ", (const char *)
3030
	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
3031
    } else {
3032
	printf(": %s", &(exec->inputString[exec->index]));
3033
    }
3034
    printf("\n");
3035
}
3036
#endif
3037
3038
static void
3039
xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3040
#ifdef DEBUG_REGEXP_EXEC
3041
    printf("saving ");
3042
    exec->transno++;
3043
    xmlFARegDebugExec(exec);
3044
    exec->transno--;
3045
#endif
3046
#ifdef MAX_PUSH
3047
    if (exec->nbPush > MAX_PUSH) {
3048
        return;
3049
    }
3050
    exec->nbPush++;
3051
#endif
3052
3053
    if (exec->maxRollbacks == 0) {
3054
	exec->maxRollbacks = 4;
3055
	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
3056
		                             sizeof(xmlRegExecRollback));
3057
	if (exec->rollbacks == NULL) {
3058
	    xmlRegexpErrMemory(NULL, "saving regexp");
3059
	    exec->maxRollbacks = 0;
3060
	    return;
3061
	}
3062
	memset(exec->rollbacks, 0,
3063
	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
3064
    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
3065
	xmlRegExecRollback *tmp;
3066
	int len = exec->maxRollbacks;
3067
3068
	exec->maxRollbacks *= 2;
3069
	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
3070
			exec->maxRollbacks * sizeof(xmlRegExecRollback));
3071
	if (tmp == NULL) {
3072
	    xmlRegexpErrMemory(NULL, "saving regexp");
3073
	    exec->maxRollbacks /= 2;
3074
	    return;
3075
	}
3076
	exec->rollbacks = tmp;
3077
	tmp = &exec->rollbacks[len];
3078
	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
3079
    }
3080
    exec->rollbacks[exec->nbRollbacks].state = exec->state;
3081
    exec->rollbacks[exec->nbRollbacks].index = exec->index;
3082
    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3083
    if (exec->comp->nbCounters > 0) {
3084
	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3085
	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
3086
		xmlMalloc(exec->comp->nbCounters * sizeof(int));
3087
	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3088
		xmlRegexpErrMemory(NULL, "saving regexp");
3089
		exec->status = -5;
3090
		return;
3091
	    }
3092
	}
3093
	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3094
	       exec->comp->nbCounters * sizeof(int));
3095
    }
3096
    exec->nbRollbacks++;
3097
}
3098
3099
static void
3100
xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3101
    if (exec->nbRollbacks <= 0) {
3102
	exec->status = -1;
3103
#ifdef DEBUG_REGEXP_EXEC
3104
	printf("rollback failed on empty stack\n");
3105
#endif
3106
	return;
3107
    }
3108
    exec->nbRollbacks--;
3109
    exec->state = exec->rollbacks[exec->nbRollbacks].state;
3110
    exec->index = exec->rollbacks[exec->nbRollbacks].index;
3111
    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3112
    if (exec->comp->nbCounters > 0) {
3113
	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3114
	    fprintf(stderr, "exec save: allocation failed");
3115
	    exec->status = -6;
3116
	    return;
3117
	}
3118
	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3119
	       exec->comp->nbCounters * sizeof(int));
3120
    }
3121
3122
#ifdef DEBUG_REGEXP_EXEC
3123
    printf("restored ");
3124
    xmlFARegDebugExec(exec);
3125
#endif
3126
}
3127
3128
/************************************************************************
3129
 * 									*
3130
 *	Verifier, running an input against a compiled regexp		*
3131
 * 									*
3132
 ************************************************************************/
3133
3134
static int
3135
xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3136
    xmlRegExecCtxt execval;
3137
    xmlRegExecCtxtPtr exec = &execval;
3138
    int ret, codepoint = 0, len, deter;
3139
3140
    exec->inputString = content;
3141
    exec->index = 0;
3142
    exec->nbPush = 0;
3143
    exec->determinist = 1;
3144
    exec->maxRollbacks = 0;
3145
    exec->nbRollbacks = 0;
3146
    exec->rollbacks = NULL;
3147
    exec->status = 0;
3148
    exec->comp = comp;
3149
    exec->state = comp->states[0];
3150
    exec->transno = 0;
3151
    exec->transcount = 0;
3152
    exec->inputStack = NULL;
3153
    exec->inputStackMax = 0;
3154
    if (comp->nbCounters > 0) {
3155
	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3156
	if (exec->counts == NULL) {
3157
	    xmlRegexpErrMemory(NULL, "running regexp");
3158
	    return(-1);
3159
	}
3160
        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3161
    } else
3162
	exec->counts = NULL;
3163
    while ((exec->status == 0) &&
3164
	   ((exec->inputString[exec->index] != 0) ||
3165
	    ((exec->state != NULL) &&
3166
	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3167
	xmlRegTransPtr trans;
3168
	xmlRegAtomPtr atom;
3169
3170
	/*
3171
	 * If end of input on non-terminal state, rollback, however we may
3172
	 * still have epsilon like transition for counted transitions
3173
	 * on counters, in that case don't break too early.  Additionally,
3174
	 * if we are working on a range like "AB{0,2}", where B is not present,
3175
	 * we don't want to break.
3176
	 */
3177
	len = 1;
3178
	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
3179
	    /*
3180
	     * if there is a transition, we must check if
3181
	     *  atom allows minOccurs of 0
3182
	     */
3183
	    if (exec->transno < exec->state->nbTrans) {
3184
	        trans = &exec->state->trans[exec->transno];
3185
		if (trans->to >=0) {
3186
		    atom = trans->atom;
3187
		    if (!((atom->min == 0) && (atom->max > 0)))
3188
		        goto rollback;
3189
		}
3190
	    } else
3191
	        goto rollback;
3192
	}
3193
3194
	exec->transcount = 0;
3195
	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3196
	    trans = &exec->state->trans[exec->transno];
3197
	    if (trans->to < 0)
3198
		continue;
3199
	    atom = trans->atom;
3200
	    ret = 0;
3201
	    deter = 1;
3202
	    if (trans->count >= 0) {
3203
		int count;
3204
		xmlRegCounterPtr counter;
3205
3206
		if (exec->counts == NULL) {
3207
		    exec->status = -1;
3208
		    goto error;
3209
		}
3210
		/*
3211
		 * A counted transition.
3212
		 */
3213
3214
		count = exec->counts[trans->count];
3215
		counter = &exec->comp->counters[trans->count];
3216
#ifdef DEBUG_REGEXP_EXEC
3217
		printf("testing count %d: val %d, min %d, max %d\n",
3218
		       trans->count, count, counter->min,  counter->max);
3219
#endif
3220
		ret = ((count >= counter->min) && (count <= counter->max));
3221
		if ((ret) && (counter->min != counter->max))
3222
		    deter = 0;
3223
	    } else if (atom == NULL) {
3224
		fprintf(stderr, "epsilon transition left at runtime\n");
3225
		exec->status = -2;
3226
		break;
3227
	    } else if (exec->inputString[exec->index] != 0) {
3228
                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3229
		ret = xmlRegCheckCharacter(atom, codepoint);
3230
		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3231
		    xmlRegStatePtr to = comp->states[trans->to];
3232
3233
		    /*
3234
		     * this is a multiple input sequence
3235
		     * If there is a counter associated increment it now.
3236
		     * before potentially saving and rollback
3237
		     * do not increment if the counter is already over the
3238
		     * maximum limit in which case get to next transition
3239
		     */
3240
		    if (trans->counter >= 0) {
3241
			xmlRegCounterPtr counter;
3242
3243
			if ((exec->counts == NULL) ||
3244
			    (exec->comp == NULL) ||
3245
			    (exec->comp->counters == NULL)) {
3246
			    exec->status = -1;
3247
			    goto error;
3248
			}
3249
			counter = &exec->comp->counters[trans->counter];
3250
			if (exec->counts[trans->counter] >= counter->max)
3251
			    continue; /* for loop on transitions */
3252
3253
#ifdef DEBUG_REGEXP_EXEC
3254
			printf("Increasing count %d\n", trans->counter);
3255
#endif
3256
			exec->counts[trans->counter]++;
3257
		    }
3258
		    if (exec->state->nbTrans > exec->transno + 1) {
3259
			xmlFARegExecSave(exec);
3260
		    }
3261
		    exec->transcount = 1;
3262
		    do {
3263
			/*
3264
			 * Try to progress as much as possible on the input
3265
			 */
3266
			if (exec->transcount == atom->max) {
3267
			    break;
3268
			}
3269
			exec->index += len;
3270
			/*
3271
			 * End of input: stop here
3272
			 */
3273
			if (exec->inputString[exec->index] == 0) {
3274
			    exec->index -= len;
3275
			    break;
3276
			}
3277
			if (exec->transcount >= atom->min) {
3278
			    int transno = exec->transno;
3279
			    xmlRegStatePtr state = exec->state;
3280
3281
			    /*
3282
			     * The transition is acceptable save it
3283
			     */
3284
			    exec->transno = -1; /* trick */
3285
			    exec->state = to;
3286
			    xmlFARegExecSave(exec);
3287
			    exec->transno = transno;
3288
			    exec->state = state;
3289
			}
3290
			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3291
				              len);
3292
			ret = xmlRegCheckCharacter(atom, codepoint);
3293
			exec->transcount++;
3294
		    } while (ret == 1);
3295
		    if (exec->transcount < atom->min)
3296
			ret = 0;
3297
3298
		    /*
3299
		     * If the last check failed but one transition was found
3300
		     * possible, rollback
3301
		     */
3302
		    if (ret < 0)
3303
			ret = 0;
3304
		    if (ret == 0) {
3305
			goto rollback;
3306
		    }
3307
		    if (trans->counter >= 0) {
3308
			if (exec->counts == NULL) {
3309
			    exec->status = -1;
3310
			    goto error;
3311
			}
3312
#ifdef DEBUG_REGEXP_EXEC
3313
			printf("Decreasing count %d\n", trans->counter);
3314
#endif
3315
			exec->counts[trans->counter]--;
3316
		    }
3317
		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3318
		    /*
3319
		     * we don't match on the codepoint, but minOccurs of 0
3320
		     * says that's ok.  Setting len to 0 inhibits stepping
3321
		     * over the codepoint.
3322
		     */
3323
		    exec->transcount = 1;
3324
		    len = 0;
3325
		    ret = 1;
3326
		}
3327
	    } else if ((atom->min == 0) && (atom->max > 0)) {
3328
	        /* another spot to match when minOccurs is 0 */
3329
		exec->transcount = 1;
3330
		len = 0;
3331
		ret = 1;
3332
	    }
3333
	    if (ret == 1) {
3334
		if ((trans->nd == 1) ||
3335
		    ((trans->count >= 0) && (deter == 0) &&
3336
		     (exec->state->nbTrans > exec->transno + 1))) {
3337
#ifdef DEBUG_REGEXP_EXEC
3338
		    if (trans->nd == 1)
3339
		        printf("Saving on nd transition atom %d for %c at %d\n",
3340
			       trans->atom->no, codepoint, exec->index);
3341
		    else
3342
		        printf("Saving on counted transition count %d for %c at %d\n",
3343
			       trans->count, codepoint, exec->index);
3344
#endif
3345
		    xmlFARegExecSave(exec);
3346
		}
3347
		if (trans->counter >= 0) {
3348
		    xmlRegCounterPtr counter;
3349
3350
                    /* make sure we don't go over the counter maximum value */
3351
		    if ((exec->counts == NULL) ||
3352
			(exec->comp == NULL) ||
3353
			(exec->comp->counters == NULL)) {
3354
			exec->status = -1;
3355
			goto error;
3356
		    }
3357
		    counter = &exec->comp->counters[trans->counter];
3358
		    if (exec->counts[trans->counter] >= counter->max)
3359
			continue; /* for loop on transitions */
3360
#ifdef DEBUG_REGEXP_EXEC
3361
		    printf("Increasing count %d\n", trans->counter);
3362
#endif
3363
		    exec->counts[trans->counter]++;
3364
		}
3365
		if ((trans->count >= 0) &&
3366
		    (trans->count < REGEXP_ALL_COUNTER)) {
3367
		    if (exec->counts == NULL) {
3368
		        exec->status = -1;
3369
			goto error;
3370
		    }
3371
#ifdef DEBUG_REGEXP_EXEC
3372
		    printf("resetting count %d on transition\n",
3373
		           trans->count);
3374
#endif
3375
		    exec->counts[trans->count] = 0;
3376
		}
3377
#ifdef DEBUG_REGEXP_EXEC
3378
		printf("entering state %d\n", trans->to);
3379
#endif
3380
		exec->state = comp->states[trans->to];
3381
		exec->transno = 0;
3382
		if (trans->atom != NULL) {
3383
		    exec->index += len;
3384
		}
3385
		goto progress;
3386
	    } else if (ret < 0) {
3387
		exec->status = -4;
3388
		break;
3389
	    }
3390
	}
3391
	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3392
rollback:
3393
	    /*
3394
	     * Failed to find a way out
3395
	     */
3396
	    exec->determinist = 0;
3397
#ifdef DEBUG_REGEXP_EXEC
3398
	    printf("rollback from state %d on %d:%c\n", exec->state->no,
3399
	           codepoint,codepoint);
3400
#endif
3401
	    xmlFARegExecRollBack(exec);
3402
	}
3403
progress:
3404
	continue;
3405
    }
3406
error:
3407
    if (exec->rollbacks != NULL) {
3408
	if (exec->counts != NULL) {
3409
	    int i;
3410
3411
	    for (i = 0;i < exec->maxRollbacks;i++)
3412
		if (exec->rollbacks[i].counts != NULL)
3413
		    xmlFree(exec->rollbacks[i].counts);
3414
	}
3415
	xmlFree(exec->rollbacks);
3416
    }
3417
    if (exec->counts != NULL)
3418
	xmlFree(exec->counts);
3419
    if (exec->status == 0)
3420
	return(1);
3421
    if (exec->status == -1) {
3422
	if (exec->nbPush > MAX_PUSH)
3423
	    return(-1);
3424
	return(0);
3425
    }
3426
    return(exec->status);
3427
}
3428
3429
/************************************************************************
3430
 * 									*
3431
 *	Progressive interface to the verifier one atom at a time	*
3432
 * 									*
3433
 ************************************************************************/
3434
#ifdef DEBUG_ERR
3435
static void testerr(xmlRegExecCtxtPtr exec);
3436
#endif
3437
3438
/**
3439
 * xmlRegNewExecCtxt:
3440
 * @comp: a precompiled regular expression
3441
 * @callback: a callback function used for handling progresses in the
3442
 *            automata matching phase
3443
 * @data: the context data associated to the callback in this context
3444
 *
3445
 * Build a context used for progressive evaluation of a regexp.
3446
 *
3447
 * Returns the new context
3448
 */
3449
xmlRegExecCtxtPtr
3450
xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
3451
    xmlRegExecCtxtPtr exec;
3452
3453
    if (comp == NULL)
3454
	return(NULL);
3455
    if ((comp->compact == NULL) && (comp->states == NULL))
3456
        return(NULL);
3457
    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
3458
    if (exec == NULL) {
3459
	xmlRegexpErrMemory(NULL, "creating execution context");
3460
	return(NULL);
3461
    }
3462
    memset(exec, 0, sizeof(xmlRegExecCtxt));
3463
    exec->inputString = NULL;
3464
    exec->index = 0;
3465
    exec->determinist = 1;
3466
    exec->maxRollbacks = 0;
3467
    exec->nbRollbacks = 0;
3468
    exec->rollbacks = NULL;
3469
    exec->status = 0;
3470
    exec->comp = comp;
3471
    if (comp->compact == NULL)
3472
	exec->state = comp->states[0];
3473
    exec->transno = 0;
3474
    exec->transcount = 0;
3475
    exec->callback = callback;
3476
    exec->data = data;
3477
    if (comp->nbCounters > 0) {
3478
        /*
3479
	 * For error handling, exec->counts is allocated twice the size
3480
	 * the second half is used to store the data in case of rollback
3481
	 */
3482
	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3483
	                                 * 2);
3484
	if (exec->counts == NULL) {
3485
	    xmlRegexpErrMemory(NULL, "creating execution context");
3486
	    xmlFree(exec);
3487
	    return(NULL);
3488
	}
3489
        memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3490
	exec->errCounts = &exec->counts[comp->nbCounters];
3491
    } else {
3492
	exec->counts = NULL;
3493
	exec->errCounts = NULL;
3494
    }
3495
    exec->inputStackMax = 0;
3496
    exec->inputStackNr = 0;
3497
    exec->inputStack = NULL;
3498
    exec->errStateNo = -1;
3499
    exec->errString = NULL;
3500
    exec->nbPush = 0;
3501
    return(exec);
3502
}
3503
3504
/**
3505
 * xmlRegFreeExecCtxt:
3506
 * @exec: a regular expression evaulation context
3507
 *
3508
 * Free the structures associated to a regular expression evaulation context.
3509
 */
3510
void
3511
xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3512
    if (exec == NULL)
3513
	return;
3514
3515
    if (exec->rollbacks != NULL) {
3516
	if (exec->counts != NULL) {
3517
	    int i;
3518
3519
	    for (i = 0;i < exec->maxRollbacks;i++)
3520
		if (exec->rollbacks[i].counts != NULL)
3521
		    xmlFree(exec->rollbacks[i].counts);
3522
	}
3523
	xmlFree(exec->rollbacks);
3524
    }
3525
    if (exec->counts != NULL)
3526
	xmlFree(exec->counts);
3527
    if (exec->inputStack != NULL) {
3528
	int i;
3529
3530
	for (i = 0;i < exec->inputStackNr;i++) {
3531
	    if (exec->inputStack[i].value != NULL)
3532
		xmlFree(exec->inputStack[i].value);
3533
	}
3534
	xmlFree(exec->inputStack);
3535
    }
3536
    if (exec->errString != NULL)
3537
        xmlFree(exec->errString);
3538
    xmlFree(exec);
3539
}
3540
3541
static void
3542
xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3543
	                    void *data) {
3544
#ifdef DEBUG_PUSH
3545
    printf("saving value: %d:%s\n", exec->inputStackNr, value);
3546
#endif
3547
    if (exec->inputStackMax == 0) {
3548
	exec->inputStackMax = 4;
3549
	exec->inputStack = (xmlRegInputTokenPtr) 
3550
	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3551
	if (exec->inputStack == NULL) {
3552
	    xmlRegexpErrMemory(NULL, "pushing input string");
3553
	    exec->inputStackMax = 0;
3554
	    return;
3555
	}
3556
    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3557
	xmlRegInputTokenPtr tmp;
3558
3559
	exec->inputStackMax *= 2;
3560
	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3561
			exec->inputStackMax * sizeof(xmlRegInputToken));
3562
	if (tmp == NULL) {
3563
	    xmlRegexpErrMemory(NULL, "pushing input string");
3564
	    exec->inputStackMax /= 2;
3565
	    return;
3566
	}
3567
	exec->inputStack = tmp;
3568
    }
3569
    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3570
    exec->inputStack[exec->inputStackNr].data = data;
3571
    exec->inputStackNr++;
3572
    exec->inputStack[exec->inputStackNr].value = NULL;
3573
    exec->inputStack[exec->inputStackNr].data = NULL;
3574
}
3575
3576
/**
3577
 * xmlRegStrEqualWildcard:
3578
 * @expStr:  the string to be evaluated 
3579
 * @valStr:  the validation string
3580
 *
3581
 * Checks if both strings are equal or have the same content. "*"
3582
 * can be used as a wildcard in @valStr; "|" is used as a seperator of 
3583
 * substrings in both @expStr and @valStr.
3584
 *
3585
 * Returns 1 if the comparison is satisfied and the number of substrings
3586
 * is equal, 0 otherwise.
3587
 */
3588
3589
static int
3590
xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3591
    if (expStr == valStr) return(1);
3592
    if (expStr == NULL) return(0);
3593
    if (valStr == NULL) return(0);
3594
    do {
3595
	/*
3596
	* Eval if we have a wildcard for the current item.
3597
	*/
3598
        if (*expStr != *valStr) {
3599
	    /* if one of them starts with a wildcard make valStr be it */
3600
	    if (*valStr == '*') {
3601
	        const xmlChar *tmp;
3602
3603
		tmp = valStr;
3604
		valStr = expStr;
3605
		expStr = tmp;
3606
	    }
3607
	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3608
		do {
3609
		    if (*valStr == XML_REG_STRING_SEPARATOR)
3610
			break;
3611
		    valStr++;
3612
		} while (*valStr != 0);
3613
		continue;
3614
	    } else
3615
		return(0);
3616
	}
3617
	expStr++;
3618
	valStr++;
3619
    } while (*valStr != 0);
3620
    if (*expStr != 0)
3621
	return (0);
3622
    else
3623
	return (1);
3624
}
3625
3626
/**
3627
 * xmlRegCompactPushString:
3628
 * @exec: a regexp execution context
3629
 * @comp:  the precompiled exec with a compact table
3630
 * @value: a string token input
3631
 * @data: data associated to the token to reuse in callbacks
3632
 *
3633
 * Push one input token in the execution context
3634
 *
3635
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3636
 *     a negative value in case of error.
3637
 */
3638
static int
3639
xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3640
	                xmlRegexpPtr comp,
3641
	                const xmlChar *value,
3642
	                void *data) {
3643
    int state = exec->index;
3644
    int i, target;
3645
3646
    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3647
	return(-1);
3648
    
3649
    if (value == NULL) {
3650
	/*
3651
	 * are we at a final state ?
3652
	 */
3653
	if (comp->compact[state * (comp->nbstrings + 1)] ==
3654
            XML_REGEXP_FINAL_STATE)
3655
	    return(1);
3656
	return(0);
3657
    }
3658
3659
#ifdef DEBUG_PUSH
3660
    printf("value pushed: %s\n", value);
3661
#endif
3662
3663
    /*
3664
     * Examine all outside transitions from current state
3665
     */
3666
    for (i = 0;i < comp->nbstrings;i++) {
3667
	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3668
	if ((target > 0) && (target <= comp->nbstates)) {
3669
	    target--; /* to avoid 0 */    
3670
	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3671
		exec->index = target;		
3672
		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3673
		    exec->callback(exec->data, value,
3674
			  comp->transdata[state * comp->nbstrings + i], data);
3675
		}
3676
#ifdef DEBUG_PUSH
3677
		printf("entering state %d\n", target);
3678
#endif
3679
		if (comp->compact[target * (comp->nbstrings + 1)] ==
3680
		    XML_REGEXP_SINK_STATE)
3681
		    goto error;
3682
3683
		if (comp->compact[target * (comp->nbstrings + 1)] ==
3684
		    XML_REGEXP_FINAL_STATE)
3685
		    return(1);
3686
		return(0);
3687
	    }
3688
	}
3689
    }
3690
    /*
3691
     * Failed to find an exit transition out from current state for the
3692
     * current token
3693
     */
3694
#ifdef DEBUG_PUSH
3695
    printf("failed to find a transition for %s on state %d\n", value, state);
3696
#endif
3697
error:
3698
    if (exec->errString != NULL)
3699
        xmlFree(exec->errString);
3700
    exec->errString = xmlStrdup(value);
3701
    exec->errStateNo = state;
3702
    exec->status = -1;
3703
#ifdef DEBUG_ERR
3704
    testerr(exec);
3705
#endif
3706
    return(-1);
3707
}
3708
3709
/**
3710
 * xmlRegExecPushStringInternal:
3711
 * @exec: a regexp execution context or NULL to indicate the end
3712
 * @value: a string token input
3713
 * @data: data associated to the token to reuse in callbacks
3714
 * @compound: value was assembled from 2 strings
3715
 *
3716
 * Push one input token in the execution context
3717
 *
3718
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3719
 *     a negative value in case of error.
3720
 */
3721
static int
3722
xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3723
	                     void *data, int compound) {
3724
    xmlRegTransPtr trans;
3725
    xmlRegAtomPtr atom;
3726
    int ret;
3727
    int final = 0;
3728
    int progress = 1;
3729
3730
    if (exec == NULL)
3731
	return(-1);
3732
    if (exec->comp == NULL)
3733
	return(-1);
3734
    if (exec->status != 0)
3735
	return(exec->status);
3736
3737
    if (exec->comp->compact != NULL)
3738
	return(xmlRegCompactPushString(exec, exec->comp, value, data));
3739
3740
    if (value == NULL) {
3741
        if (exec->state->type == XML_REGEXP_FINAL_STATE)
3742
	    return(1);
3743
	final = 1;
3744
    }
3745
3746
#ifdef DEBUG_PUSH
3747
    printf("value pushed: %s\n", value);
3748
#endif
3749
    /*
3750
     * If we have an active rollback stack push the new value there
3751
     * and get back to where we were left
3752
     */
3753
    if ((value != NULL) && (exec->inputStackNr > 0)) {
3754
	xmlFARegExecSaveInputString(exec, value, data);
3755
	value = exec->inputStack[exec->index].value;
3756
	data = exec->inputStack[exec->index].data;
3757
#ifdef DEBUG_PUSH
3758
	printf("value loaded: %s\n", value);
3759
#endif
3760
    }
3761
3762
    while ((exec->status == 0) &&
3763
	   ((value != NULL) ||
3764
	    ((final == 1) &&
3765
	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3766
3767
	/*
3768
	 * End of input on non-terminal state, rollback, however we may
3769
	 * still have epsilon like transition for counted transitions
3770
	 * on counters, in that case don't break too early.
3771
	 */
3772
	if ((value == NULL) && (exec->counts == NULL))
3773
	    goto rollback;
3774
3775
	exec->transcount = 0;
3776
	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3777
	    trans = &exec->state->trans[exec->transno];
3778
	    if (trans->to < 0)
3779
		continue;
3780
	    atom = trans->atom;
3781
	    ret = 0;
3782
	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3783
		int i;
3784
		int count;
3785
		xmlRegTransPtr t;
3786
		xmlRegCounterPtr counter;
3787
3788
		ret = 0;
3789
3790
#ifdef DEBUG_PUSH
3791
		printf("testing all lax %d\n", trans->count);
3792
#endif
3793
		/*
3794
		 * Check all counted transitions from the current state
3795
		 */
3796
		if ((value == NULL) && (final)) {
3797
		    ret = 1;
3798
		} else if (value != NULL) {
3799
		    for (i = 0;i < exec->state->nbTrans;i++) {
3800
			t = &exec->state->trans[i];
3801
			if ((t->counter < 0) || (t == trans))
3802
			    continue;
3803
			counter = &exec->comp->counters[t->counter];
3804
			count = exec->counts[t->counter];
3805
			if ((count < counter->max) && 
3806
		            (t->atom != NULL) &&
3807
			    (xmlStrEqual(value, t->atom->valuep))) {
3808
			    ret = 0;
3809
			    break;
3810
			}
3811
			if ((count >= counter->min) &&
3812
			    (count < counter->max) &&
3813
			    (t->atom != NULL) &&
3814
			    (xmlStrEqual(value, t->atom->valuep))) {
3815
			    ret = 1;
3816
			    break;
3817
			}
3818
		    }
3819
		}
3820
	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3821
		int i;
3822
		int count;
3823
		xmlRegTransPtr t;
3824
		xmlRegCounterPtr counter;
3825
3826
		ret = 1;
3827
3828
#ifdef DEBUG_PUSH
3829
		printf("testing all %d\n", trans->count);
3830
#endif
3831
		/*
3832
		 * Check all counted transitions from the current state
3833
		 */
3834
		for (i = 0;i < exec->state->nbTrans;i++) {
3835
                    t = &exec->state->trans[i];
3836
		    if ((t->counter < 0) || (t == trans))
3837
			continue;
3838
                    counter = &exec->comp->counters[t->counter];
3839
		    count = exec->counts[t->counter];
3840
		    if ((count < counter->min) || (count > counter->max)) {
3841
			ret = 0;
3842
			break;
3843
		    }
3844
		}
3845
	    } else if (trans->count >= 0) {
3846
		int count;
3847
		xmlRegCounterPtr counter;
3848
3849
		/*
3850
		 * A counted transition.
3851
		 */
3852
3853
		count = exec->counts[trans->count];
3854
		counter = &exec->comp->counters[trans->count];
3855
#ifdef DEBUG_PUSH
3856
		printf("testing count %d: val %d, min %d, max %d\n",
3857
		       trans->count, count, counter->min,  counter->max);
3858
#endif
3859
		ret = ((count >= counter->min) && (count <= counter->max));
3860
	    } else if (atom == NULL) {
3861
		fprintf(stderr, "epsilon transition left at runtime\n");
3862
		exec->status = -2;
3863
		break;
3864
	    } else if (value != NULL) {
3865
		ret = xmlRegStrEqualWildcard(atom->valuep, value);
3866
		if (atom->neg) {
3867
		    ret = !ret;
3868
		    if (!compound)
3869
		        ret = 0;
3870
		}
3871
		if ((ret == 1) && (trans->counter >= 0)) {
3872
		    xmlRegCounterPtr counter;
3873
		    int count;
3874
3875
		    count = exec->counts[trans->counter];
3876
		    counter = &exec->comp->counters[trans->counter];
3877
		    if (count >= counter->max)
3878
			ret = 0;
3879
		}
3880
3881
		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3882
		    xmlRegStatePtr to = exec->comp->states[trans->to];
3883
3884
		    /*
3885
		     * this is a multiple input sequence
3886
		     */
3887
		    if (exec->state->nbTrans > exec->transno + 1) {
3888
			if (exec->inputStackNr <= 0) {
3889
			    xmlFARegExecSaveInputString(exec, value, data);
3890
			}
3891
			xmlFARegExecSave(exec);
3892
		    }
3893
		    exec->transcount = 1;
3894
		    do {
3895
			/*
3896
			 * Try to progress as much as possible on the input
3897
			 */
3898
			if (exec->transcount == atom->max) {
3899
			    break;
3900
			}
3901
			exec->index++;
3902
			value = exec->inputStack[exec->index].value;
3903
			data = exec->inputStack[exec->index].data;
3904
#ifdef DEBUG_PUSH
3905
			printf("value loaded: %s\n", value);
3906
#endif
3907
3908
			/*
3909
			 * End of input: stop here
3910
			 */
3911
			if (value == NULL) {
3912
			    exec->index --;
3913
			    break;
3914
			}
3915
			if (exec->transcount >= atom->min) {
3916
			    int transno = exec->transno;
3917
			    xmlRegStatePtr state = exec->state;
3918
3919
			    /*
3920
			     * The transition is acceptable save it
3921
			     */
3922
			    exec->transno = -1; /* trick */
3923
			    exec->state = to;
3924
			    if (exec->inputStackNr <= 0) {
3925
				xmlFARegExecSaveInputString(exec, value, data);
3926
			    }
3927
			    xmlFARegExecSave(exec);
3928
			    exec->transno = transno;
3929
			    exec->state = state;
3930
			}
3931
			ret = xmlStrEqual(value, atom->valuep);
3932
			exec->transcount++;
3933
		    } while (ret == 1);
3934
		    if (exec->transcount < atom->min)
3935
			ret = 0;
3936
3937
		    /*
3938
		     * If the last check failed but one transition was found
3939
		     * possible, rollback
3940
		     */
3941
		    if (ret < 0)
3942
			ret = 0;
3943
		    if (ret == 0) {
3944
			goto rollback;
3945
		    }
3946
		}
3947
	    }
3948
	    if (ret == 1) {
3949
		if ((exec->callback != NULL) && (atom != NULL) &&
3950
			(data != NULL)) {
3951
		    exec->callback(exec->data, atom->valuep,
3952
			           atom->data, data);
3953
		}
3954
		if (exec->state->nbTrans > exec->transno + 1) {
3955
		    if (exec->inputStackNr <= 0) {
3956
			xmlFARegExecSaveInputString(exec, value, data);
3957
		    }
3958
		    xmlFARegExecSave(exec);
3959
		}
3960
		if (trans->counter >= 0) {
3961
#ifdef DEBUG_PUSH
3962
		    printf("Increasing count %d\n", trans->counter);
3963
#endif
3964
		    exec->counts[trans->counter]++;
3965
		}
3966
		if ((trans->count >= 0) &&
3967
		    (trans->count < REGEXP_ALL_COUNTER)) {
3968
#ifdef DEBUG_REGEXP_EXEC
3969
		    printf("resetting count %d on transition\n",
3970
		           trans->count);
3971
#endif
3972
		    exec->counts[trans->count] = 0;
3973
		}
3974
#ifdef DEBUG_PUSH
3975
		printf("entering state %d\n", trans->to);
3976
#endif
3977
                if ((exec->comp->states[trans->to] != NULL) &&
3978
		    (exec->comp->states[trans->to]->type ==
3979
		     XML_REGEXP_SINK_STATE)) {
3980
		    /*
3981
		     * entering a sink state, save the current state as error
3982
		     * state.
3983
		     */
3984
		    if (exec->errString != NULL)
3985
			xmlFree(exec->errString);
3986
		    exec->errString = xmlStrdup(value);
3987
		    exec->errState = exec->state;
3988
		    memcpy(exec->errCounts, exec->counts,
3989
			   exec->comp->nbCounters * sizeof(int));
3990
		}
3991
		exec->state = exec->comp->states[trans->to];
3992
		exec->transno = 0;
3993
		if (trans->atom != NULL) {
3994
		    if (exec->inputStack != NULL) {
3995
			exec->index++;
3996
			if (exec->index < exec->inputStackNr) {
3997
			    value = exec->inputStack[exec->index].value;
3998
			    data = exec->inputStack[exec->index].data;
3999
#ifdef DEBUG_PUSH
4000
			    printf("value loaded: %s\n", value);
4001
#endif
4002
			} else {
4003
			    value = NULL;
4004
			    data = NULL;
4005
#ifdef DEBUG_PUSH
4006
			    printf("end of input\n");
4007
#endif
4008
			}
4009
		    } else {
4010
			value = NULL;
4011
			data = NULL;
4012
#ifdef DEBUG_PUSH
4013
			printf("end of input\n");
4014
#endif
4015
		    }
4016
		}
4017
		goto progress;
4018
	    } else if (ret < 0) {
4019
		exec->status = -4;
4020
		break;
4021
	    }
4022
	}
4023
	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4024
rollback:
4025
            /*
4026
	     * if we didn't yet rollback on the current input
4027
	     * store the current state as the error state.
4028
	     */
4029
	    if ((progress) && (exec->state != NULL) &&
4030
	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
4031
	        progress = 0;
4032
		if (exec->errString != NULL)
4033
		    xmlFree(exec->errString);
4034
		exec->errString = xmlStrdup(value);
4035
		exec->errState = exec->state;
4036
		memcpy(exec->errCounts, exec->counts,
4037
		       exec->comp->nbCounters * sizeof(int));
4038
	    }
4039
4040
	    /*
4041
	     * Failed to find a way out
4042
	     */
4043
	    exec->determinist = 0;
4044
	    xmlFARegExecRollBack(exec);
4045
	    if (exec->status == 0) {
4046
		value = exec->inputStack[exec->index].value;
4047
		data = exec->inputStack[exec->index].data;
4048
#ifdef DEBUG_PUSH
4049
		printf("value loaded: %s\n", value);
4050
#endif
4051
	    }
4052
	}
4053
	continue;
4054
progress:
4055
        progress = 1;
4056
	continue;
4057
    }
4058
    if (exec->status == 0) {
4059
        return(exec->state->type == XML_REGEXP_FINAL_STATE);
4060
    }
4061
#ifdef DEBUG_ERR
4062
    if (exec->status < 0) {
4063
	testerr(exec);
4064
    }
4065
#endif
4066
    return(exec->status);
4067
}
4068
4069
/**
4070
 * xmlRegExecPushString:
4071
 * @exec: a regexp execution context or NULL to indicate the end
4072
 * @value: a string token input
4073
 * @data: data associated to the token to reuse in callbacks
4074
 *
4075
 * Push one input token in the execution context
4076
 *
4077
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4078
 *     a negative value in case of error.
4079
 */
4080
int
4081
xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
4082
	             void *data) {
4083
    return(xmlRegExecPushStringInternal(exec, value, data, 0));
4084
}
4085
4086
/**
4087
 * xmlRegExecPushString2:
4088
 * @exec: a regexp execution context or NULL to indicate the end
4089
 * @value: the first string token input
4090
 * @value2: the second string token input
4091
 * @data: data associated to the token to reuse in callbacks
4092
 *
4093
 * Push one input token in the execution context
4094
 *
4095
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4096
 *     a negative value in case of error.
4097
 */
4098
int
4099
xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
4100
                      const xmlChar *value2, void *data) {
4101
    xmlChar buf[150];
4102
    int lenn, lenp, ret;
4103
    xmlChar *str;
4104
4105
    if (exec == NULL)
4106
	return(-1);
4107
    if (exec->comp == NULL)
4108
	return(-1);
4109
    if (exec->status != 0)
4110
	return(exec->status);
4111
4112
    if (value2 == NULL)
4113
        return(xmlRegExecPushString(exec, value, data));
4114
4115
    lenn = strlen((char *) value2);
4116
    lenp = strlen((char *) value);
4117
4118
    if (150 < lenn + lenp + 2) {
4119
	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4120
	if (str == NULL) {
4121
	    exec->status = -1;
4122
	    return(-1);
4123
	}
4124
    } else {
4125
	str = buf;
4126
    }
4127
    memcpy(&str[0], value, lenp);
4128
    str[lenp] = XML_REG_STRING_SEPARATOR;
4129
    memcpy(&str[lenp + 1], value2, lenn);
4130
    str[lenn + lenp + 1] = 0;
4131
4132
    if (exec->comp->compact != NULL)
4133
	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
4134
    else
4135
        ret = xmlRegExecPushStringInternal(exec, str, data, 1);
4136
4137
    if (str != buf)
4138
        xmlFree(str);
4139
    return(ret);
4140
}
4141
4142
/**
4143
 * xmlRegExecGetValues:
4144
 * @exec: a regexp execution context
4145
 * @err: error extraction or normal one
4146
 * @nbval: pointer to the number of accepted values IN/OUT
4147
 * @nbneg: return number of negative transitions
4148
 * @values: pointer to the array of acceptable values
4149
 * @terminal: return value if this was a terminal state
4150
 *
4151
 * Extract informations from the regexp execution, internal routine to
4152
 * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
4153
 *
4154
 * Returns: 0 in case of success or -1 in case of error.
4155
 */
4156
static int
4157
xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4158
                    int *nbval, int *nbneg,
4159
		    xmlChar **values, int *terminal) {
4160
    int maxval;
4161
    int nb = 0;
4162
4163
    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
4164
        (values == NULL) || (*nbval <= 0))
4165
        return(-1);
4166
4167
    maxval = *nbval;
4168
    *nbval = 0;
4169
    *nbneg = 0;
4170
    if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4171
        xmlRegexpPtr comp;
4172
	int target, i, state;
4173
4174
        comp = exec->comp;
4175
4176
	if (err) {
4177
	    if (exec->errStateNo == -1) return(-1);
4178
	    state = exec->errStateNo;
4179
	} else {
4180
	    state = exec->index;
4181
	}
4182
	if (terminal != NULL) {
4183
	    if (comp->compact[state * (comp->nbstrings + 1)] ==
4184
	        XML_REGEXP_FINAL_STATE)
4185
		*terminal = 1;
4186
	    else
4187
		*terminal = 0;
4188
	}
4189
	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4190
	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4191
	    if ((target > 0) && (target <= comp->nbstates) &&
4192
	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
4193
		 XML_REGEXP_SINK_STATE)) {
4194
	        values[nb++] = comp->stringMap[i];
4195
		(*nbval)++;
4196
	    }
4197
	}
4198
	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4199
	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4200
	    if ((target > 0) && (target <= comp->nbstates) &&
4201
	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
4202
		 XML_REGEXP_SINK_STATE)) {
4203
	        values[nb++] = comp->stringMap[i];
4204
		(*nbneg)++;
4205
	    }
4206
	}
4207
    } else {
4208
        int transno;
4209
	xmlRegTransPtr trans;
4210
	xmlRegAtomPtr atom;
4211
	xmlRegStatePtr state;
4212
4213
	if (terminal != NULL) {
4214
	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
4215
		*terminal = 1;
4216
	    else
4217
		*terminal = 0;
4218
	}
4219
4220
	if (err) {
4221
	    if (exec->errState == NULL) return(-1);
4222
	    state = exec->errState;
4223
	} else {
4224
	    if (exec->state == NULL) return(-1);
4225
	    state = exec->state;
4226
	}
4227
	for (transno = 0;
4228
	     (transno < state->nbTrans) && (nb < maxval);
4229
	     transno++) {
4230
	    trans = &state->trans[transno];
4231
	    if (trans->to < 0)
4232
		continue;
4233
	    atom = trans->atom;
4234
	    if ((atom == NULL) || (atom->valuep == NULL))
4235
		continue;
4236
	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4237
	        /* this should not be reached but ... */
4238
	        TODO;
4239
	    } else if (trans->count == REGEXP_ALL_COUNTER) {
4240
	        /* this should not be reached but ... */
4241
	        TODO;
4242
	    } else if (trans->counter >= 0) {
4243
		xmlRegCounterPtr counter = NULL;
4244
		int count;
4245
4246
		if (err)
4247
		    count = exec->errCounts[trans->counter];
4248
		else
4249
		    count = exec->counts[trans->counter];
4250
		if (exec->comp != NULL)
4251
		    counter = &exec->comp->counters[trans->counter];
4252
		if ((counter == NULL) || (count < counter->max)) {
4253
		    if (atom->neg)
4254
			values[nb++] = (xmlChar *) atom->valuep2;
4255
		    else
4256
			values[nb++] = (xmlChar *) atom->valuep;
4257
		    (*nbval)++;
4258
		}
4259
	    } else {
4260
                if ((exec->comp->states[trans->to] != NULL) &&
4261
		    (exec->comp->states[trans->to]->type !=
4262
		     XML_REGEXP_SINK_STATE)) {
4263
		    if (atom->neg)
4264
			values[nb++] = (xmlChar *) atom->valuep2;
4265
		    else
4266
			values[nb++] = (xmlChar *) atom->valuep;
4267
		    (*nbval)++;
4268
		}
4269
	    } 
4270
	}
4271
	for (transno = 0;
4272
	     (transno < state->nbTrans) && (nb < maxval);
4273
	     transno++) {
4274
	    trans = &state->trans[transno];
4275
	    if (trans->to < 0)
4276
		continue;
4277
	    atom = trans->atom;
4278
	    if ((atom == NULL) || (atom->valuep == NULL))
4279
		continue;
4280
	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4281
	        continue;
4282
	    } else if (trans->count == REGEXP_ALL_COUNTER) {
4283
	        continue;
4284
	    } else if (trans->counter >= 0) {
4285
	        continue;
4286
	    } else {
4287
                if ((exec->comp->states[trans->to] != NULL) &&
4288
		    (exec->comp->states[trans->to]->type ==
4289
		     XML_REGEXP_SINK_STATE)) {
4290
		    if (atom->neg)
4291
			values[nb++] = (xmlChar *) atom->valuep2;
4292
		    else
4293
			values[nb++] = (xmlChar *) atom->valuep;
4294
		    (*nbneg)++;
4295
		}
4296
	    } 
4297
	}
4298
    }
4299
    return(0);
4300
}
4301
4302
/**
4303
 * xmlRegExecNextValues:
4304
 * @exec: a regexp execution context
4305
 * @nbval: pointer to the number of accepted values IN/OUT
4306
 * @nbneg: return number of negative transitions
4307
 * @values: pointer to the array of acceptable values
4308
 * @terminal: return value if this was a terminal state
4309
 *
4310
 * Extract informations from the regexp execution,
4311
 * the parameter @values must point to an array of @nbval string pointers
4312
 * on return nbval will contain the number of possible strings in that
4313
 * state and the @values array will be updated with them. The string values
4314
 * returned will be freed with the @exec context and don't need to be
4315
 * deallocated.
4316
 *
4317
 * Returns: 0 in case of success or -1 in case of error.
4318
 */
4319
int
4320
xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
4321
                     xmlChar **values, int *terminal) {
4322
    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
4323
}
4324
4325
/**
4326
 * xmlRegExecErrInfo:
4327
 * @exec: a regexp execution context generating an error
4328
 * @string: return value for the error string
4329
 * @nbval: pointer to the number of accepted values IN/OUT
4330
 * @nbneg: return number of negative transitions
4331
 * @values: pointer to the array of acceptable values
4332
 * @terminal: return value if this was a terminal state
4333
 *
4334
 * Extract error informations from the regexp execution, the parameter
4335
 * @string will be updated with the value pushed and not accepted,
4336
 * the parameter @values must point to an array of @nbval string pointers
4337
 * on return nbval will contain the number of possible strings in that
4338
 * state and the @values array will be updated with them. The string values
4339
 * returned will be freed with the @exec context and don't need to be
4340
 * deallocated.
4341
 *
4342
 * Returns: 0 in case of success or -1 in case of error.
4343
 */
4344
int
4345
xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
4346
                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
4347
    if (exec == NULL)
4348
        return(-1);
4349
    if (string != NULL) {
4350
        if (exec->status != 0)
4351
	    *string = exec->errString;
4352
	else
4353
	    *string = NULL;
4354
    }
4355
    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
4356
}
4357
4358
#ifdef DEBUG_ERR
4359
static void testerr(xmlRegExecCtxtPtr exec) {
4360
    const xmlChar *string;
4361
    xmlChar *values[5];
4362
    int nb = 5;
4363
    int nbneg;
4364
    int terminal;
4365
    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
4366
}
4367
#endif
4368
4369
#if 0
4370
static int
4371
xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
4372
    xmlRegTransPtr trans;
4373
    xmlRegAtomPtr atom;
4374
    int ret;
4375
    int codepoint, len;
4376
4377
    if (exec == NULL)
4378
	return(-1);
4379
    if (exec->status != 0)
4380
	return(exec->status);
4381
4382
    while ((exec->status == 0) &&
4383
	   ((exec->inputString[exec->index] != 0) ||
4384
	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4385
4386
	/*
4387
	 * End of input on non-terminal state, rollback, however we may
4388
	 * still have epsilon like transition for counted transitions
4389
	 * on counters, in that case don't break too early.
4390
	 */
4391
	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
4392
	    goto rollback;
4393
4394
	exec->transcount = 0;
4395
	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4396
	    trans = &exec->state->trans[exec->transno];
4397
	    if (trans->to < 0)
4398
		continue;
4399
	    atom = trans->atom;
4400
	    ret = 0;
4401
	    if (trans->count >= 0) {
4402
		int count;
4403
		xmlRegCounterPtr counter;
4404
4405
		/*
4406
		 * A counted transition.
4407
		 */
4408
4409
		count = exec->counts[trans->count];
4410
		counter = &exec->comp->counters[trans->count];
4411
#ifdef DEBUG_REGEXP_EXEC
4412
		printf("testing count %d: val %d, min %d, max %d\n",
4413
		       trans->count, count, counter->min,  counter->max);
4414
#endif
4415
		ret = ((count >= counter->min) && (count <= counter->max));
4416
	    } else if (atom == NULL) {
4417
		fprintf(stderr, "epsilon transition left at runtime\n");
4418
		exec->status = -2;
4419
		break;
4420
	    } else if (exec->inputString[exec->index] != 0) {
4421
                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
4422
		ret = xmlRegCheckCharacter(atom, codepoint);
4423
		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4424
		    xmlRegStatePtr to = exec->comp->states[trans->to];
4425
4426
		    /*
4427
		     * this is a multiple input sequence
4428
		     */
4429
		    if (exec->state->nbTrans > exec->transno + 1) {
4430
			xmlFARegExecSave(exec);
4431
		    }
4432
		    exec->transcount = 1;
4433
		    do {
4434
			/*
4435
			 * Try to progress as much as possible on the input
4436
			 */
4437
			if (exec->transcount == atom->max) {
4438
			    break;
4439
			}
4440
			exec->index += len;
4441
			/*
4442
			 * End of input: stop here
4443
			 */
4444
			if (exec->inputString[exec->index] == 0) {
4445
			    exec->index -= len;
4446
			    break;
4447
			}
4448
			if (exec->transcount >= atom->min) {
4449
			    int transno = exec->transno;
4450
			    xmlRegStatePtr state = exec->state;
4451
4452
			    /*
4453
			     * The transition is acceptable save it
4454
			     */
4455
			    exec->transno = -1; /* trick */
4456
			    exec->state = to;
4457
			    xmlFARegExecSave(exec);
4458
			    exec->transno = transno;
4459
			    exec->state = state;
4460
			}
4461
			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
4462
				              len);
4463
			ret = xmlRegCheckCharacter(atom, codepoint);
4464
			exec->transcount++;
4465
		    } while (ret == 1);
4466
		    if (exec->transcount < atom->min)
4467
			ret = 0;
4468
4469
		    /*
4470
		     * If the last check failed but one transition was found
4471
		     * possible, rollback
4472
		     */
4473
		    if (ret < 0)
4474
			ret = 0;
4475
		    if (ret == 0) {
4476
			goto rollback;
4477
		    }
4478
		}
4479
	    }
4480
	    if (ret == 1) {
4481
		if (exec->state->nbTrans > exec->transno + 1) {
4482
		    xmlFARegExecSave(exec);
4483
		}
4484
		/*
4485
		 * restart count for expressions like this ((abc){2})*
4486
		 */
4487
		if (trans->count >= 0) {
4488
#ifdef DEBUG_REGEXP_EXEC
4489
		    printf("Reset count %d\n", trans->count);
4490
#endif
4491
		    exec->counts[trans->count] = 0;
4492
		}
4493
		if (trans->counter >= 0) {
4494
#ifdef DEBUG_REGEXP_EXEC
4495
		    printf("Increasing count %d\n", trans->counter);
4496
#endif
4497
		    exec->counts[trans->counter]++;
4498
		}
4499
#ifdef DEBUG_REGEXP_EXEC
4500
		printf("entering state %d\n", trans->to);
4501
#endif
4502
		exec->state = exec->comp->states[trans->to];
4503
		exec->transno = 0;
4504
		if (trans->atom != NULL) {
4505
		    exec->index += len;
4506
		}
4507
		goto progress;
4508
	    } else if (ret < 0) {
4509
		exec->status = -4;
4510
		break;
4511
	    }
4512
	}
4513
	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4514
rollback:
4515
	    /*
4516
	     * Failed to find a way out
4517
	     */
4518
	    exec->determinist = 0;
4519
	    xmlFARegExecRollBack(exec);
4520
	}
4521
progress:
4522
	continue;
4523
    }
4524
}
4525
#endif
4526
/************************************************************************
4527
 * 									*
4528
 *	Parser for the Schemas Datatype Regular Expressions		*
4529
 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
4530
 * 									*
4531
 ************************************************************************/
4532
4533
/**
4534
 * xmlFAIsChar:
4535
 * @ctxt:  a regexp parser context
4536
 *
4537
 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
4538
 */
4539
static int
4540
xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4541
    int cur;
4542
    int len;
4543
4544
    cur = CUR_SCHAR(ctxt->cur, len);
4545
    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4546
	(cur == '*') || (cur == '+') || (cur == '(') ||
4547
	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
4548
	(cur == 0x5D) || (cur == 0))
4549
	return(-1);
4550
    return(cur);
4551
}
4552
4553
/**
4554
 * xmlFAParseCharProp:
4555
 * @ctxt:  a regexp parser context
4556
 *
4557
 * [27]   charProp   ::=   IsCategory | IsBlock
4558
 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
4559
 *                       Separators | Symbols | Others 
4560
 * [29]   Letters   ::=   'L' [ultmo]?
4561
 * [30]   Marks   ::=   'M' [nce]?
4562
 * [31]   Numbers   ::=   'N' [dlo]?
4563
 * [32]   Punctuation   ::=   'P' [cdseifo]?
4564
 * [33]   Separators   ::=   'Z' [slp]?
4565
 * [34]   Symbols   ::=   'S' [mcko]?
4566
 * [35]   Others   ::=   'C' [cfon]?
4567
 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
4568
 */
4569
static void
4570
xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4571
    int cur;
4572
    xmlRegAtomType type = (xmlRegAtomType) 0;
4573
    xmlChar *blockName = NULL;
4574
    
4575
    cur = CUR;
4576
    if (cur == 'L') {
4577
	NEXT;
4578
	cur = CUR;
4579
	if (cur == 'u') {
4580
	    NEXT;
4581
	    type = XML_REGEXP_LETTER_UPPERCASE;
4582
	} else if (cur == 'l') {
4583
	    NEXT;
4584
	    type = XML_REGEXP_LETTER_LOWERCASE;
4585
	} else if (cur == 't') {
4586
	    NEXT;
4587
	    type = XML_REGEXP_LETTER_TITLECASE;
4588
	} else if (cur == 'm') {
4589
	    NEXT;
4590
	    type = XML_REGEXP_LETTER_MODIFIER;
4591
	} else if (cur == 'o') {
4592
	    NEXT;
4593
	    type = XML_REGEXP_LETTER_OTHERS;
4594
	} else {
4595
	    type = XML_REGEXP_LETTER;
4596
	}
4597
    } else if (cur == 'M') {
4598
	NEXT;
4599
	cur = CUR;
4600
	if (cur == 'n') {
4601
	    NEXT;
4602
	    /* nonspacing */
4603
	    type = XML_REGEXP_MARK_NONSPACING;
4604
	} else if (cur == 'c') {
4605
	    NEXT;
4606
	    /* spacing combining */
4607
	    type = XML_REGEXP_MARK_SPACECOMBINING;
4608
	} else if (cur == 'e') {
4609
	    NEXT;
4610
	    /* enclosing */
4611
	    type = XML_REGEXP_MARK_ENCLOSING;
4612
	} else {
4613
	    /* all marks */
4614
	    type = XML_REGEXP_MARK;
4615
	}
4616
    } else if (cur == 'N') {
4617
	NEXT;
4618
	cur = CUR;
4619
	if (cur == 'd') {
4620
	    NEXT;
4621
	    /* digital */
4622
	    type = XML_REGEXP_NUMBER_DECIMAL;
4623
	} else if (cur == 'l') {
4624
	    NEXT;
4625
	    /* letter */
4626
	    type = XML_REGEXP_NUMBER_LETTER;
4627
	} else if (cur == 'o') {
4628
	    NEXT;
4629
	    /* other */
4630
	    type = XML_REGEXP_NUMBER_OTHERS;
4631
	} else {
4632
	    /* all numbers */
4633
	    type = XML_REGEXP_NUMBER;
4634
	}
4635
    } else if (cur == 'P') {
4636
	NEXT;
4637
	cur = CUR;
4638
	if (cur == 'c') {
4639
	    NEXT;
4640
	    /* connector */
4641
	    type = XML_REGEXP_PUNCT_CONNECTOR;
4642
	} else if (cur == 'd') {
4643
	    NEXT;
4644
	    /* dash */
4645
	    type = XML_REGEXP_PUNCT_DASH;
4646
	} else if (cur == 's') {
4647
	    NEXT;
4648
	    /* open */
4649
	    type = XML_REGEXP_PUNCT_OPEN;
4650
	} else if (cur == 'e') {
4651
	    NEXT;
4652
	    /* close */
4653
	    type = XML_REGEXP_PUNCT_CLOSE;
4654
	} else if (cur == 'i') {
4655
	    NEXT;
4656
	    /* initial quote */
4657
	    type = XML_REGEXP_PUNCT_INITQUOTE;
4658
	} else if (cur == 'f') {
4659
	    NEXT;
4660
	    /* final quote */
4661
	    type = XML_REGEXP_PUNCT_FINQUOTE;
4662
	} else if (cur == 'o') {
4663
	    NEXT;
4664
	    /* other */
4665
	    type = XML_REGEXP_PUNCT_OTHERS;
4666
	} else {
4667
	    /* all punctuation */
4668
	    type = XML_REGEXP_PUNCT;
4669
	}
4670
    } else if (cur == 'Z') {
4671
	NEXT;
4672
	cur = CUR;
4673
	if (cur == 's') {
4674
	    NEXT;
4675
	    /* space */
4676
	    type = XML_REGEXP_SEPAR_SPACE;
4677
	} else if (cur == 'l') {
4678
	    NEXT;
4679
	    /* line */
4680
	    type = XML_REGEXP_SEPAR_LINE;
4681
	} else if (cur == 'p') {
4682
	    NEXT;
4683
	    /* paragraph */
4684
	    type = XML_REGEXP_SEPAR_PARA;
4685
	} else {
4686
	    /* all separators */
4687
	    type = XML_REGEXP_SEPAR;
4688
	}
4689
    } else if (cur == 'S') {
4690
	NEXT;
4691
	cur = CUR;
4692
	if (cur == 'm') {
4693
	    NEXT;
4694
	    type = XML_REGEXP_SYMBOL_MATH;
4695
	    /* math */
4696
	} else if (cur == 'c') {
4697
	    NEXT;
4698
	    type = XML_REGEXP_SYMBOL_CURRENCY;
4699
	    /* currency */
4700
	} else if (cur == 'k') {
4701
	    NEXT;
4702
	    type = XML_REGEXP_SYMBOL_MODIFIER;
4703
	    /* modifiers */
4704
	} else if (cur == 'o') {
4705
	    NEXT;
4706
	    type = XML_REGEXP_SYMBOL_OTHERS;
4707
	    /* other */
4708
	} else {
4709
	    /* all symbols */
4710
	    type = XML_REGEXP_SYMBOL;
4711
	}
4712
    } else if (cur == 'C') {
4713
	NEXT;
4714
	cur = CUR;
4715
	if (cur == 'c') {
4716
	    NEXT;
4717
	    /* control */
4718
	    type = XML_REGEXP_OTHER_CONTROL;
4719
	} else if (cur == 'f') {
4720
	    NEXT;
4721
	    /* format */
4722
	    type = XML_REGEXP_OTHER_FORMAT;
4723
	} else if (cur == 'o') {
4724
	    NEXT;
4725
	    /* private use */
4726
	    type = XML_REGEXP_OTHER_PRIVATE;
4727
	} else if (cur == 'n') {
4728
	    NEXT;
4729
	    /* not assigned */
4730
	    type = XML_REGEXP_OTHER_NA;
4731
	} else {
4732
	    /* all others */
4733
	    type = XML_REGEXP_OTHER;
4734
	}
4735
    } else if (cur == 'I') {
4736
	const xmlChar *start;
4737
	NEXT;
4738
	cur = CUR;
4739
	if (cur != 's') {
4740
	    ERROR("IsXXXX expected");
4741
	    return;
4742
	}
4743
	NEXT;
4744
	start = ctxt->cur;
4745
	cur = CUR;
4746
	if (((cur >= 'a') && (cur <= 'z')) || 
4747
	    ((cur >= 'A') && (cur <= 'Z')) || 
4748
	    ((cur >= '0') && (cur <= '9')) || 
4749
	    (cur == 0x2D)) {
4750
	    NEXT;
4751
	    cur = CUR;
4752
	    while (((cur >= 'a') && (cur <= 'z')) || 
4753
		((cur >= 'A') && (cur <= 'Z')) || 
4754
		((cur >= '0') && (cur <= '9')) || 
4755
		(cur == 0x2D)) {
4756
		NEXT;
4757
		cur = CUR;
4758
	    }
4759
	}
4760
	type = XML_REGEXP_BLOCK_NAME;
4761
	blockName = xmlStrndup(start, ctxt->cur - start);
4762
    } else {
4763
	ERROR("Unknown char property");
4764
	return;
4765
    }
4766
    if (ctxt->atom == NULL) {
4767
	ctxt->atom = xmlRegNewAtom(ctxt, type);
4768
	if (ctxt->atom != NULL)
4769
	    ctxt->atom->valuep = blockName;
4770
    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4771
        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4772
		           type, 0, 0, blockName);
4773
    }
4774
}
4775
4776
/**
4777
 * xmlFAParseCharClassEsc:
4778
 * @ctxt:  a regexp parser context
4779
 *
4780
 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 
4781
 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
4782
 * [25] catEsc   ::=   '\p{' charProp '}'
4783
 * [26] complEsc ::=   '\P{' charProp '}'
4784
 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
4785
 */
4786
static void
4787
xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4788
    int cur;
4789
4790
    if (CUR == '.') {
4791
	if (ctxt->atom == NULL) {
4792
	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4793
	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4794
	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4795
			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
4796
	}
4797
	NEXT;
4798
	return;
4799
    }
4800
    if (CUR != '\\') {
4801
	ERROR("Escaped sequence: expecting \\");
4802
	return;
4803
    }
4804
    NEXT;
4805
    cur = CUR;
4806
    if (cur == 'p') {
4807
	NEXT;
4808
	if (CUR != '{') {
4809
	    ERROR("Expecting '{'");
4810
	    return;
4811
	}
4812
	NEXT;
4813
	xmlFAParseCharProp(ctxt);
4814
	if (CUR != '}') {
4815
	    ERROR("Expecting '}'");
4816
	    return;
4817
	}
4818
	NEXT;
4819
    } else if (cur == 'P') {
4820
	NEXT;
4821
	if (CUR != '{') {
4822
	    ERROR("Expecting '{'");
4823
	    return;
4824
	}
4825
	NEXT;
4826
	xmlFAParseCharProp(ctxt);
4827
	ctxt->atom->neg = 1;
4828
	if (CUR != '}') {
4829
	    ERROR("Expecting '}'");
4830
	    return;
4831
	}
4832
	NEXT;
4833
    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
4834
	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
4835
	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
4836
	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
4837
	(cur == 0x5E)) {
4838
	if (ctxt->atom == NULL) {
4839
	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4840
	    if (ctxt->atom != NULL) {
4841
	        switch (cur) {
4842
		    case 'n':
4843
		        ctxt->atom->codepoint = '\n';
4844
			break;
4845
		    case 'r':
4846
		        ctxt->atom->codepoint = '\r';
4847
			break;
4848
		    case 't':
4849
		        ctxt->atom->codepoint = '\t';
4850
			break;
4851
		    default:
4852
			ctxt->atom->codepoint = cur;
4853
		}
4854
	    }
4855
	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4856
	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4857
			       XML_REGEXP_CHARVAL, cur, cur, NULL);
4858
	}
4859
	NEXT;
4860
    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4861
	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4862
	(cur == 'w') || (cur == 'W')) {
4863
	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4864
4865
	switch (cur) {
4866
	    case 's': 
4867
		type = XML_REGEXP_ANYSPACE;
4868
		break;
4869
	    case 'S': 
4870
		type = XML_REGEXP_NOTSPACE;
4871
		break;
4872
	    case 'i': 
4873
		type = XML_REGEXP_INITNAME;
4874
		break;
4875
	    case 'I': 
4876
		type = XML_REGEXP_NOTINITNAME;
4877
		break;
4878
	    case 'c': 
4879
		type = XML_REGEXP_NAMECHAR;
4880
		break;
4881
	    case 'C': 
4882
		type = XML_REGEXP_NOTNAMECHAR;
4883
		break;
4884
	    case 'd': 
4885
		type = XML_REGEXP_DECIMAL;
4886
		break;
4887
	    case 'D': 
4888
		type = XML_REGEXP_NOTDECIMAL;
4889
		break;
4890
	    case 'w': 
4891
		type = XML_REGEXP_REALCHAR;
4892
		break;
4893
	    case 'W': 
4894
		type = XML_REGEXP_NOTREALCHAR;
4895
		break;
4896
	}
4897
	NEXT;
4898
	if (ctxt->atom == NULL) {
4899
	    ctxt->atom = xmlRegNewAtom(ctxt, type);
4900
	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4901
	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4902
			       type, 0, 0, NULL);
4903
	}
4904
    } else {
4905
	ERROR("Wrong escape sequence, misuse of character '\\'");
4906
    }
4907
}
4908
4909
/**
4910
 * xmlFAParseCharRange:
4911
 * @ctxt:  a regexp parser context
4912
 *
4913
 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash 
4914
 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
4915
 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
4916
 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
4917
 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
4918
 */
4919
static void
4920
xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4921
    int cur, len;
4922
    int start = -1;
4923
    int end = -1;
4924
4925
    if (CUR == '\0') {
4926
        ERROR("Expecting ']'");
4927
	return;
4928
    }
4929
4930
    cur = CUR;
4931
    if (cur == '\\') {
4932
	NEXT;
4933
	cur = CUR;
4934
	switch (cur) {
4935
	    case 'n': start = 0xA; break;
4936
	    case 'r': start = 0xD; break;
4937
	    case 't': start = 0x9; break;
4938
	    case '\\': case '|': case '.': case '-': case '^': case '?':
4939
	    case '*': case '+': case '{': case '}': case '(': case ')':
4940
	    case '[': case ']':
4941
		start = cur; break;
4942
	    default:
4943
		ERROR("Invalid escape value");
4944
		return;
4945
	}
4946
	end = start;
4947
        len = 1;
4948
    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4949
        end = start = CUR_SCHAR(ctxt->cur, len);
4950
    } else {
4951
	ERROR("Expecting a char range");
4952
	return;
4953
    }
4954
    /*
4955
     * Since we are "inside" a range, we can assume ctxt->cur is past
4956
     * the start of ctxt->string, and PREV should be safe
4957
     */
4958
    if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
4959
	NEXTL(len);
4960
	return;
4961
    }
4962
    NEXTL(len);
4963
    cur = CUR;
4964
    if ((cur != '-') || (NXT(1) == ']')) {
4965
        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4966
		              XML_REGEXP_CHARVAL, start, end, NULL);
4967
	return;
4968
    }
4969
    NEXT;
4970
    cur = CUR;
4971
    if (cur == '\\') {
4972
	NEXT;
4973
	cur = CUR;
4974
	switch (cur) {
4975
	    case 'n': end = 0xA; break;
4976
	    case 'r': end = 0xD; break;
4977
	    case 't': end = 0x9; break;
4978
	    case '\\': case '|': case '.': case '-': case '^': case '?':
4979
	    case '*': case '+': case '{': case '}': case '(': case ')':
4980
	    case '[': case ']':
4981
		end = cur; break;
4982
	    default:
4983
		ERROR("Invalid escape value");
4984
		return;
4985
	}
4986
        len = 1;
4987
    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4988
        end = CUR_SCHAR(ctxt->cur, len);
4989
    } else {
4990
	ERROR("Expecting the end of a char range");
4991
	return;
4992
    }
4993
    NEXTL(len);
4994
    /* TODO check that the values are acceptable character ranges for XML */
4995
    if (end < start) {
4996
	ERROR("End of range is before start of range");
4997
    } else {
4998
        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4999
		           XML_REGEXP_CHARVAL, start, end, NULL);
5000
    }
5001
    return;
5002
}
5003
5004
/**
5005
 * xmlFAParsePosCharGroup:
5006
 * @ctxt:  a regexp parser context
5007
 *
5008
 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
5009
 */
5010
static void
5011
xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5012
    do {
5013
	if (CUR == '\\') {
5014
	    xmlFAParseCharClassEsc(ctxt);
5015
	} else {
5016
	    xmlFAParseCharRange(ctxt);
5017
	}
5018
    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
5019
             (CUR != 0) && (ctxt->error == 0));
5020
}
5021
5022
/**
5023
 * xmlFAParseCharGroup:
5024
 * @ctxt:  a regexp parser context
5025
 *
5026
 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
5027
 * [15]   negCharGroup ::= '^' posCharGroup
5028
 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  
5029
 * [12]   charClassExpr ::= '[' charGroup ']'
5030
 */
5031
static void
5032
xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5033
    int n = ctxt->neg;
5034
    while ((CUR != ']') && (ctxt->error == 0)) {
5035
	if (CUR == '^') {
5036
	    int neg = ctxt->neg;
5037
5038
	    NEXT;
5039
	    ctxt->neg = !ctxt->neg;
5040
	    xmlFAParsePosCharGroup(ctxt);
5041
	    ctxt->neg = neg;
5042
	} else if ((CUR == '-') && (NXT(1) == '[')) {
5043
	    int neg = ctxt->neg;
5044
	    ctxt->neg = 2;
5045
	    NEXT;	/* eat the '-' */
5046
	    NEXT;	/* eat the '[' */
5047
	    xmlFAParseCharGroup(ctxt);
5048
	    if (CUR == ']') {
5049
		NEXT;
5050
	    } else {
5051
		ERROR("charClassExpr: ']' expected");
5052
		break;
5053
	    }
5054
	    ctxt->neg = neg;
5055
	    break;
5056
	} else if (CUR != ']') {
5057
	    xmlFAParsePosCharGroup(ctxt);
5058
	}
5059
    }
5060
    ctxt->neg = n;
5061
}
5062
5063
/**
5064
 * xmlFAParseCharClass:
5065
 * @ctxt:  a regexp parser context
5066
 *
5067
 * [11]   charClass   ::=     charClassEsc | charClassExpr
5068
 * [12]   charClassExpr   ::=   '[' charGroup ']'
5069
 */
5070
static void
5071
xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5072
    if (CUR == '[') {
5073
	NEXT;
5074
	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5075
	if (ctxt->atom == NULL)
5076
	    return;
5077
	xmlFAParseCharGroup(ctxt);
5078
	if (CUR == ']') {
5079
	    NEXT;
5080
	} else {
5081
	    ERROR("xmlFAParseCharClass: ']' expected");
5082
	}
5083
    } else {
5084
	xmlFAParseCharClassEsc(ctxt);
5085
    }
5086
}
5087
5088
/**
5089
 * xmlFAParseQuantExact:
5090
 * @ctxt:  a regexp parser context
5091
 *
5092
 * [8]   QuantExact   ::=   [0-9]+
5093
 *
5094
 * Returns 0 if success or -1 in case of error
5095
 */
5096
static int
5097
xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5098
    int ret = 0;
5099
    int ok = 0;
5100
5101
    while ((CUR >= '0') && (CUR <= '9')) {
5102
	ret = ret * 10 + (CUR - '0');
5103
	ok = 1;
5104
	NEXT;
5105
    }
5106
    if (ok != 1) {
5107
	return(-1);
5108
    }
5109
    return(ret);
5110
}
5111
5112
/**
5113
 * xmlFAParseQuantifier:
5114
 * @ctxt:  a regexp parser context
5115
 *
5116
 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
5117
 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
5118
 * [6]   quantRange   ::=   QuantExact ',' QuantExact
5119
 * [7]   quantMin   ::=   QuantExact ','
5120
 * [8]   QuantExact   ::=   [0-9]+
5121
 */
5122
static int
5123
xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5124
    int cur;
5125
5126
    cur = CUR;
5127
    if ((cur == '?') || (cur == '*') || (cur == '+')) {
5128
	if (ctxt->atom != NULL) {
5129
	    if (cur == '?')
5130
		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5131
	    else if (cur == '*')
5132
		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5133
	    else if (cur == '+')
5134
		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5135
	}
5136
	NEXT;
5137
	return(1);
5138
    }
5139
    if (cur == '{') {
5140
	int min = 0, max = 0;
5141
5142
	NEXT;
5143
	cur = xmlFAParseQuantExact(ctxt);
5144
	if (cur >= 0)
5145
	    min = cur;
5146
	if (CUR == ',') {
5147
	    NEXT;
5148
	    if (CUR == '}')
5149
	        max = INT_MAX;
5150
	    else {
5151
	        cur = xmlFAParseQuantExact(ctxt);
5152
	        if (cur >= 0)
5153
		    max = cur;
5154
		else {
5155
		    ERROR("Improper quantifier");
5156
		}
5157
	    }
5158
	}
5159
	if (CUR == '}') {
5160
	    NEXT;
5161
	} else {
5162
	    ERROR("Unterminated quantifier");
5163
	}
5164
	if (max == 0)
5165
	    max = min;
5166
	if (ctxt->atom != NULL) {
5167
	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5168
	    ctxt->atom->min = min;
5169
	    ctxt->atom->max = max;
5170
	}
5171
	return(1);
5172
    }
5173
    return(0);
5174
}
5175
5176
/**
5177
 * xmlFAParseAtom:
5178
 * @ctxt:  a regexp parser context
5179
 *
5180
 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
5181
 */
5182
static int
5183
xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5184
    int codepoint, len;
5185
5186
    codepoint = xmlFAIsChar(ctxt);
5187
    if (codepoint > 0) {
5188
	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5189
	if (ctxt->atom == NULL)
5190
	    return(-1);
5191
	codepoint = CUR_SCHAR(ctxt->cur, len);
5192
	ctxt->atom->codepoint = codepoint;
5193
	NEXTL(len);
5194
	return(1);
5195
    } else if (CUR == '|') {
5196
	return(0);
5197
    } else if (CUR == 0) {
5198
	return(0);
5199
    } else if (CUR == ')') {
5200
	return(0);
5201
    } else if (CUR == '(') {
5202
	xmlRegStatePtr start, oldend, start0;
5203
5204
	NEXT;
5205
	/*
5206
	 * this extra Epsilon transition is needed if we count with 0 allowed
5207
	 * unfortunately this can't be known at that point
5208
	 */
5209
	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5210
	start0 = ctxt->state;
5211
	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5212
	start = ctxt->state;
5213
	oldend = ctxt->end;
5214
	ctxt->end = NULL;
5215
	ctxt->atom = NULL;
5216
	xmlFAParseRegExp(ctxt, 0);
5217
	if (CUR == ')') {
5218
	    NEXT;
5219
	} else {
5220
	    ERROR("xmlFAParseAtom: expecting ')'");
5221
	}
5222
	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5223
	if (ctxt->atom == NULL)
5224
	    return(-1);
5225
	ctxt->atom->start = start;
5226
	ctxt->atom->start0 = start0;
5227
	ctxt->atom->stop = ctxt->state;
5228
	ctxt->end = oldend;
5229
	return(1);
5230
    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
5231
	xmlFAParseCharClass(ctxt);
5232
	return(1);
5233
    }
5234
    return(0);
5235
}
5236
5237
/**
5238
 * xmlFAParsePiece:
5239
 * @ctxt:  a regexp parser context
5240
 *
5241
 * [3]   piece   ::=   atom quantifier?
5242
 */
5243
static int
5244
xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5245
    int ret;
5246
5247
    ctxt->atom = NULL;
5248
    ret = xmlFAParseAtom(ctxt);
5249
    if (ret == 0)
5250
	return(0);
5251
    if (ctxt->atom == NULL) {
5252
	ERROR("internal: no atom generated");
5253
    }
5254
    xmlFAParseQuantifier(ctxt);
5255
    return(1);
5256
}
5257
5258
/**
5259
 * xmlFAParseBranch:
5260
 * @ctxt:  a regexp parser context
5261
 * @to: optional target to the end of the branch
5262
 *
5263
 * @to is used to optimize by removing duplicate path in automata
5264
 * in expressions like (a|b)(c|d)
5265
 *
5266
 * [2]   branch   ::=   piece*
5267
 */
5268
static int
5269
xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5270
    xmlRegStatePtr previous;
5271
    int ret;
5272
5273
    previous = ctxt->state;
5274
    ret = xmlFAParsePiece(ctxt);
5275
    if (ret != 0) {
5276
	if (xmlFAGenerateTransitions(ctxt, previous, 
5277
	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5278
	    return(-1);
5279
	previous = ctxt->state;
5280
	ctxt->atom = NULL;
5281
    }
5282
    while ((ret != 0) && (ctxt->error == 0)) {
5283
	ret = xmlFAParsePiece(ctxt);
5284
	if (ret != 0) {
5285
	    if (xmlFAGenerateTransitions(ctxt, previous, 
5286
	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5287
		    return(-1);
5288
	    previous = ctxt->state;
5289
	    ctxt->atom = NULL;
5290
	}
5291
    }
5292
    return(0);
5293
}
5294
5295
/**
5296
 * xmlFAParseRegExp:
5297
 * @ctxt:  a regexp parser context
5298
 * @top:  is this the top-level expression ?
5299
 *
5300
 * [1]   regExp   ::=     branch  ( '|' branch )*
5301
 */
5302
static void
5303
xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
5304
    xmlRegStatePtr start, end;
5305
5306
    /* if not top start should have been generated by an epsilon trans */
5307
    start = ctxt->state;
5308
    ctxt->end = NULL;
5309
    xmlFAParseBranch(ctxt, NULL);
5310
    if (top) {
5311
#ifdef DEBUG_REGEXP_GRAPH
5312
	printf("State %d is final\n", ctxt->state->no);
5313
#endif
5314
	ctxt->state->type = XML_REGEXP_FINAL_STATE;
5315
    }
5316
    if (CUR != '|') {
5317
	ctxt->end = ctxt->state;
5318
	return;
5319
    }
5320
    end = ctxt->state;
5321
    while ((CUR == '|') && (ctxt->error == 0)) {
5322
	NEXT;
5323
	ctxt->state = start;
5324
	ctxt->end = NULL;
5325
	xmlFAParseBranch(ctxt, end);
5326
    }
5327
    if (!top) {
5328
	ctxt->state = end;
5329
	ctxt->end = end;
5330
    }
5331
}
5332
5333
/************************************************************************
5334
 * 									*
5335
 * 			The basic API					*
5336
 * 									*
5337
 ************************************************************************/
5338
5339
/**
5340
 * xmlRegexpPrint:
5341
 * @output: the file for the output debug
5342
 * @regexp: the compiled regexp
5343
 *
5344
 * Print the content of the compiled regular expression
5345
 */
5346
void
5347
xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
5348
    int i;
5349
5350
    if (output == NULL)
5351
        return;
5352
    fprintf(output, " regexp: ");
5353
    if (regexp == NULL) {
5354
	fprintf(output, "NULL\n");
5355
	return;
5356
    }
5357
    fprintf(output, "'%s' ", regexp->string);
5358
    fprintf(output, "\n");
5359
    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
5360
    for (i = 0;i < regexp->nbAtoms; i++) {
5361
	fprintf(output, " %02d ", i);
5362
	xmlRegPrintAtom(output, regexp->atoms[i]);
5363
    }
5364
    fprintf(output, "%d states:", regexp->nbStates);
5365
    fprintf(output, "\n");
5366
    for (i = 0;i < regexp->nbStates; i++) {
5367
	xmlRegPrintState(output, regexp->states[i]);
5368
    }
5369
    fprintf(output, "%d counters:\n", regexp->nbCounters);
5370
    for (i = 0;i < regexp->nbCounters; i++) {
5371
	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
5372
		                                regexp->counters[i].max);
5373
    }
5374
}
5375
5376
/**
5377
 * xmlRegexpCompile:
5378
 * @regexp:  a regular expression string
5379
 *
5380
 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
5381
 * Appendix F and builds an automata suitable for testing strings against
5382
 * that regular expression
5383
 *
5384
 * Returns the compiled expression or NULL in case of error
5385
 */
5386
xmlRegexpPtr
5387
xmlRegexpCompile(const xmlChar *regexp) {
5388
    xmlRegexpPtr ret;
5389
    xmlRegParserCtxtPtr ctxt;
5390
5391
    ctxt = xmlRegNewParserCtxt(regexp);
5392
    if (ctxt == NULL)
5393
	return(NULL);
5394
5395
    /* initialize the parser */
5396
    ctxt->end = NULL;
5397
    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5398
    xmlRegStatePush(ctxt, ctxt->start);
5399
5400
    /* parse the expression building an automata */
5401
    xmlFAParseRegExp(ctxt, 1);
5402
    if (CUR != 0) {
5403
	ERROR("xmlFAParseRegExp: extra characters");
5404
    }
5405
    if (ctxt->error != 0) {
5406
	xmlRegFreeParserCtxt(ctxt);
5407
	return(NULL);
5408
    }
5409
    ctxt->end = ctxt->state;
5410
    ctxt->start->type = XML_REGEXP_START_STATE;
5411
    ctxt->end->type = XML_REGEXP_FINAL_STATE;
5412
5413
    /* remove the Epsilon except for counted transitions */
5414
    xmlFAEliminateEpsilonTransitions(ctxt);
5415
5416
5417
    if (ctxt->error != 0) {
5418
	xmlRegFreeParserCtxt(ctxt);
5419
	return(NULL);
5420
    }
5421
    ret = xmlRegEpxFromParse(ctxt);
5422
    xmlRegFreeParserCtxt(ctxt);
5423
    return(ret);
5424
}
5425
5426
/**
5427
 * xmlRegexpExec:
5428
 * @comp:  the compiled regular expression
5429
 * @content:  the value to check against the regular expression
5430
 *
5431
 * Check if the regular expression generates the value
5432
 *
5433
 * Returns 1 if it matches, 0 if not and a negative value in case of error
5434
 */
5435
int
5436
xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5437
    if ((comp == NULL) || (content == NULL))
5438
	return(-1);
5439
    return(xmlFARegExec(comp, content));
5440
}
5441
5442
/**
5443
 * xmlRegexpIsDeterminist:
5444
 * @comp:  the compiled regular expression
5445
 *
5446
 * Check if the regular expression is determinist
5447
 *
5448
 * Returns 1 if it yes, 0 if not and a negative value in case of error
5449
 */
5450
int
5451
xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5452
    xmlAutomataPtr am;
5453
    int ret;
5454
5455
    if (comp == NULL)
5456
	return(-1);
5457
    if (comp->determinist != -1)
5458
	return(comp->determinist);
5459
5460
    am = xmlNewAutomata();
5461
    if (am->states != NULL) {
5462
	int i;
5463
5464
	for (i = 0;i < am->nbStates;i++)
5465
	    xmlRegFreeState(am->states[i]);
5466
	xmlFree(am->states);
5467
    }
5468
    am->nbAtoms = comp->nbAtoms;
5469
    am->atoms = comp->atoms;
5470
    am->nbStates = comp->nbStates;
5471
    am->states = comp->states;
5472
    am->determinist = -1;
5473
    ret = xmlFAComputesDeterminism(am);
5474
    am->atoms = NULL;
5475
    am->states = NULL;
5476
    xmlFreeAutomata(am);
5477
    return(ret);
5478
}
5479
5480
/**
5481
 * xmlRegFreeRegexp:
5482
 * @regexp:  the regexp
5483
 *
5484
 * Free a regexp
5485
 */
5486
void
5487
xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5488
    int i;
5489
    if (regexp == NULL)
5490
	return;
5491
5492
    if (regexp->string != NULL)
5493
	xmlFree(regexp->string);
5494
    if (regexp->states != NULL) {
5495
	for (i = 0;i < regexp->nbStates;i++)
5496
	    xmlRegFreeState(regexp->states[i]);
5497
	xmlFree(regexp->states);
5498
    }
5499
    if (regexp->atoms != NULL) {
5500
	for (i = 0;i < regexp->nbAtoms;i++)
5501
	    xmlRegFreeAtom(regexp->atoms[i]);
5502
	xmlFree(regexp->atoms);
5503
    }
5504
    if (regexp->counters != NULL)
5505
	xmlFree(regexp->counters);
5506
    if (regexp->compact != NULL)
5507
	xmlFree(regexp->compact);
5508
    if (regexp->transdata != NULL)
5509
	xmlFree(regexp->transdata);
5510
    if (regexp->stringMap != NULL) {
5511
	for (i = 0; i < regexp->nbstrings;i++)
5512
	    xmlFree(regexp->stringMap[i]);
5513
	xmlFree(regexp->stringMap);
5514
    }
5515
5516
    xmlFree(regexp);
5517
}
5518
5519
#ifdef LIBXML_AUTOMATA_ENABLED
5520
/************************************************************************
5521
 * 									*
5522
 * 			The Automata interface				*
5523
 * 									*
5524
 ************************************************************************/
5525
5526
/**
5527
 * xmlNewAutomata:
5528
 *
5529
 * Create a new automata
5530
 *
5531
 * Returns the new object or NULL in case of failure
5532
 */
5533
xmlAutomataPtr
5534
xmlNewAutomata(void) {
5535
    xmlAutomataPtr ctxt;
5536
5537
    ctxt = xmlRegNewParserCtxt(NULL);
5538
    if (ctxt == NULL)
5539
	return(NULL);
5540
5541
    /* initialize the parser */
5542
    ctxt->end = NULL;
5543
    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5544
    if (ctxt->start == NULL) {
5545
	xmlFreeAutomata(ctxt);
5546
	return(NULL);
5547
    }
5548
    ctxt->start->type = XML_REGEXP_START_STATE;
5549
    if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5550
        xmlRegFreeState(ctxt->start);
5551
	xmlFreeAutomata(ctxt);
5552
	return(NULL);
5553
    }
5554
5555
    return(ctxt);
5556
}
5557
5558
/**
5559
 * xmlFreeAutomata:
5560
 * @am: an automata
5561
 *
5562
 * Free an automata
5563
 */
5564
void
5565
xmlFreeAutomata(xmlAutomataPtr am) {
5566
    if (am == NULL)
5567
	return;
5568
    xmlRegFreeParserCtxt(am);
5569
}
5570
5571
/**
5572
 * xmlAutomataGetInitState:
5573
 * @am: an automata
5574
 *
5575
 * Initial state lookup
5576
 *
5577
 * Returns the initial state of the automata
5578
 */
5579
xmlAutomataStatePtr
5580
xmlAutomataGetInitState(xmlAutomataPtr am) {
5581
    if (am == NULL)
5582
	return(NULL);
5583
    return(am->start);
5584
}
5585
5586
/**
5587
 * xmlAutomataSetFinalState:
5588
 * @am: an automata
5589
 * @state: a state in this automata
5590
 *
5591
 * Makes that state a final state
5592
 *
5593
 * Returns 0 or -1 in case of error
5594
 */
5595
int
5596
xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5597
    if ((am == NULL) || (state == NULL))
5598
	return(-1);
5599
    state->type = XML_REGEXP_FINAL_STATE;
5600
    return(0);
5601
}
5602
5603
/**
5604
 * xmlAutomataNewTransition:
5605
 * @am: an automata
5606
 * @from: the starting point of the transition
5607
 * @to: the target point of the transition or NULL
5608
 * @token: the input string associated to that transition
5609
 * @data: data passed to the callback function if the transition is activated
5610
 *
5611
 * If @to is NULL, this creates first a new target state in the automata
5612
 * and then adds a transition from the @from state to the target state
5613
 * activated by the value of @token
5614
 *
5615
 * Returns the target state or NULL in case of error
5616
 */
5617
xmlAutomataStatePtr
5618
xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5619
			 xmlAutomataStatePtr to, const xmlChar *token,
5620
			 void *data) {
5621
    xmlRegAtomPtr atom;
5622
5623
    if ((am == NULL) || (from == NULL) || (token == NULL))
5624
	return(NULL);
5625
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5626
    if (atom == NULL)
5627
        return(NULL);
5628
    atom->data = data;
5629
    if (atom == NULL)
5630
	return(NULL);
5631
    atom->valuep = xmlStrdup(token);
5632
5633
    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5634
        xmlRegFreeAtom(atom);
5635
	return(NULL);
5636
    }
5637
    if (to == NULL)
5638
	return(am->state);
5639
    return(to);
5640
}
5641
5642
/**
5643
 * xmlAutomataNewTransition2:
5644
 * @am: an automata
5645
 * @from: the starting point of the transition
5646
 * @to: the target point of the transition or NULL
5647
 * @token: the first input string associated to that transition
5648
 * @token2: the second input string associated to that transition
5649
 * @data: data passed to the callback function if the transition is activated
5650
 *
5651
 * If @to is NULL, this creates first a new target state in the automata
5652
 * and then adds a transition from the @from state to the target state
5653
 * activated by the value of @token
5654
 *
5655
 * Returns the target state or NULL in case of error
5656
 */
5657
xmlAutomataStatePtr
5658
xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5659
			  xmlAutomataStatePtr to, const xmlChar *token,
5660
			  const xmlChar *token2, void *data) {
5661
    xmlRegAtomPtr atom;
5662
5663
    if ((am == NULL) || (from == NULL) || (token == NULL))
5664
	return(NULL);
5665
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5666
    if (atom == NULL)
5667
	return(NULL);
5668
    atom->data = data;
5669
    if ((token2 == NULL) || (*token2 == 0)) {
5670
	atom->valuep = xmlStrdup(token);
5671
    } else {
5672
	int lenn, lenp;
5673
	xmlChar *str;
5674
5675
	lenn = strlen((char *) token2);
5676
	lenp = strlen((char *) token);
5677
5678
	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5679
	if (str == NULL) {
5680
	    xmlRegFreeAtom(atom);
5681
	    return(NULL);
5682
	}
5683
	memcpy(&str[0], token, lenp);
5684
	str[lenp] = '|';
5685
	memcpy(&str[lenp + 1], token2, lenn);
5686
	str[lenn + lenp + 1] = 0;
5687
5688
	atom->valuep = str;
5689
    }
5690
5691
    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5692
        xmlRegFreeAtom(atom);
5693
	return(NULL);
5694
    }
5695
    if (to == NULL)
5696
	return(am->state);
5697
    return(to);
5698
}
5699
5700
/**
5701
 * xmlAutomataNewNegTrans:
5702
 * @am: an automata
5703
 * @from: the starting point of the transition
5704
 * @to: the target point of the transition or NULL
5705
 * @token: the first input string associated to that transition
5706
 * @token2: the second input string associated to that transition
5707
 * @data: data passed to the callback function if the transition is activated
5708
 *
5709
 * If @to is NULL, this creates first a new target state in the automata
5710
 * and then adds a transition from the @from state to the target state
5711
 * activated by any value except (@token,@token2)
5712
 * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
5713
 # the semantic of XSD ##other
5714
 *
5715
 * Returns the target state or NULL in case of error
5716
 */
5717
xmlAutomataStatePtr
5718
xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5719
		       xmlAutomataStatePtr to, const xmlChar *token,
5720
		       const xmlChar *token2, void *data) {
5721
    xmlRegAtomPtr atom;
5722
    xmlChar err_msg[200];
5723
5724
    if ((am == NULL) || (from == NULL) || (token == NULL))
5725
	return(NULL);
5726
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5727
    if (atom == NULL)
5728
	return(NULL);
5729
    atom->data = data;
5730
    atom->neg = 1;
5731
    if ((token2 == NULL) || (*token2 == 0)) {
5732
	atom->valuep = xmlStrdup(token);
5733
    } else {
5734
	int lenn, lenp;
5735
	xmlChar *str;
5736
5737
	lenn = strlen((char *) token2);
5738
	lenp = strlen((char *) token);
5739
5740
	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5741
	if (str == NULL) {
5742
	    xmlRegFreeAtom(atom);
5743
	    return(NULL);
5744
	}
5745
	memcpy(&str[0], token, lenp);
5746
	str[lenp] = '|';
5747
	memcpy(&str[lenp + 1], token2, lenn);
5748
	str[lenn + lenp + 1] = 0;
5749
5750
	atom->valuep = str;
5751
    }
5752
    snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5753
    err_msg[199] = 0;
5754
    atom->valuep2 = xmlStrdup(err_msg);
5755
5756
    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5757
        xmlRegFreeAtom(atom);
5758
	return(NULL);
5759
    }
5760
    am->negs++;
5761
    if (to == NULL)
5762
	return(am->state);
5763
    return(to);
5764
}
5765
5766
/**
5767
 * xmlAutomataNewCountTrans2:
5768
 * @am: an automata
5769
 * @from: the starting point of the transition
5770
 * @to: the target point of the transition or NULL
5771
 * @token: the input string associated to that transition
5772
 * @token2: the second input string associated to that transition
5773
 * @min:  the minimum successive occurences of token
5774
 * @max:  the maximum successive occurences of token
5775
 * @data:  data associated to the transition
5776
 *
5777
 * If @to is NULL, this creates first a new target state in the automata
5778
 * and then adds a transition from the @from state to the target state
5779
 * activated by a succession of input of value @token and @token2 and 
5780
 * whose number is between @min and @max
5781
 *
5782
 * Returns the target state or NULL in case of error
5783
 */
5784
xmlAutomataStatePtr
5785
xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5786
			 xmlAutomataStatePtr to, const xmlChar *token,
5787
			 const xmlChar *token2,
5788
			 int min, int max, void *data) {
5789
    xmlRegAtomPtr atom;
5790
    int counter;
5791
5792
    if ((am == NULL) || (from == NULL) || (token == NULL))
5793
	return(NULL);
5794
    if (min < 0)
5795
	return(NULL);
5796
    if ((max < min) || (max < 1))
5797
	return(NULL);
5798
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5799
    if (atom == NULL)
5800
	return(NULL);
5801
    if ((token2 == NULL) || (*token2 == 0)) {
5802
	atom->valuep = xmlStrdup(token);
5803
    } else {
5804
	int lenn, lenp;
5805
	xmlChar *str;
5806
5807
	lenn = strlen((char *) token2);
5808
	lenp = strlen((char *) token);
5809
5810
	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5811
	if (str == NULL) {
5812
	    xmlRegFreeAtom(atom);
5813
	    return(NULL);
5814
	}
5815
	memcpy(&str[0], token, lenp);
5816
	str[lenp] = '|';
5817
	memcpy(&str[lenp + 1], token2, lenn);
5818
	str[lenn + lenp + 1] = 0;
5819
5820
	atom->valuep = str;
5821
    }
5822
    atom->data = data;
5823
    if (min == 0)
5824
	atom->min = 1;
5825
    else
5826
	atom->min = min;
5827
    atom->max = max;
5828
5829
    /*
5830
     * associate a counter to the transition.
5831
     */
5832
    counter = xmlRegGetCounter(am);
5833
    am->counters[counter].min = min;
5834
    am->counters[counter].max = max;
5835
5836
    /* xmlFAGenerateTransitions(am, from, to, atom); */
5837
    if (to == NULL) {
5838
        to = xmlRegNewState(am);
5839
	xmlRegStatePush(am, to);
5840
    }
5841
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5842
    xmlRegAtomPush(am, atom);
5843
    am->state = to;
5844
5845
    if (to == NULL)
5846
	to = am->state;
5847
    if (to == NULL)
5848
	return(NULL);
5849
    if (min == 0)
5850
	xmlFAGenerateEpsilonTransition(am, from, to);
5851
    return(to);
5852
}
5853
5854
/**
5855
 * xmlAutomataNewCountTrans:
5856
 * @am: an automata
5857
 * @from: the starting point of the transition
5858
 * @to: the target point of the transition or NULL
5859
 * @token: the input string associated to that transition
5860
 * @min:  the minimum successive occurences of token
5861
 * @max:  the maximum successive occurences of token
5862
 * @data:  data associated to the transition
5863
 *
5864
 * If @to is NULL, this creates first a new target state in the automata
5865
 * and then adds a transition from the @from state to the target state
5866
 * activated by a succession of input of value @token and whose number
5867
 * is between @min and @max
5868
 *
5869
 * Returns the target state or NULL in case of error
5870
 */
5871
xmlAutomataStatePtr
5872
xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5873
			 xmlAutomataStatePtr to, const xmlChar *token,
5874
			 int min, int max, void *data) {
5875
    xmlRegAtomPtr atom;
5876
    int counter;
5877
5878
    if ((am == NULL) || (from == NULL) || (token == NULL))
5879
	return(NULL);
5880
    if (min < 0)
5881
	return(NULL);
5882
    if ((max < min) || (max < 1))
5883
	return(NULL);
5884
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5885
    if (atom == NULL)
5886
	return(NULL);
5887
    atom->valuep = xmlStrdup(token);
5888
    atom->data = data;
5889
    if (min == 0)
5890
	atom->min = 1;
5891
    else
5892
	atom->min = min;
5893
    atom->max = max;
5894
5895
    /*
5896
     * associate a counter to the transition.
5897
     */
5898
    counter = xmlRegGetCounter(am);
5899
    am->counters[counter].min = min;
5900
    am->counters[counter].max = max;
5901
5902
    /* xmlFAGenerateTransitions(am, from, to, atom); */
5903
    if (to == NULL) {
5904
        to = xmlRegNewState(am);
5905
	xmlRegStatePush(am, to);
5906
    }
5907
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5908
    xmlRegAtomPush(am, atom);
5909
    am->state = to;
5910
5911
    if (to == NULL)
5912
	to = am->state;
5913
    if (to == NULL)
5914
	return(NULL);
5915
    if (min == 0)
5916
	xmlFAGenerateEpsilonTransition(am, from, to);
5917
    return(to);
5918
}
5919
5920
/**
5921
 * xmlAutomataNewOnceTrans2:
5922
 * @am: an automata
5923
 * @from: the starting point of the transition
5924
 * @to: the target point of the transition or NULL
5925
 * @token: the input string associated to that transition
5926
 * @token2: the second input string associated to that transition
5927
 * @min:  the minimum successive occurences of token
5928
 * @max:  the maximum successive occurences of token
5929
 * @data:  data associated to the transition
5930
 *
5931
 * If @to is NULL, this creates first a new target state in the automata
5932
 * and then adds a transition from the @from state to the target state
5933
 * activated by a succession of input of value @token and @token2 and whose 
5934
 * number is between @min and @max, moreover that transition can only be 
5935
 * crossed once.
5936
 *
5937
 * Returns the target state or NULL in case of error
5938
 */
5939
xmlAutomataStatePtr
5940
xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5941
			 xmlAutomataStatePtr to, const xmlChar *token,
5942
			 const xmlChar *token2,
5943
			 int min, int max, void *data) {
5944
    xmlRegAtomPtr atom;
5945
    int counter;
5946
5947
    if ((am == NULL) || (from == NULL) || (token == NULL))
5948
	return(NULL);
5949
    if (min < 1)
5950
	return(NULL);
5951
    if ((max < min) || (max < 1))
5952
	return(NULL);
5953
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5954
    if (atom == NULL)
5955
	return(NULL);
5956
    if ((token2 == NULL) || (*token2 == 0)) {
5957
	atom->valuep = xmlStrdup(token);
5958
    } else {
5959
	int lenn, lenp;
5960
	xmlChar *str;
5961
5962
	lenn = strlen((char *) token2);
5963
	lenp = strlen((char *) token);
5964
5965
	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5966
	if (str == NULL) {
5967
	    xmlRegFreeAtom(atom);
5968
	    return(NULL);
5969
	}
5970
	memcpy(&str[0], token, lenp);
5971
	str[lenp] = '|';
5972
	memcpy(&str[lenp + 1], token2, lenn);
5973
	str[lenn + lenp + 1] = 0;
5974
5975
	atom->valuep = str;
5976
    }    
5977
    atom->data = data;
5978
    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5979
    atom->min = min;
5980
    atom->max = max;
5981
    /*
5982
     * associate a counter to the transition.
5983
     */
5984
    counter = xmlRegGetCounter(am);
5985
    am->counters[counter].min = 1;
5986
    am->counters[counter].max = 1;
5987
5988
    /* xmlFAGenerateTransitions(am, from, to, atom); */
5989
    if (to == NULL) {
5990
	to = xmlRegNewState(am);
5991
	xmlRegStatePush(am, to);
5992
    }
5993
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5994
    xmlRegAtomPush(am, atom);
5995
    am->state = to;
5996
    return(to);
5997
}
5998
5999
    
6000
6001
/**
6002
 * xmlAutomataNewOnceTrans:
6003
 * @am: an automata
6004
 * @from: the starting point of the transition
6005
 * @to: the target point of the transition or NULL
6006
 * @token: the input string associated to that transition
6007
 * @min:  the minimum successive occurences of token
6008
 * @max:  the maximum successive occurences of token
6009
 * @data:  data associated to the transition
6010
 *
6011
 * If @to is NULL, this creates first a new target state in the automata
6012
 * and then adds a transition from the @from state to the target state
6013
 * activated by a succession of input of value @token and whose number
6014
 * is between @min and @max, moreover that transition can only be crossed
6015
 * once.
6016
 *
6017
 * Returns the target state or NULL in case of error
6018
 */
6019
xmlAutomataStatePtr
6020
xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6021
			 xmlAutomataStatePtr to, const xmlChar *token,
6022
			 int min, int max, void *data) {
6023
    xmlRegAtomPtr atom;
6024
    int counter;
6025
6026
    if ((am == NULL) || (from == NULL) || (token == NULL))
6027
	return(NULL);
6028
    if (min < 1)
6029
	return(NULL);
6030
    if ((max < min) || (max < 1))
6031
	return(NULL);
6032
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6033
    if (atom == NULL)
6034
	return(NULL);
6035
    atom->valuep = xmlStrdup(token);
6036
    atom->data = data;
6037
    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6038
    atom->min = min;
6039
    atom->max = max;
6040
    /*
6041
     * associate a counter to the transition.
6042
     */
6043
    counter = xmlRegGetCounter(am);
6044
    am->counters[counter].min = 1;
6045
    am->counters[counter].max = 1;
6046
6047
    /* xmlFAGenerateTransitions(am, from, to, atom); */
6048
    if (to == NULL) {
6049
	to = xmlRegNewState(am);
6050
	xmlRegStatePush(am, to);
6051
    }
6052
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6053
    xmlRegAtomPush(am, atom);
6054
    am->state = to;
6055
    return(to);
6056
}
6057
6058
/**
6059
 * xmlAutomataNewState:
6060
 * @am: an automata
6061
 *
6062
 * Create a new disconnected state in the automata
6063
 *
6064
 * Returns the new state or NULL in case of error
6065
 */
6066
xmlAutomataStatePtr
6067
xmlAutomataNewState(xmlAutomataPtr am) {
6068
    xmlAutomataStatePtr to; 
6069
6070
    if (am == NULL)
6071
	return(NULL);
6072
    to = xmlRegNewState(am);
6073
    xmlRegStatePush(am, to);
6074
    return(to);
6075
}
6076
6077
/**
6078
 * xmlAutomataNewEpsilon:
6079
 * @am: an automata
6080
 * @from: the starting point of the transition
6081
 * @to: the target point of the transition or NULL
6082
 *
6083
 * If @to is NULL, this creates first a new target state in the automata
6084
 * and then adds an epsilon transition from the @from state to the
6085
 * target state
6086
 *
6087
 * Returns the target state or NULL in case of error
6088
 */
6089
xmlAutomataStatePtr
6090
xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
6091
		      xmlAutomataStatePtr to) {
6092
    if ((am == NULL) || (from == NULL))
6093
	return(NULL);
6094
    xmlFAGenerateEpsilonTransition(am, from, to);
6095
    if (to == NULL)
6096
	return(am->state);
6097
    return(to);
6098
}
6099
6100
/**
6101
 * xmlAutomataNewAllTrans:
6102
 * @am: an automata
6103
 * @from: the starting point of the transition
6104
 * @to: the target point of the transition or NULL
6105
 * @lax: allow to transition if not all all transitions have been activated
6106
 *
6107
 * If @to is NULL, this creates first a new target state in the automata
6108
 * and then adds a an ALL transition from the @from state to the
6109
 * target state. That transition is an epsilon transition allowed only when
6110
 * all transitions from the @from node have been activated.
6111
 *
6112
 * Returns the target state or NULL in case of error
6113
 */
6114
xmlAutomataStatePtr
6115
xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6116
		       xmlAutomataStatePtr to, int lax) {
6117
    if ((am == NULL) || (from == NULL))
6118
	return(NULL);
6119
    xmlFAGenerateAllTransition(am, from, to, lax);
6120
    if (to == NULL)
6121
	return(am->state);
6122
    return(to);
6123
}
6124
6125
/**
6126
 * xmlAutomataNewCounter:
6127
 * @am: an automata
6128
 * @min:  the minimal value on the counter
6129
 * @max:  the maximal value on the counter
6130
 *
6131
 * Create a new counter
6132
 *
6133
 * Returns the counter number or -1 in case of error
6134
 */
6135
int		
6136
xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6137
    int ret;
6138
6139
    if (am == NULL)
6140
	return(-1);
6141
6142
    ret = xmlRegGetCounter(am);
6143
    if (ret < 0)
6144
	return(-1);
6145
    am->counters[ret].min = min;
6146
    am->counters[ret].max = max;
6147
    return(ret);
6148
}
6149
6150
/**
6151
 * xmlAutomataNewCountedTrans:
6152
 * @am: an automata
6153
 * @from: the starting point of the transition
6154
 * @to: the target point of the transition or NULL
6155
 * @counter: the counter associated to that transition
6156
 *
6157
 * If @to is NULL, this creates first a new target state in the automata
6158
 * and then adds an epsilon transition from the @from state to the target state
6159
 * which will increment the counter provided
6160
 *
6161
 * Returns the target state or NULL in case of error
6162
 */
6163
xmlAutomataStatePtr
6164
xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6165
		xmlAutomataStatePtr to, int counter) {
6166
    if ((am == NULL) || (from == NULL) || (counter < 0))
6167
	return(NULL);
6168
    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
6169
    if (to == NULL)
6170
	return(am->state);
6171
    return(to);
6172
}
6173
6174
/**
6175
 * xmlAutomataNewCounterTrans:
6176
 * @am: an automata
6177
 * @from: the starting point of the transition
6178
 * @to: the target point of the transition or NULL
6179
 * @counter: the counter associated to that transition
6180
 *
6181
 * If @to is NULL, this creates first a new target state in the automata
6182
 * and then adds an epsilon transition from the @from state to the target state
6183
 * which will be allowed only if the counter is within the right range.
6184
 *
6185
 * Returns the target state or NULL in case of error
6186
 */
6187
xmlAutomataStatePtr
6188
xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6189
		xmlAutomataStatePtr to, int counter) {
6190
    if ((am == NULL) || (from == NULL) || (counter < 0))
6191
	return(NULL);
6192
    xmlFAGenerateCountedTransition(am, from, to, counter);
6193
    if (to == NULL)
6194
	return(am->state);
6195
    return(to);
6196
}
6197
6198
/**
6199
 * xmlAutomataCompile:
6200
 * @am: an automata
6201
 *
6202
 * Compile the automata into a Reg Exp ready for being executed.
6203
 * The automata should be free after this point.
6204
 *
6205
 * Returns the compiled regexp or NULL in case of error
6206
 */
6207
xmlRegexpPtr          
6208
xmlAutomataCompile(xmlAutomataPtr am) {
6209
    xmlRegexpPtr ret;
6210
6211
    if ((am == NULL) || (am->error != 0)) return(NULL);
6212
    xmlFAEliminateEpsilonTransitions(am);
6213
    /* xmlFAComputesDeterminism(am); */
6214
    ret = xmlRegEpxFromParse(am);
6215
6216
    return(ret);
6217
}
6218
6219
/**
6220
 * xmlAutomataIsDeterminist:
6221
 * @am: an automata
6222
 *
6223
 * Checks if an automata is determinist.
6224
 *
6225
 * Returns 1 if true, 0 if not, and -1 in case of error
6226
 */
6227
int          
6228
xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6229
    int ret;
6230
6231
    if (am == NULL)
6232
	return(-1);
6233
6234
    ret = xmlFAComputesDeterminism(am);
6235
    return(ret);
6236
}
6237
#endif /* LIBXML_AUTOMATA_ENABLED */
6238
6239
#ifdef LIBXML_EXPR_ENABLED
6240
/************************************************************************
6241
 *									*
6242
 *		Formal Expression handling code				*
6243
 *									*
6244
 ************************************************************************/
6245
/************************************************************************
6246
 *									*
6247
 *		Expression handling context				*
6248
 *									*
6249
 ************************************************************************/
6250
6251
struct _xmlExpCtxt {
6252
    xmlDictPtr dict;
6253
    xmlExpNodePtr *table;
6254
    int size;
6255
    int nbElems;
6256
    int nb_nodes;
6257
    const char *expr;
6258
    const char *cur;
6259
    int nb_cons;
6260
    int tabSize;
6261
};
6262
6263
/**
6264
 * xmlExpNewCtxt:
6265
 * @maxNodes:  the maximum number of nodes
6266
 * @dict:  optional dictionnary to use internally
6267
 *
6268
 * Creates a new context for manipulating expressions
6269
 *
6270
 * Returns the context or NULL in case of error
6271
 */
6272
xmlExpCtxtPtr
6273
xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6274
    xmlExpCtxtPtr ret;
6275
    int size = 256;
6276
6277
    if (maxNodes <= 4096)
6278
        maxNodes = 4096;
6279
    
6280
    ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6281
    if (ret == NULL)
6282
        return(NULL);
6283
    memset(ret, 0, sizeof(xmlExpCtxt));
6284
    ret->size = size;
6285
    ret->nbElems = 0;
6286
    ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6287
    if (ret->table == NULL) {
6288
        xmlFree(ret);
6289
	return(NULL);
6290
    }
6291
    memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
6292
    if (dict == NULL) {
6293
        ret->dict = xmlDictCreate();
6294
	if (ret->dict == NULL) {
6295
	    xmlFree(ret->table);
6296
	    xmlFree(ret);
6297
	    return(NULL);
6298
	}
6299
    } else {
6300
        ret->dict = dict;
6301
	xmlDictReference(ret->dict);
6302
    }
6303
    return(ret);
6304
}
6305
6306
/**
6307
 * xmlExpFreeCtxt:
6308
 * @ctxt:  an expression context
6309
 *
6310
 * Free an expression context
6311
 */
6312
void
6313
xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6314
    if (ctxt == NULL)
6315
        return;
6316
    xmlDictFree(ctxt->dict);
6317
    if (ctxt->table != NULL)
6318
	xmlFree(ctxt->table);
6319
    xmlFree(ctxt);
6320
}
6321
6322
/************************************************************************
6323
 *									*
6324
 *		Structure associated to an expression node		*
6325
 *									*
6326
 ************************************************************************/
6327
#define MAX_NODES 10000
6328
6329
/* #define DEBUG_DERIV */
6330
6331
/*
6332
 * TODO: 
6333
 * - Wildcards
6334
 * - public API for creation
6335
 *
6336
 * Started
6337
 * - regression testing
6338
 *
6339
 * Done
6340
 * - split into module and test tool
6341
 * - memleaks
6342
 */
6343
6344
typedef enum {
6345
    XML_EXP_NILABLE = (1 << 0)
6346
} xmlExpNodeInfo;
6347
6348
#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6349
6350
struct _xmlExpNode {
6351
    unsigned char type;/* xmlExpNodeType */
6352
    unsigned char info;/* OR of xmlExpNodeInfo */
6353
    unsigned short key;	/* the hash key */
6354
    unsigned int ref;	/* The number of references */
6355
    int c_max;		/* the maximum length it can consume */
6356
    xmlExpNodePtr exp_left;
6357
    xmlExpNodePtr next;/* the next node in the hash table or free list */
6358
    union {
6359
	struct {
6360
	    int f_min;
6361
	    int f_max;
6362
	} count;
6363
	struct {
6364
	    xmlExpNodePtr f_right;
6365
	} children;
6366
        const xmlChar *f_str;
6367
    } field;
6368
};
6369
6370
#define exp_min field.count.f_min
6371
#define exp_max field.count.f_max
6372
/* #define exp_left field.children.f_left */
6373
#define exp_right field.children.f_right
6374
#define exp_str field.f_str
6375
6376
static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
6377
static xmlExpNode forbiddenExpNode = {
6378
    XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6379
};
6380
xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6381
static xmlExpNode emptyExpNode = {
6382
    XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6383
};
6384
xmlExpNodePtr emptyExp = &emptyExpNode;
6385
6386
/************************************************************************
6387
 *									*
6388
 *  The custom hash table for unicity and canonicalization		*
6389
 *  of sub-expressions pointers						*
6390
 *									*
6391
 ************************************************************************/
6392
/*
6393
 * xmlExpHashNameComputeKey:
6394
 * Calculate the hash key for a token
6395
 */
6396
static unsigned short
6397
xmlExpHashNameComputeKey(const xmlChar *name) {
6398
    unsigned short value = 0L;
6399
    char ch;
6400
    
6401
    if (name != NULL) {
6402
	value += 30 * (*name);
6403
	while ((ch = *name++) != 0) {
6404
	    value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
6405
	}
6406
    }
6407
    return (value);
6408
}
6409
6410
/*
6411
 * xmlExpHashComputeKey:
6412
 * Calculate the hash key for a compound expression
6413
 */
6414
static unsigned short
6415
xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6416
                     xmlExpNodePtr right) {
6417
    unsigned long value;
6418
    unsigned short ret;
6419
    
6420
    switch (type) {
6421
        case XML_EXP_SEQ:
6422
	    value = left->key;
6423
	    value += right->key;
6424
	    value *= 3;
6425
	    ret = (unsigned short) value;
6426
	    break;
6427
        case XML_EXP_OR:
6428
	    value = left->key;
6429
	    value += right->key;
6430
	    value *= 7;
6431
	    ret = (unsigned short) value;
6432
	    break;
6433
        case XML_EXP_COUNT:
6434
	    value = left->key;
6435
	    value += right->key;
6436
	    ret = (unsigned short) value;
6437
	    break;
6438
	default:
6439
	    ret = 0;
6440
    }
6441
    return(ret);
6442
}
6443
6444
6445
static xmlExpNodePtr
6446
xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6447
    xmlExpNodePtr ret;
6448
6449
    if (ctxt->nb_nodes >= MAX_NODES)
6450
        return(NULL);
6451
    ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6452
    if (ret == NULL)
6453
        return(NULL);
6454
    memset(ret, 0, sizeof(xmlExpNode));
6455
    ret->type = type;
6456
    ret->next = NULL;
6457
    ctxt->nb_nodes++;
6458
    ctxt->nb_cons++;
6459
    return(ret);
6460
}
6461
6462
/**
6463
 * xmlExpHashGetEntry:
6464
 * @table: the hash table
6465
 *
6466
 * Get the unique entry from the hash table. The entry is created if
6467
 * needed. @left and @right are consumed, i.e. their ref count will
6468
 * be decremented by the operation.
6469
 *
6470
 * Returns the pointer or NULL in case of error
6471
 */
6472
static xmlExpNodePtr
6473
xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6474
                   xmlExpNodePtr left, xmlExpNodePtr right,
6475
		   const xmlChar *name, int min, int max) {
6476
    unsigned short kbase, key;
6477
    xmlExpNodePtr entry;
6478
    xmlExpNodePtr insert;
6479
6480
    if (ctxt == NULL)
6481
	return(NULL);
6482
6483
    /*
6484
     * Check for duplicate and insertion location.
6485
     */
6486
    if (type == XML_EXP_ATOM) {
6487
	kbase = xmlExpHashNameComputeKey(name);
6488
    } else if (type == XML_EXP_COUNT) {
6489
        /* COUNT reduction rule 1 */
6490
	/* a{1} -> a */
6491
	if (min == max) {
6492
	    if (min == 1) {
6493
		return(left);
6494
	    }
6495
	    if (min == 0) {
6496
		xmlExpFree(ctxt, left);
6497
	        return(emptyExp);
6498
	    }
6499
	}
6500
	if (min < 0) {
6501
	    xmlExpFree(ctxt, left);
6502
	    return(forbiddenExp);
6503
	}
6504
        if (max == -1)
6505
	    kbase = min + 79;
6506
	else
6507
	    kbase = max - min;
6508
	kbase += left->key;
6509
    } else if (type == XML_EXP_OR) {
6510
        /* Forbid reduction rules */
6511
        if (left->type == XML_EXP_FORBID) {
6512
	    xmlExpFree(ctxt, left);
6513
	    return(right);
6514
	}
6515
        if (right->type == XML_EXP_FORBID) {
6516
	    xmlExpFree(ctxt, right);
6517
	    return(left);
6518
	}
6519
6520
        /* OR reduction rule 1 */
6521
	/* a | a reduced to a */
6522
        if (left == right) {
6523
	    left->ref--;
6524
	    return(left);
6525
	}
6526
        /* OR canonicalization rule 1 */
6527
	/* linearize (a | b) | c into a | (b | c) */
6528
        if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6529
	    xmlExpNodePtr tmp = left;
6530
            left = right;
6531
	    right = tmp;
6532
	}
6533
        /* OR reduction rule 2 */
6534
	/* a | (a | b) and b | (a | b) are reduced to a | b */
6535
        if (right->type == XML_EXP_OR) {
6536
	    if ((left == right->exp_left) ||
6537
	        (left == right->exp_right)) {
6538
		xmlExpFree(ctxt, left);
6539
		return(right);
6540
	    }
6541
	}
6542
        /* OR canonicalization rule 2 */
6543
	/* linearize (a | b) | c into a | (b | c) */
6544
        if (left->type == XML_EXP_OR) {
6545
	    xmlExpNodePtr tmp;
6546
6547
	    /* OR canonicalization rule 2 */
6548
	    if ((left->exp_right->type != XML_EXP_OR) &&
6549
	        (left->exp_right->key < left->exp_left->key)) {
6550
	        tmp = left->exp_right;
6551
		left->exp_right = left->exp_left;
6552
		left->exp_left = tmp;
6553
	    }
6554
	    left->exp_right->ref++;
6555
	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6556
	                             NULL, 0, 0);
6557
	    left->exp_left->ref++;
6558
	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6559
	                             NULL, 0, 0);
6560
	
6561
	    xmlExpFree(ctxt, left);
6562
	    return(tmp);
6563
	}
6564
	if (right->type == XML_EXP_OR) {
6565
	    /* Ordering in the tree */
6566
	    /* C | (A | B) -> A | (B | C) */
6567
	    if (left->key > right->exp_right->key) {
6568
		xmlExpNodePtr tmp;
6569
		right->exp_right->ref++;
6570
		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6571
		                         left, NULL, 0, 0);
6572
		right->exp_left->ref++;
6573
		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6574
		                         tmp, NULL, 0, 0);
6575
		xmlExpFree(ctxt, right);
6576
		return(tmp);
6577
	    }
6578
	    /* Ordering in the tree */
6579
	    /* B | (A | C) -> A | (B | C) */
6580
	    if (left->key > right->exp_left->key) {
6581
		xmlExpNodePtr tmp;
6582
		right->exp_right->ref++;
6583
		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6584
		                         right->exp_right, NULL, 0, 0);
6585
		right->exp_left->ref++;
6586
		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6587
		                         tmp, NULL, 0, 0);
6588
		xmlExpFree(ctxt, right);
6589
		return(tmp);
6590
	    }
6591
	}
6592
	/* we know both types are != XML_EXP_OR here */
6593
        else if (left->key > right->key) {
6594
	    xmlExpNodePtr tmp = left;
6595
            left = right;
6596
	    right = tmp;
6597
	}
6598
	kbase = xmlExpHashComputeKey(type, left, right);
6599
    } else if (type == XML_EXP_SEQ) {
6600
        /* Forbid reduction rules */
6601
        if (left->type == XML_EXP_FORBID) {
6602
	    xmlExpFree(ctxt, right);
6603
	    return(left);
6604
	}
6605
        if (right->type == XML_EXP_FORBID) {
6606
	    xmlExpFree(ctxt, left);
6607
	    return(right);
6608
	}
6609
        /* Empty reduction rules */
6610
        if (right->type == XML_EXP_EMPTY) {
6611
	    return(left);
6612
	}
6613
        if (left->type == XML_EXP_EMPTY) {
6614
	    return(right);
6615
	}
6616
	kbase = xmlExpHashComputeKey(type, left, right);
6617
    } else 
6618
        return(NULL);
6619
6620
    key = kbase % ctxt->size;
6621
    if (ctxt->table[key] != NULL) {
6622
	for (insert = ctxt->table[key]; insert != NULL;
6623
	     insert = insert->next) {
6624
	    if ((insert->key == kbase) &&
6625
	        (insert->type == type)) {
6626
		if (type == XML_EXP_ATOM) {
6627
		    if (name == insert->exp_str) {
6628
			insert->ref++;
6629
			return(insert);
6630
		    }
6631
		} else if (type == XML_EXP_COUNT) {
6632
		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
6633
		        (insert->exp_left == left)) {
6634
			insert->ref++;
6635
			left->ref--;
6636
			return(insert);
6637
		    }
6638
		} else if ((insert->exp_left == left) &&
6639
			   (insert->exp_right == right)) {
6640
		    insert->ref++;
6641
		    left->ref--;
6642
		    right->ref--;
6643
		    return(insert);
6644
		}
6645
	    }
6646
	}
6647
    }
6648
6649
    entry = xmlExpNewNode(ctxt, type);
6650
    if (entry == NULL)
6651
        return(NULL);
6652
    entry->key = kbase;
6653
    if (type == XML_EXP_ATOM) {
6654
	entry->exp_str = name;
6655
	entry->c_max = 1;
6656
    } else if (type == XML_EXP_COUNT) {
6657
        entry->exp_min = min;
6658
        entry->exp_max = max;
6659
	entry->exp_left = left;
6660
	if ((min == 0) || (IS_NILLABLE(left)))
6661
	    entry->info |= XML_EXP_NILABLE;
6662
	if (max < 0)
6663
	    entry->c_max = -1;
6664
	else
6665
	    entry->c_max = max * entry->exp_left->c_max;
6666
    } else {
6667
	entry->exp_left = left;
6668
	entry->exp_right = right;
6669
	if (type == XML_EXP_OR) {
6670
	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6671
		entry->info |= XML_EXP_NILABLE;
6672
	    if ((entry->exp_left->c_max == -1) ||
6673
	        (entry->exp_right->c_max == -1))
6674
		entry->c_max = -1;
6675
	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
6676
	        entry->c_max = entry->exp_left->c_max;
6677
	    else
6678
	        entry->c_max = entry->exp_right->c_max;
6679
	} else {
6680
	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6681
		entry->info |= XML_EXP_NILABLE;
6682
	    if ((entry->exp_left->c_max == -1) ||
6683
	        (entry->exp_right->c_max == -1))
6684
		entry->c_max = -1;
6685
	    else
6686
	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6687
	}
6688
    }
6689
    entry->ref = 1;
6690
    if (ctxt->table[key] != NULL)
6691
        entry->next = ctxt->table[key];
6692
6693
    ctxt->table[key] = entry;
6694
    ctxt->nbElems++;
6695
6696
    return(entry);
6697
}
6698
6699
/**
6700
 * xmlExpFree:
6701
 * @ctxt: the expression context
6702
 * @exp: the expression
6703
 *
6704
 * Dereference the expression
6705
 */
6706
void
6707
xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6708
    if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6709
        return;
6710
    exp->ref--;
6711
    if (exp->ref == 0) {
6712
        unsigned short key;
6713
6714
        /* Unlink it first from the hash table */
6715
	key = exp->key % ctxt->size;
6716
	if (ctxt->table[key] == exp) {
6717
	    ctxt->table[key] = exp->next;
6718
	} else {
6719
	    xmlExpNodePtr tmp;
6720
6721
	    tmp = ctxt->table[key];
6722
	    while (tmp != NULL) {
6723
	        if (tmp->next == exp) {
6724
		    tmp->next = exp->next;
6725
		    break;
6726
		}
6727
	        tmp = tmp->next;
6728
	    }
6729
	}
6730
6731
        if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6732
	    xmlExpFree(ctxt, exp->exp_left);
6733
	    xmlExpFree(ctxt, exp->exp_right);
6734
	} else if (exp->type == XML_EXP_COUNT) {
6735
	    xmlExpFree(ctxt, exp->exp_left);
6736
	}
6737
        xmlFree(exp);
6738
	ctxt->nb_nodes--;
6739
    }
6740
}
6741
6742
/**
6743
 * xmlExpRef:
6744
 * @exp: the expression
6745
 *
6746
 * Increase the reference count of the expression
6747
 */
6748
void
6749
xmlExpRef(xmlExpNodePtr exp) {
6750
    if (exp != NULL)
6751
        exp->ref++;
6752
}
6753
6754
/**
6755
 * xmlExpNewAtom:
6756
 * @ctxt: the expression context
6757
 * @name: the atom name
6758
 * @len: the atom name lenght in byte (or -1);
6759
 *
6760
 * Get the atom associated to this name from that context
6761
 *
6762
 * Returns the node or NULL in case of error
6763
 */
6764
xmlExpNodePtr
6765
xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
6766
    if ((ctxt == NULL) || (name == NULL))
6767
        return(NULL);
6768
    name = xmlDictLookup(ctxt->dict, name, len);
6769
    if (name == NULL)
6770
        return(NULL);
6771
    return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
6772
}
6773
6774
/**
6775
 * xmlExpNewOr:
6776
 * @ctxt: the expression context
6777
 * @left: left expression
6778
 * @right: right expression
6779
 *
6780
 * Get the atom associated to the choice @left | @right
6781
 * Note that @left and @right are consumed in the operation, to keep
6782
 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6783
 * this is true even in case of failure (unless ctxt == NULL).
6784
 *
6785
 * Returns the node or NULL in case of error
6786
 */
6787
xmlExpNodePtr
6788
xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6789
    if (ctxt == NULL)
6790
        return(NULL);
6791
    if ((left == NULL) || (right == NULL)) {
6792
        xmlExpFree(ctxt, left);
6793
        xmlExpFree(ctxt, right);
6794
        return(NULL);
6795
    }
6796
    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
6797
}
6798
6799
/**
6800
 * xmlExpNewSeq:
6801
 * @ctxt: the expression context
6802
 * @left: left expression
6803
 * @right: right expression
6804
 *
6805
 * Get the atom associated to the sequence @left , @right
6806
 * Note that @left and @right are consumed in the operation, to keep
6807
 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6808
 * this is true even in case of failure (unless ctxt == NULL).
6809
 *
6810
 * Returns the node or NULL in case of error
6811
 */
6812
xmlExpNodePtr
6813
xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6814
    if (ctxt == NULL)
6815
        return(NULL);
6816
    if ((left == NULL) || (right == NULL)) {
6817
        xmlExpFree(ctxt, left);
6818
        xmlExpFree(ctxt, right);
6819
        return(NULL);
6820
    }
6821
    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
6822
}
6823
6824
/**
6825
 * xmlExpNewRange:
6826
 * @ctxt: the expression context
6827
 * @subset: the expression to be repeated
6828
 * @min: the lower bound for the repetition
6829
 * @max: the upper bound for the repetition, -1 means infinite
6830
 *
6831
 * Get the atom associated to the range (@subset){@min, @max}
6832
 * Note that @subset is consumed in the operation, to keep
6833
 * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
6834
 * this is true even in case of failure (unless ctxt == NULL).
6835
 *
6836
 * Returns the node or NULL in case of error
6837
 */
6838
xmlExpNodePtr
6839
xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
6840
    if (ctxt == NULL)
6841
        return(NULL);
6842
    if ((subset == NULL) || (min < 0) || (max < -1) ||
6843
        ((max >= 0) && (min > max))) {
6844
	xmlExpFree(ctxt, subset);
6845
        return(NULL);
6846
    }
6847
    return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
6848
                              NULL, NULL, min, max));
6849
}
6850
6851
/************************************************************************
6852
 *									*
6853
 *		Public API for operations on expressions		*
6854
 *									*
6855
 ************************************************************************/
6856
6857
static int
6858
xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
6859
                     const xmlChar**list, int len, int nb) {
6860
    int tmp, tmp2;
6861
tail:
6862
    switch (exp->type) {
6863
        case XML_EXP_EMPTY:
6864
	    return(0);
6865
        case XML_EXP_ATOM:
6866
	    for (tmp = 0;tmp < nb;tmp++)
6867
	        if (list[tmp] == exp->exp_str)
6868
		    return(0);
6869
            if (nb >= len)
6870
	        return(-2);
6871
	    list[nb++] = exp->exp_str;
6872
	    return(1);
6873
        case XML_EXP_COUNT:
6874
	    exp = exp->exp_left;
6875
	    goto tail;
6876
        case XML_EXP_SEQ:
6877
        case XML_EXP_OR:
6878
	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
6879
	    if (tmp < 0)
6880
	        return(tmp);
6881
	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
6882
	                                nb + tmp);
6883
	    if (tmp2 < 0)
6884
	        return(tmp2);
6885
            return(tmp + tmp2);
6886
    }
6887
    return(-1);
6888
}
6889
6890
/**
6891
 * xmlExpGetLanguage:
6892
 * @ctxt: the expression context
6893
 * @exp: the expression
6894
 * @langList: where to store the tokens
6895
 * @len: the allocated lenght of @list
6896
 *
6897
 * Find all the strings used in @exp and store them in @list
6898
 *
6899
 * Returns the number of unique strings found, -1 in case of errors and
6900
 *         -2 if there is more than @len strings
6901
 */
6902
int
6903
xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
6904
                  const xmlChar**langList, int len) {
6905
    if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
6906
        return(-1);
6907
    return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
6908
}
6909
6910
static int
6911
xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
6912
                  const xmlChar**list, int len, int nb) {
6913
    int tmp, tmp2;
6914
tail:
6915
    switch (exp->type) {
6916
        case XML_EXP_FORBID:
6917
	    return(0);
6918
        case XML_EXP_EMPTY:
6919
	    return(0);
6920
        case XML_EXP_ATOM:
6921
	    for (tmp = 0;tmp < nb;tmp++)
6922
	        if (list[tmp] == exp->exp_str)
6923
		    return(0);
6924
            if (nb >= len)
6925
	        return(-2);
6926
	    list[nb++] = exp->exp_str;
6927
	    return(1);
6928
        case XML_EXP_COUNT:
6929
	    exp = exp->exp_left;
6930
	    goto tail;
6931
        case XML_EXP_SEQ:
6932
	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
6933
	    if (tmp < 0)
6934
	        return(tmp);
6935
	    if (IS_NILLABLE(exp->exp_left)) {
6936
		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
6937
					    nb + tmp);
6938
		if (tmp2 < 0)
6939
		    return(tmp2);
6940
		tmp += tmp2;
6941
	    }
6942
            return(tmp);
6943
        case XML_EXP_OR:
6944
	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
6945
	    if (tmp < 0)
6946
	        return(tmp);
6947
	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
6948
	                                nb + tmp);
6949
	    if (tmp2 < 0)
6950
	        return(tmp2);
6951
            return(tmp + tmp2);
6952
    }
6953
    return(-1);
6954
}
6955
6956
/**
6957
 * xmlExpGetStart:
6958
 * @ctxt: the expression context
6959
 * @exp: the expression
6960
 * @tokList: where to store the tokens
6961
 * @len: the allocated lenght of @list
6962
 *
6963
 * Find all the strings that appears at the start of the languages
6964
 * accepted by @exp and store them in @list. E.g. for (a, b) | c
6965
 * it will return the list [a, c]
6966
 *
6967
 * Returns the number of unique strings found, -1 in case of errors and
6968
 *         -2 if there is more than @len strings
6969
 */
6970
int
6971
xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
6972
               const xmlChar**tokList, int len) {
6973
    if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
6974
        return(-1);
6975
    return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
6976
}
6977
6978
/**
6979
 * xmlExpIsNillable:
6980
 * @exp: the expression
6981
 *
6982
 * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
6983
 *
6984
 * Returns 1 if nillable, 0 if not and -1 in case of error
6985
 */
6986
int
6987
xmlExpIsNillable(xmlExpNodePtr exp) {
6988
    if (exp == NULL)
6989
        return(-1);
6990
    return(IS_NILLABLE(exp) != 0);
6991
}
6992
6993
static xmlExpNodePtr
6994
xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
6995
{
6996
    xmlExpNodePtr ret;
6997
6998
    switch (exp->type) {
6999
	case XML_EXP_EMPTY:
7000
	    return(forbiddenExp);
7001
	case XML_EXP_FORBID:
7002
	    return(forbiddenExp);
7003
	case XML_EXP_ATOM:
7004
	    if (exp->exp_str == str) {
7005
#ifdef DEBUG_DERIV
7006
		printf("deriv atom: equal => Empty\n");
7007
#endif
7008
	        ret = emptyExp;
7009
	    } else {
7010
#ifdef DEBUG_DERIV
7011
		printf("deriv atom: mismatch => forbid\n");
7012
#endif
7013
	        /* TODO wildcards here */
7014
		ret = forbiddenExp;
7015
	    }
7016
	    return(ret);
7017
	case XML_EXP_OR: {
7018
	    xmlExpNodePtr tmp;
7019
7020
#ifdef DEBUG_DERIV
7021
	    printf("deriv or: => or(derivs)\n");
7022
#endif
7023
	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7024
	    if (tmp == NULL) {
7025
		return(NULL);
7026
	    }
7027
	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7028
	    if (ret == NULL) {
7029
	        xmlExpFree(ctxt, tmp);
7030
		return(NULL);
7031
	    }
7032
            ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
7033
			     NULL, 0, 0);
7034
	    return(ret);
7035
	}
7036
	case XML_EXP_SEQ:
7037
#ifdef DEBUG_DERIV
7038
	    printf("deriv seq: starting with left\n");
7039
#endif
7040
	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7041
	    if (ret == NULL) {
7042
	        return(NULL);
7043
	    } else if (ret == forbiddenExp) {
7044
	        if (IS_NILLABLE(exp->exp_left)) {
7045
#ifdef DEBUG_DERIV
7046
		    printf("deriv seq: left failed but nillable\n");
7047
#endif
7048
		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7049
		}
7050
	    } else {
7051
#ifdef DEBUG_DERIV
7052
		printf("deriv seq: left match => sequence\n");
7053
#endif
7054
	        exp->exp_right->ref++;
7055
	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
7056
		                         NULL, 0, 0);
7057
	    }
7058
	    return(ret);
7059
	case XML_EXP_COUNT: {
7060
	    int min, max;
7061
	    xmlExpNodePtr tmp;
7062
7063
	    if (exp->exp_max == 0)
7064
		return(forbiddenExp);
7065
	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7066
	    if (ret == NULL)
7067
	        return(NULL);
7068
	    if (ret == forbiddenExp) {
7069
#ifdef DEBUG_DERIV
7070
		printf("deriv count: pattern mismatch => forbid\n");
7071
#endif
7072
	        return(ret);
7073
	    }
7074
	    if (exp->exp_max == 1)
7075
		return(ret);
7076
	    if (exp->exp_max < 0) /* unbounded */
7077
		max = -1;
7078
	    else
7079
		max = exp->exp_max - 1;
7080
	    if (exp->exp_min > 0)
7081
		min = exp->exp_min - 1;
7082
	    else
7083
		min = 0;
7084
	    exp->exp_left->ref++;
7085
	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
7086
				     NULL, min, max);
7087
	    if (ret == emptyExp) {
7088
#ifdef DEBUG_DERIV
7089
		printf("deriv count: match to empty => new count\n");
7090
#endif
7091
	        return(tmp);
7092
	    }
7093
#ifdef DEBUG_DERIV
7094
	    printf("deriv count: match => sequence with new count\n");
7095
#endif
7096
	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
7097
	                              NULL, 0, 0));
7098
	}
7099
    }
7100
    return(NULL);
7101
}
7102
7103
/**
7104
 * xmlExpStringDerive:
7105
 * @ctxt: the expression context
7106
 * @exp: the expression
7107
 * @str: the string
7108
 * @len: the string len in bytes if available
7109
 *
7110
 * Do one step of Brzozowski derivation of the expression @exp with
7111
 * respect to the input string
7112
 *
7113
 * Returns the resulting expression or NULL in case of internal error
7114
 */
7115
xmlExpNodePtr
7116
xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7117
                   const xmlChar *str, int len) {
7118
    const xmlChar *input;
7119
7120
    if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
7121
        return(NULL);
7122
    }
7123
    /*
7124
     * check the string is in the dictionnary, if yes use an interned
7125
     * copy, otherwise we know it's not an acceptable input
7126
     */
7127
    input = xmlDictExists(ctxt->dict, str, len);
7128
    if (input == NULL) {
7129
        return(forbiddenExp);
7130
    }
7131
    return(xmlExpStringDeriveInt(ctxt, exp, input));
7132
}
7133
7134
static int
7135
xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
7136
    int ret = 1;
7137
7138
    if (sub->c_max == -1) {
7139
        if (exp->c_max != -1)
7140
	    ret = 0;
7141
    } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
7142
        ret = 0;
7143
    }
7144
#if 0
7145
    if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
7146
        ret = 0;
7147
#endif
7148
    return(ret);
7149
}
7150
7151
static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7152
                                        xmlExpNodePtr sub);
7153
/**
7154
 * xmlExpDivide:
7155
 * @ctxt: the expressions context
7156
 * @exp: the englobing expression
7157
 * @sub: the subexpression
7158
 * @mult: the multiple expression
7159
 * @remain: the remain from the derivation of the multiple
7160
 *
7161
 * Check if exp is a multiple of sub, i.e. if there is a finite number n
7162
 * so that sub{n} subsume exp
7163
 *
7164
 * Returns the multiple value if successful, 0 if it is not a multiple
7165
 *         and -1 in case of internel error.
7166
 */
7167
7168
static int
7169
xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
7170
             xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7171
    int i;
7172
    xmlExpNodePtr tmp, tmp2;
7173
7174
    if (mult != NULL) *mult = NULL;
7175
    if (remain != NULL) *remain = NULL;
7176
    if (exp->c_max == -1) return(0);
7177
    if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
7178
7179
    for (i = 1;i <= exp->c_max;i++) {
7180
        sub->ref++;
7181
        tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7182
				 sub, NULL, NULL, i, i);
7183
	if (tmp == NULL) {
7184
	    return(-1);
7185
	}
7186
	if (!xmlExpCheckCard(tmp, exp)) {
7187
	    xmlExpFree(ctxt, tmp);
7188
	    continue;
7189
	}
7190
	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
7191
	if (tmp2 == NULL) {
7192
	    xmlExpFree(ctxt, tmp);
7193
	    return(-1);
7194
	}
7195
	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7196
	    if (remain != NULL)
7197
	        *remain = tmp2;
7198
	    else
7199
	        xmlExpFree(ctxt, tmp2);
7200
	    if (mult != NULL)
7201
	        *mult = tmp;
7202
	    else
7203
	        xmlExpFree(ctxt, tmp);
7204
#ifdef DEBUG_DERIV
7205
	    printf("Divide succeeded %d\n", i);
7206
#endif
7207
	    return(i);
7208
	}
7209
	xmlExpFree(ctxt, tmp);
7210
	xmlExpFree(ctxt, tmp2);
7211
    }
7212
#ifdef DEBUG_DERIV
7213
    printf("Divide failed\n");
7214
#endif
7215
    return(0);
7216
}
7217
7218
/**
7219
 * xmlExpExpDeriveInt:
7220
 * @ctxt: the expressions context
7221
 * @exp: the englobing expression
7222
 * @sub: the subexpression
7223
 *
7224
 * Try to do a step of Brzozowski derivation but at a higher level
7225
 * the input being a subexpression.
7226
 *
7227
 * Returns the resulting expression or NULL in case of internal error
7228
 */
7229
static xmlExpNodePtr
7230
xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7231
    xmlExpNodePtr ret, tmp, tmp2, tmp3;
7232
    const xmlChar **tab;
7233
    int len, i;
7234
7235
    /*
7236
     * In case of equality and if the expression can only consume a finite
7237
     * amount, then the derivation is empty
7238
     */
7239
    if ((exp == sub) && (exp->c_max >= 0)) {
7240
#ifdef DEBUG_DERIV
7241
        printf("Equal(exp, sub) and finite -> Empty\n");
7242
#endif
7243
        return(emptyExp);
7244
    }
7245
    /*
7246
     * decompose sub sequence first
7247
     */
7248
    if (sub->type == XML_EXP_EMPTY) {
7249
#ifdef DEBUG_DERIV
7250
        printf("Empty(sub) -> Empty\n");
7251
#endif
7252
	exp->ref++;
7253
        return(exp);
7254
    }
7255
    if (sub->type == XML_EXP_SEQ) {
7256
#ifdef DEBUG_DERIV
7257
        printf("Seq(sub) -> decompose\n");
7258
#endif
7259
        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7260
	if (tmp == NULL)
7261
	    return(NULL);
7262
	if (tmp == forbiddenExp)
7263
	    return(tmp);
7264
	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7265
	xmlExpFree(ctxt, tmp);
7266
	return(ret);
7267
    }
7268
    if (sub->type == XML_EXP_OR) {
7269
#ifdef DEBUG_DERIV
7270
        printf("Or(sub) -> decompose\n");
7271
#endif
7272
        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7273
	if (tmp == forbiddenExp)
7274
	    return(tmp);
7275
	if (tmp == NULL)
7276
	    return(NULL);
7277
	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
7278
	if ((ret == NULL) || (ret == forbiddenExp)) {
7279
	    xmlExpFree(ctxt, tmp);
7280
	    return(ret);
7281
	}
7282
	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
7283
    }
7284
    if (!xmlExpCheckCard(exp, sub)) {
7285
#ifdef DEBUG_DERIV
7286
        printf("CheckCard(exp, sub) failed -> Forbid\n");
7287
#endif
7288
        return(forbiddenExp);
7289
    }
7290
    switch (exp->type) {
7291
        case XML_EXP_EMPTY:
7292
	    if (sub == emptyExp)
7293
	        return(emptyExp);
7294
#ifdef DEBUG_DERIV
7295
	    printf("Empty(exp) -> Forbid\n");
7296
#endif
7297
	    return(forbiddenExp);
7298
        case XML_EXP_FORBID:
7299
#ifdef DEBUG_DERIV
7300
	    printf("Forbid(exp) -> Forbid\n");
7301
#endif
7302
	    return(forbiddenExp);
7303
        case XML_EXP_ATOM:
7304
	    if (sub->type == XML_EXP_ATOM) {
7305
	        /* TODO: handle wildcards */
7306
	        if (exp->exp_str == sub->exp_str) {
7307
#ifdef DEBUG_DERIV
7308
		    printf("Atom match -> Empty\n");
7309
#endif
7310
		    return(emptyExp);
7311
                }
7312
#ifdef DEBUG_DERIV
7313
		printf("Atom mismatch -> Forbid\n");
7314
#endif
7315
	        return(forbiddenExp);
7316
	    }
7317
	    if ((sub->type == XML_EXP_COUNT) &&
7318
	        (sub->exp_max == 1) &&
7319
	        (sub->exp_left->type == XML_EXP_ATOM)) {
7320
	        /* TODO: handle wildcards */
7321
	        if (exp->exp_str == sub->exp_left->exp_str) {
7322
#ifdef DEBUG_DERIV
7323
		    printf("Atom match -> Empty\n");
7324
#endif
7325
		    return(emptyExp);
7326
		}
7327
#ifdef DEBUG_DERIV
7328
		printf("Atom mismatch -> Forbid\n");
7329
#endif
7330
	        return(forbiddenExp);
7331
	    }
7332
#ifdef DEBUG_DERIV
7333
	    printf("Compex exp vs Atom -> Forbid\n");
7334
#endif
7335
	    return(forbiddenExp);
7336
        case XML_EXP_SEQ:
7337
	    /* try to get the sequence consumed only if possible */
7338
	    if (xmlExpCheckCard(exp->exp_left, sub)) {
7339
		/* See if the sequence can be consumed directly */
7340
#ifdef DEBUG_DERIV
7341
		printf("Seq trying left only\n");
7342
#endif
7343
		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7344
		if ((ret != forbiddenExp) && (ret != NULL)) {
7345
#ifdef DEBUG_DERIV
7346
		    printf("Seq trying left only worked\n");
7347
#endif
7348
		    /*
7349
		     * TODO: assumption here that we are determinist
7350
		     *       i.e. we won't get to a nillable exp left
7351
		     *       subset which could be matched by the right
7352
		     *       part too.
7353
		     * e.g.: (a | b)+,(a | c) and 'a+,a'
7354
		     */
7355
		    exp->exp_right->ref++;
7356
		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7357
					      exp->exp_right, NULL, 0, 0));
7358
		}
7359
#ifdef DEBUG_DERIV
7360
	    } else {
7361
		printf("Seq: left too short\n");
7362
#endif
7363
	    }
7364
	    /* Try instead to decompose */
7365
	    if (sub->type == XML_EXP_COUNT) {
7366
		int min, max;
7367
7368
#ifdef DEBUG_DERIV
7369
		printf("Seq: sub is a count\n");
7370
#endif
7371
	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7372
		if (ret == NULL)
7373
		    return(NULL);
7374
		if (ret != forbiddenExp) {
7375
#ifdef DEBUG_DERIV
7376
		    printf("Seq , Count match on left\n");
7377
#endif
7378
		    if (sub->exp_max < 0)
7379
		        max = -1;
7380
	            else
7381
		        max = sub->exp_max -1;
7382
		    if (sub->exp_min > 0)
7383
		        min = sub->exp_min -1;
7384
		    else
7385
		        min = 0;
7386
		    exp->exp_right->ref++;
7387
		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7388
		                             exp->exp_right, NULL, 0, 0);
7389
		    if (tmp == NULL)
7390
		        return(NULL);
7391
7392
		    sub->exp_left->ref++;
7393
		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7394
				      sub->exp_left, NULL, NULL, min, max);
7395
		    if (tmp2 == NULL) {
7396
		        xmlExpFree(ctxt, tmp);
7397
			return(NULL);
7398
		    }
7399
		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7400
		    xmlExpFree(ctxt, tmp);
7401
		    xmlExpFree(ctxt, tmp2);
7402
		    return(ret);
7403
		}
7404
	    }
7405
	    /* we made no progress on structured operations */
7406
	    break;
7407
        case XML_EXP_OR:
7408
#ifdef DEBUG_DERIV
7409
	    printf("Or , trying both side\n");
7410
#endif
7411
	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7412
	    if (ret == NULL)
7413
	        return(NULL);
7414
	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
7415
	    if (tmp == NULL) {
7416
		xmlExpFree(ctxt, ret);
7417
	        return(NULL);
7418
	    }
7419
	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
7420
        case XML_EXP_COUNT: {
7421
	    int min, max;
7422
7423
	    if (sub->type == XML_EXP_COUNT) {
7424
	        /*
7425
		 * Try to see if the loop is completely subsumed
7426
		 */
7427
	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7428
		if (tmp == NULL)
7429
		    return(NULL);
7430
		if (tmp == forbiddenExp) {
7431
		    int mult;
7432
7433
#ifdef DEBUG_DERIV
7434
		    printf("Count, Count inner don't subsume\n");
7435
#endif
7436
		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7437
		                        NULL, &tmp);
7438
		    if (mult <= 0) {
7439
#ifdef DEBUG_DERIV
7440
			printf("Count, Count not multiple => forbidden\n");
7441
#endif
7442
                        return(forbiddenExp);
7443
		    }
7444
		    if (sub->exp_max == -1) {
7445
		        max = -1;
7446
			if (exp->exp_max == -1) {
7447
			    if (exp->exp_min <= sub->exp_min * mult)
7448
			        min = 0;
7449
			    else
7450
			        min = exp->exp_min - sub->exp_min * mult;
7451
			} else {
7452
#ifdef DEBUG_DERIV
7453
			    printf("Count, Count finite can't subsume infinite\n");
7454
#endif
7455
                            xmlExpFree(ctxt, tmp);
7456
			    return(forbiddenExp);
7457
			}
7458
		    } else {
7459
			if (exp->exp_max == -1) {
7460
#ifdef DEBUG_DERIV
7461
			    printf("Infinite loop consume mult finite loop\n");
7462
#endif
7463
			    if (exp->exp_min > sub->exp_min * mult) {
7464
				max = -1;
7465
				min = exp->exp_min - sub->exp_min * mult;
7466
			    } else {
7467
				max = -1;
7468
				min = 0;
7469
			    }
7470
			} else {
7471
			    if (exp->exp_max < sub->exp_max * mult) {
7472
#ifdef DEBUG_DERIV
7473
				printf("loops max mult mismatch => forbidden\n");
7474
#endif
7475
				xmlExpFree(ctxt, tmp);
7476
				return(forbiddenExp);
7477
			    }
7478
			    if (sub->exp_max * mult > exp->exp_min)
7479
				min = 0;
7480
			    else
7481
				min = exp->exp_min - sub->exp_max * mult;
7482
			    max = exp->exp_max - sub->exp_max * mult;
7483
			}
7484
		    }
7485
		} else if (!IS_NILLABLE(tmp)) {
7486
		    /*
7487
		     * TODO: loop here to try to grow if working on finite
7488
		     *       blocks.
7489
		     */
7490
#ifdef DEBUG_DERIV
7491
		    printf("Count, Count remain not nillable => forbidden\n");
7492
#endif
7493
		    xmlExpFree(ctxt, tmp);
7494
		    return(forbiddenExp);
7495
		} else if (sub->exp_max == -1) {
7496
		    if (exp->exp_max == -1) {
7497
		        if (exp->exp_min <= sub->exp_min) {
7498
#ifdef DEBUG_DERIV
7499
			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
7500
#endif
7501
                            max = -1;
7502
			    min = 0;
7503
			} else {
7504
#ifdef DEBUG_DERIV
7505
			    printf("Infinite loops min => Count(X,Inf)\n");
7506
#endif
7507
                            max = -1;
7508
			    min = exp->exp_min - sub->exp_min;
7509
			}
7510
		    } else if (exp->exp_min > sub->exp_min) {
7511
#ifdef DEBUG_DERIV
7512
			printf("loops min mismatch 1 => forbidden ???\n");
7513
#endif
7514
		        xmlExpFree(ctxt, tmp);
7515
		        return(forbiddenExp);
7516
		    } else {
7517
			max = -1;
7518
			min = 0;
7519
		    }
7520
		} else {
7521
		    if (exp->exp_max == -1) {
7522
#ifdef DEBUG_DERIV
7523
			printf("Infinite loop consume finite loop\n");
7524
#endif
7525
		        if (exp->exp_min > sub->exp_min) {
7526
			    max = -1;
7527
			    min = exp->exp_min - sub->exp_min;
7528
			} else {
7529
			    max = -1;
7530
			    min = 0;
7531
			}
7532
		    } else {
7533
		        if (exp->exp_max < sub->exp_max) {
7534
#ifdef DEBUG_DERIV
7535
			    printf("loops max mismatch => forbidden\n");
7536
#endif
7537
			    xmlExpFree(ctxt, tmp);
7538
			    return(forbiddenExp);
7539
			}
7540
			if (sub->exp_max > exp->exp_min)
7541
			    min = 0;
7542
			else
7543
			    min = exp->exp_min - sub->exp_max;
7544
			max = exp->exp_max - sub->exp_max;
7545
		    }
7546
		}
7547
#ifdef DEBUG_DERIV
7548
		printf("loops match => SEQ(COUNT())\n");
7549
#endif
7550
		exp->exp_left->ref++;
7551
		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7552
		                          NULL, NULL, min, max);
7553
		if (tmp2 == NULL) {
7554
		    return(NULL);
7555
		}
7556
                ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7557
		                         NULL, 0, 0);
7558
		return(ret);
7559
	    }
7560
	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7561
	    if (tmp == NULL)
7562
		return(NULL);
7563
	    if (tmp == forbiddenExp) {
7564
#ifdef DEBUG_DERIV
7565
		printf("loop mismatch => forbidden\n");
7566
#endif
7567
		return(forbiddenExp);
7568
	    }
7569
	    if (exp->exp_min > 0)
7570
		min = exp->exp_min - 1;
7571
	    else
7572
		min = 0;
7573
	    if (exp->exp_max < 0)
7574
		max = -1;
7575
	    else
7576
		max = exp->exp_max - 1;
7577
7578
#ifdef DEBUG_DERIV
7579
	    printf("loop match => SEQ(COUNT())\n");
7580
#endif
7581
	    exp->exp_left->ref++;
7582
	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7583
				      NULL, NULL, min, max);
7584
	    if (tmp2 == NULL)
7585
		return(NULL);
7586
	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7587
				     NULL, 0, 0);
7588
	    return(ret);
7589
	}
7590
    }
7591
7592
#ifdef DEBUG_DERIV
7593
    printf("Fallback to derivative\n");
7594
#endif
7595
    if (IS_NILLABLE(sub)) {
7596
        if (!(IS_NILLABLE(exp)))
7597
	    return(forbiddenExp);
7598
	else
7599
	    ret = emptyExp;
7600
    } else
7601
	ret = NULL;
7602
    /*
7603
     * here the structured derivation made no progress so
7604
     * we use the default token based derivation to force one more step
7605
     */
7606
    if (ctxt->tabSize == 0)
7607
        ctxt->tabSize = 40;
7608
7609
    tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7610
	                               sizeof(const xmlChar *));
7611
    if (tab == NULL) {
7612
	return(NULL);
7613
    }
7614
7615
    /*
7616
     * collect all the strings accepted by the subexpression on input
7617
     */
7618
    len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7619
    while (len < 0) {
7620
        const xmlChar **temp;
7621
	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7622
	                                     sizeof(const xmlChar *));
7623
	if (temp == NULL) {
7624
	    xmlFree((xmlChar **) tab);
7625
	    return(NULL);
7626
	}
7627
	tab = temp;
7628
	ctxt->tabSize *= 2;
7629
	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7630
    }
7631
    for (i = 0;i < len;i++) {
7632
        tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7633
	if ((tmp == NULL) || (tmp == forbiddenExp)) {
7634
	    xmlExpFree(ctxt, ret);
7635
	    xmlFree((xmlChar **) tab);
7636
	    return(tmp);
7637
	}
7638
	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7639
	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7640
	    xmlExpFree(ctxt, tmp);
7641
	    xmlExpFree(ctxt, ret);
7642
	    xmlFree((xmlChar **) tab);
7643
	    return(tmp);
7644
	}
7645
	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7646
	xmlExpFree(ctxt, tmp);
7647
	xmlExpFree(ctxt, tmp2);
7648
7649
	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7650
	    xmlExpFree(ctxt, ret);
7651
	    xmlFree((xmlChar **) tab);
7652
	    return(tmp3);
7653
	}
7654
7655
	if (ret == NULL)
7656
	    ret = tmp3;
7657
	else {
7658
	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7659
	    if (ret == NULL) {
7660
		xmlFree((xmlChar **) tab);
7661
	        return(NULL);
7662
	    }
7663
	}
7664
    }
7665
    xmlFree((xmlChar **) tab);
7666
    return(ret);
7667
}
7668
    
7669
/**
7670
 * xmlExpExpDerive:
7671
 * @ctxt: the expressions context
7672
 * @exp: the englobing expression
7673
 * @sub: the subexpression
7674
 *
7675
 * Evaluates the expression resulting from @exp consuming a sub expression @sub
7676
 * Based on algebraic derivation and sometimes direct Brzozowski derivation
7677
 * it usually tatkes less than linear time and can handle expressions generating
7678
 * infinite languages.
7679
 *
7680
 * Returns the resulting expression or NULL in case of internal error, the
7681
 *         result must be freed
7682
 */
7683
xmlExpNodePtr
7684
xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7685
    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7686
        return(NULL);
7687
7688
    /*
7689
     * O(1) speedups
7690
     */
7691
    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7692
#ifdef DEBUG_DERIV
7693
	printf("Sub nillable and not exp : can't subsume\n");
7694
#endif
7695
        return(forbiddenExp);
7696
    }
7697
    if (xmlExpCheckCard(exp, sub) == 0) {
7698
#ifdef DEBUG_DERIV
7699
	printf("sub generate longuer sequances than exp : can't subsume\n");
7700
#endif
7701
        return(forbiddenExp);
7702
    }
7703
    return(xmlExpExpDeriveInt(ctxt, exp, sub));
7704
}
7705
7706
/**
7707
 * xmlExpSubsume:
7708
 * @ctxt: the expressions context
7709
 * @exp: the englobing expression
7710
 * @sub: the subexpression
7711
 *
7712
 * Check whether @exp accepts all the languages accexpted by @sub
7713
 * the input being a subexpression.
7714
 *
7715
 * Returns 1 if true 0 if false and -1 in case of failure.
7716
 */
7717
int
7718
xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7719
    xmlExpNodePtr tmp;
7720
    
7721
    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7722
        return(-1);
7723
7724
    /*
7725
     * TODO: speedup by checking the language of sub is a subset of the
7726
     *       language of exp
7727
     */
7728
    /*
7729
     * O(1) speedups
7730
     */
7731
    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7732
#ifdef DEBUG_DERIV
7733
	printf("Sub nillable and not exp : can't subsume\n");
7734
#endif
7735
        return(0);
7736
    }
7737
    if (xmlExpCheckCard(exp, sub) == 0) {
7738
#ifdef DEBUG_DERIV
7739
	printf("sub generate longuer sequances than exp : can't subsume\n");
7740
#endif
7741
        return(0);
7742
    }
7743
    tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7744
#ifdef DEBUG_DERIV
7745
    printf("Result derivation :\n");
7746
    PRINT_EXP(tmp);
7747
#endif
7748
    if (tmp == NULL)
7749
        return(-1);
7750
    if (tmp == forbiddenExp)
7751
	return(0);
7752
    if (tmp == emptyExp)
7753
	return(1);
7754
    if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
7755
        xmlExpFree(ctxt, tmp);
7756
        return(1);
7757
    }
7758
    xmlExpFree(ctxt, tmp);
7759
    return(0);
7760
}
7761
7762
/************************************************************************
7763
 *									*
7764
 *			Parsing expression 				*
7765
 *									*
7766
 ************************************************************************/
7767
7768
static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7769
7770
#undef CUR
7771
#define CUR (*ctxt->cur)
7772
#undef NEXT
7773
#define NEXT ctxt->cur++;
7774
#undef IS_BLANK
7775
#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
7776
#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
7777
7778
static int
7779
xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
7780
    int ret = 0;
7781
7782
    SKIP_BLANKS
7783
    if (CUR == '*') {
7784
	NEXT
7785
	return(-1);
7786
    }
7787
    if ((CUR < '0') || (CUR > '9'))
7788
        return(-1);
7789
    while ((CUR >= '0') && (CUR <= '9')) {
7790
        ret = ret * 10 + (CUR - '0');
7791
	NEXT
7792
    }
7793
    return(ret);
7794
}
7795
7796
static xmlExpNodePtr
7797
xmlExpParseOr(xmlExpCtxtPtr ctxt) {
7798
    const char *base;
7799
    xmlExpNodePtr ret;
7800
    const xmlChar *val;
7801
7802
    SKIP_BLANKS
7803
    base = ctxt->cur;
7804
    if (*ctxt->cur == '(') {
7805
        NEXT
7806
	ret = xmlExpParseExpr(ctxt);
7807
	SKIP_BLANKS
7808
	if (*ctxt->cur != ')') {
7809
	    fprintf(stderr, "unbalanced '(' : %s\n", base);
7810
	    xmlExpFree(ctxt, ret);
7811
	    return(NULL);
7812
	}
7813
	NEXT;
7814
	SKIP_BLANKS
7815
	goto parse_quantifier;
7816
    }
7817
    while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
7818
           (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
7819
	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
7820
	NEXT;
7821
    val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
7822
    if (val == NULL)
7823
        return(NULL);
7824
    ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
7825
    if (ret == NULL)
7826
        return(NULL);
7827
    SKIP_BLANKS
7828
parse_quantifier:
7829
    if (CUR == '{') {
7830
        int min, max;
7831
7832
        NEXT
7833
	min = xmlExpParseNumber(ctxt);
7834
	if (min < 0) {
7835
	    xmlExpFree(ctxt, ret);
7836
	    return(NULL);
7837
	}
7838
	SKIP_BLANKS
7839
	if (CUR == ',') {
7840
	    NEXT
7841
	    max = xmlExpParseNumber(ctxt);
7842
	    SKIP_BLANKS
7843
	} else
7844
	    max = min;
7845
	if (CUR != '}') {
7846
	    xmlExpFree(ctxt, ret);
7847
	    return(NULL);
7848
	}
7849
        NEXT
7850
	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7851
	                         min, max);
7852
	SKIP_BLANKS
7853
    } else if (CUR == '?') {
7854
        NEXT
7855
	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7856
	                         0, 1);
7857
	SKIP_BLANKS
7858
    } else if (CUR == '+') {
7859
        NEXT
7860
	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7861
	                         1, -1);
7862
	SKIP_BLANKS
7863
    } else if (CUR == '*') {
7864
        NEXT
7865
	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7866
	                         0, -1);
7867
	SKIP_BLANKS
7868
    } 
7869
    return(ret);
7870
}
7871
7872
7873
static xmlExpNodePtr
7874
xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
7875
    xmlExpNodePtr ret, right;
7876
7877
    ret = xmlExpParseOr(ctxt);
7878
    SKIP_BLANKS
7879
    while (CUR == '|') {
7880
        NEXT
7881
	right = xmlExpParseOr(ctxt);
7882
	if (right == NULL) {
7883
	    xmlExpFree(ctxt, ret);
7884
	    return(NULL);
7885
	}
7886
	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
7887
	if (ret == NULL)
7888
	    return(NULL);
7889
    }
7890
    return(ret);
7891
}
7892
7893
static xmlExpNodePtr
7894
xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
7895
    xmlExpNodePtr ret, right;
7896
7897
    ret = xmlExpParseSeq(ctxt);
7898
    SKIP_BLANKS
7899
    while (CUR == ',') {
7900
        NEXT
7901
	right = xmlExpParseSeq(ctxt);
7902
	if (right == NULL) {
7903
	    xmlExpFree(ctxt, ret);
7904
	    return(NULL);
7905
	}
7906
	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
7907
	if (ret == NULL)
7908
	    return(NULL);
7909
    }
7910
    return(ret);
7911
}
7912
7913
/**
7914
 * xmlExpParse:
7915
 * @ctxt: the expressions context
7916
 * @expr: the 0 terminated string
7917
 *
7918
 * Minimal parser for regexps, it understand the following constructs
7919
 *  - string terminals
7920
 *  - choice operator |
7921
 *  - sequence operator ,
7922
 *  - subexpressions (...)
7923
 *  - usual cardinality operators + * and ?
7924
 *  - finite sequences  { min, max }
7925
 *  - infinite sequences { min, * }
7926
 * There is minimal checkings made especially no checking on strings values
7927
 *
7928
 * Returns a new expression or NULL in case of failure
7929
 */
7930
xmlExpNodePtr
7931
xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
7932
    xmlExpNodePtr ret;
7933
7934
    ctxt->expr = expr;
7935
    ctxt->cur = expr;
7936
7937
    ret = xmlExpParseExpr(ctxt);
7938
    SKIP_BLANKS
7939
    if (*ctxt->cur != 0) {
7940
        xmlExpFree(ctxt, ret);
7941
        return(NULL);
7942
    }
7943
    return(ret);
7944
}
7945
7946
static void
7947
xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
7948
    xmlExpNodePtr c;
7949
7950
    if (expr == NULL) return;
7951
    if (glob) xmlBufferWriteChar(buf, "(");
7952
    switch (expr->type) {
7953
        case XML_EXP_EMPTY:
7954
	    xmlBufferWriteChar(buf, "empty");
7955
	    break;
7956
        case XML_EXP_FORBID:
7957
	    xmlBufferWriteChar(buf, "forbidden");
7958
	    break;
7959
        case XML_EXP_ATOM:
7960
	    xmlBufferWriteCHAR(buf, expr->exp_str);
7961
	    break;
7962
        case XML_EXP_SEQ:
7963
	    c = expr->exp_left;
7964
	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7965
	        xmlExpDumpInt(buf, c, 1);
7966
	    else
7967
	        xmlExpDumpInt(buf, c, 0);
7968
	    xmlBufferWriteChar(buf, " , ");
7969
	    c = expr->exp_right;
7970
	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7971
	        xmlExpDumpInt(buf, c, 1);
7972
	    else
7973
	        xmlExpDumpInt(buf, c, 0);
7974
            break;
7975
        case XML_EXP_OR:
7976
	    c = expr->exp_left;
7977
	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7978
	        xmlExpDumpInt(buf, c, 1);
7979
	    else
7980
	        xmlExpDumpInt(buf, c, 0);
7981
	    xmlBufferWriteChar(buf, " | ");
7982
	    c = expr->exp_right;
7983
	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7984
	        xmlExpDumpInt(buf, c, 1);
7985
	    else
7986
	        xmlExpDumpInt(buf, c, 0);
7987
            break;
7988
        case XML_EXP_COUNT: {
7989
	    char rep[40];
7990
	    
7991
	    c = expr->exp_left;
7992
	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7993
	        xmlExpDumpInt(buf, c, 1);
7994
	    else
7995
	        xmlExpDumpInt(buf, c, 0);
7996
	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
7997
		rep[0] = '?';
7998
		rep[1] = 0;
7999
	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
8000
		rep[0] = '*';
8001
		rep[1] = 0;
8002
	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
8003
		rep[0] = '+';
8004
		rep[1] = 0;
8005
	    } else if (expr->exp_max == expr->exp_min) {
8006
	        snprintf(rep, 39, "{%d}", expr->exp_min);
8007
	    } else if (expr->exp_max < 0) {
8008
	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
8009
	    } else {
8010
	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
8011
	    }
8012
	    rep[39] = 0;
8013
	    xmlBufferWriteChar(buf, rep);
8014
	    break;
8015
	}
8016
	default:
8017
	    fprintf(stderr, "Error in tree\n");
8018
    }
8019
    if (glob)
8020
        xmlBufferWriteChar(buf, ")");
8021
}
8022
/**
8023
 * xmlExpDump:
8024
 * @buf:  a buffer to receive the output
8025
 * @expr:  the compiled expression
8026
 *
8027
 * Serialize the expression as compiled to the buffer
8028
 */
8029
void
8030
xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
8031
    if ((buf == NULL) || (expr == NULL))
8032
        return;
8033
    xmlExpDumpInt(buf, expr, 0);
8034
}
8035
8036
/**
8037
 * xmlExpMaxToken:
8038
 * @expr: a compiled expression
8039
 *
8040
 * Indicate the maximum number of input a expression can accept
8041
 *
8042
 * Returns the maximum length or -1 in case of error
8043
 */
8044
int
8045
xmlExpMaxToken(xmlExpNodePtr expr) {
8046
    if (expr == NULL)
8047
        return(-1);
8048
    return(expr->c_max);
8049
}
8050
8051
/**
8052
 * xmlExpCtxtNbNodes:
8053
 * @ctxt: an expression context
8054
 *
8055
 * Debugging facility provides the number of allocated nodes at a that point
8056
 *
8057
 * Returns the number of nodes in use or -1 in case of error
8058
 */
8059
int
8060
xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
8061
    if (ctxt == NULL)
8062
        return(-1);
8063
    return(ctxt->nb_nodes);
8064
}
8065
8066
/**
8067
 * xmlExpCtxtNbCons:
8068
 * @ctxt: an expression context
8069
 *
8070
 * Debugging facility provides the number of allocated nodes over lifetime
8071
 *
8072
 * Returns the number of nodes ever allocated or -1 in case of error
8073
 */
8074
int
8075
xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8076
    if (ctxt == NULL)
8077
        return(-1);
8078
    return(ctxt->nb_cons);
8079
}
8080
8081
#endif /* LIBXML_EXPR_ENABLED */
8082
#define bottom_xmlregexp
8083
#include "elfgcchack.h"
8084
#endif /* LIBXML_REGEXP_ENABLED */