1
/*****************************************************************************
2
3
NAME:
4
   rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6
ENTRY POINTS:
7
   nextaddr() -- parse the next address out of an RFC822 header
8
   reply_hack() -- append hostname to local header addresses 
9
10
THEORY:
11
   How to parse RFC822 headers in C. This is not a fully conformant
12
implementation of RFC822 or RFC2822, but it has been in production use
13
in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14
Really perverse combinations of quoting and commenting could break it.
15
16
AUTHOR:
17
   Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18
is part of fetchmail and the Unix Cookbook, and are released under the
19
MIT license.  Compile with -DMAIN to build the demonstrator.
20
21
******************************************************************************/
22
23
#include "config.h"
24
25
#include  <stdio.h>
26
#include  <ctype.h>
27
#include  <string.h>
28
#include  <strings.h>
29
#include  <stdlib.h>
30
31
#include "fetchmail.h"
32
#include "sdump.h"
33
34
#ifndef MAIN
35
#include "i18n.h"
36
#else
37
#include  <unistd.h>
38
static int verbose;
39
const char *program_name = "rfc822";
40
#endif /* MAIN */
41
42
#ifndef TRUE
43
#define TRUE 1
44
#define FALSE 0
45
#endif
46
47
#define HEADER_END(p)	((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
48
49
#define BEFORE_EOL(s)	(strcspn((s), "\r\n"))
50
51
char *reply_hack(
52
	char *buf		/* header to be hacked */,
53
	const char *host	/* server hostname */,
54
	size_t *length)
55
/* hack message headers so replies will work properly */
56
{
57
    char *from, *cp, last_nws = '\0', *parens_from = NULL;
58
    int parendepth, state, has_bare_name_part, has_host_part;
59
#ifndef MAIN
60
    int addresscount = 1;
61
#endif /* MAIN */
62
63
    if (strncasecmp("From:", buf, 5)
64
	&& strncasecmp("To:", buf, 3)
65
	&& strncasecmp("Reply-To:", buf, 9)
66
	&& strncasecmp("Return-Path:", buf, 12)
67
	&& strncasecmp("Cc:", buf, 3)
68
	&& strncasecmp("Bcc:", buf, 4)
69
	&& strncasecmp("Resent-From:", buf, 12)
70
	&& strncasecmp("Resent-To:", buf, 10)
71
	&& strncasecmp("Resent-Cc:", buf, 10)
72
	&& strncasecmp("Resent-Bcc:", buf, 11)
73
	&& strncasecmp("Apparently-From:", buf, 16)
74
	&& strncasecmp("Apparently-To:", buf, 14)
75
	&& strncasecmp("Sender:", buf, 7)
76
	&& strncasecmp("Resent-Sender:", buf, 14)
77
       ) {
78
	return(buf);
79
    }
80
81
#ifndef MAIN
82
    if (outlevel >= O_DEBUG) {
83
	report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf))));
84
	xfree(cp);
85
    }
86
87
    /* make room to hack the address; buf must be malloced */
88
    for (cp = buf; *cp; cp++)
89
	if (*cp == ',' || isspace((unsigned char)*cp))
90
	    addresscount++;
91
    buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
92
#endif /* MAIN */
93
94
    /*
95
     * This is going to foo up on some ill-formed addresses.
96
     * Note that we don't rewrite the fake address <> in order to
97
     * avoid screwing up bounce suppression with a null Return-Path.
98
     */
99
100
    parendepth = state = 0;
101
    has_host_part = has_bare_name_part = FALSE;
102
    for (from = buf; *from; from++)
103
    {
104
#ifdef MAIN
105
	if (verbose)
106
	{
107
	    printf("state %d: %s", state, buf);
108
	    printf("%*s^\n", (int)(from - buf + 10), " ");
109
	}
110
#endif /* MAIN */
111
	if (state != 2)
112
	{
113
	    if (*from == '(')
114
		++parendepth;
115
	    else if (*from == ')')
116
		--parendepth;
117
	}
118
119
	if (!parendepth && !has_host_part)
120
	    switch (state)
121
	    {
122
	    case 0:	/* before header colon */
123
		if (*from == ':')
124
		    state = 1;
125
		break;
126
127
	    case 1:	/* we've seen the colon, we're looking for addresses */
128
		if (!isspace((unsigned char)*from))
129
		    last_nws = *from;
130
		if (*from == '<')
131
		    state = 3;
132
		else if (*from == '@' || *from == '!')
133
		    has_host_part = TRUE;
134
		else if (*from == '"')
135
		    state = 2;
136
		/*
137
		 * Not expanding on last non-WS == ';' deals with groupnames,
138
		 * an obscure misfeature described in sections
139
		 * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
140
		 */
141
		else if ((*from == ',' || HEADER_END(from))
142
			 && has_bare_name_part
143
			 && !has_host_part
144
			 && last_nws != ';')
145
		{
146
		    int hostlen;
147
		    char *p;
148
149
		    p = from;
150
		    if (parens_from)
151
			from = parens_from;
152
		    while (isspace((unsigned char)*from) || (*from == ','))
153
			--from;
154
		    from++;
155
		    hostlen = strlen(host);
156
		    for (cp = from + strlen(from); cp >= from; --cp)
157
			cp[hostlen+1] = *cp;
158
		    *from++ = '@';
159
		    memcpy(from, host, hostlen);
160
		    from = p + hostlen + 1;
161
		    has_host_part = TRUE;
162
		} 
163
		else if (from[1] == '('
164
			 && has_bare_name_part
165
			 && !has_host_part
166
			 && last_nws != ';' && last_nws != ')')
167
		{
168
		    parens_from = from;
169
		} 
170
		else if (!isspace((unsigned char)*from))
171
		    has_bare_name_part = TRUE;
172
		break;
173
174
	    case 2:	/* we're in a string */
175
		if (*from == '"')
176
		{
177
		    char	*bp;
178
		    int		bscount;
179
180
		    bscount = 0;
181
		    for (bp = from - 1; *bp == '\\'; bp--)
182
			bscount++;
183
		    if (!(bscount % 2))
184
			state = 1;
185
		}
186
		break;
187
188
	    case 3:	/* we're in a <>-enclosed address */
189
		if (*from == '@' || *from == '!')
190
		    has_host_part = TRUE;
191
		else if (*from == '>' && (from > buf && from[-1] != '<'))
192
		{
193
		    state = 1;
194
		    if (!has_host_part)
195
		    {
196
			int hostlen;
197
198
			hostlen = strlen(host);
199
			for (cp = from + strlen(from); cp >= from; --cp)
200
			    cp[hostlen+1] = *cp;
201
			*from++ = '@';
202
			memcpy(from, host, hostlen);
203
			from += hostlen;
204
			has_host_part = TRUE;
205
		    }
206
		}
207
		break;
208
	    }
209
210
	/*
211
	 * If we passed a comma, reset everything.
212
	 */
213
	if ((from > buf && from[-1] == ',') && !parendepth) {
214
	  has_host_part = has_bare_name_part = FALSE;
215
	  parens_from = NULL;
216
	}
217
    }
218
219
#ifndef MAIN
220
    if (outlevel >= O_DEBUG) {
221
	report_complete(stdout, GT_("...rewritten version is %s.\n"),
222
			(cp = sdump(buf, BEFORE_EOL(buf))));
223
	xfree(cp)
224
    }
225
226
#endif /* MAIN */
227
    *length = strlen(buf);
228
    return(buf);
229
}
230
231
char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
232
/* parse addresses in succession out of a specified RFC822 header */
233
{
234
    static char address[BUFSIZ];
235
    static size_t tp;
236
    static const char *hp;
237
    static int	state, oldstate;
238
#ifdef MAIN
239
    static const char *orighdr;
240
#endif /* MAIN */
241
    int parendepth = 0;
242
243
#define START_HDR	0	/* before header colon */
244
#define SKIP_JUNK	1	/* skip whitespace, \n, and junk */
245
#define BARE_ADDRESS	2	/* collecting address without delimiters */
246
#define INSIDE_DQUOTE	3	/* inside double quotes */
247
#define INSIDE_PARENS	4	/* inside parentheses */
248
#define INSIDE_BRACKETS	5	/* inside bracketed address */
249
#define ENDIT_ALL	6	/* after last address */
250
251
#define NEXTTP()	((tp < sizeof(address)-1) ? tp++ : tp)
252
253
    if (hdr)
254
    {
255
	hp = hdr;
256
	state = START_HDR;
257
#ifdef MAIN
258
	orighdr = hdr;
259
#endif /* MAIN */
260
	tp = 0;
261
    }
262
263
    if (!hp) return NULL;
264
265
    for (; *hp; hp++)
266
    {
267
#ifdef MAIN
268
	if (verbose)
269
	{
270
	    printf("state %d: %s", state, orighdr);
271
	    printf("%*s^\n", (int)(hp - orighdr + 10), " ");
272
	}
273
#endif /* MAIN */
274
275
	if (state == ENDIT_ALL)		/* after last address */
276
	    return(NULL);
277
	else if (HEADER_END(hp))
278
	{
279
	    state = ENDIT_ALL;
280
	    if (tp)
281
	    {
282
		while (tp > 0 && isspace((unsigned char)address[tp - 1]))
283
		    tp--;
284
		address[tp] = '\0';
285
		tp = 0;
286
		return (address);
287
	    }
288
	    return(NULL);
289
	}
290
	else if (*hp == '\\')		/* handle RFC822 escaping */
291
	{
292
	    if (state != INSIDE_PARENS)
293
	    {
294
		address[NEXTTP()] = *hp++;	/* take the escape */
295
		address[NEXTTP()] = *hp;	/* take following unsigned char */
296
	    }
297
	}
298
	else switch (state)
299
	{
300
	case START_HDR:   /* before header colon */
301
	    if (*hp == ':')
302
		state = SKIP_JUNK;
303
	    break;
304
305
	case SKIP_JUNK:		/* looking for address start */
306
	    if (*hp == '"')	/* quoted string */
307
	    {
308
		oldstate = SKIP_JUNK;
309
	        state = INSIDE_DQUOTE;
310
		address[NEXTTP()] = *hp;
311
	    }
312
	    else if (*hp == '(')	/* address comment -- ignore */
313
	    {
314
		parendepth = 1;
315
		oldstate = SKIP_JUNK;
316
		state = INSIDE_PARENS;    
317
	    }
318
	    else if (*hp == '<')	/* begin <address> */
319
	    {
320
		state = INSIDE_BRACKETS;
321
		tp = 0;
322
	    }
323
	    else if (*hp != ',' && !isspace((unsigned char)*hp))
324
	    {
325
		--hp;
326
	        state = BARE_ADDRESS;
327
	    }
328
	    break;
329
330
	case BARE_ADDRESS:   	/* collecting address without delimiters */
331
	    if (*hp == ',')  	/* end of address */
332
	    {
333
		if (tp)
334
		{
335
		    address[NEXTTP()] = '\0';
336
		    state = SKIP_JUNK;
337
		    tp = 0;
338
		    return(address);
339
		}
340
	    }
341
	    else if (*hp == '(')  	/* beginning of comment */
342
	    {
343
		parendepth = 1;
344
		oldstate = BARE_ADDRESS;
345
		state = INSIDE_PARENS;    
346
	    }
347
	    else if (*hp == '<')  	/* beginning of real address */
348
	    {
349
		state = INSIDE_BRACKETS;
350
		tp = 0;
351
	    }
352
	    else if (*hp == '"')        /* quoted word, copy verbatim */
353
	    {
354
	        oldstate = state;
355
		state = INSIDE_DQUOTE;
356
                address[NEXTTP()] = *hp;
357
            }
358
	    else if (!isspace((unsigned char)*hp)) 	/* just take it, ignoring whitespace */
359
		address[NEXTTP()] = *hp;
360
	    break;
361
362
	case INSIDE_DQUOTE:	/* we're in a quoted string, copy verbatim */
363
	    address[NEXTTP()] = *hp;
364
	    if (*hp == '"')
365
		state = oldstate;
366
	    break;
367
368
	case INSIDE_PARENS:	/* we're in a parenthesized comment, ignore */
369
	    if (*hp == '(')
370
		++parendepth;
371
	    else if (*hp == ')')
372
		--parendepth;
373
	    if (parendepth == 0)
374
		state = oldstate;
375
	    break;
376
377
	case INSIDE_BRACKETS:	/* possible <>-enclosed address */
378
	    if (*hp == '>')	/* end of address */
379
	    {
380
		address[NEXTTP()] = '\0';
381
		state = SKIP_JUNK;
382
		++hp;
383
		tp = 0;
384
		return(address);
385
	    }
386
	    else if (*hp == '<')	/* nested <> */
387
	        tp = 0;
388
	    else if (*hp == '"')	/* quoted address */
389
	    {
390
	        address[NEXTTP()] = *hp;
391
		oldstate = INSIDE_BRACKETS;
392
		state = INSIDE_DQUOTE;
393
	    }
394
	    else			/* just copy address */
395
		address[NEXTTP()] = *hp;
396
	    break;
397
	}
398
    }
399
400
    return(NULL);
401
}
402
403
#ifdef MAIN
404
static void parsebuf(char *longbuf, int reply)
405
{
406
    char	*cp;
407
    size_t	dummy;
408
409
    if (reply)
410
    {
411
	reply_hack(longbuf, "HOSTNAME.NET", &dummy);
412
	printf("Rewritten buffer: %s", (char *)longbuf);
413
    }
414
    else
415
	if ((cp = nxtaddr(longbuf)) != (char *)NULL)
416
	    do {
417
		printf("\t-> \"%s\"\n", (char *)cp);
418
	    } while
419
		((cp = nxtaddr((char *)NULL)) != (char *)NULL);
420
}
421
422
423
424
int main(int argc, char *argv[])
425
{
426
    char	buf[BUFSIZ], longbuf[BUFSIZ];
427
    int		ch, reply;
428
    
429
    verbose = reply = FALSE;
430
    while ((ch = getopt(argc, argv, "rv")) != EOF)
431
	switch(ch)
432
	{
433
	case 'r':
434
	    reply = TRUE;
435
	    break;
436
437
	case 'v':
438
	    verbose = TRUE;
439
	    break;
440
	}
441
442
    longbuf[0] = '\0';
443
444
    while (fgets(buf, sizeof(buf)-1, stdin))
445
    {
446
	if (buf[0] == ' ' || buf[0] == '\t')
447
	    strlcat(longbuf, buf, sizeof(longbuf));
448
	else if (!strncasecmp("From: ", buf, 6)
449
		    || !strncasecmp("To: ", buf, 4)
450
		    || !strncasecmp("Reply-", buf, 6)
451
		    || !strncasecmp("Cc: ", buf, 4)
452
		    || !strncasecmp("Bcc: ", buf, 5))
453
	    strlcpy(longbuf, buf, sizeof(longbuf));
454
	else if (longbuf[0])
455
	{
456
	    if (verbose)
457
		fputs(longbuf, stdout);
458
	    parsebuf(longbuf, reply);
459
	    longbuf[0] = '\0';
460
	}
461
    }
462
    if (longbuf[0])
463
    {
464
	if (verbose)
465
	    fputs(longbuf, stdout);
466
	parsebuf(longbuf, reply);
467
    }
468
    exit(0);
469
}
470
#endif /* MAIN */
471
472
/* rfc822.c end */