| 1 |
/* |
| 2 |
* ttman - text to man converter |
| 3 |
* |
| 4 |
* Copyright 2006 Timo Hirvonen <tihirvon@gmail.com> |
| 5 |
* |
| 6 |
* This file is licensed under the GPLv2. |
| 7 |
*/ |
| 8 |
#include <stdlib.h> |
| 9 |
#include <stdarg.h> |
| 10 |
#include <stdio.h> |
| 11 |
#include <string.h> |
| 12 |
#include <ctype.h> |
| 13 |
#include <sys/types.h> |
| 14 |
#include <sys/stat.h> |
| 15 |
#include <sys/mman.h> |
| 16 |
#include <unistd.h> |
| 17 |
#include <fcntl.h> |
| 18 |
#include <errno.h> |
| 19 |
|
| 20 |
struct token { |
| 21 |
struct token *next; |
| 22 |
struct token *prev; |
| 23 |
enum { |
| 24 |
TOK_TEXT, // max one line w/o \n |
| 25 |
TOK_NL, // \n |
| 26 |
TOK_ITALIC, // ` |
| 27 |
TOK_BOLD, // * |
| 28 |
TOK_INDENT, // \t |
| 29 |
|
| 30 |
// keywords (@...) |
| 31 |
TOK_H1, |
| 32 |
TOK_H2, |
| 33 |
TOK_LI, |
| 34 |
TOK_BR, |
| 35 |
TOK_PRE, |
| 36 |
TOK_ENDPRE, // must be after TOK_PRE |
| 37 |
TOK_RAW, |
| 38 |
TOK_ENDRAW, // must be after TOK_RAW |
| 39 |
TOK_TITLE, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual" |
| 40 |
} type; |
| 41 |
int line; |
| 42 |
|
| 43 |
// not NUL-terminated |
| 44 |
const char *text; |
| 45 |
// length of text |
| 46 |
int len; |
| 47 |
}; |
| 48 |
|
| 49 |
static const char *program; |
| 50 |
static const char *filename; |
| 51 |
static char tmp_file[1024]; |
| 52 |
static FILE *outfile; |
| 53 |
static int cur_line = 1; |
| 54 |
static struct token head = { &head, &head, TOK_TEXT, 0, NULL, 0 }; |
| 55 |
|
| 56 |
#define CONST_STR(str) { str, sizeof(str) - 1 } |
| 57 |
static const struct { |
| 58 |
const char *str; |
| 59 |
int len; |
| 60 |
} token_names[] = { |
| 61 |
CONST_STR("text"), |
| 62 |
CONST_STR("nl"), |
| 63 |
CONST_STR("italic"), |
| 64 |
CONST_STR("bold"), |
| 65 |
CONST_STR("indent"), |
| 66 |
|
| 67 |
// keywords |
| 68 |
CONST_STR("h1"), |
| 69 |
CONST_STR("h2"), |
| 70 |
CONST_STR("li"), |
| 71 |
CONST_STR("br"), |
| 72 |
CONST_STR("pre"), |
| 73 |
CONST_STR("endpre"), |
| 74 |
CONST_STR("raw"), |
| 75 |
CONST_STR("endraw"), |
| 76 |
CONST_STR("title") |
| 77 |
}; |
| 78 |
#define NR_TOKEN_NAMES (sizeof(token_names) / sizeof(token_names[0])) |
| 79 |
#define BUG() die("BUG in %s\n", __FUNCTION__) |
| 80 |
|
| 81 |
#ifdef __GNUC__ |
| 82 |
#define __NORETURN __attribute__((__noreturn__)) |
| 83 |
#else |
| 84 |
#define __NORETURN |
| 85 |
#endif |
| 86 |
|
| 87 |
static __NORETURN void quit(void) |
| 88 |
{ |
| 89 |
if (tmp_file[0]) |
| 90 |
unlink(tmp_file); |
| 91 |
exit(1); |
| 92 |
} |
| 93 |
|
| 94 |
static __NORETURN void die(const char *format, ...) |
| 95 |
{ |
| 96 |
va_list ap; |
| 97 |
|
| 98 |
fprintf(stderr, "%s: ", program); |
| 99 |
va_start(ap, format); |
| 100 |
vfprintf(stderr, format, ap); |
| 101 |
va_end(ap); |
| 102 |
quit(); |
| 103 |
} |
| 104 |
|
| 105 |
static __NORETURN void syntax(int line, const char *format, ...) |
| 106 |
{ |
| 107 |
va_list ap; |
| 108 |
|
| 109 |
fprintf(stderr, "%s:%d: error: ", filename, line); |
| 110 |
va_start(ap, format); |
| 111 |
vfprintf(stderr, format, ap); |
| 112 |
va_end(ap); |
| 113 |
quit(); |
| 114 |
} |
| 115 |
|
| 116 |
static inline const char *keyword_name(int type) |
| 117 |
{ |
| 118 |
if (type < TOK_H1 || type > TOK_TITLE) |
| 119 |
die("BUG: no keyword name for type %d\n", type); |
| 120 |
return token_names[type].str; |
| 121 |
} |
| 122 |
|
| 123 |
static void *xmalloc(size_t size) |
| 124 |
{ |
| 125 |
void *ret = malloc(size); |
| 126 |
|
| 127 |
if (!ret) |
| 128 |
die("OOM when allocating %ul bytes\n", size); |
| 129 |
return ret; |
| 130 |
} |
| 131 |
|
| 132 |
static char *memdup(const char *str, int len) |
| 133 |
{ |
| 134 |
char *s = xmalloc(len + 1); |
| 135 |
memcpy(s, str, len); |
| 136 |
s[len] = 0; |
| 137 |
return s; |
| 138 |
} |
| 139 |
|
| 140 |
static struct token *new_token(int type) |
| 141 |
{ |
| 142 |
struct token *tok = xmalloc(sizeof(struct token)); |
| 143 |
|
| 144 |
tok->prev = NULL; |
| 145 |
tok->next = NULL; |
| 146 |
tok->type = type; |
| 147 |
tok->line = cur_line; |
| 148 |
return tok; |
| 149 |
} |
| 150 |
|
| 151 |
static void free_token(struct token *tok) |
| 152 |
{ |
| 153 |
struct token *prev = tok->prev; |
| 154 |
struct token *next = tok->next; |
| 155 |
|
| 156 |
if (tok == &head) |
| 157 |
BUG(); |
| 158 |
|
| 159 |
prev->next = next; |
| 160 |
next->prev = prev; |
| 161 |
free(tok); |
| 162 |
} |
| 163 |
|
| 164 |
static void emit_token(struct token *tok) |
| 165 |
{ |
| 166 |
tok->prev = head.prev; |
| 167 |
tok->next = &head; |
| 168 |
head.prev->next = tok; |
| 169 |
head.prev = tok; |
| 170 |
} |
| 171 |
|
| 172 |
static void emit(int type) |
| 173 |
{ |
| 174 |
struct token *tok = new_token(type); |
| 175 |
tok->len = 0; |
| 176 |
tok->text = NULL; |
| 177 |
emit_token(tok); |
| 178 |
} |
| 179 |
|
| 180 |
static int emit_keyword(const char *buf, int size) |
| 181 |
{ |
| 182 |
int i, len; |
| 183 |
|
| 184 |
for (len = 0; len < size; len++) { |
| 185 |
if (!isalnum((unsigned char)buf[len])) |
| 186 |
break; |
| 187 |
} |
| 188 |
|
| 189 |
if (!len) |
| 190 |
syntax(cur_line, "keyword expected\n"); |
| 191 |
|
| 192 |
for (i = TOK_H1; i < NR_TOKEN_NAMES; i++) { |
| 193 |
if (len != token_names[i].len) |
| 194 |
continue; |
| 195 |
if (!strncmp(buf, token_names[i].str, len)) { |
| 196 |
emit(i); |
| 197 |
return len; |
| 198 |
} |
| 199 |
} |
| 200 |
syntax(cur_line, "invalid keyword '@%s'\n", memdup(buf, len)); |
| 201 |
} |
| 202 |
|
| 203 |
static int emit_text(const char *buf, int size) |
| 204 |
{ |
| 205 |
struct token *tok; |
| 206 |
int i; |
| 207 |
|
| 208 |
for (i = 0; i < size; i++) { |
| 209 |
int c = buf[i]; |
| 210 |
if (c == '@' || c == '`' || c == '*' || c == '\n' || c == '\\' || c == '\t') |
| 211 |
break; |
| 212 |
} |
| 213 |
tok = new_token(TOK_TEXT); |
| 214 |
tok->text = buf; |
| 215 |
tok->len = i; |
| 216 |
emit_token(tok); |
| 217 |
return i; |
| 218 |
} |
| 219 |
|
| 220 |
static void tokenize(const char *buf, int size) |
| 221 |
{ |
| 222 |
int pos = 0; |
| 223 |
|
| 224 |
while (pos < size) { |
| 225 |
struct token *tok; |
| 226 |
int ch; |
| 227 |
|
| 228 |
ch = buf[pos++]; |
| 229 |
switch (ch) { |
| 230 |
case '@': |
| 231 |
pos += emit_keyword(buf + pos, size - pos); |
| 232 |
break; |
| 233 |
case '`': |
| 234 |
emit(TOK_ITALIC); |
| 235 |
break; |
| 236 |
case '*': |
| 237 |
emit(TOK_BOLD); |
| 238 |
break; |
| 239 |
case '\n': |
| 240 |
emit(TOK_NL); |
| 241 |
cur_line++; |
| 242 |
break; |
| 243 |
case '\t': |
| 244 |
emit(TOK_INDENT); |
| 245 |
break; |
| 246 |
case '\\': |
| 247 |
tok = new_token(TOK_TEXT); |
| 248 |
tok->text = buf + pos; |
| 249 |
tok->len = 1; |
| 250 |
pos++; |
| 251 |
if (pos == size || buf[pos] == '\n') { |
| 252 |
// just one '\\' |
| 253 |
tok->text--; |
| 254 |
} |
| 255 |
|
| 256 |
if (tok->text[0] == '\\') { |
| 257 |
tok->text = "\\\\"; |
| 258 |
tok->len = 2; |
| 259 |
} |
| 260 |
|
| 261 |
emit_token(tok); |
| 262 |
break; |
| 263 |
default: |
| 264 |
pos--; |
| 265 |
pos += emit_text(buf + pos, size - pos); |
| 266 |
break; |
| 267 |
} |
| 268 |
} |
| 269 |
} |
| 270 |
|
| 271 |
static int is_empty_line(const struct token *tok) |
| 272 |
{ |
| 273 |
while (tok != &head) { |
| 274 |
int i; |
| 275 |
|
| 276 |
switch (tok->type) { |
| 277 |
case TOK_TEXT: |
| 278 |
for (i = 0; i < tok->len; i++) { |
| 279 |
if (tok->text[i] != ' ') |
| 280 |
return 0; |
| 281 |
} |
| 282 |
break; |
| 283 |
case TOK_INDENT: |
| 284 |
break; |
| 285 |
case TOK_NL: |
| 286 |
return 1; |
| 287 |
default: |
| 288 |
return 0; |
| 289 |
} |
| 290 |
tok = tok->next; |
| 291 |
} |
| 292 |
return 1; |
| 293 |
} |
| 294 |
|
| 295 |
static struct token *remove_line(struct token *tok) |
| 296 |
{ |
| 297 |
while (tok != &head) { |
| 298 |
struct token *next = tok->next; |
| 299 |
int type = tok->type; |
| 300 |
|
| 301 |
free_token(tok); |
| 302 |
tok = next; |
| 303 |
if (type == TOK_NL) |
| 304 |
break; |
| 305 |
} |
| 306 |
return tok; |
| 307 |
} |
| 308 |
|
| 309 |
static struct token *skip_after(struct token *tok, int type) |
| 310 |
{ |
| 311 |
struct token *save = tok; |
| 312 |
|
| 313 |
while (tok != &head) { |
| 314 |
if (tok->type == type) { |
| 315 |
tok = tok->next; |
| 316 |
if (tok->type != TOK_NL) |
| 317 |
syntax(tok->line, "newline expected after @%s\n", |
| 318 |
keyword_name(type)); |
| 319 |
return tok->next; |
| 320 |
} |
| 321 |
if (tok->type >= TOK_H1) |
| 322 |
syntax(tok->line, "keywords not allowed betweed @%s and @%s\n", |
| 323 |
keyword_name(type-1), keyword_name(type)); |
| 324 |
tok = tok->next; |
| 325 |
} |
| 326 |
syntax(save->prev->line, "missing @%s\n", keyword_name(type)); |
| 327 |
} |
| 328 |
|
| 329 |
static struct token *get_next_line(struct token *tok) |
| 330 |
{ |
| 331 |
while (tok != &head) { |
| 332 |
int type = tok->type; |
| 333 |
|
| 334 |
tok = tok->next; |
| 335 |
if (type == TOK_NL) |
| 336 |
break; |
| 337 |
} |
| 338 |
return tok; |
| 339 |
} |
| 340 |
|
| 341 |
static struct token *get_indent(struct token *tok, int *ip) |
| 342 |
{ |
| 343 |
int i = 0; |
| 344 |
|
| 345 |
while (tok != &head && tok->type == TOK_INDENT) { |
| 346 |
tok = tok->next; |
| 347 |
i++; |
| 348 |
} |
| 349 |
*ip = i; |
| 350 |
return tok; |
| 351 |
} |
| 352 |
|
| 353 |
// line must be non-empty |
| 354 |
static struct token *check_line(struct token *tok, int *ip) |
| 355 |
{ |
| 356 |
struct token *start; |
| 357 |
int tok_type; |
| 358 |
|
| 359 |
start = tok = get_indent(tok, ip); |
| 360 |
|
| 361 |
tok_type = tok->type; |
| 362 |
switch (tok_type) { |
| 363 |
case TOK_TEXT: |
| 364 |
case TOK_BOLD: |
| 365 |
case TOK_ITALIC: |
| 366 |
case TOK_BR: |
| 367 |
tok = tok->next; |
| 368 |
while (tok != &head) { |
| 369 |
switch (tok->type) { |
| 370 |
case TOK_TEXT: |
| 371 |
case TOK_BOLD: |
| 372 |
case TOK_ITALIC: |
| 373 |
case TOK_BR: |
| 374 |
case TOK_INDENT: |
| 375 |
break; |
| 376 |
case TOK_NL: |
| 377 |
return start; |
| 378 |
default: |
| 379 |
syntax(tok->line, "@%s not allowed inside paragraph\n", |
| 380 |
keyword_name(tok->type)); |
| 381 |
} |
| 382 |
tok = tok->next; |
| 383 |
} |
| 384 |
break; |
| 385 |
case TOK_H1: |
| 386 |
case TOK_H2: |
| 387 |
case TOK_TITLE: |
| 388 |
if (*ip) |
| 389 |
goto indentation; |
| 390 |
|
| 391 |
// check arguments |
| 392 |
tok = tok->next; |
| 393 |
while (tok != &head) { |
| 394 |
switch (tok->type) { |
| 395 |
case TOK_TEXT: |
| 396 |
case TOK_INDENT: |
| 397 |
break; |
| 398 |
case TOK_NL: |
| 399 |
return start; |
| 400 |
default: |
| 401 |
syntax(tok->line, "@%s can contain only text\n", |
| 402 |
keyword_name(tok_type)); |
| 403 |
} |
| 404 |
tok = tok->next; |
| 405 |
} |
| 406 |
break; |
| 407 |
case TOK_LI: |
| 408 |
// check arguments |
| 409 |
tok = tok->next; |
| 410 |
while (tok != &head) { |
| 411 |
switch (tok->type) { |
| 412 |
case TOK_TEXT: |
| 413 |
case TOK_BOLD: |
| 414 |
case TOK_ITALIC: |
| 415 |
case TOK_INDENT: |
| 416 |
break; |
| 417 |
case TOK_NL: |
| 418 |
return start; |
| 419 |
default: |
| 420 |
syntax(tok->line, "@%s not allowed inside @li\n", |
| 421 |
keyword_name(tok->type)); |
| 422 |
} |
| 423 |
tok = tok->next; |
| 424 |
} |
| 425 |
break; |
| 426 |
case TOK_PRE: |
| 427 |
// checked later |
| 428 |
break; |
| 429 |
case TOK_RAW: |
| 430 |
if (*ip) |
| 431 |
goto indentation; |
| 432 |
// checked later |
| 433 |
break; |
| 434 |
case TOK_ENDPRE: |
| 435 |
case TOK_ENDRAW: |
| 436 |
syntax(tok->line, "@%s not expected\n", keyword_name(tok->type)); |
| 437 |
break; |
| 438 |
case TOK_NL: |
| 439 |
case TOK_INDENT: |
| 440 |
BUG(); |
| 441 |
break; |
| 442 |
} |
| 443 |
return start; |
| 444 |
indentation: |
| 445 |
syntax(tok->line, "indentation before @%s\n", keyword_name(tok->type)); |
| 446 |
} |
| 447 |
|
| 448 |
static void insert_nl_before(struct token *next) |
| 449 |
{ |
| 450 |
struct token *prev = next->prev; |
| 451 |
struct token *new = new_token(TOK_NL); |
| 452 |
|
| 453 |
new->prev = prev; |
| 454 |
new->next = next; |
| 455 |
prev->next = new; |
| 456 |
next->prev = new; |
| 457 |
} |
| 458 |
|
| 459 |
static void normalize(void) |
| 460 |
{ |
| 461 |
struct token *tok = head.next; |
| 462 |
/* |
| 463 |
* >= 0 if previous line was text (== amount of indent) |
| 464 |
* -1 if previous block was @pre (amount of indent doesn't matter) |
| 465 |
* -2 otherwise (@h1 etc., indent was 0) |
| 466 |
*/ |
| 467 |
int prev_indent = -2; |
| 468 |
|
| 469 |
while (tok != &head) { |
| 470 |
struct token *start; |
| 471 |
int i, new_para = 0; |
| 472 |
|
| 473 |
// remove empty lines |
| 474 |
while (is_empty_line(tok)) { |
| 475 |
tok = remove_line(tok); |
| 476 |
new_para = 1; |
| 477 |
if (tok == &head) |
| 478 |
return; |
| 479 |
} |
| 480 |
|
| 481 |
// skips indent |
| 482 |
start = tok; |
| 483 |
tok = check_line(tok, &i); |
| 484 |
|
| 485 |
switch (tok->type) { |
| 486 |
case TOK_TEXT: |
| 487 |
case TOK_ITALIC: |
| 488 |
case TOK_BOLD: |
| 489 |
case TOK_BR: |
| 490 |
// normal text |
| 491 |
if (new_para && prev_indent >= -1) { |
| 492 |
// previous line/block was text or @pre |
| 493 |
// and there was a empty line after it |
| 494 |
insert_nl_before(start); |
| 495 |
} |
| 496 |
|
| 497 |
if (!new_para && prev_indent == i) { |
| 498 |
// join with previous line |
| 499 |
struct token *nl = start->prev; |
| 500 |
|
| 501 |
if (nl->type != TOK_NL) |
| 502 |
BUG(); |
| 503 |
|
| 504 |
if ((nl->prev != &head && nl->prev->type == TOK_BR) || |
| 505 |
tok->type == TOK_BR) { |
| 506 |
// don't convert \n after/before @br to ' ' |
| 507 |
free_token(nl); |
| 508 |
} else { |
| 509 |
// convert "\n" to " " |
| 510 |
nl->type = TOK_TEXT; |
| 511 |
nl->text = " "; |
| 512 |
nl->len = 1; |
| 513 |
} |
| 514 |
|
| 515 |
// remove indent |
| 516 |
while (start->type == TOK_INDENT) { |
| 517 |
struct token *next = start->next; |
| 518 |
free_token(start); |
| 519 |
start = next; |
| 520 |
} |
| 521 |
} |
| 522 |
|
| 523 |
prev_indent = i; |
| 524 |
tok = get_next_line(tok); |
| 525 |
break; |
| 526 |
case TOK_PRE: |
| 527 |
case TOK_RAW: |
| 528 |
// these can be directly after normal text |
| 529 |
// but not joined with the previous line |
| 530 |
if (new_para && prev_indent >= -1) { |
| 531 |
// previous line/block was text or @pre |
| 532 |
// and there was a empty line after it |
| 533 |
insert_nl_before(start); |
| 534 |
} |
| 535 |
tok = skip_after(tok->next, tok->type + 1); |
| 536 |
prev_indent = -1; |
| 537 |
break; |
| 538 |
case TOK_H1: |
| 539 |
case TOK_H2: |
| 540 |
case TOK_LI: |
| 541 |
case TOK_TITLE: |
| 542 |
// remove white space after H1, H2, L1 and TITLE |
| 543 |
tok = tok->next; |
| 544 |
while (tok != &head) { |
| 545 |
int type = tok->type; |
| 546 |
struct token *next; |
| 547 |
|
| 548 |
if (type == TOK_TEXT) { |
| 549 |
while (tok->len && *tok->text == ' ') { |
| 550 |
tok->text++; |
| 551 |
tok->len--; |
| 552 |
} |
| 553 |
if (tok->len) |
| 554 |
break; |
| 555 |
} |
| 556 |
if (type != TOK_INDENT) |
| 557 |
break; |
| 558 |
|
| 559 |
// empty TOK_TEXT or TOK_INDENT |
| 560 |
next = tok->next; |
| 561 |
free_token(tok); |
| 562 |
tok = next; |
| 563 |
} |
| 564 |
// not normal text. can't be joined |
| 565 |
prev_indent = -2; |
| 566 |
tok = get_next_line(tok); |
| 567 |
break; |
| 568 |
case TOK_NL: |
| 569 |
case TOK_INDENT: |
| 570 |
case TOK_ENDPRE: |
| 571 |
case TOK_ENDRAW: |
| 572 |
BUG(); |
| 573 |
break; |
| 574 |
} |
| 575 |
} |
| 576 |
} |
| 577 |
|
| 578 |
#define output(...) fprintf(outfile, __VA_ARGS__) |
| 579 |
|
| 580 |
static void output_buf(const char *buf, int len) |
| 581 |
{ |
| 582 |
fwrite(buf, 1, len, outfile); |
| 583 |
} |
| 584 |
|
| 585 |
static void output_text(struct token *tok) |
| 586 |
{ |
| 587 |
char buf[1024]; |
| 588 |
const char *str = tok->text; |
| 589 |
int len = tok->len; |
| 590 |
int pos = 0; |
| 591 |
|
| 592 |
while (len) { |
| 593 |
int c = *str++; |
| 594 |
|
| 595 |
if (pos >= sizeof(buf) - 1) { |
| 596 |
output_buf(buf, pos); |
| 597 |
pos = 0; |
| 598 |
} |
| 599 |
if (c == '-') |
| 600 |
buf[pos++] = '\\'; |
| 601 |
buf[pos++] = c; |
| 602 |
len--; |
| 603 |
} |
| 604 |
|
| 605 |
if (pos) |
| 606 |
output_buf(buf, pos); |
| 607 |
} |
| 608 |
|
| 609 |
static int bold = 0; |
| 610 |
static int italic = 0; |
| 611 |
static int indent = 0; |
| 612 |
|
| 613 |
static struct token *output_pre(struct token *tok) |
| 614 |
{ |
| 615 |
int bol = 1; |
| 616 |
|
| 617 |
if (tok->type != TOK_NL) |
| 618 |
syntax(tok->line, "newline expected after @pre\n"); |
| 619 |
|
| 620 |
output(".nf\n"); |
| 621 |
tok = tok->next; |
| 622 |
while (tok != &head) { |
| 623 |
if (bol) { |
| 624 |
int i; |
| 625 |
|
| 626 |
tok = get_indent(tok, &i); |
| 627 |
if (i != indent && tok->type != TOK_NL) |
| 628 |
syntax(tok->line, "indent changed in @pre\n"); |
| 629 |
} |
| 630 |
|
| 631 |
switch (tok->type) { |
| 632 |
case TOK_TEXT: |
| 633 |
if (bol && tok->len && tok->text[0] == '.') |
| 634 |
output("\\&"); |
| 635 |
output_text(tok); |
| 636 |
break; |
| 637 |
case TOK_NL: |
| 638 |
output("\n"); |
| 639 |
bol = 1; |
| 640 |
tok = tok->next; |
| 641 |
continue; |
| 642 |
case TOK_ITALIC: |
| 643 |
output("`"); |
| 644 |
break; |
| 645 |
case TOK_BOLD: |
| 646 |
output("*"); |
| 647 |
break; |
| 648 |
case TOK_INDENT: |
| 649 |
// FIXME: warn |
| 650 |
output(" "); |
| 651 |
break; |
| 652 |
case TOK_ENDPRE: |
| 653 |
output(".fi\n"); |
| 654 |
tok = tok->next; |
| 655 |
if (tok != &head && tok->type == TOK_NL) |
| 656 |
tok = tok->next; |
| 657 |
return tok; |
| 658 |
default: |
| 659 |
BUG(); |
| 660 |
break; |
| 661 |
} |
| 662 |
bol = 0; |
| 663 |
tok = tok->next; |
| 664 |
} |
| 665 |
return tok; |
| 666 |
} |
| 667 |
|
| 668 |
static struct token *output_raw(struct token *tok) |
| 669 |
{ |
| 670 |
if (tok->type != TOK_NL) |
| 671 |
syntax(tok->line, "newline expected after @raw\n"); |
| 672 |
|
| 673 |
tok = tok->next; |
| 674 |
while (tok != &head) { |
| 675 |
switch (tok->type) { |
| 676 |
case TOK_TEXT: |
| 677 |
if (tok->len == 2 && !strncmp(tok->text, "\\\\", 2)) { |
| 678 |
/* ugly special case |
| 679 |
* "\\" (\) was converted to "\\\\" (\\) because |
| 680 |
* nroff does escaping too. |
| 681 |
*/ |
| 682 |
output("\\"); |
| 683 |
} else { |
| 684 |
output_buf(tok->text, tok->len); |
| 685 |
} |
| 686 |
break; |
| 687 |
case TOK_NL: |
| 688 |
output("\n"); |
| 689 |
break; |
| 690 |
case TOK_ITALIC: |
| 691 |
output("`"); |
| 692 |
break; |
| 693 |
case TOK_BOLD: |
| 694 |
output("*"); |
| 695 |
break; |
| 696 |
case TOK_INDENT: |
| 697 |
output("\t"); |
| 698 |
break; |
| 699 |
case TOK_ENDRAW: |
| 700 |
tok = tok->next; |
| 701 |
if (tok != &head && tok->type == TOK_NL) |
| 702 |
tok = tok->next; |
| 703 |
return tok; |
| 704 |
default: |
| 705 |
BUG(); |
| 706 |
break; |
| 707 |
} |
| 708 |
tok = tok->next; |
| 709 |
} |
| 710 |
return tok; |
| 711 |
} |
| 712 |
|
| 713 |
static struct token *output_para(struct token *tok) |
| 714 |
{ |
| 715 |
int bol = 1; |
| 716 |
|
| 717 |
while (tok != &head) { |
| 718 |
switch (tok->type) { |
| 719 |
case TOK_TEXT: |
| 720 |
output_text(tok); |
| 721 |
break; |
| 722 |
case TOK_ITALIC: |
| 723 |
italic ^= 1; |
| 724 |
if (italic) { |
| 725 |
output("\\fI"); |
| 726 |
} else { |
| 727 |
output("\\fR"); |
| 728 |
} |
| 729 |
break; |
| 730 |
case TOK_BOLD: |
| 731 |
bold ^= 1; |
| 732 |
if (bold) { |
| 733 |
output("\\fB"); |
| 734 |
} else { |
| 735 |
output("\\fR"); |
| 736 |
} |
| 737 |
break; |
| 738 |
case TOK_BR: |
| 739 |
if (bol) { |
| 740 |
output(".br\n"); |
| 741 |
} else { |
| 742 |
output("\n.br\n"); |
| 743 |
} |
| 744 |
bol = 1; |
| 745 |
tok = tok->next; |
| 746 |
continue; |
| 747 |
case TOK_NL: |
| 748 |
output("\n"); |
| 749 |
return tok->next; |
| 750 |
case TOK_INDENT: |
| 751 |
output(" "); |
| 752 |
break; |
| 753 |
default: |
| 754 |
BUG(); |
| 755 |
break; |
| 756 |
} |
| 757 |
bol = 0; |
| 758 |
tok = tok->next; |
| 759 |
} |
| 760 |
return tok; |
| 761 |
} |
| 762 |
|
| 763 |
static struct token *title(struct token *tok, const char *cmd) |
| 764 |
{ |
| 765 |
output("%s", cmd); |
| 766 |
return output_para(tok->next); |
| 767 |
} |
| 768 |
|
| 769 |
static struct token *dump_one(struct token *tok) |
| 770 |
{ |
| 771 |
int i; |
| 772 |
|
| 773 |
tok = get_indent(tok, &i); |
| 774 |
if (tok->type != TOK_RAW) { |
| 775 |
while (indent < i) { |
| 776 |
output(".RS\n"); |
| 777 |
indent++; |
| 778 |
} |
| 779 |
while (indent > i) { |
| 780 |
output(".RE\n"); |
| 781 |
indent--; |
| 782 |
} |
| 783 |
} |
| 784 |
|
| 785 |
switch (tok->type) { |
| 786 |
case TOK_TEXT: |
| 787 |
case TOK_ITALIC: |
| 788 |
case TOK_BOLD: |
| 789 |
case TOK_BR: |
| 790 |
if (tok->type == TOK_TEXT && tok->len && tok->text[0] == '.') |
| 791 |
output("\\&"); |
| 792 |
tok = output_para(tok); |
| 793 |
break; |
| 794 |
case TOK_H1: |
| 795 |
tok = title(tok, ".SH "); |
| 796 |
break; |
| 797 |
case TOK_H2: |
| 798 |
tok = title(tok, ".SS "); |
| 799 |
break; |
| 800 |
case TOK_LI: |
| 801 |
tok = title(tok, ".TP\n"); |
| 802 |
break; |
| 803 |
case TOK_PRE: |
| 804 |
tok = output_pre(tok->next); |
| 805 |
break; |
| 806 |
case TOK_RAW: |
| 807 |
tok = output_raw(tok->next); |
| 808 |
break; |
| 809 |
case TOK_TITLE: |
| 810 |
tok = title(tok, ".TH "); |
| 811 |
// must be after .TH |
| 812 |
// no hyphenation, adjust left |
| 813 |
output(".nh\n.ad l\n"); |
| 814 |
break; |
| 815 |
case TOK_NL: |
| 816 |
output("\n"); |
| 817 |
tok = tok->next; |
| 818 |
break; |
| 819 |
case TOK_ENDPRE: |
| 820 |
case TOK_ENDRAW: |
| 821 |
case TOK_INDENT: |
| 822 |
BUG(); |
| 823 |
break; |
| 824 |
} |
| 825 |
return tok; |
| 826 |
} |
| 827 |
|
| 828 |
static void dump(void) |
| 829 |
{ |
| 830 |
struct token *tok = head.next; |
| 831 |
|
| 832 |
while (tok != &head) |
| 833 |
tok = dump_one(tok); |
| 834 |
} |
| 835 |
|
| 836 |
static void process(void) |
| 837 |
{ |
| 838 |
struct stat s; |
| 839 |
const char *buf; |
| 840 |
int fd; |
| 841 |
|
| 842 |
fd = open(filename, O_RDONLY); |
| 843 |
if (fd == -1) |
| 844 |
die("opening `%s' for reading: %s\n", filename, strerror(errno)); |
| 845 |
fstat(fd, &s); |
| 846 |
if (s.st_size) { |
| 847 |
buf = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0); |
| 848 |
if (buf == MAP_FAILED) |
| 849 |
die("mmap: %s\n", strerror(errno)); |
| 850 |
|
| 851 |
tokenize(buf, s.st_size); |
| 852 |
normalize(); |
| 853 |
} |
| 854 |
dump(); |
| 855 |
} |
| 856 |
|
| 857 |
int main(int argc, char *argv[]) |
| 858 |
{ |
| 859 |
const char *dest; |
| 860 |
int fd; |
| 861 |
|
| 862 |
program = argv[0]; |
| 863 |
if (argc != 3) { |
| 864 |
fprintf(stderr, "Usage: %s <in> <out>\n", program); |
| 865 |
return 1; |
| 866 |
} |
| 867 |
filename = argv[1]; |
| 868 |
dest = argv[2]; |
| 869 |
|
| 870 |
snprintf(tmp_file, sizeof(tmp_file), "%s.XXXXXX", dest); |
| 871 |
fd = mkstemp(tmp_file); |
| 872 |
if (fd < 0) |
| 873 |
die("creating %s: %s\n", tmp_file, strerror(errno)); |
| 874 |
outfile = fdopen(fd, "w"); |
| 875 |
if (!outfile) |
| 876 |
die("opening %s: %s\n", tmp_file, strerror(errno)); |
| 877 |
|
| 878 |
process(); |
| 879 |
if (rename(tmp_file, dest)) |
| 880 |
die("renaming %s to %s: %s\n", tmp_file, dest, strerror(errno)); |
| 881 |
return 0; |
| 882 |
} |