Added workaround for broken ifstream::putback() on MSVC
[hypercube:hypercube.git] / IO / providers / dot.cpp
1 #include <cstring>
2 #include <cerrno>
3 #include <cctype>
4 #include "dot.h"
5
6 using namespace std;
7
8
9 #define NUM_KEYWORDS (sizeof(keywords) / sizeof(Keyword))
10 const DotGraphInput::Keyword DotGraphInput::keywords[] = {
11         {NODE, L"NODE"},
12         {EDGE, L"EDGE"},
13         {GRAPH, L"GRAPH"},
14         {DIGRAPH, L"DIGRAPH"},
15         {SUBGRAPH, L"SUBGRAPH"},
16         {STRICT, L"STRICT"}
17 };
18
19
20 static bool strCaseCmp(const wstring &str1, const wstring &str2)
21 {
22         if (str1.length() != str2.length())
23                 return false;
24
25         for (size_t i = 0; i < str1.length(); i++)
26                 if (tolower(str1[i]) != tolower(str2[i]))
27                         return false;
28
29         return true;
30 }
31
32
33 DotGraphInput::Token DotGraphInput::keyword()
34 {
35         for (size_t i = 0; i < NUM_KEYWORDS ; i++)
36                 if (strCaseCmp(_id, keywords[i].name))
37                         return keywords[i].token;
38
39         return ID;
40 }
41
42 void DotGraphInput::error()
43 {
44         if (_token == ERROR)
45                 return;
46
47         ioerr << "DOT: parse error on line: " << _line << endl;
48         _token = ERROR;
49 }
50
51 void DotGraphInput::nextToken()
52 {
53         int c, state = 0, parenthesis = 0;
54
55         _id.clear();
56
57         while (1) {
58                 c = _fs.get();
59
60                 switch (state) {
61                         case 0:
62                                 if (isspace(c)) {
63                                         if (c == '\n')
64                                                 _line++;
65                                         break;
66                                 }
67                                 if (c == '/') {
68                                         state = 3;
69                                         break;
70                                 }
71                                 if (c == '#') {
72                                         state = 4;
73                                         break;
74                                 }
75
76                                 if (c == ':') {
77                                         _token = COLON;
78                                         return;
79                                 }
80                                 if (c == ';') {
81                                         _token = SEMICOLON;
82                                         return;
83                                 }
84                                 if (c == ',') {
85                                         _token = COMMA;
86                                         return;
87                                 }
88                                 if (c == '=') {
89                                         _token = EQ;
90                                         return;
91                                 }
92                                 if (c == '{') {
93                                         _token = LBRC;
94                                         return;
95                                 }
96                                 if (c == '}') {
97                                         _token = RBRC;
98                                         return;
99                                 }
100                                 if (c == '[') {
101                                         _token = LBRK;
102                                         return;
103                                 }
104                                 if (c == ']') {
105                                         _token = RBRK;
106                                         return;
107                                 }
108
109                                 if (c == '-') {
110                                         _id += c;
111                                         state = 1;
112                                         break;
113                                 }
114                                 if (c == '.') {
115                                         _id += c;
116                                         state = 9;
117                                         break;
118                                 }
119                                 if (isdigit(c)) {
120                                         _id += c;
121                                         state = 11;
122                                         break;
123                                 }
124                                 if (isalpha(c) || c == '_') {
125                                         _id += c;
126                                         state = 2;
127                                         break;
128                                 }
129                                 if (c == '"') {
130                                         state = 7;
131                                         break;
132                                 }
133                                 if (c == '<') {
134                                         state = 12;
135                                         break;
136                                 }
137                                 if (c == -1) {
138                                         _token = EOI;
139                                         return;
140                                 }
141
142                                 error();
143                                 return;
144
145                         case 1:
146                                 if (c == '-' || c == '>') {
147                                         _token = EDGEOP;
148                                         return;
149                                 }
150                                 if (c == '.') {
151                                         _id += c;
152                                         state = 9;
153                                         break;
154                                 }
155                                 if (isdigit(c)) {
156                                         _id += c;
157                                         state = 11;
158                                         break;
159                                 }
160
161                                 error();
162                                 return;
163
164                         case 2:
165                                 if (isalnum(c) || c == '_') {
166                                         _id += c;
167                                         break;
168                                 }
169                                 _fs.unget();
170                                 _token = keyword();
171                                 return;
172
173                         case 3:
174                                 if (c == '/') {
175                                         state = 4;
176                                         break;
177                                 }
178                                 if (c == '*') {
179                                         state = 5;
180                                         break;
181                                 }
182
183                                 error();
184                                 return;
185
186                         case 4:
187                                 if (c == -1) {
188                                         _token = EOI;
189                                         return;
190                                 }
191                                 if (c == '\n') {
192                                         _line++;
193                                         state = 0;
194                                 }
195                                 break;
196
197                         case 5:
198                                 if (c == -1) {
199                                         error();
200                                         return;
201                                 }
202                                 if (c == '*')
203                                         state = 6;
204                                 if (c == '\n')
205                                         _line++;
206                                 break;
207
208                         case 6:
209                                 if (c == -1) {
210                                         error();
211                                         return;
212                                 }
213                                 if (c == '/')
214                                         state = 0;
215                                 else
216                                         state = 5;
217                                 if (c == '\n')
218                                         _line++;
219                                 break;
220
221                         case 7:
222                                 if (c == -1) {
223                                         error();
224                                         return;
225                                 }
226                                 if (c == '"') {
227                                         _token = ID;
228                                         return;
229                                 }
230                                 if (c == '\\') {
231                                         state = 8;
232                                         break;
233                                 }
234                                 if (c == '\n')
235                                         _line++;
236                                 _id += c;
237                                 break;
238
239                         case 8:
240                                 if (c == -1) {
241                                         error();
242                                         return;
243                                 }
244                                 if (c == '"')
245                                         _id += '"';
246                                 else {
247                                         _id += '\\';
248                                         _id += c;
249                                 }
250                                 state = 7;
251                                 break;
252
253                         case 9:
254                                 if (isdigit(c)) {
255                                         state = 10;
256                                         _id += c;
257                                         break;
258                                 }
259                                 error();
260                                 return;
261
262                         case 10:
263                                 if (isdigit(c)) {
264                                         _id += c;
265                                         break;
266                                 }
267                                 _fs.unget();
268                                 _token = ID;
269                                 return;
270
271                         case 11:
272                                 if (c == '.') {
273                                         _id += c;
274                                         state = 10;
275                                         break;
276                                 }
277                                 if (isdigit(c)) {
278                                         _id += c;
279                                         break;
280                                 }
281                                 _fs.unget();
282                                 _token = ID;
283                                 return;
284
285                         case 12:
286                                 if (c == -1) {
287                                         error();
288                                         return;
289                                 }
290                                 if (c == '>') {
291                                         if (!parenthesis) {
292                                                 _token = ID;
293                                                 return;
294                                         } else
295                                                 parenthesis--;
296                                 } else if (c == '<')
297                                         parenthesis++;
298                                 _id += c;
299                                 break;
300                 }
301         }
302 }
303
304 void DotGraphInput::compare(Token token)
305 {
306         if (_token == token)
307                 nextToken();
308         else
309                 error();
310 }
311
312
313 void DotGraphInput::list(Attributes &attr)
314 {
315         wstring key(_id);
316
317         compare(ID);
318
319         if (_token == EQ) {
320                 nextToken();
321                 setAttribute(attr, key, _id);
322                 compare(ID);
323         }
324
325         if (_token == COMMA)
326                 nextToken();
327
328         if (_token == ID)
329                 list(attr);
330 }
331
332 void DotGraphInput::attributeList(Attributes &attr)
333 {
334         compare(LBRK);
335         if (_token == ID)
336                 list(attr);
337         compare(RBRK);
338         if (_token == LBRK)
339                 attributeList(attr);
340 }
341
342 void DotGraphInput::edge(idSet &src, edgeSet &edges)
343 {
344         idSet dst;
345         Attributes attr;
346         Vertex *v;
347         Edge *e;
348
349         nextToken();
350
351         switch (_token) {
352                 case ID:
353                         v = addVertex(_id);
354                         setVertexAttributes(v, _nodeAttributes);
355                         dst.insert(_id);
356                         nodeId();
357                         break;
358                 case SUBGRAPH:
359                 case LBRC:
360                         subgraph(dst);
361                         break;
362                 default:
363                         error();
364         }
365
366         for (idSet::iterator i = src.begin(); i != src.end(); i++) {
367                 for (idSet::iterator j = dst.begin(); j != dst.end(); j++) {
368                         e = addEdge(*i, *j);
369                         setEdgeAttributes(e, _edgeAttributes);
370                         edges.insert(e);
371                 }
372         }
373
374         if (_token == EDGEOP)
375                 edge(dst, edges);
376         if (_token == LBRK)
377                 attributeList(attr);
378
379         for (edgeSet::iterator i = edges.begin(); i != edges.end(); i++)
380                 setEdgeAttributes(*i, attr);
381 }
382
383 void DotGraphInput::subgraphStatement(idSet &vertexes)
384 {
385         edgeSet edges;
386
387         subgraph(vertexes);
388
389         if (_token == EDGEOP)
390                 edge(vertexes, edges);
391 }
392
393 void DotGraphInput::compassPt()
394 {
395         if (_token == COLON) {
396                 nextToken();
397                 compare(ID);
398         }
399 }
400
401 void DotGraphInput::nodeId()
402 {
403         nextToken();
404
405         if (_token == COLON) {
406                 nextToken();
407                 compare(ID);
408                 compassPt();
409         }
410 }
411
412 void DotGraphInput::idStatement(idSet &subgraph)
413 {
414         wstring vertex(_id);
415         Attributes attr;
416         idSet vertexes;
417         edgeSet edges;
418         Vertex *v;
419
420         nodeId();
421
422         switch (_token) {
423                 case EDGEOP:
424                         v = addVertex(vertex);
425                         setVertexAttributes(v, _nodeAttributes);
426                         subgraph.insert(vertex);
427                         vertexes.insert(vertex);
428                         edge(vertexes, edges);
429                         break;
430                 case EQ:
431                         nextToken();
432                         compare(ID);
433                         break;
434                 case LBRK:
435                         v = addVertex(vertex);
436                         setVertexAttributes(v, _nodeAttributes);
437                         subgraph.insert(vertex);
438                         attributeList(attr);
439                         setVertexAttributes(v, attr);
440                         break;
441                 case ID:
442                 case RBRC:
443                 case SEMICOLON:
444                 case SUBGRAPH:
445                 case LBRC:
446                         v = addVertex(vertex);
447                         setVertexAttributes(v, _nodeAttributes);
448                         subgraph.insert(vertex);
449                         break;
450                 default:
451                         error();
452         }
453 }
454
455 void DotGraphInput::attributeStatement()
456 {
457         Attributes attr;
458         Token attributesType = _token;
459
460         nextToken();
461         attributeList(attr);
462
463         if (attributesType == NODE)
464                 mergeAttributes(_nodeAttributes, attr);
465         else if (attributesType == EDGE)
466                 mergeAttributes(_edgeAttributes, attr);
467 }
468
469 void DotGraphInput::subgraph(idSet &parent)
470 {
471         idSet child;
472
473         if (_token == SUBGRAPH) {
474                 nextToken();
475                 if (_token == ID)
476                         nextToken();
477         }
478
479         compare(LBRC);
480         statementList(child);
481         compare(RBRC);
482
483         parent.insert(child.begin(), child.end());
484 }
485
486 void DotGraphInput::statementList(idSet &vertexes)
487 {
488         while (1) {
489                 switch (_token) {
490                         case RBRC:
491                         case ERROR:
492                         case EOI:
493                                 return;
494                         case LBRC:
495                         case SUBGRAPH:
496                                 subgraphStatement(vertexes);
497                                 break;
498                         case ID:
499                                 idStatement(vertexes);
500                                 break;
501                         case GRAPH:
502                         case NODE:
503                         case EDGE:
504                                 attributeStatement();
505                                 break;
506                         default:
507                                 error();
508                 }
509
510                 if (_token == SEMICOLON)
511                         nextToken();
512         }
513 }
514
515 void DotGraphInput::graphId()
516 {
517         switch (_token) {
518                 case LBRC:
519                         break;
520                 case ID:
521                         nextToken();
522                         break;
523                 default:
524                         error();
525         }
526 }
527
528 void DotGraphInput::graphType()
529 {
530         switch (_token) {
531                 case GRAPH:
532                 case DIGRAPH:
533                         nextToken();
534                         break;
535                 default:
536                         error();
537         }
538 }
539
540 void DotGraphInput::graph()
541 {
542         idSet vertexes;
543
544         if (_token == STRICT)
545                 nextToken();
546
547         graphType();
548         graphId();
549         compare(LBRC);
550         statementList(vertexes);
551         compare(RBRC);
552 }
553
554 bool DotGraphInput::parse()
555 {
556         _line = 1;
557         _token = START;
558
559         nextToken();
560         graph();
561
562         _vertexes.clear();
563         attributesClear();
564
565         if (_token == EOI)
566                 return true;
567         else {
568                 error();
569                 return false;
570         }
571 }
572
573
574 void DotGraphInput::attributesClear()
575 {
576         _nodeAttributes.label.clear();
577         _edgeAttributes.label.clear();
578 }
579
580 void DotGraphInput::setAttribute(Attributes &attr, const wstring &key,
581   const wstring &value)
582 {
583         if (strCaseCmp(key, L"LABEL"))
584                 attr.label = value;
585 }
586
587 void DotGraphInput::mergeAttributes(Attributes &dst, const Attributes &src)
588 {
589         if (!src.label.empty())
590                 dst.label = src.label;
591 }
592
593 void DotGraphInput::setVertexAttributes(Vertex *vertex, const Attributes &attr)
594 {
595         if (!attr.label.empty())
596                 vertex->setText(attr.label);
597 }
598
599 void DotGraphInput::setEdgeAttributes(Edge *edge, const Attributes &attr)
600 {
601         if (!attr.label.empty())
602                 edge->setText(attr.label);
603 }
604
605 Vertex* DotGraphInput::addVertex(const wstring &vertex)
606 {
607         Vertex *v;
608         map<std::wstring, Vertex*>::const_iterator it;
609
610         it = _vertexes.find(vertex);
611         if (it != _vertexes.end())
612                 return it->second;
613
614         v = _graph->addVertex();
615         v->setText(vertex);
616
617         _vertexes.insert(pair<wstring, Vertex*>(vertex, v));
618
619         return v;
620 }
621
622 Edge* DotGraphInput::addEdge(const wstring &src, const wstring &dst)
623 {
624         return _graph->addEdge(_vertexes[src], _vertexes[dst]);
625 }
626
627
628 IO::Error DotGraphInput::readGraph(Graph *graph, const char *fileName,
629   Encoding *encoding)
630 {
631         IO::Error err = Ok;
632
633         _graph = graph;
634
635         if (encoding) {
636                 locale lc(std::locale(), encoding->cvt());
637                 _fs.imbue(lc);
638         }
639
640         _fs.open(fileName);
641         if (!_fs) {
642                 ioerr << fileName << ": " << strerror(errno) << endl;
643                 err = OpenError;
644         } else {
645                 if (!parse())
646                         err = (_fs.fail()) ? ReadError : FormatError;
647         }
648
649         _fs.close();
650         _fs.clear();
651
652         if (err)
653                 _graph->clear();
654
655         return err;
656 }