Added support for GraphML input format
[hypercube:hypercube.git] / IO / providers / graphml.cpp
1 #include <cstring>
2 #include <cerrno>
3 #include "IO/encodings/utf8cvt.h"
4 #include "graphml.h"
5
6 using namespace std;
7
8
9 #define isStartName(c) \
10         (isalnum((c)) || (c) == '_' || (c) == ':')
11 #define isName(c) \
12         (isStartName((c)) || (c) == '-' || (c) == '.')
13
14
15 #define NUM_RELATIONS (sizeof(relations) / sizeof(Relation))
16 const GraphmlGraphInput::Relation GraphmlGraphInput::relations[] = {
17         {L"graphml", L""},
18         {L"graph", L"graphml"},
19         {L"node", L"graph"},
20         {L"edge", L"graph"}
21 };
22
23
24 Vertex* GraphmlGraphInput::addVertex(const wstring &id)
25 {
26         Vertex *v;
27         map<wstring, Vertex*>::const_iterator it;
28
29         it = _vertexes.find(id);
30         if (it != _vertexes.end())
31                 return it->second;
32
33         v = _graph->addVertex();
34
35         _vertexes.insert(pair<wstring, Vertex*>(id, v));
36
37         return v;
38 }
39
40 Edge* GraphmlGraphInput::addEdge(const wstring &source, const wstring &target)
41 {
42         Vertex *src, *dst;
43
44         src = addVertex(source);
45         dst = addVertex(target);
46
47         return _graph->addEdge(src, dst);
48 }
49
50 void GraphmlGraphInput::checkRelation(const wstring &node, const wstring &parent)
51 {
52         for (size_t i = 0; i < NUM_RELATIONS; i++) {
53                 if (node == relations[i].node) {
54                         if (parent != relations[i].parent)
55                                 error();
56                         return;
57                 }
58         }
59 }
60
61
62 void GraphmlGraphInput::error()
63 {
64         if (_token == ERROR)
65                 return;
66
67         ioerr << "GraphML: parse error on line: " << _line << endl;
68         _token = ERROR;
69 }
70
71 void GraphmlGraphInput::nextToken()
72 {
73         int c, state = 0;
74
75
76         while (1) {
77                 c = _fs.get();
78
79                 if (!_fs.good())
80                         c = -1;
81
82                 switch (state) {
83                         case 0:
84                                 if (isspace(c)) {
85                                         if (c == '\n')
86                                                 _line++;
87                                         break;
88                                 }
89                                 if (c == '<') {
90                                         state = 3;
91                                         break;
92                                 }
93                                 if (c == '>') {
94                                         _token = GT;
95                                         return;
96                                 }
97                                 if (c == '=') {
98                                         _token = EQ;
99                                         return;
100                                 }
101                                 if (c == '&') {
102                                         _token = AMP;
103                                         return;
104                                 }
105                                 if (c == '/') {
106                                         _token = SLASH;
107                                         return;
108                                 }
109                                 if (c == '?') {
110                                         _token = QM;
111                                         return;
112                                 }
113                                 if (isStartName(c)) {
114                                         _string = c;
115                                         state = 1;
116                                         break;
117                                 }
118                                 if (c == '"') {
119                                         _string.clear();
120                                         state = 2;
121                                         break;
122                                 }
123                                 if (c == -1) {
124                                         _token = EOI;
125                                         return;
126                                 }
127                                 error();
128                                 return;
129
130                         case 1:
131                                 if (isName(c)) {
132                                         _string += c;
133                                         break;
134                                 }
135                                 _fs.putback(c);
136                                 _token = IDENT;
137                                 return;
138
139                         case 2:
140                                 if (c == '"') {
141                                         _token = STRING;
142                                         return;
143                                 }
144                                 if (c == '\n')
145                                         _line++;
146                                 _string += c;
147                                 break;
148
149                         case 3:
150                                 if (c == '!') {
151                                         state = 4;
152                                         break;
153                                 }
154                                 _fs.putback(c);
155                                 _token = LT;
156                                 return;
157
158                         case 4:
159                                 if (c == '-') {
160                                         state = 5;
161                                         break;
162                                 }
163                                 error();
164
165                         case 5:
166                                 if (c == '-') {
167                                         state = 6;
168                                         break;
169                                 }
170                                 error();
171
172                         case 6:
173                                 if (c == '-')
174                                         state = 7;
175                                 if (c == '\n')
176                                         _line++;
177                                 break;
178
179                         case 7:
180                                 if (c == '-')
181                                         state = 8;
182                                 else
183                                         state = 6;
184                                 break;
185
186                         case 8:
187                                 if (c == '>')
188                                         state = 0;
189                                 else
190                                         state = 6;
191                                 break;
192                 }
193         }
194 }
195
196 void GraphmlGraphInput::compare(Token token)
197 {
198         if (_token == token)
199                 nextToken();
200         else
201                 error();
202 }
203
204 void GraphmlGraphInput::data()
205 {
206         while (1) {
207                 switch (_token) {
208                         case LT:
209                         case ERROR:
210                         case EOI:
211                                 return;
212                         default:
213                                 nextToken();
214                 }
215         }
216 }
217
218 void GraphmlGraphInput::attribute()
219 {
220         wstring attr, value;
221
222         attr = _string;
223         compare(IDENT);
224         compare(EQ);
225         value = _string;
226         compare(STRING);
227
228         if (_token == ERROR)
229                 return;
230
231         if (attr == L"id")
232                 _attributes.id = value;
233         if (attr == L"source")
234                 _attributes.source = value;
235         if (attr == L"target")
236                 _attributes.target = value;
237 }
238
239 bool GraphmlGraphInput::attributes()
240 {
241         while (1) {
242                 switch (_token) {
243                         case SLASH:
244                                 nextToken();
245                                 return true;
246                         case GT:
247                                 return false;
248                         case IDENT:
249                                 attribute();
250                                 break;
251                         default:
252                                 error();
253                                 return false;
254                 }
255         }
256 }
257
258 void GraphmlGraphInput::elementType(const wstring &parent)
259 {
260         switch (_token) {
261                 case SLASH:
262                         break;
263                 case IDENT:
264                         element(parent);
265                         break;
266                 default:
267                         error();
268         }
269 }
270
271 void GraphmlGraphInput::nextItem(const wstring &parent)
272 {
273         switch (_token) {
274                 case LT:
275                         nextToken();
276                         elementType(parent);
277                         break;
278                 case IDENT:
279                         data();
280                         compare(LT);
281                         break;
282                 case EOI:
283                         break;
284                 default:
285                         error();
286         }
287 }
288
289 void GraphmlGraphInput::element(const wstring &parent)
290 {
291         bool closed;
292         wstring start, end;
293         Vertex *vertex;
294         Edge *edge;
295
296
297         start = _string;
298         compare(IDENT);
299         checkRelation(start, parent);
300         closed = attributes();
301         compare(GT);
302
303         if (_token == ERROR)
304                 return;
305
306         if (start == L"node") {
307                 vertex = addVertex(_attributes.id);
308                 vertex->setText(_attributes.id);
309         }
310         if (start == L"edge") {
311                 edge = addEdge(_attributes.source, _attributes.target);
312                 edge->setText(_attributes.id);
313         }
314
315         if (closed)
316                 return;
317
318         while (_token == LT || _token == IDENT)
319                 nextItem(start);
320
321         compare(SLASH);
322         end = _string;
323         compare(IDENT);
324         compare(GT);
325
326         if (start != end)
327                 error();
328 }
329
330 void GraphmlGraphInput::xmlAttributes()
331 {
332         while (1) {
333                 switch (_token) {
334                         case QM:
335                         case ERROR:
336                         case EOI:
337                                 return;
338                         default:
339                                 nextToken();
340                 }
341         }
342 }
343
344 void GraphmlGraphInput::xmlProlog()
345 {
346         wstring elm;
347
348         nextToken();
349         elm = _string;
350         compare(IDENT);
351         xmlAttributes();
352         compare(QM);
353         compare(GT);
354
355         if (elm != L"xml")
356                 error();
357 }
358
359 void GraphmlGraphInput::xml()
360 {
361         compare(LT);
362
363         switch (_token) {
364                 case QM:
365                         xmlProlog();
366                         compare(LT);
367                         element(L"");
368                         break;
369                 case IDENT:
370                         element(L"");
371                         break;
372                 default:
373                         error();
374         }
375 }
376
377 bool GraphmlGraphInput::parse()
378 {
379         _line = 1;
380         _token = START;
381
382         nextToken();
383         xml();
384
385         _vertexes.clear();
386
387         if (_token == EOI)
388                 return true;
389         else {
390                 error();
391                 return false;
392         }
393 }
394
395 IO::Error GraphmlGraphInput::readGraph(Graph *graph, const char *fileName,
396   Encoding *encoding)
397 {
398         IO::Error err = Ok;
399
400         _graph = graph;
401
402         locale lc(std::locale(), new utf8cvt);
403         _fs.imbue(lc);
404
405         _fs.open(fileName);
406         if (!_fs) {
407                 ioerr << fileName << ": " << strerror(errno) << endl;
408                 err = OpenError;
409         } else {
410                 if (!parse())
411                         err = (_fs.fail()) ? ReadError : FormatError;
412         }
413
414         _fs.close();
415         _fs.clear();
416
417         if (err)
418                 _graph->clear();
419
420         return err;
421 }