gravatar
deba@inf.elte.hu
deba@inf.elte.hu
Section reader for DigraphReader
0 2 0
default
2 files changed with 183 insertions and 12 deletions:
↑ Collapse diff ↑
Ignore white space 48 line context
... ...
@@ -69,28 +69,31 @@
69 69
describe the arcs. The first two tokens of each line are
70 70
the source and the target node of the arc, respectively, then come the map
71 71
values. The source and target tokens must be node labels.
72 72

	
73 73
\code
74 74
 @arcs
75 75
 	      capacity
76 76
 1   2   16
77 77
 1   3   12
78 78
 2   3   18
79 79
\endcode
80 80

	
81 81
The \c \@edges is just a synonym of \c \@arcs.
82 82

	
83 83
The \c \@attributes section contains key-value pairs, each line
84 84
consists of two tokens, an attribute name, and then an attribute value.
85 85

	
86 86
\code
87 87
 @attributes
88 88
 source 1
89 89
 target 3
90 90
 caption "LEMON test digraph"
91 91
\endcode
92 92

	
93
The \e LGF can contain extra sections, but there is no restriction on
94
the format of such sections.
95

	
93 96
*/
94 97
}
95 98

	
96 99
//  LocalWords:  whitespace whitespaces
Ignore white space 6 line context
... ...
@@ -247,85 +247,153 @@
247 247
      if (c == '\"') {
248 248
	while (is.get(c) && c != '\"') {
249 249
	  if (c == '\\') 
250 250
	    c = readEscape(is);
251 251
	  os << c;
252 252
	}
253 253
	if (!is) 
254 254
	  throw DataFormatError("Quoted format error");
255 255
      } else {
256 256
	is.putback(c);
257 257
	while (is.get(c) && !isWhiteSpace(c)) {
258 258
	  if (c == '\\') 
259 259
	    c = readEscape(is);
260 260
	  os << c;
261 261
	}
262 262
	if (!is) {
263 263
	  is.clear();
264 264
	} else {
265 265
	  is.putback(c);
266 266
	}
267 267
      }
268 268
      str = os.str();
269 269
      return is;
270 270
    }
271

	
272
    class Section {
273
    public:
274
      virtual ~Section() {}
275
      virtual void process(std::istream& is, int& line_num) = 0;
276
    };
277

	
278
    template <typename Functor>
279
    class LineSection : public Section {
280
    private:
281

	
282
      Functor _functor;
283

	
284
    public:
285
      
286
      LineSection(const Functor& functor) : _functor(functor) {}
287
      virtual ~LineSection() {}
288

	
289
      virtual void process(std::istream& is, int& line_num) {
290
	char c;
291
	std::string line;
292
	while (is.get(c) && c != '@') {
293
	  if (c == '\n') {
294
	    ++line_num;
295
	  } else if (c == '#') {
296
	    getline(is, line);
297
	    ++line_num;
298
	  } else if (!isWhiteSpace(c)) {
299
	    is.putback(c);
300
	    getline(is, line);
301
	    _functor(line);
302
	    ++line_num;
303
	  }
304
	}
305
	if (is) is.putback(c);
306
	else if (is.eof()) is.clear();
307
      }
308
    };
309

	
310
    template <typename Functor>
311
    class StreamSection : public Section {
312
    private:
313

	
314
      Functor _functor;
315

	
316
    public:
317
      
318
      StreamSection(const Functor& functor) : _functor(functor) {}
319
      virtual ~StreamSection() {} 
320

	
321
      virtual void process(std::istream& is, int& line_num) {
322
	_functor(is, line_num);
323
	char c;
324
	std::string line;
325
	while (is.get(c) && c != '@') {
326
	  if (c == '\n') {
327
	    ++line_num;
328
	  } else if (!isWhiteSpace(c)) {
329
	    getline(is, line);
330
	    ++line_num;
331
	  }
332
	}
333
	if (is) is.putback(c);
334
	else if (is.eof()) is.clear();	
335
      }
336
    };
271 337
    
272 338
  }
273 339

	
274 340
  /// \ingroup lemon_io
275 341
  ///  
276 342
  /// \brief LGF reader for directed graphs
277 343
  ///
278 344
  /// This utility reads an \ref lgf-format "LGF" file.
279 345
  ///
280 346
  /// The reading method does a batch processing. The user creates a
281 347
  /// reader object, then various reading rules can be added to the
282 348
  /// reader, and eventually the reading is executed with the \c run()
283 349
  /// member function. A map reading rule can be added to the reader
284 350
  /// with the \c nodeMap() or \c arcMap() members. An optional
285
  /// converter parameter can also be added as a standard functor converting from
286
  /// std::string to the value type of the map. If it is set, it will
287
  /// determine how the tokens in the file should be is converted to the map's
288
  /// value type. If the functor is not set, then a default conversion
289
  /// will be used. One map can be read into multiple map objects at the
290
  /// same time. The \c attribute(), \c node() and \c arc() functions
291
  /// are used to add attribute reading rules.
351
  /// converter parameter can also be added as a standard functor
352
  /// converting from std::string to the value type of the map. If it
353
  /// is set, it will determine how the tokens in the file should be
354
  /// is converted to the map's value type. If the functor is not set,
355
  /// then a default conversion will be used. One map can be read into
356
  /// multiple map objects at the same time. The \c attribute(), \c
357
  /// node() and \c arc() functions are used to add attribute reading
358
  /// rules.
292 359
  ///
293 360
  ///\code
294 361
  ///     DigraphReader<Digraph>(std::cin, digraph).
295 362
  ///       nodeMap("coordinates", coord_map).
296 363
  ///       arcMap("capacity", cap_map).
297 364
  ///       node("source", src).
298 365
  ///       node("target", trg).
299 366
  ///       attribute("caption", caption).
300 367
  ///       run();
301 368
  ///\endcode
302 369
  ///
303 370
  /// By default the reader uses the first section in the file of the
304 371
  /// proper type. If a section has an optional name, then it can be
305
  /// selected for reading by giving an optional name parameter to
306
  /// the \c nodes(), \c arcs() or \c attributes()
307
  /// functions.
372
  /// selected for reading by giving an optional name parameter to the
373
  /// \c nodes(), \c arcs() or \c attributes() functions. The readers
374
  /// also can load extra sections with the \c sectionLines() and
375
  /// sectionStream() functions.
308 376
  ///
309 377
  /// The \c useNodes() and \c useArcs() functions are used to tell the reader
310 378
  /// that the nodes or arcs should not be constructed (added to the
311 379
  /// graph) during the reading, but instead the label map of the items
312 380
  /// are given as a parameter of these functions. An
313 381
  /// application of these function is multipass reading, which is
314 382
  /// important if two \e \@arcs sections must be read from the
315 383
  /// file. In this example the first phase would read the node set and one
316 384
  /// of the arc sets, while the second phase would read the second arc
317 385
  /// set into an \e ArcSet class (\c SmartArcSet or \c ListArcSet).
318 386
  /// The previously read label node map should be passed to the \c
319 387
  /// useNodes() functions. Another application of multipass reading when
320 388
  /// paths are given as a node map or an arc map. It is impossible read this in
321 389
  /// a single pass, because the arcs are not constructed when the node
322 390
  /// maps are read.
323 391
  template <typename _Digraph>
324 392
  class DigraphReader {
325 393
  public:
326 394

	
327 395
    typedef _Digraph Digraph;
328 396
    TEMPLATE_DIGRAPH_TYPEDEFS(Digraph);
329 397
    
330 398
  private:
331 399

	
... ...
@@ -335,48 +403,51 @@
335 403

	
336 404
    Digraph& _digraph;
337 405

	
338 406
    std::string _nodes_caption;
339 407
    std::string _arcs_caption;
340 408
    std::string _attributes_caption;
341 409

	
342 410
    typedef std::map<std::string, Node> NodeIndex;
343 411
    NodeIndex _node_index;
344 412
    typedef std::map<std::string, Arc> ArcIndex;
345 413
    ArcIndex _arc_index;
346 414
    
347 415
    typedef std::vector<std::pair<std::string, 
348 416
      _reader_bits::MapStorageBase<Node>*> > NodeMaps;    
349 417
    NodeMaps _node_maps; 
350 418

	
351 419
    typedef std::vector<std::pair<std::string,
352 420
      _reader_bits::MapStorageBase<Arc>*> >ArcMaps;
353 421
    ArcMaps _arc_maps;
354 422

	
355 423
    typedef std::multimap<std::string, _reader_bits::ValueStorageBase*> 
356 424
      Attributes;
357 425
    Attributes _attributes;
358 426

	
427
    typedef std::map<std::string, _reader_bits::Section*> Sections;
428
    Sections _sections;
429

	
359 430
    bool _use_nodes;
360 431
    bool _use_arcs;
361 432

	
362 433
    int line_num;
363 434
    std::istringstream line;
364 435

	
365 436
  public:
366 437

	
367 438
    /// \brief Constructor
368 439
    ///
369 440
    /// Construct a directed graph reader, which reads from the given
370 441
    /// input stream.
371 442
    DigraphReader(std::istream& is, Digraph& digraph) 
372 443
      : _is(&is), local_is(false), _digraph(digraph),
373 444
	_use_nodes(false), _use_arcs(false) {}
374 445

	
375 446
    /// \brief Constructor
376 447
    ///
377 448
    /// Construct a directed graph reader, which reads from the given
378 449
    /// file.
379 450
    DigraphReader(const std::string& fn, Digraph& digraph) 
380 451
      : _is(new std::ifstream(fn.c_str())), local_is(true), _digraph(digraph),
381 452
    	_use_nodes(false), _use_arcs(false) {}
382 453
    
... ...
@@ -388,67 +459,74 @@
388 459
      : _is(new std::ifstream(fn)), local_is(true), _digraph(digraph),
389 460
    	_use_nodes(false), _use_arcs(false) {}
390 461

	
391 462
    /// \brief Copy constructor
392 463
    ///
393 464
    /// The copy constructor transfers all data from the other reader,
394 465
    /// therefore the copied reader will not be usable more. 
395 466
    DigraphReader(DigraphReader& other) 
396 467
      : _is(other._is), local_is(other.local_is), _digraph(other._digraph),
397 468
	_use_nodes(other._use_nodes), _use_arcs(other._use_arcs) {
398 469

	
399 470
      other.is = 0;
400 471
      other.local_is = false;
401 472
      
402 473
      _node_index.swap(other._node_index);
403 474
      _arc_index.swap(other._arc_index);
404 475

	
405 476
      _node_maps.swap(other._node_maps);
406 477
      _arc_maps.swap(other._arc_maps);
407 478
      _attributes.swap(other._attributes);
408 479

	
409 480
      _nodes_caption = other._nodes_caption;
410 481
      _arcs_caption = other._arcs_caption;
411 482
      _attributes_caption = other._attributes_caption;
483

	
484
      _sections.swap(other._sections);
412 485
    }
413 486

	
414 487
    /// \brief Destructor
415 488
    ~DigraphReader() {
416 489
      for (typename NodeMaps::iterator it = _node_maps.begin(); 
417 490
	   it != _node_maps.end(); ++it) {
418 491
	delete it->second;
419 492
      }
420 493

	
421 494
      for (typename ArcMaps::iterator it = _arc_maps.begin(); 
422 495
	   it != _arc_maps.end(); ++it) {
423 496
	delete it->second;
424 497
      }
425 498

	
426 499
      for (typename Attributes::iterator it = _attributes.begin(); 
427 500
	   it != _attributes.end(); ++it) {
428 501
	delete it->second;
429 502
      }
430 503

	
504
      for (typename Sections::iterator it = _sections.begin(); 
505
	   it != _sections.end(); ++it) {
506
	delete it->second;
507
      }
508

	
431 509
      if (local_is) {
432 510
	delete _is;
433 511
      }
434 512

	
435 513
    }
436 514

	
437 515
  private:
438 516
    
439 517
    DigraphReader& operator=(const DigraphReader&);
440 518

	
441 519
  public:
442 520

	
443 521
    /// \name Reading rules
444 522
    /// @{
445 523
    
446 524
    /// \brief Node map reading rule
447 525
    ///
448 526
    /// Add a node map reading rule to the reader.
449 527
    template <typename Map>
450 528
    DigraphReader& nodeMap(const std::string& caption, Map& map) {
451 529
      checkConcept<concepts::WriteMap<Node, typename Map::Value>, Map>();
452 530
      _reader_bits::MapStorageBase<Node>* storage = 
453 531
	new _reader_bits::MapStorage<Node, Map>(map);
454 532
      _node_maps.push_back(std::make_pair(caption, storage));
... ...
@@ -553,48 +631,125 @@
553 631
    /// Set \c \@nodes section to be read
554 632
    DigraphReader& nodes(const std::string& caption) {
555 633
      _nodes_caption = caption;
556 634
      return *this;
557 635
    }
558 636

	
559 637
    /// \brief Set \c \@arcs section to be read
560 638
    ///
561 639
    /// Set \c \@arcs section to be read
562 640
    DigraphReader& arcs(const std::string& caption) {
563 641
      _arcs_caption = caption;
564 642
      return *this;
565 643
    }
566 644

	
567 645
    /// \brief Set \c \@attributes section to be read
568 646
    ///
569 647
    /// Set \c \@attributes section to be read
570 648
    DigraphReader& attributes(const std::string& caption) {
571 649
      _attributes_caption = caption;
572 650
      return *this;
573 651
    }
574 652

	
575 653
    /// @}
576 654

	
655
    /// \name Section readers
656
    /// @{
657

	
658
    /// \brief Add a section processor with line oriented reading
659
    ///
660
    /// In the \e LGF file extra sections can be placed, which contain
661
    /// any data in arbitrary format. These sections can be read with
662
    /// this function line by line. The first parameter is the type
663
    /// descriptor of the section, the second is a functor, which
664
    /// takes just one \c std::string parameter. At the reading
665
    /// process, each line of the section will be given to the functor
666
    /// object. However, the empty lines and the comment lines are
667
    /// filtered out, and the leading whitespaces are stipped from
668
    /// each processed string.
669
    ///
670
    /// For example let's see a section, which contain several
671
    /// integers, which should be inserted into a vector.
672
    ///\code
673
    ///  @numbers
674
    ///  12 45 23
675
    ///  4
676
    ///  23 6
677
    ///\endcode
678
    ///
679
    /// The functor is implemented as an struct:
680
    ///\code
681
    ///  struct NumberSection {
682
    ///    std::vector<int>& _data;
683
    ///    NumberSection(std::vector<int>& data) : _data(data) {}
684
    ///    void operator()(const std::string& line) {
685
    ///      std::istringstream ls(line);
686
    ///      int value;
687
    ///      while (ls >> value) _data.push_back(value);
688
    ///    }
689
    ///  };
690
    ///
691
    ///  // ...
692
    ///
693
    ///  reader.sectionLines("numbers", NumberSection(vec));  
694
    ///\endcode
695
    template <typename Functor>
696
    DigraphReader& sectionLines(const std::string& type, Functor functor) {
697
      LEMON_ASSERT(!type.empty(), "Type is not empty.");
698
      LEMON_ASSERT(_sections.find(type) == _sections.end(), 
699
		   "Multiple reading of section.");
700
      LEMON_ASSERT(type != "nodes" && type != "arcs" && type != "edges" &&
701
		   type != "attributes", "Multiple reading of section.");
702
      _sections.insert(std::make_pair(type, 
703
        new _reader_bits::LineSection<Functor>(functor)));
704
      return *this;
705
    }
706

	
707

	
708
    /// \brief Add a section processor with stream oriented reading
709
    ///
710
    /// In the \e LGF file extra sections can be placed, which contain
711
    /// any data in arbitrary format. These sections can be read
712
    /// directly with this function. The first parameter is the type
713
    /// of the section, the second is a functor, which takes an \c
714
    /// std::istream& and an int& parameter, the latter regard to the
715
    /// line number of stream. The functor can read the input while
716
    /// the section go on, and the line number should be modified
717
    /// accordingly.
718
    template <typename Functor>
719
    DigraphReader& sectionStream(const std::string& type, Functor functor) {
720
      LEMON_ASSERT(!type.empty(), "Type is not empty.");
721
      LEMON_ASSERT(_sections.find(type) == _sections.end(), 
722
		   "Multiple reading of section.");
723
      LEMON_ASSERT(type != "nodes" && type != "arcs" && type != "edges" &&
724
		   type != "attributes", "Multiple reading of section.");
725
      _sections.insert(std::make_pair(type, 
726
	 new _reader_bits::StreamSection<Functor>(functor)));
727
      return *this;
728
    }    
729
    
730
    /// @}
731

	
577 732
    /// \name Using previously constructed node or arc set
578 733
    /// @{
579 734

	
580 735
    /// \brief Use previously constructed node set
581 736
    ///
582 737
    /// Use previously constructed node set, and specify the node
583 738
    /// label map.
584 739
    template <typename Map>
585 740
    DigraphReader& useNodes(const Map& map) {
586 741
      checkConcept<concepts::ReadMap<Node, typename Map::Value>, Map>();
587 742
      LEMON_ASSERT(!_use_nodes, "Multiple usage of useNodes() member"); 
588 743
      _use_nodes = true;
589 744
      _writer_bits::DefaultConverter<typename Map::Value> converter;
590 745
      for (NodeIt n(_digraph); n != INVALID; ++n) {
591 746
	_node_index.insert(std::make_pair(converter(map[n]), n));
592 747
      }
593 748
      return *this;
594 749
    }
595 750

	
596 751
    /// \brief Use previously constructed node set
597 752
    ///
598 753
    /// Use previously constructed node set, and specify the node
599 754
    /// label map and a functor which converts the label map values to
600 755
    /// std::string.
... ...
@@ -908,83 +1063,96 @@
908 1063
	   it != _attributes.end(); ++it) {
909 1064
	if (read_attr.find(it->first) == read_attr.end()) {
910 1065
	  std::ostringstream msg;
911 1066
	  msg << "Attribute not found in file: " << it->first;
912 1067
	  throw DataFormatError(msg.str().c_str());
913 1068
	}	
914 1069
      }
915 1070
    }
916 1071

	
917 1072
  public:
918 1073

	
919 1074
    /// \name Execution of the reader    
920 1075
    /// @{
921 1076

	
922 1077
    /// \brief Start the batch processing
923 1078
    ///
924 1079
    /// This function starts the batch processing
925 1080
    void run() {
926 1081
      
927 1082
      LEMON_ASSERT(_is != 0, "This reader assigned to an other reader");
928 1083
      
929 1084
      bool nodes_done = false;
930 1085
      bool arcs_done = false;
931 1086
      bool attributes_done = false;
1087
      std::set<std::string> extra_sections;
932 1088

	
933 1089
      line_num = 0;      
934 1090
      readLine();
935 1091

	
936 1092
      while (readSuccess()) {
937 1093
	skipSection();
938 1094
	try {
939 1095
	  char c;
940 1096
	  std::string section, caption;
941 1097
	  line >> c;
942 1098
	  _reader_bits::readToken(line, section);
943 1099
	  _reader_bits::readToken(line, caption);
944 1100

	
945 1101
	  if (line >> c) 
946 1102
	    throw DataFormatError("Extra character on the end of line");
947 1103

	
948 1104
	  if (section == "nodes" && !nodes_done) {
949 1105
	    if (_nodes_caption.empty() || _nodes_caption == caption) {
950 1106
	      readNodes();
951 1107
	      nodes_done = true;
952 1108
	    }
953 1109
	  } else if ((section == "arcs" || section == "edges") && 
954 1110
		     !arcs_done) {
955 1111
	    if (_arcs_caption.empty() || _arcs_caption == caption) {
956 1112
	      readArcs();
957 1113
	      arcs_done = true;
958 1114
	    }
959 1115
	  } else if (section == "attributes" && !attributes_done) {
960 1116
	    if (_attributes_caption.empty() || _attributes_caption == caption) {
961 1117
	      readAttributes();
962 1118
	      attributes_done = true;
963 1119
	    }
964 1120
	  } else {
965
	    readLine();
966
	    skipSection();
1121
	    if (extra_sections.find(section) != extra_sections.end()) {
1122
	      std::ostringstream msg;
1123
	      msg << "Multiple occurence of section " << section;
1124
	      throw DataFormatError(msg.str().c_str());
1125
	    }
1126
	    Sections::iterator it = _sections.find(section);
1127
	    if (it != _sections.end()) {
1128
	      extra_sections.insert(section);
1129
	      it->second->process(*_is, line_num);
1130
	      readLine();
1131
	    } else {
1132
	      readLine();
1133
	      skipSection();
1134
	    }
967 1135
	  }
968 1136
	} catch (DataFormatError& error) {
969 1137
	  error.line(line_num);
970 1138
	  throw;
971 1139
	}	
972 1140
      }
973 1141

	
974 1142
      if (!nodes_done) {
975 1143
	throw DataFormatError("Section @nodes not found");
976 1144
      }
977 1145

	
978 1146
      if (!arcs_done) {
979 1147
	throw DataFormatError("Section @arcs not found");
980 1148
      }
981 1149

	
982 1150
      if (!attributes_done && !_attributes.empty()) {
983 1151
	throw DataFormatError("Section @attributes not found");
984 1152
      }
985 1153

	
986 1154
    }
987 1155

	
988 1156
    /// @}
989 1157
    
990 1158
  };
0 comments (0 inline)