# HG changeset patch
# User Alpar Juttner <alpar@cs.elte.hu>
# Date 1267189220 -3600
# Node ID 2914b6f0fde0a1378a2d87c564bd2ac5c9153dda
# Parent  2c35bef44dd12ecff705b0b9cd6cde34a9bb371b# Parent  f3bc4e9b5f3a10814b5ae5b45cd804987c9231e4
Merge #340

diff -r 2c35bef44dd1 -r 2914b6f0fde0 lemon/capacity_scaling.h
--- a/lemon/capacity_scaling.h	Sun Feb 21 18:55:30 2010 +0100
+++ b/lemon/capacity_scaling.h	Fri Feb 26 14:00:20 2010 +0100
@@ -139,9 +139,10 @@
     TEMPLATE_DIGRAPH_TYPEDEFS(GR);
 
     typedef std::vector<int> IntVector;
-    typedef std::vector<char> BoolVector;
     typedef std::vector<Value> ValueVector;
     typedef std::vector<Cost> CostVector;
+    typedef std::vector<char> BoolVector;
+    // Note: vector<char> is used instead of vector<bool> for efficiency reasons
 
   private:
 
@@ -798,15 +799,15 @@
       // Initialize delta value
       if (_factor > 1) {
         // With scaling
-        Value max_sup = 0, max_dem = 0;
-        for (int i = 0; i != _node_num; ++i) {
+        Value max_sup = 0, max_dem = 0, max_cap = 0;
+        for (int i = 0; i != _root; ++i) {
           Value ex = _excess[i];
           if ( ex > max_sup) max_sup =  ex;
           if (-ex > max_dem) max_dem = -ex;
-        }
-        Value max_cap = 0;
-        for (int j = 0; j != _res_arc_num; ++j) {
-          if (_res_cap[j] > max_cap) max_cap = _res_cap[j];
+          int last_out = _first_out[i+1] - 1;
+          for (int j = _first_out[i]; j != last_out; ++j) {
+            if (_res_cap[j] > max_cap) max_cap = _res_cap[j];
+          }
         }
         max_sup = std::min(std::min(max_sup, max_dem), max_cap);
         for (_delta = 1; 2 * _delta <= max_sup; _delta *= 2) ;
diff -r 2c35bef44dd1 -r 2914b6f0fde0 lemon/cost_scaling.h
--- a/lemon/cost_scaling.h	Sun Feb 21 18:55:30 2010 +0100
+++ b/lemon/cost_scaling.h	Fri Feb 26 14:00:20 2010 +0100
@@ -201,10 +201,11 @@
     TEMPLATE_DIGRAPH_TYPEDEFS(GR);
 
     typedef std::vector<int> IntVector;
-    typedef std::vector<char> BoolVector;
     typedef std::vector<Value> ValueVector;
     typedef std::vector<Cost> CostVector;
     typedef std::vector<LargeCost> LargeCostVector;
+    typedef std::vector<char> BoolVector;
+    // Note: vector<char> is used instead of vector<bool> for efficiency reasons
 
   private:
   
@@ -248,6 +249,7 @@
     // Parameters of the problem
     bool _have_lower;
     Value _sum_supply;
+    int _sup_node_num;
 
     // Data structures for storing the digraph
     IntNodeMap _node_id;
@@ -276,6 +278,12 @@
     LargeCost _epsilon;
     int _alpha;
 
+    IntVector _buckets;
+    IntVector _bucket_next;
+    IntVector _bucket_prev;
+    IntVector _rank;
+    int _max_rank;
+  
     // Data for a StaticDigraph structure
     typedef std::pair<int, int> IntPair;
     StaticDigraph _sgr;
@@ -828,6 +836,11 @@
         }
       }
 
+      _sup_node_num = 0;
+      for (NodeIt n(_graph); n != INVALID; ++n) {
+        if (sup[n] > 0) ++_sup_node_num;
+      }
+
       // Find a feasible flow using Circulation
       Circulation<Digraph, ConstMap<Arc, Value>, ValueArcMap, ValueNodeMap>
         circ(_graph, low, cap, sup);
@@ -862,7 +875,7 @@
         }
         for (int a = _first_out[_root]; a != _res_arc_num; ++a) {
           int ra = _reverse[a];
-          _res_cap[a] = 1;
+          _res_cap[a] = 0;
           _res_cap[ra] = 0;
           _cost[a] = 0;
           _cost[ra] = 0;
@@ -876,7 +889,14 @@
     void start(Method method) {
       // Maximum path length for partial augment
       const int MAX_PATH_LENGTH = 4;
-      
+
+      // Initialize data structures for buckets      
+      _max_rank = _alpha * _res_node_num;
+      _buckets.resize(_max_rank);
+      _bucket_next.resize(_res_node_num + 1);
+      _bucket_prev.resize(_res_node_num + 1);
+      _rank.resize(_res_node_num + 1);
+  
       // Execute the algorithm
       switch (method) {
         case PUSH:
@@ -915,63 +935,175 @@
         }
       }
     }
+    
+    // Initialize a cost scaling phase
+    void initPhase() {
+      // Saturate arcs not satisfying the optimality condition
+      for (int u = 0; u != _res_node_num; ++u) {
+        int last_out = _first_out[u+1];
+        LargeCost pi_u = _pi[u];
+        for (int a = _first_out[u]; a != last_out; ++a) {
+          int v = _target[a];
+          if (_res_cap[a] > 0 && _cost[a] + pi_u - _pi[v] < 0) {
+            Value delta = _res_cap[a];
+            _excess[u] -= delta;
+            _excess[v] += delta;
+            _res_cap[a] = 0;
+            _res_cap[_reverse[a]] += delta;
+          }
+        }
+      }
+      
+      // Find active nodes (i.e. nodes with positive excess)
+      for (int u = 0; u != _res_node_num; ++u) {
+        if (_excess[u] > 0) _active_nodes.push_back(u);
+      }
+
+      // Initialize the next arcs
+      for (int u = 0; u != _res_node_num; ++u) {
+        _next_out[u] = _first_out[u];
+      }
+    }
+    
+    // Early termination heuristic
+    bool earlyTermination() {
+      const double EARLY_TERM_FACTOR = 3.0;
+
+      // Build a static residual graph
+      _arc_vec.clear();
+      _cost_vec.clear();
+      for (int j = 0; j != _res_arc_num; ++j) {
+        if (_res_cap[j] > 0) {
+          _arc_vec.push_back(IntPair(_source[j], _target[j]));
+          _cost_vec.push_back(_cost[j] + 1);
+        }
+      }
+      _sgr.build(_res_node_num, _arc_vec.begin(), _arc_vec.end());
+
+      // Run Bellman-Ford algorithm to check if the current flow is optimal
+      BellmanFord<StaticDigraph, LargeCostArcMap> bf(_sgr, _cost_map);
+      bf.init(0);
+      bool done = false;
+      int K = int(EARLY_TERM_FACTOR * std::sqrt(double(_res_node_num)));
+      for (int i = 0; i < K && !done; ++i) {
+        done = bf.processNextWeakRound();
+      }
+      return done;
+    }
+
+    // Global potential update heuristic
+    void globalUpdate() {
+      int bucket_end = _root + 1;
+    
+      // Initialize buckets
+      for (int r = 0; r != _max_rank; ++r) {
+        _buckets[r] = bucket_end;
+      }
+      Value total_excess = 0;
+      for (int i = 0; i != _res_node_num; ++i) {
+        if (_excess[i] < 0) {
+          _rank[i] = 0;
+          _bucket_next[i] = _buckets[0];
+          _bucket_prev[_buckets[0]] = i;
+          _buckets[0] = i;
+        } else {
+          total_excess += _excess[i];
+          _rank[i] = _max_rank;
+        }
+      }
+      if (total_excess == 0) return;
+
+      // Search the buckets
+      int r = 0;
+      for ( ; r != _max_rank; ++r) {
+        while (_buckets[r] != bucket_end) {
+          // Remove the first node from the current bucket
+          int u = _buckets[r];
+          _buckets[r] = _bucket_next[u];
+          
+          // Search the incomming arcs of u
+          LargeCost pi_u = _pi[u];
+          int last_out = _first_out[u+1];
+          for (int a = _first_out[u]; a != last_out; ++a) {
+            int ra = _reverse[a];
+            if (_res_cap[ra] > 0) {
+              int v = _source[ra];
+              int old_rank_v = _rank[v];
+              if (r < old_rank_v) {
+                // Compute the new rank of v
+                LargeCost nrc = (_cost[ra] + _pi[v] - pi_u) / _epsilon;
+                int new_rank_v = old_rank_v;
+                if (nrc < LargeCost(_max_rank))
+                  new_rank_v = r + 1 + int(nrc);
+                  
+                // Change the rank of v
+                if (new_rank_v < old_rank_v) {
+                  _rank[v] = new_rank_v;
+                  _next_out[v] = _first_out[v];
+                  
+                  // Remove v from its old bucket
+                  if (old_rank_v < _max_rank) {
+                    if (_buckets[old_rank_v] == v) {
+                      _buckets[old_rank_v] = _bucket_next[v];
+                    } else {
+                      _bucket_next[_bucket_prev[v]] = _bucket_next[v];
+                      _bucket_prev[_bucket_next[v]] = _bucket_prev[v];
+                    }
+                  }
+                  
+                  // Insert v to its new bucket
+                  _bucket_next[v] = _buckets[new_rank_v];
+                  _bucket_prev[_buckets[new_rank_v]] = v;
+                  _buckets[new_rank_v] = v;
+                }
+              }
+            }
+          }
+
+          // Finish search if there are no more active nodes
+          if (_excess[u] > 0) {
+            total_excess -= _excess[u];
+            if (total_excess <= 0) break;
+          }
+        }
+        if (total_excess <= 0) break;
+      }
+      
+      // Relabel nodes
+      for (int u = 0; u != _res_node_num; ++u) {
+        int k = std::min(_rank[u], r);
+        if (k > 0) {
+          _pi[u] -= _epsilon * k;
+          _next_out[u] = _first_out[u];
+        }
+      }
+    }
 
     /// Execute the algorithm performing augment and relabel operations
     void startAugment(int max_length = std::numeric_limits<int>::max()) {
       // Paramters for heuristics
-      const int BF_HEURISTIC_EPSILON_BOUND = 1000;
-      const int BF_HEURISTIC_BOUND_FACTOR  = 3;
+      const int EARLY_TERM_EPSILON_LIMIT = 1000;
+      const double GLOBAL_UPDATE_FACTOR = 3.0;
 
+      const int global_update_freq = int(GLOBAL_UPDATE_FACTOR *
+        (_res_node_num + _sup_node_num * _sup_node_num));
+      int next_update_limit = global_update_freq;
+      
+      int relabel_cnt = 0;
+      
       // Perform cost scaling phases
-      IntVector pred_arc(_res_node_num);
-      std::vector<int> path_nodes;
+      std::vector<int> path;
       for ( ; _epsilon >= 1; _epsilon = _epsilon < _alpha && _epsilon > 1 ?
                                         1 : _epsilon / _alpha )
       {
-        // "Early Termination" heuristic: use Bellman-Ford algorithm
-        // to check if the current flow is optimal
-        if (_epsilon <= BF_HEURISTIC_EPSILON_BOUND) {
-          _arc_vec.clear();
-          _cost_vec.clear();
-          for (int j = 0; j != _res_arc_num; ++j) {
-            if (_res_cap[j] > 0) {
-              _arc_vec.push_back(IntPair(_source[j], _target[j]));
-              _cost_vec.push_back(_cost[j] + 1);
-            }
-          }
-          _sgr.build(_res_node_num, _arc_vec.begin(), _arc_vec.end());
-
-          BellmanFord<StaticDigraph, LargeCostArcMap> bf(_sgr, _cost_map);
-          bf.init(0);
-          bool done = false;
-          int K = int(BF_HEURISTIC_BOUND_FACTOR * sqrt(_res_node_num));
-          for (int i = 0; i < K && !done; ++i)
-            done = bf.processNextWeakRound();
-          if (done) break;
-        }
-
-        // Saturate arcs not satisfying the optimality condition
-        for (int a = 0; a != _res_arc_num; ++a) {
-          if (_res_cap[a] > 0 &&
-              _cost[a] + _pi[_source[a]] - _pi[_target[a]] < 0) {
-            Value delta = _res_cap[a];
-            _excess[_source[a]] -= delta;
-            _excess[_target[a]] += delta;
-            _res_cap[a] = 0;
-            _res_cap[_reverse[a]] += delta;
-          }
+        // Early termination heuristic
+        if (_epsilon <= EARLY_TERM_EPSILON_LIMIT) {
+          if (earlyTermination()) break;
         }
         
-        // Find active nodes (i.e. nodes with positive excess)
-        for (int u = 0; u != _res_node_num; ++u) {
-          if (_excess[u] > 0) _active_nodes.push_back(u);
-        }
-
-        // Initialize the next arcs
-        for (int u = 0; u != _res_node_num; ++u) {
-          _next_out[u] = _first_out[u];
-        }
-
+        // Initialize current phase
+        initPhase();
+        
         // Perform partial augment and relabel operations
         while (true) {
           // Select an active node (FIFO selection)
@@ -981,46 +1113,44 @@
           }
           if (_active_nodes.size() == 0) break;
           int start = _active_nodes.front();
-          path_nodes.clear();
-          path_nodes.push_back(start);
 
           // Find an augmenting path from the start node
+          path.clear();
           int tip = start;
-          while (_excess[tip] >= 0 &&
-                 int(path_nodes.size()) <= max_length) {
+          while (_excess[tip] >= 0 && int(path.size()) < max_length) {
             int u;
-            LargeCost min_red_cost, rc;
-            int last_out = _sum_supply < 0 ?
-              _first_out[tip+1] : _first_out[tip+1] - 1;
+            LargeCost min_red_cost, rc, pi_tip = _pi[tip];
+            int last_out = _first_out[tip+1];
             for (int a = _next_out[tip]; a != last_out; ++a) {
-              if (_res_cap[a] > 0 &&
-                  _cost[a] + _pi[_source[a]] - _pi[_target[a]] < 0) {
-                u = _target[a];
-                pred_arc[u] = a;
+              u = _target[a];
+              if (_res_cap[a] > 0 && _cost[a] + pi_tip - _pi[u] < 0) {
+                path.push_back(a);
                 _next_out[tip] = a;
                 tip = u;
-                path_nodes.push_back(tip);
                 goto next_step;
               }
             }
 
             // Relabel tip node
-            min_red_cost = std::numeric_limits<LargeCost>::max() / 2;
+            min_red_cost = std::numeric_limits<LargeCost>::max();
+            if (tip != start) {
+              int ra = _reverse[path.back()];
+              min_red_cost = _cost[ra] + pi_tip - _pi[_target[ra]];
+            }
             for (int a = _first_out[tip]; a != last_out; ++a) {
-              rc = _cost[a] + _pi[_source[a]] - _pi[_target[a]];
+              rc = _cost[a] + pi_tip - _pi[_target[a]];
               if (_res_cap[a] > 0 && rc < min_red_cost) {
                 min_red_cost = rc;
               }
             }
             _pi[tip] -= min_red_cost + _epsilon;
-
-            // Reset the next arc of tip
             _next_out[tip] = _first_out[tip];
+            ++relabel_cnt;
 
             // Step back
             if (tip != start) {
-              path_nodes.pop_back();
-              tip = path_nodes.back();
+              tip = _source[path.back()];
+              path.pop_back();
             }
 
           next_step: ;
@@ -1028,11 +1158,11 @@
 
           // Augment along the found path (as much flow as possible)
           Value delta;
-          int u, v = path_nodes.front(), pa;
-          for (int i = 1; i < int(path_nodes.size()); ++i) {
+          int pa, u, v = start;
+          for (int i = 0; i != int(path.size()); ++i) {
+            pa = path[i];
             u = v;
-            v = path_nodes[i];
-            pa = pred_arc[v];
+            v = _target[pa];
             delta = std::min(_res_cap[pa], _excess[u]);
             _res_cap[pa] -= delta;
             _res_cap[_reverse[pa]] += delta;
@@ -1041,6 +1171,12 @@
             if (_excess[v] > 0 && _excess[v] <= delta)
               _active_nodes.push_back(v);
           }
+
+          // Global update heuristic
+          if (relabel_cnt >= next_update_limit) {
+            globalUpdate();
+            next_update_limit += global_update_freq;
+          }
         }
       }
     }
@@ -1048,98 +1184,70 @@
     /// Execute the algorithm performing push and relabel operations
     void startPush() {
       // Paramters for heuristics
-      const int BF_HEURISTIC_EPSILON_BOUND = 1000;
-      const int BF_HEURISTIC_BOUND_FACTOR  = 3;
+      const int EARLY_TERM_EPSILON_LIMIT = 1000;
+      const double GLOBAL_UPDATE_FACTOR = 2.0;
 
+      const int global_update_freq = int(GLOBAL_UPDATE_FACTOR *
+        (_res_node_num + _sup_node_num * _sup_node_num));
+      int next_update_limit = global_update_freq;
+
+      int relabel_cnt = 0;
+      
       // Perform cost scaling phases
       BoolVector hyper(_res_node_num, false);
+      LargeCostVector hyper_cost(_res_node_num);
       for ( ; _epsilon >= 1; _epsilon = _epsilon < _alpha && _epsilon > 1 ?
                                         1 : _epsilon / _alpha )
       {
-        // "Early Termination" heuristic: use Bellman-Ford algorithm
-        // to check if the current flow is optimal
-        if (_epsilon <= BF_HEURISTIC_EPSILON_BOUND) {
-          _arc_vec.clear();
-          _cost_vec.clear();
-          for (int j = 0; j != _res_arc_num; ++j) {
-            if (_res_cap[j] > 0) {
-              _arc_vec.push_back(IntPair(_source[j], _target[j]));
-              _cost_vec.push_back(_cost[j] + 1);
-            }
-          }
-          _sgr.build(_res_node_num, _arc_vec.begin(), _arc_vec.end());
-
-          BellmanFord<StaticDigraph, LargeCostArcMap> bf(_sgr, _cost_map);
-          bf.init(0);
-          bool done = false;
-          int K = int(BF_HEURISTIC_BOUND_FACTOR * sqrt(_res_node_num));
-          for (int i = 0; i < K && !done; ++i)
-            done = bf.processNextWeakRound();
-          if (done) break;
+        // Early termination heuristic
+        if (_epsilon <= EARLY_TERM_EPSILON_LIMIT) {
+          if (earlyTermination()) break;
         }
-
-        // Saturate arcs not satisfying the optimality condition
-        for (int a = 0; a != _res_arc_num; ++a) {
-          if (_res_cap[a] > 0 &&
-              _cost[a] + _pi[_source[a]] - _pi[_target[a]] < 0) {
-            Value delta = _res_cap[a];
-            _excess[_source[a]] -= delta;
-            _excess[_target[a]] += delta;
-            _res_cap[a] = 0;
-            _res_cap[_reverse[a]] += delta;
-          }
-        }
-
-        // Find active nodes (i.e. nodes with positive excess)
-        for (int u = 0; u != _res_node_num; ++u) {
-          if (_excess[u] > 0) _active_nodes.push_back(u);
-        }
-
-        // Initialize the next arcs
-        for (int u = 0; u != _res_node_num; ++u) {
-          _next_out[u] = _first_out[u];
-        }
+        
+        // Initialize current phase
+        initPhase();
 
         // Perform push and relabel operations
         while (_active_nodes.size() > 0) {
-          LargeCost min_red_cost, rc;
+          LargeCost min_red_cost, rc, pi_n;
           Value delta;
           int n, t, a, last_out = _res_arc_num;
 
+        next_node:
           // Select an active node (FIFO selection)
-        next_node:
           n = _active_nodes.front();
-          last_out = _sum_supply < 0 ?
-            _first_out[n+1] : _first_out[n+1] - 1;
-
+          last_out = _first_out[n+1];
+          pi_n = _pi[n];
+          
           // Perform push operations if there are admissible arcs
           if (_excess[n] > 0) {
             for (a = _next_out[n]; a != last_out; ++a) {
               if (_res_cap[a] > 0 &&
-                  _cost[a] + _pi[_source[a]] - _pi[_target[a]] < 0) {
+                  _cost[a] + pi_n - _pi[_target[a]] < 0) {
                 delta = std::min(_res_cap[a], _excess[n]);
                 t = _target[a];
 
                 // Push-look-ahead heuristic
                 Value ahead = -_excess[t];
-                int last_out_t = _sum_supply < 0 ?
-                  _first_out[t+1] : _first_out[t+1] - 1;
+                int last_out_t = _first_out[t+1];
+                LargeCost pi_t = _pi[t];
                 for (int ta = _next_out[t]; ta != last_out_t; ++ta) {
                   if (_res_cap[ta] > 0 && 
-                      _cost[ta] + _pi[_source[ta]] - _pi[_target[ta]] < 0)
+                      _cost[ta] + pi_t - _pi[_target[ta]] < 0)
                     ahead += _res_cap[ta];
                   if (ahead >= delta) break;
                 }
                 if (ahead < 0) ahead = 0;
 
                 // Push flow along the arc
-                if (ahead < delta) {
+                if (ahead < delta && !hyper[t]) {
                   _res_cap[a] -= ahead;
                   _res_cap[_reverse[a]] += ahead;
                   _excess[n] -= ahead;
                   _excess[t] += ahead;
                   _active_nodes.push_front(t);
                   hyper[t] = true;
+                  hyper_cost[t] = _cost[a] + pi_n - pi_t;
                   _next_out[n] = a;
                   goto next_node;
                 } else {
@@ -1162,18 +1270,18 @@
 
           // Relabel the node if it is still active (or hyper)
           if (_excess[n] > 0 || hyper[n]) {
-            min_red_cost = std::numeric_limits<LargeCost>::max() / 2;
+             min_red_cost = hyper[n] ? -hyper_cost[n] :
+               std::numeric_limits<LargeCost>::max();
             for (int a = _first_out[n]; a != last_out; ++a) {
-              rc = _cost[a] + _pi[_source[a]] - _pi[_target[a]];
+              rc = _cost[a] + pi_n - _pi[_target[a]];
               if (_res_cap[a] > 0 && rc < min_red_cost) {
                 min_red_cost = rc;
               }
             }
             _pi[n] -= min_red_cost + _epsilon;
+            _next_out[n] = _first_out[n];
             hyper[n] = false;
-
-            // Reset the next arc
-            _next_out[n] = _first_out[n];
+            ++relabel_cnt;
           }
         
           // Remove nodes that are not active nor hyper
@@ -1183,6 +1291,14 @@
                   !hyper[_active_nodes.front()] ) {
             _active_nodes.pop_front();
           }
+          
+          // Global update heuristic
+          if (relabel_cnt >= next_update_limit) {
+            globalUpdate();
+            for (int u = 0; u != _res_node_num; ++u)
+              hyper[u] = false;
+            next_update_limit += global_update_freq;
+          }
         }
       }
     }
diff -r 2c35bef44dd1 -r 2914b6f0fde0 lemon/cycle_canceling.h
--- a/lemon/cycle_canceling.h	Sun Feb 21 18:55:30 2010 +0100
+++ b/lemon/cycle_canceling.h	Fri Feb 26 14:00:20 2010 +0100
@@ -144,10 +144,11 @@
     TEMPLATE_DIGRAPH_TYPEDEFS(GR);
     
     typedef std::vector<int> IntVector;
-    typedef std::vector<char> CharVector;
     typedef std::vector<double> DoubleVector;
     typedef std::vector<Value> ValueVector;
     typedef std::vector<Cost> CostVector;
+    typedef std::vector<char> BoolVector;
+    // Note: vector<char> is used instead of vector<bool> for efficiency reasons
 
   private:
   
@@ -198,7 +199,7 @@
     IntArcMap _arc_idf;
     IntArcMap _arc_idb;
     IntVector _first_out;
-    CharVector _forward;
+    BoolVector _forward;
     IntVector _source;
     IntVector _target;
     IntVector _reverse;
@@ -962,8 +963,8 @@
       // Contruct auxiliary data vectors
       DoubleVector pi(_res_node_num, 0.0);
       IntVector level(_res_node_num);
-      CharVector reached(_res_node_num);
-      CharVector processed(_res_node_num);
+      BoolVector reached(_res_node_num);
+      BoolVector processed(_res_node_num);
       IntVector pred_node(_res_node_num);
       IntVector pred_arc(_res_node_num);
       std::vector<int> stack(_res_node_num);
diff -r 2c35bef44dd1 -r 2914b6f0fde0 lemon/network_simplex.h
--- a/lemon/network_simplex.h	Sun Feb 21 18:55:30 2010 +0100
+++ b/lemon/network_simplex.h	Fri Feb 26 14:00:20 2010 +0100
@@ -164,9 +164,10 @@
     TEMPLATE_DIGRAPH_TYPEDEFS(GR);
 
     typedef std::vector<int> IntVector;
-    typedef std::vector<char> CharVector;
     typedef std::vector<Value> ValueVector;
     typedef std::vector<Cost> CostVector;
+    typedef std::vector<char> BoolVector;
+    // Note: vector<char> is used instead of vector<bool> for efficiency reasons
 
     // State constants for arcs
     enum ArcStateEnum {
@@ -213,8 +214,8 @@
     IntVector _succ_num;
     IntVector _last_succ;
     IntVector _dirty_revs;
-    CharVector _forward;
-    CharVector _state;
+    BoolVector _forward;
+    BoolVector _state;
     int _root;
 
     // Temporary data used in the current pivot iteration
@@ -245,7 +246,7 @@
       const IntVector  &_source;
       const IntVector  &_target;
       const CostVector &_cost;
-      const CharVector &_state;
+      const BoolVector &_state;
       const CostVector &_pi;
       int &_in_arc;
       int _search_arc_num;
@@ -266,7 +267,7 @@
       // Find next entering arc
       bool findEnteringArc() {
         Cost c;
-        for (int e = _next_arc; e < _search_arc_num; ++e) {
+        for (int e = _next_arc; e != _search_arc_num; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < 0) {
             _in_arc = e;
@@ -274,7 +275,7 @@
             return true;
           }
         }
-        for (int e = 0; e < _next_arc; ++e) {
+        for (int e = 0; e != _next_arc; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < 0) {
             _in_arc = e;
@@ -297,7 +298,7 @@
       const IntVector  &_source;
       const IntVector  &_target;
       const CostVector &_cost;
-      const CharVector &_state;
+      const BoolVector &_state;
       const CostVector &_pi;
       int &_in_arc;
       int _search_arc_num;
@@ -314,7 +315,7 @@
       // Find next entering arc
       bool findEnteringArc() {
         Cost c, min = 0;
-        for (int e = 0; e < _search_arc_num; ++e) {
+        for (int e = 0; e != _search_arc_num; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < min) {
             min = c;
@@ -336,7 +337,7 @@
       const IntVector  &_source;
       const IntVector  &_target;
       const CostVector &_cost;
-      const CharVector &_state;
+      const BoolVector &_state;
       const CostVector &_pi;
       int &_in_arc;
       int _search_arc_num;
@@ -355,7 +356,7 @@
         _next_arc(0)
       {
         // The main parameters of the pivot rule
-        const double BLOCK_SIZE_FACTOR = 0.5;
+        const double BLOCK_SIZE_FACTOR = 1.0;
         const int MIN_BLOCK_SIZE = 10;
 
         _block_size = std::max( int(BLOCK_SIZE_FACTOR *
@@ -368,7 +369,7 @@
         Cost c, min = 0;
         int cnt = _block_size;
         int e;
-        for (e = _next_arc; e < _search_arc_num; ++e) {
+        for (e = _next_arc; e != _search_arc_num; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < min) {
             min = c;
@@ -379,7 +380,7 @@
             cnt = _block_size;
           }
         }
-        for (e = 0; e < _next_arc; ++e) {
+        for (e = 0; e != _next_arc; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < min) {
             min = c;
@@ -409,7 +410,7 @@
       const IntVector  &_source;
       const IntVector  &_target;
       const CostVector &_cost;
-      const CharVector &_state;
+      const BoolVector &_state;
       const CostVector &_pi;
       int &_in_arc;
       int _search_arc_num;
@@ -470,7 +471,7 @@
         // Major iteration: build a new candidate list
         min = 0;
         _curr_length = 0;
-        for (e = _next_arc; e < _search_arc_num; ++e) {
+        for (e = _next_arc; e != _search_arc_num; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < 0) {
             _candidates[_curr_length++] = e;
@@ -481,7 +482,7 @@
             if (_curr_length == _list_length) goto search_end;
           }
         }
-        for (e = 0; e < _next_arc; ++e) {
+        for (e = 0; e != _next_arc; ++e) {
           c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (c < 0) {
             _candidates[_curr_length++] = e;
@@ -512,7 +513,7 @@
       const IntVector  &_source;
       const IntVector  &_target;
       const CostVector &_cost;
-      const CharVector &_state;
+      const BoolVector &_state;
       const CostVector &_pi;
       int &_in_arc;
       int _search_arc_num;
@@ -565,7 +566,7 @@
       bool findEnteringArc() {
         // Check the current candidate list
         int e;
-        for (int i = 0; i < _curr_length; ++i) {
+        for (int i = 0; i != _curr_length; ++i) {
           e = _candidates[i];
           _cand_cost[e] = _state[e] *
             (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
@@ -578,7 +579,7 @@
         int cnt = _block_size;
         int limit = _head_length;
 
-        for (e = _next_arc; e < _search_arc_num; ++e) {
+        for (e = _next_arc; e != _search_arc_num; ++e) {
           _cand_cost[e] = _state[e] *
             (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (_cand_cost[e] < 0) {
@@ -590,7 +591,7 @@
             cnt = _block_size;
           }
         }
-        for (e = 0; e < _next_arc; ++e) {
+        for (e = 0; e != _next_arc; ++e) {
           _cand_cost[e] = _state[e] *
             (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
           if (_cand_cost[e] < 0) {
@@ -1360,7 +1361,7 @@
       }
 
       // Update _rev_thread using the new _thread values
-      for (int i = 0; i < int(_dirty_revs.size()); ++i) {
+      for (int i = 0; i != int(_dirty_revs.size()); ++i) {
         u = _dirty_revs[i];
         _rev_thread[_thread[u]] = u;
       }
@@ -1432,6 +1433,100 @@
       }
     }
 
+    // Heuristic initial pivots
+    bool initialPivots() {
+      Value curr, total = 0;
+      std::vector<Node> supply_nodes, demand_nodes;
+      for (NodeIt u(_graph); u != INVALID; ++u) {
+        curr = _supply[_node_id[u]];
+        if (curr > 0) {
+          total += curr;
+          supply_nodes.push_back(u);
+        }
+        else if (curr < 0) {
+          demand_nodes.push_back(u);
+        }
+      }
+      if (_sum_supply > 0) total -= _sum_supply;
+      if (total <= 0) return true;
+
+      IntVector arc_vector;
+      if (_sum_supply >= 0) {
+        if (supply_nodes.size() == 1 && demand_nodes.size() == 1) {
+          // Perform a reverse graph search from the sink to the source
+          typename GR::template NodeMap<bool> reached(_graph, false);
+          Node s = supply_nodes[0], t = demand_nodes[0];
+          std::vector<Node> stack;
+          reached[t] = true;
+          stack.push_back(t);
+          while (!stack.empty()) {
+            Node u, v = stack.back();
+            stack.pop_back();
+            if (v == s) break;
+            for (InArcIt a(_graph, v); a != INVALID; ++a) {
+              if (reached[u = _graph.source(a)]) continue;
+              int j = _arc_id[a];
+              if (_cap[j] >= total) {
+                arc_vector.push_back(j);
+                reached[u] = true;
+                stack.push_back(u);
+              }
+            }
+          }
+        } else {
+          // Find the min. cost incomming arc for each demand node
+          for (int i = 0; i != int(demand_nodes.size()); ++i) {
+            Node v = demand_nodes[i];
+            Cost c, min_cost = std::numeric_limits<Cost>::max();
+            Arc min_arc = INVALID;
+            for (InArcIt a(_graph, v); a != INVALID; ++a) {
+              c = _cost[_arc_id[a]];
+              if (c < min_cost) {
+                min_cost = c;
+                min_arc = a;
+              }
+            }
+            if (min_arc != INVALID) {
+              arc_vector.push_back(_arc_id[min_arc]);
+            }
+          }
+        }
+      } else {
+        // Find the min. cost outgoing arc for each supply node
+        for (int i = 0; i != int(supply_nodes.size()); ++i) {
+          Node u = supply_nodes[i];
+          Cost c, min_cost = std::numeric_limits<Cost>::max();
+          Arc min_arc = INVALID;
+          for (OutArcIt a(_graph, u); a != INVALID; ++a) {
+            c = _cost[_arc_id[a]];
+            if (c < min_cost) {
+              min_cost = c;
+              min_arc = a;
+            }
+          }
+          if (min_arc != INVALID) {
+            arc_vector.push_back(_arc_id[min_arc]);
+          }
+        }
+      }
+
+      // Perform heuristic initial pivots
+      for (int i = 0; i != int(arc_vector.size()); ++i) {
+        in_arc = arc_vector[i];
+        if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -
+            _pi[_target[in_arc]]) >= 0) continue;
+        findJoinNode();
+        bool change = findLeavingArc();
+        if (delta >= MAX) return false;
+        changeFlow(change);
+        if (change) {
+          updateTreeStructure();
+          updatePotential();
+        }
+      }
+      return true;
+    }
+
     // Execute the algorithm
     ProblemType start(PivotRule pivot_rule) {
       // Select the pivot rule implementation
@@ -1454,6 +1549,9 @@
     ProblemType start() {
       PivotRuleImpl pivot(*this);
 
+      // Perform heuristic initial pivots
+      if (!initialPivots()) return UNBOUNDED;
+
       // Execute the Network Simplex algorithm
       while (pivot.findEnteringArc()) {
         findJoinNode();