src/amd/amd_2.c
changeset 1 c445c931472f
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/amd/amd_2.c	Mon Dec 06 13:09:21 2010 +0100
     1.3 @@ -0,0 +1,1842 @@
     1.4 +/* ========================================================================= */
     1.5 +/* === AMD_2 =============================================================== */
     1.6 +/* ========================================================================= */
     1.7 +
     1.8 +/* ------------------------------------------------------------------------- */
     1.9 +/* AMD, Copyright (c) Timothy A. Davis,                                      */
    1.10 +/* Patrick R. Amestoy, and Iain S. Duff.  See ../README.txt for License.     */
    1.11 +/* email: davis at cise.ufl.edu    CISE Department, Univ. of Florida.        */
    1.12 +/* web: http://www.cise.ufl.edu/research/sparse/amd                          */
    1.13 +/* ------------------------------------------------------------------------- */
    1.14 +
    1.15 +/* AMD_2:  performs the AMD ordering on a symmetric sparse matrix A, followed
    1.16 + * by a postordering (via depth-first search) of the assembly tree using the
    1.17 + * AMD_postorder routine.
    1.18 + */
    1.19 +
    1.20 +#include "amd_internal.h"
    1.21 +
    1.22 +/* ========================================================================= */
    1.23 +/* === clear_flag ========================================================== */
    1.24 +/* ========================================================================= */
    1.25 +
    1.26 +static Int clear_flag (Int wflg, Int wbig, Int W [ ], Int n)
    1.27 +{
    1.28 +    Int x ;
    1.29 +    if (wflg < 2 || wflg >= wbig)
    1.30 +    {
    1.31 +        for (x = 0 ; x < n ; x++)
    1.32 +        {
    1.33 +            if (W [x] != 0) W [x] = 1 ;
    1.34 +        }
    1.35 +        wflg = 2 ;
    1.36 +    }
    1.37 +    /*  at this point, W [0..n-1] < wflg holds */
    1.38 +    return (wflg) ;
    1.39 +}
    1.40 +
    1.41 +
    1.42 +/* ========================================================================= */
    1.43 +/* === AMD_2 =============================================================== */
    1.44 +/* ========================================================================= */
    1.45 +
    1.46 +GLOBAL void AMD_2
    1.47 +(
    1.48 +    Int n,              /* A is n-by-n, where n > 0 */
    1.49 +    Int Pe [ ],         /* Pe [0..n-1]: index in Iw of row i on input */
    1.50 +    Int Iw [ ],         /* workspace of size iwlen. Iw [0..pfree-1]
    1.51 +                         * holds the matrix on input */
    1.52 +    Int Len [ ],        /* Len [0..n-1]: length for row/column i on input */
    1.53 +    Int iwlen,          /* length of Iw. iwlen >= pfree + n */
    1.54 +    Int pfree,          /* Iw [pfree ... iwlen-1] is empty on input */
    1.55 +
    1.56 +    /* 7 size-n workspaces, not defined on input: */
    1.57 +    Int Nv [ ],         /* the size of each supernode on output */
    1.58 +    Int Next [ ],       /* the output inverse permutation */
    1.59 +    Int Last [ ],       /* the output permutation */
    1.60 +    Int Head [ ],
    1.61 +    Int Elen [ ],       /* the size columns of L for each supernode */
    1.62 +    Int Degree [ ],
    1.63 +    Int W [ ],
    1.64 +
    1.65 +    /* control parameters and output statistics */
    1.66 +    double Control [ ], /* array of size AMD_CONTROL */
    1.67 +    double Info [ ]     /* array of size AMD_INFO */
    1.68 +)
    1.69 +{
    1.70 +
    1.71 +/*
    1.72 + * Given a representation of the nonzero pattern of a symmetric matrix, A,
    1.73 + * (excluding the diagonal) perform an approximate minimum (UMFPACK/MA38-style)
    1.74 + * degree ordering to compute a pivot order such that the introduction of
    1.75 + * nonzeros (fill-in) in the Cholesky factors A = LL' is kept low.  At each
    1.76 + * step, the pivot selected is the one with the minimum UMFAPACK/MA38-style
    1.77 + * upper-bound on the external degree.  This routine can optionally perform
    1.78 + * aggresive absorption (as done by MC47B in the Harwell Subroutine
    1.79 + * Library).
    1.80 + *
    1.81 + * The approximate degree algorithm implemented here is the symmetric analog of
    1.82 + * the degree update algorithm in MA38 and UMFPACK (the Unsymmetric-pattern
    1.83 + * MultiFrontal PACKage, both by Davis and Duff).  The routine is based on the
    1.84 + * MA27 minimum degree ordering algorithm by Iain Duff and John Reid.
    1.85 + *
    1.86 + * This routine is a translation of the original AMDBAR and MC47B routines,
    1.87 + * in Fortran, with the following modifications:
    1.88 + *
    1.89 + * (1) dense rows/columns are removed prior to ordering the matrix, and placed
    1.90 + *      last in the output order.  The presence of a dense row/column can
    1.91 + *      increase the ordering time by up to O(n^2), unless they are removed
    1.92 + *      prior to ordering.
    1.93 + *
    1.94 + * (2) the minimum degree ordering is followed by a postordering (depth-first
    1.95 + *      search) of the assembly tree.  Note that mass elimination (discussed
    1.96 + *      below) combined with the approximate degree update can lead to the mass
    1.97 + *      elimination of nodes with lower exact degree than the current pivot
    1.98 + *      element.  No additional fill-in is caused in the representation of the
    1.99 + *      Schur complement.  The mass-eliminated nodes merge with the current
   1.100 + *      pivot element.  They are ordered prior to the current pivot element.
   1.101 + *      Because they can have lower exact degree than the current element, the
   1.102 + *      merger of two or more of these nodes in the current pivot element can
   1.103 + *      lead to a single element that is not a "fundamental supernode".  The
   1.104 + *      diagonal block can have zeros in it.  Thus, the assembly tree used here
   1.105 + *      is not guaranteed to be the precise supernodal elemination tree (with
   1.106 + *      "funadmental" supernodes), and the postordering performed by this
   1.107 + *      routine is not guaranteed to be a precise postordering of the
   1.108 + *      elimination tree.
   1.109 + *
   1.110 + * (3) input parameters are added, to control aggressive absorption and the
   1.111 + *      detection of "dense" rows/columns of A.
   1.112 + *
   1.113 + * (4) additional statistical information is returned, such as the number of
   1.114 + *      nonzeros in L, and the flop counts for subsequent LDL' and LU
   1.115 + *      factorizations.  These are slight upper bounds, because of the mass
   1.116 + *      elimination issue discussed above.
   1.117 + *
   1.118 + * (5) additional routines are added to interface this routine to MATLAB
   1.119 + *      to provide a simple C-callable user-interface, to check inputs for
   1.120 + *      errors, compute the symmetry of the pattern of A and the number of
   1.121 + *      nonzeros in each row/column of A+A', to compute the pattern of A+A',
   1.122 + *      to perform the assembly tree postordering, and to provide debugging
   1.123 + *      ouput.  Many of these functions are also provided by the Fortran
   1.124 + *      Harwell Subroutine Library routine MC47A.
   1.125 + *
   1.126 + * (6) both int and UF_long versions are provided.  In the descriptions below
   1.127 + *      and integer is and int or UF_long depending on which version is
   1.128 + *      being used.
   1.129 +
   1.130 + **********************************************************************
   1.131 + ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
   1.132 + **********************************************************************
   1.133 + ** If you want error checking, a more versatile input format, and a **
   1.134 + ** simpler user interface, use amd_order or amd_l_order instead.    **
   1.135 + ** This routine is not meant to be user-callable.                   **
   1.136 + **********************************************************************
   1.137 +
   1.138 + * ----------------------------------------------------------------------------
   1.139 + * References:
   1.140 + * ----------------------------------------------------------------------------
   1.141 + *
   1.142 + *  [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern multifrontal
   1.143 + *      method for sparse LU factorization", SIAM J. Matrix Analysis and
   1.144 + *      Applications, vol. 18, no. 1, pp. 140-158.  Discusses UMFPACK / MA38,
   1.145 + *      which first introduced the approximate minimum degree used by this
   1.146 + *      routine.
   1.147 + *
   1.148 + *  [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, "An approximate
   1.149 + *      minimum degree ordering algorithm," SIAM J. Matrix Analysis and
   1.150 + *      Applications, vol. 17, no. 4, pp. 886-905, 1996.  Discusses AMDBAR and
   1.151 + *      MC47B, which are the Fortran versions of this routine.
   1.152 + *
   1.153 + *  [3] Alan George and Joseph Liu, "The evolution of the minimum degree
   1.154 + *      ordering algorithm," SIAM Review, vol. 31, no. 1, pp. 1-19, 1989.
   1.155 + *      We list below the features mentioned in that paper that this code
   1.156 + *      includes:
   1.157 + *
   1.158 + *      mass elimination:
   1.159 + *          Yes.  MA27 relied on supervariable detection for mass elimination.
   1.160 + *
   1.161 + *      indistinguishable nodes:
   1.162 + *          Yes (we call these "supervariables").  This was also in the MA27
   1.163 + *          code - although we modified the method of detecting them (the
   1.164 + *          previous hash was the true degree, which we no longer keep track
   1.165 + *          of).  A supervariable is a set of rows with identical nonzero
   1.166 + *          pattern.  All variables in a supervariable are eliminated together.
   1.167 + *          Each supervariable has as its numerical name that of one of its
   1.168 + *          variables (its principal variable).
   1.169 + *
   1.170 + *      quotient graph representation:
   1.171 + *          Yes.  We use the term "element" for the cliques formed during
   1.172 + *          elimination.  This was also in the MA27 code.  The algorithm can
   1.173 + *          operate in place, but it will work more efficiently if given some
   1.174 + *          "elbow room."
   1.175 + *
   1.176 + *      element absorption:
   1.177 + *          Yes.  This was also in the MA27 code.
   1.178 + *
   1.179 + *      external degree:
   1.180 + *          Yes.  The MA27 code was based on the true degree.
   1.181 + *
   1.182 + *      incomplete degree update and multiple elimination:
   1.183 + *          No.  This was not in MA27, either.  Our method of degree update
   1.184 + *          within MC47B is element-based, not variable-based.  It is thus
   1.185 + *          not well-suited for use with incomplete degree update or multiple
   1.186 + *          elimination.
   1.187 + *
   1.188 + * Authors, and Copyright (C) 2004 by:
   1.189 + * Timothy A. Davis, Patrick Amestoy, Iain S. Duff, John K. Reid.
   1.190 + *
   1.191 + * Acknowledgements: This work (and the UMFPACK package) was supported by the
   1.192 + * National Science Foundation (ASC-9111263, DMS-9223088, and CCR-0203270).
   1.193 + * The UMFPACK/MA38 approximate degree update algorithm, the unsymmetric analog
   1.194 + * which forms the basis of AMD, was developed while Tim Davis was supported by
   1.195 + * CERFACS (Toulouse, France) in a post-doctoral position.  This C version, and
   1.196 + * the etree postorder, were written while Tim Davis was on sabbatical at
   1.197 + * Stanford University and Lawrence Berkeley National Laboratory.
   1.198 +
   1.199 + * ----------------------------------------------------------------------------
   1.200 + * INPUT ARGUMENTS (unaltered):
   1.201 + * ----------------------------------------------------------------------------
   1.202 +
   1.203 + * n:  The matrix order.  Restriction:  n >= 1.
   1.204 + *
   1.205 + * iwlen:  The size of the Iw array.  On input, the matrix is stored in
   1.206 + *      Iw [0..pfree-1].  However, Iw [0..iwlen-1] should be slightly larger
   1.207 + *      than what is required to hold the matrix, at least iwlen >= pfree + n.
   1.208 + *      Otherwise, excessive compressions will take place.  The recommended
   1.209 + *      value of iwlen is 1.2 * pfree + n, which is the value used in the
   1.210 + *      user-callable interface to this routine (amd_order.c).  The algorithm
   1.211 + *      will not run at all if iwlen < pfree.  Restriction: iwlen >= pfree + n.
   1.212 + *      Note that this is slightly more restrictive than the actual minimum
   1.213 + *      (iwlen >= pfree), but AMD_2 will be very slow with no elbow room.
   1.214 + *      Thus, this routine enforces a bare minimum elbow room of size n.
   1.215 + *
   1.216 + * pfree: On input the tail end of the array, Iw [pfree..iwlen-1], is empty,
   1.217 + *      and the matrix is stored in Iw [0..pfree-1].  During execution,
   1.218 + *      additional data is placed in Iw, and pfree is modified so that
   1.219 + *      Iw [pfree..iwlen-1] is always the unused part of Iw.
   1.220 + *
   1.221 + * Control:  A double array of size AMD_CONTROL containing input parameters
   1.222 + *      that affect how the ordering is computed.  If NULL, then default
   1.223 + *      settings are used.
   1.224 + *
   1.225 + *      Control [AMD_DENSE] is used to determine whether or not a given input
   1.226 + *      row is "dense".  A row is "dense" if the number of entries in the row
   1.227 + *      exceeds Control [AMD_DENSE] times sqrt (n), except that rows with 16 or
   1.228 + *      fewer entries are never considered "dense".  To turn off the detection
   1.229 + *      of dense rows, set Control [AMD_DENSE] to a negative number, or to a
   1.230 + *      number larger than sqrt (n).  The default value of Control [AMD_DENSE]
   1.231 + *      is AMD_DEFAULT_DENSE, which is defined in amd.h as 10.
   1.232 + *
   1.233 + *      Control [AMD_AGGRESSIVE] is used to determine whether or not aggressive
   1.234 + *      absorption is to be performed.  If nonzero, then aggressive absorption
   1.235 + *      is performed (this is the default).
   1.236 +
   1.237 + * ----------------------------------------------------------------------------
   1.238 + * INPUT/OUPUT ARGUMENTS:
   1.239 + * ----------------------------------------------------------------------------
   1.240 + *
   1.241 + * Pe:  An integer array of size n.  On input, Pe [i] is the index in Iw of
   1.242 + *      the start of row i.  Pe [i] is ignored if row i has no off-diagonal
   1.243 + *      entries.  Thus Pe [i] must be in the range 0 to pfree-1 for non-empty
   1.244 + *      rows.
   1.245 + *
   1.246 + *      During execution, it is used for both supervariables and elements:
   1.247 + *
   1.248 + *      Principal supervariable i:  index into Iw of the description of
   1.249 + *          supervariable i.  A supervariable represents one or more rows of
   1.250 + *          the matrix with identical nonzero pattern.  In this case,
   1.251 + *          Pe [i] >= 0.
   1.252 + *
   1.253 + *      Non-principal supervariable i:  if i has been absorbed into another
   1.254 + *          supervariable j, then Pe [i] = FLIP (j), where FLIP (j) is defined
   1.255 + *          as (-(j)-2).  Row j has the same pattern as row i.  Note that j
   1.256 + *          might later be absorbed into another supervariable j2, in which
   1.257 + *          case Pe [i] is still FLIP (j), and Pe [j] = FLIP (j2) which is
   1.258 + *          < EMPTY, where EMPTY is defined as (-1) in amd_internal.h.
   1.259 + *
   1.260 + *      Unabsorbed element e:  the index into Iw of the description of element
   1.261 + *          e, if e has not yet been absorbed by a subsequent element.  Element
   1.262 + *          e is created when the supervariable of the same name is selected as
   1.263 + *          the pivot.  In this case, Pe [i] >= 0.
   1.264 + *
   1.265 + *      Absorbed element e:  if element e is absorbed into element e2, then
   1.266 + *          Pe [e] = FLIP (e2).  This occurs when the pattern of e (which we
   1.267 + *          refer to as Le) is found to be a subset of the pattern of e2 (that
   1.268 + *          is, Le2).  In this case, Pe [i] < EMPTY.  If element e is "null"
   1.269 + *          (it has no nonzeros outside its pivot block), then Pe [e] = EMPTY,
   1.270 + *          and e is the root of an assembly subtree (or the whole tree if
   1.271 + *          there is just one such root).
   1.272 + *
   1.273 + *      Dense variable i:  if i is "dense", then Pe [i] = EMPTY.
   1.274 + *
   1.275 + *      On output, Pe holds the assembly tree/forest, which implicitly
   1.276 + *      represents a pivot order with identical fill-in as the actual order
   1.277 + *      (via a depth-first search of the tree), as follows.  If Nv [i] > 0,
   1.278 + *      then i represents a node in the assembly tree, and the parent of i is
   1.279 + *      Pe [i], or EMPTY if i is a root.  If Nv [i] = 0, then (i, Pe [i])
   1.280 + *      represents an edge in a subtree, the root of which is a node in the
   1.281 + *      assembly tree.  Note that i refers to a row/column in the original
   1.282 + *      matrix, not the permuted matrix.
   1.283 + *
   1.284 + * Info:  A double array of size AMD_INFO.  If present, (that is, not NULL),
   1.285 + *      then statistics about the ordering are returned in the Info array.
   1.286 + *      See amd.h for a description.
   1.287 +
   1.288 + * ----------------------------------------------------------------------------
   1.289 + * INPUT/MODIFIED (undefined on output):
   1.290 + * ----------------------------------------------------------------------------
   1.291 + *
   1.292 + * Len:  An integer array of size n.  On input, Len [i] holds the number of
   1.293 + *      entries in row i of the matrix, excluding the diagonal.  The contents
   1.294 + *      of Len are undefined on output.
   1.295 + *
   1.296 + * Iw:  An integer array of size iwlen.  On input, Iw [0..pfree-1] holds the
   1.297 + *      description of each row i in the matrix.  The matrix must be symmetric,
   1.298 + *      and both upper and lower triangular parts must be present.  The
   1.299 + *      diagonal must not be present.  Row i is held as follows:
   1.300 + *
   1.301 + *          Len [i]:  the length of the row i data structure in the Iw array.
   1.302 + *          Iw [Pe [i] ... Pe [i] + Len [i] - 1]:
   1.303 + *              the list of column indices for nonzeros in row i (simple
   1.304 + *              supervariables), excluding the diagonal.  All supervariables
   1.305 + *              start with one row/column each (supervariable i is just row i).
   1.306 + *              If Len [i] is zero on input, then Pe [i] is ignored on input.
   1.307 + *
   1.308 + *          Note that the rows need not be in any particular order, and there
   1.309 + *          may be empty space between the rows.
   1.310 + *
   1.311 + *      During execution, the supervariable i experiences fill-in.  This is
   1.312 + *      represented by placing in i a list of the elements that cause fill-in
   1.313 + *      in supervariable i:
   1.314 + *
   1.315 + *          Len [i]:  the length of supervariable i in the Iw array.
   1.316 + *          Iw [Pe [i] ... Pe [i] + Elen [i] - 1]:
   1.317 + *              the list of elements that contain i.  This list is kept short
   1.318 + *              by removing absorbed elements.
   1.319 + *          Iw [Pe [i] + Elen [i] ... Pe [i] + Len [i] - 1]:
   1.320 + *              the list of supervariables in i.  This list is kept short by
   1.321 + *              removing nonprincipal variables, and any entry j that is also
   1.322 + *              contained in at least one of the elements (j in Le) in the list
   1.323 + *              for i (e in row i).
   1.324 + *
   1.325 + *      When supervariable i is selected as pivot, we create an element e of
   1.326 + *      the same name (e=i):
   1.327 + *
   1.328 + *          Len [e]:  the length of element e in the Iw array.
   1.329 + *          Iw [Pe [e] ... Pe [e] + Len [e] - 1]:
   1.330 + *              the list of supervariables in element e.
   1.331 + *
   1.332 + *      An element represents the fill-in that occurs when supervariable i is
   1.333 + *      selected as pivot (which represents the selection of row i and all
   1.334 + *      non-principal variables whose principal variable is i).  We use the
   1.335 + *      term Le to denote the set of all supervariables in element e.  Absorbed
   1.336 + *      supervariables and elements are pruned from these lists when
   1.337 + *      computationally convenient.
   1.338 + *
   1.339 + *  CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
   1.340 + *  The contents of Iw are undefined on output.
   1.341 +
   1.342 + * ----------------------------------------------------------------------------
   1.343 + * OUTPUT (need not be set on input):
   1.344 + * ----------------------------------------------------------------------------
   1.345 + *
   1.346 + * Nv:  An integer array of size n.  During execution, ABS (Nv [i]) is equal to
   1.347 + *      the number of rows that are represented by the principal supervariable
   1.348 + *      i.  If i is a nonprincipal or dense variable, then Nv [i] = 0.
   1.349 + *      Initially, Nv [i] = 1 for all i.  Nv [i] < 0 signifies that i is a
   1.350 + *      principal variable in the pattern Lme of the current pivot element me.
   1.351 + *      After element me is constructed, Nv [i] is set back to a positive
   1.352 + *      value.
   1.353 + *
   1.354 + *      On output, Nv [i] holds the number of pivots represented by super
   1.355 + *      row/column i of the original matrix, or Nv [i] = 0 for non-principal
   1.356 + *      rows/columns.  Note that i refers to a row/column in the original
   1.357 + *      matrix, not the permuted matrix.
   1.358 + *
   1.359 + * Elen:  An integer array of size n.  See the description of Iw above.  At the
   1.360 + *      start of execution, Elen [i] is set to zero for all rows i.  During
   1.361 + *      execution, Elen [i] is the number of elements in the list for
   1.362 + *      supervariable i.  When e becomes an element, Elen [e] = FLIP (esize) is
   1.363 + *      set, where esize is the size of the element (the number of pivots, plus
   1.364 + *      the number of nonpivotal entries).  Thus Elen [e] < EMPTY.
   1.365 + *      Elen (i) = EMPTY set when variable i becomes nonprincipal.
   1.366 + *
   1.367 + *      For variables, Elen (i) >= EMPTY holds until just before the
   1.368 + *      postordering and permutation vectors are computed.  For elements,
   1.369 + *      Elen [e] < EMPTY holds.
   1.370 + *
   1.371 + *      On output, Elen [i] is the degree of the row/column in the Cholesky
   1.372 + *      factorization of the permuted matrix, corresponding to the original row
   1.373 + *      i, if i is a super row/column.  It is equal to EMPTY if i is
   1.374 + *      non-principal.  Note that i refers to a row/column in the original
   1.375 + *      matrix, not the permuted matrix.
   1.376 + *
   1.377 + *      Note that the contents of Elen on output differ from the Fortran
   1.378 + *      version (Elen holds the inverse permutation in the Fortran version,
   1.379 + *      which is instead returned in the Next array in this C version,
   1.380 + *      described below).
   1.381 + *
   1.382 + * Last: In a degree list, Last [i] is the supervariable preceding i, or EMPTY
   1.383 + *      if i is the head of the list.  In a hash bucket, Last [i] is the hash
   1.384 + *      key for i.
   1.385 + *
   1.386 + *      Last [Head [hash]] is also used as the head of a hash bucket if
   1.387 + *      Head [hash] contains a degree list (see the description of Head,
   1.388 + *      below).
   1.389 + *
   1.390 + *      On output, Last [0..n-1] holds the permutation.  That is, if
   1.391 + *      i = Last [k], then row i is the kth pivot row (where k ranges from 0 to
   1.392 + *      n-1).  Row Last [k] of A is the kth row in the permuted matrix, PAP'.
   1.393 + *
   1.394 + * Next: Next [i] is the supervariable following i in a link list, or EMPTY if
   1.395 + *      i is the last in the list.  Used for two kinds of lists:  degree lists
   1.396 + *      and hash buckets (a supervariable can be in only one kind of list at a
   1.397 + *      time).
   1.398 + *
   1.399 + *      On output Next [0..n-1] holds the inverse permutation.  That is, if
   1.400 + *      k = Next [i], then row i is the kth pivot row. Row i of A appears as
   1.401 + *      the (Next[i])-th row in the permuted matrix, PAP'.
   1.402 + *
   1.403 + *      Note that the contents of Next on output differ from the Fortran
   1.404 + *      version (Next is undefined on output in the Fortran version).
   1.405 +
   1.406 + * ----------------------------------------------------------------------------
   1.407 + * LOCAL WORKSPACE (not input or output - used only during execution):
   1.408 + * ----------------------------------------------------------------------------
   1.409 + *
   1.410 + * Degree:  An integer array of size n.  If i is a supervariable, then
   1.411 + *      Degree [i] holds the current approximation of the external degree of
   1.412 + *      row i (an upper bound).  The external degree is the number of nonzeros
   1.413 + *      in row i, minus ABS (Nv [i]), the diagonal part.  The bound is equal to
   1.414 + *      the exact external degree if Elen [i] is less than or equal to two.
   1.415 + *
   1.416 + *      We also use the term "external degree" for elements e to refer to
   1.417 + *      |Le \ Lme|.  If e is an element, then Degree [e] is |Le|, which is the
   1.418 + *      degree of the off-diagonal part of the element e (not including the
   1.419 + *      diagonal part).
   1.420 + *
   1.421 + * Head:   An integer array of size n.  Head is used for degree lists.
   1.422 + *      Head [deg] is the first supervariable in a degree list.  All
   1.423 + *      supervariables i in a degree list Head [deg] have the same approximate
   1.424 + *      degree, namely, deg = Degree [i].  If the list Head [deg] is empty then
   1.425 + *      Head [deg] = EMPTY.
   1.426 + *
   1.427 + *      During supervariable detection Head [hash] also serves as a pointer to
   1.428 + *      a hash bucket.  If Head [hash] >= 0, there is a degree list of degree
   1.429 + *      hash.  The hash bucket head pointer is Last [Head [hash]].  If
   1.430 + *      Head [hash] = EMPTY, then the degree list and hash bucket are both
   1.431 + *      empty.  If Head [hash] < EMPTY, then the degree list is empty, and
   1.432 + *      FLIP (Head [hash]) is the head of the hash bucket.  After supervariable
   1.433 + *      detection is complete, all hash buckets are empty, and the
   1.434 + *      (Last [Head [hash]] = EMPTY) condition is restored for the non-empty
   1.435 + *      degree lists.
   1.436 + *
   1.437 + * W:  An integer array of size n.  The flag array W determines the status of
   1.438 + *      elements and variables, and the external degree of elements.
   1.439 + *
   1.440 + *      for elements:
   1.441 + *          if W [e] = 0, then the element e is absorbed.
   1.442 + *          if W [e] >= wflg, then W [e] - wflg is the size of the set
   1.443 + *              |Le \ Lme|, in terms of nonzeros (the sum of ABS (Nv [i]) for
   1.444 + *              each principal variable i that is both in the pattern of
   1.445 + *              element e and NOT in the pattern of the current pivot element,
   1.446 + *              me).
   1.447 + *          if wflg > W [e] > 0, then e is not absorbed and has not yet been
   1.448 + *              seen in the scan of the element lists in the computation of
   1.449 + *              |Le\Lme| in Scan 1 below.
   1.450 + *
   1.451 + *      for variables:
   1.452 + *          during supervariable detection, if W [j] != wflg then j is
   1.453 + *          not in the pattern of variable i.
   1.454 + *
   1.455 + *      The W array is initialized by setting W [i] = 1 for all i, and by
   1.456 + *      setting wflg = 2.  It is reinitialized if wflg becomes too large (to
   1.457 + *      ensure that wflg+n does not cause integer overflow).
   1.458 +
   1.459 + * ----------------------------------------------------------------------------
   1.460 + * LOCAL INTEGERS:
   1.461 + * ----------------------------------------------------------------------------
   1.462 + */
   1.463 +
   1.464 +    Int deg, degme, dext, lemax, e, elenme, eln, i, ilast, inext, j,
   1.465 +        jlast, jnext, k, knt1, knt2, knt3, lenj, ln, me, mindeg, nel, nleft,
   1.466 +        nvi, nvj, nvpiv, slenme, wbig, we, wflg, wnvi, ok, ndense, ncmpa,
   1.467 +        dense, aggressive ;
   1.468 +
   1.469 +    unsigned Int hash ;     /* unsigned, so that hash % n is well defined.*/
   1.470 +
   1.471 +/*
   1.472 + * deg:         the degree of a variable or element
   1.473 + * degme:       size, |Lme|, of the current element, me (= Degree [me])
   1.474 + * dext:        external degree, |Le \ Lme|, of some element e
   1.475 + * lemax:       largest |Le| seen so far (called dmax in Fortran version)
   1.476 + * e:           an element
   1.477 + * elenme:      the length, Elen [me], of element list of pivotal variable
   1.478 + * eln:         the length, Elen [...], of an element list
   1.479 + * hash:        the computed value of the hash function
   1.480 + * i:           a supervariable
   1.481 + * ilast:       the entry in a link list preceding i
   1.482 + * inext:       the entry in a link list following i
   1.483 + * j:           a supervariable
   1.484 + * jlast:       the entry in a link list preceding j
   1.485 + * jnext:       the entry in a link list, or path, following j
   1.486 + * k:           the pivot order of an element or variable
   1.487 + * knt1:        loop counter used during element construction
   1.488 + * knt2:        loop counter used during element construction
   1.489 + * knt3:        loop counter used during compression
   1.490 + * lenj:        Len [j]
   1.491 + * ln:          length of a supervariable list
   1.492 + * me:          current supervariable being eliminated, and the current
   1.493 + *                  element created by eliminating that supervariable
   1.494 + * mindeg:      current minimum degree
   1.495 + * nel:         number of pivots selected so far
   1.496 + * nleft:       n - nel, the number of nonpivotal rows/columns remaining
   1.497 + * nvi:         the number of variables in a supervariable i (= Nv [i])
   1.498 + * nvj:         the number of variables in a supervariable j (= Nv [j])
   1.499 + * nvpiv:       number of pivots in current element
   1.500 + * slenme:      number of variables in variable list of pivotal variable
   1.501 + * wbig:        = INT_MAX - n for the int version, UF_long_max - n for the
   1.502 + *                  UF_long version.  wflg is not allowed to be >= wbig.
   1.503 + * we:          W [e]
   1.504 + * wflg:        used for flagging the W array.  See description of Iw.
   1.505 + * wnvi:        wflg - Nv [i]
   1.506 + * x:           either a supervariable or an element
   1.507 + *
   1.508 + * ok:          true if supervariable j can be absorbed into i
   1.509 + * ndense:      number of "dense" rows/columns
   1.510 + * dense:       rows/columns with initial degree > dense are considered "dense"
   1.511 + * aggressive:  true if aggressive absorption is being performed
   1.512 + * ncmpa:       number of garbage collections
   1.513 +
   1.514 + * ----------------------------------------------------------------------------
   1.515 + * LOCAL DOUBLES, used for statistical output only (except for alpha):
   1.516 + * ----------------------------------------------------------------------------
   1.517 + */
   1.518 +
   1.519 +    double f, r, ndiv, s, nms_lu, nms_ldl, dmax, alpha, lnz, lnzme ;
   1.520 +
   1.521 +/*
   1.522 + * f:           nvpiv
   1.523 + * r:           degme + nvpiv
   1.524 + * ndiv:        number of divisions for LU or LDL' factorizations
   1.525 + * s:           number of multiply-subtract pairs for LU factorization, for the
   1.526 + *                  current element me
   1.527 + * nms_lu       number of multiply-subtract pairs for LU factorization
   1.528 + * nms_ldl      number of multiply-subtract pairs for LDL' factorization
   1.529 + * dmax:        the largest number of entries in any column of L, including the
   1.530 + *                  diagonal
   1.531 + * alpha:       "dense" degree ratio
   1.532 + * lnz:         the number of nonzeros in L (excluding the diagonal)
   1.533 + * lnzme:       the number of nonzeros in L (excl. the diagonal) for the
   1.534 + *                  current element me
   1.535 +
   1.536 + * ----------------------------------------------------------------------------
   1.537 + * LOCAL "POINTERS" (indices into the Iw array)
   1.538 + * ----------------------------------------------------------------------------
   1.539 +*/
   1.540 +
   1.541 +    Int p, p1, p2, p3, p4, pdst, pend, pj, pme, pme1, pme2, pn, psrc ;
   1.542 +
   1.543 +/*
   1.544 + * Any parameter (Pe [...] or pfree) or local variable starting with "p" (for
   1.545 + * Pointer) is an index into Iw, and all indices into Iw use variables starting
   1.546 + * with "p."  The only exception to this rule is the iwlen input argument.
   1.547 + *
   1.548 + * p:           pointer into lots of things
   1.549 + * p1:          Pe [i] for some variable i (start of element list)
   1.550 + * p2:          Pe [i] + Elen [i] -  1 for some variable i
   1.551 + * p3:          index of first supervariable in clean list
   1.552 + * p4:          
   1.553 + * pdst:        destination pointer, for compression
   1.554 + * pend:        end of memory to compress
   1.555 + * pj:          pointer into an element or variable
   1.556 + * pme:         pointer into the current element (pme1...pme2)
   1.557 + * pme1:        the current element, me, is stored in Iw [pme1...pme2]
   1.558 + * pme2:        the end of the current element
   1.559 + * pn:          pointer into a "clean" variable, also used to compress
   1.560 + * psrc:        source pointer, for compression
   1.561 +*/
   1.562 +
   1.563 +/* ========================================================================= */
   1.564 +/*  INITIALIZATIONS */
   1.565 +/* ========================================================================= */
   1.566 +
   1.567 +    /* Note that this restriction on iwlen is slightly more restrictive than
   1.568 +     * what is actually required in AMD_2.  AMD_2 can operate with no elbow
   1.569 +     * room at all, but it will be slow.  For better performance, at least
   1.570 +     * size-n elbow room is enforced. */
   1.571 +    ASSERT (iwlen >= pfree + n) ;
   1.572 +    ASSERT (n > 0) ;
   1.573 +
   1.574 +    /* initialize output statistics */
   1.575 +    lnz = 0 ;
   1.576 +    ndiv = 0 ;
   1.577 +    nms_lu = 0 ;
   1.578 +    nms_ldl = 0 ;
   1.579 +    dmax = 1 ;
   1.580 +    me = EMPTY ;
   1.581 +
   1.582 +    mindeg = 0 ;
   1.583 +    ncmpa = 0 ;
   1.584 +    nel = 0 ;
   1.585 +    lemax = 0 ;
   1.586 +
   1.587 +    /* get control parameters */
   1.588 +    if (Control != (double *) NULL)
   1.589 +    {
   1.590 +        alpha = Control [AMD_DENSE] ;
   1.591 +        aggressive = (Control [AMD_AGGRESSIVE] != 0) ;
   1.592 +    }
   1.593 +    else
   1.594 +    {
   1.595 +        alpha = AMD_DEFAULT_DENSE ;
   1.596 +        aggressive = AMD_DEFAULT_AGGRESSIVE ;
   1.597 +    }
   1.598 +    /* Note: if alpha is NaN, this is undefined: */
   1.599 +    if (alpha < 0)
   1.600 +    {
   1.601 +        /* only remove completely dense rows/columns */
   1.602 +        dense = n-2 ;
   1.603 +    }
   1.604 +    else
   1.605 +    {
   1.606 +        dense = alpha * sqrt ((double) n) ;
   1.607 +    }
   1.608 +    dense = MAX (16, dense) ;
   1.609 +    dense = MIN (n,  dense) ;
   1.610 +    AMD_DEBUG1 (("\n\nAMD (debug), alpha %g, aggr. "ID"\n",
   1.611 +        alpha, aggressive)) ;
   1.612 +
   1.613 +    for (i = 0 ; i < n ; i++)
   1.614 +    {
   1.615 +        Last [i] = EMPTY ;
   1.616 +        Head [i] = EMPTY ;
   1.617 +        Next [i] = EMPTY ;
   1.618 +        /* if separate Hhead array is used for hash buckets: *
   1.619 +        Hhead [i] = EMPTY ;
   1.620 +        */
   1.621 +        Nv [i] = 1 ;
   1.622 +        W [i] = 1 ;
   1.623 +        Elen [i] = 0 ;
   1.624 +        Degree [i] = Len [i] ;
   1.625 +    }
   1.626 +
   1.627 +#ifndef NDEBUG
   1.628 +    AMD_DEBUG1 (("\n======Nel "ID" initial\n", nel)) ;
   1.629 +    AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, Last,
   1.630 +                Head, Elen, Degree, W, -1) ;
   1.631 +#endif
   1.632 +
   1.633 +    /* initialize wflg */
   1.634 +    wbig = Int_MAX - n ;
   1.635 +    wflg = clear_flag (0, wbig, W, n) ;
   1.636 +
   1.637 +    /* --------------------------------------------------------------------- */
   1.638 +    /* initialize degree lists and eliminate dense and empty rows */
   1.639 +    /* --------------------------------------------------------------------- */
   1.640 +
   1.641 +    ndense = 0 ;
   1.642 +
   1.643 +    for (i = 0 ; i < n ; i++)
   1.644 +    {
   1.645 +        deg = Degree [i] ;
   1.646 +        ASSERT (deg >= 0 && deg < n) ;
   1.647 +        if (deg == 0)
   1.648 +        {
   1.649 +
   1.650 +            /* -------------------------------------------------------------
   1.651 +             * we have a variable that can be eliminated at once because
   1.652 +             * there is no off-diagonal non-zero in its row.  Note that
   1.653 +             * Nv [i] = 1 for an empty variable i.  It is treated just
   1.654 +             * the same as an eliminated element i.
   1.655 +             * ------------------------------------------------------------- */
   1.656 +
   1.657 +            Elen [i] = FLIP (1) ;
   1.658 +            nel++ ;
   1.659 +            Pe [i] = EMPTY ;
   1.660 +            W [i] = 0 ;
   1.661 +
   1.662 +        }
   1.663 +        else if (deg > dense)
   1.664 +        {
   1.665 +
   1.666 +            /* -------------------------------------------------------------
   1.667 +             * Dense variables are not treated as elements, but as unordered,
   1.668 +             * non-principal variables that have no parent.  They do not take
   1.669 +             * part in the postorder, since Nv [i] = 0.  Note that the Fortran
   1.670 +             * version does not have this option.
   1.671 +             * ------------------------------------------------------------- */
   1.672 +
   1.673 +            AMD_DEBUG1 (("Dense node "ID" degree "ID"\n", i, deg)) ;
   1.674 +            ndense++ ;
   1.675 +            Nv [i] = 0 ;                /* do not postorder this node */
   1.676 +            Elen [i] = EMPTY ;
   1.677 +            nel++ ;
   1.678 +            Pe [i] = EMPTY ;
   1.679 +
   1.680 +        }
   1.681 +        else
   1.682 +        {
   1.683 +
   1.684 +            /* -------------------------------------------------------------
   1.685 +             * place i in the degree list corresponding to its degree
   1.686 +             * ------------------------------------------------------------- */
   1.687 +
   1.688 +            inext = Head [deg] ;
   1.689 +            ASSERT (inext >= EMPTY && inext < n) ;
   1.690 +            if (inext != EMPTY) Last [inext] = i ;
   1.691 +            Next [i] = inext ;
   1.692 +            Head [deg] = i ;
   1.693 +
   1.694 +        }
   1.695 +    }
   1.696 +
   1.697 +/* ========================================================================= */
   1.698 +/* WHILE (selecting pivots) DO */
   1.699 +/* ========================================================================= */
   1.700 +
   1.701 +    while (nel < n)
   1.702 +    {
   1.703 +
   1.704 +#ifndef NDEBUG
   1.705 +        AMD_DEBUG1 (("\n======Nel "ID"\n", nel)) ;
   1.706 +        if (AMD_debug >= 2)
   1.707 +        {
   1.708 +            AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next,
   1.709 +                    Last, Head, Elen, Degree, W, nel) ;
   1.710 +        }
   1.711 +#endif
   1.712 +
   1.713 +/* ========================================================================= */
   1.714 +/* GET PIVOT OF MINIMUM DEGREE */
   1.715 +/* ========================================================================= */
   1.716 +
   1.717 +        /* ----------------------------------------------------------------- */
   1.718 +        /* find next supervariable for elimination */
   1.719 +        /* ----------------------------------------------------------------- */
   1.720 +
   1.721 +        ASSERT (mindeg >= 0 && mindeg < n) ;
   1.722 +        for (deg = mindeg ; deg < n ; deg++)
   1.723 +        {
   1.724 +            me = Head [deg] ;
   1.725 +            if (me != EMPTY) break ;
   1.726 +        }
   1.727 +        mindeg = deg ;
   1.728 +        ASSERT (me >= 0 && me < n) ;
   1.729 +        AMD_DEBUG1 (("=================me: "ID"\n", me)) ;
   1.730 +
   1.731 +        /* ----------------------------------------------------------------- */
   1.732 +        /* remove chosen variable from link list */
   1.733 +        /* ----------------------------------------------------------------- */
   1.734 +
   1.735 +        inext = Next [me] ;
   1.736 +        ASSERT (inext >= EMPTY && inext < n) ;
   1.737 +        if (inext != EMPTY) Last [inext] = EMPTY ;
   1.738 +        Head [deg] = inext ;
   1.739 +
   1.740 +        /* ----------------------------------------------------------------- */
   1.741 +        /* me represents the elimination of pivots nel to nel+Nv[me]-1. */
   1.742 +        /* place me itself as the first in this set. */
   1.743 +        /* ----------------------------------------------------------------- */
   1.744 +
   1.745 +        elenme = Elen [me] ;
   1.746 +        nvpiv = Nv [me] ;
   1.747 +        ASSERT (nvpiv > 0) ;
   1.748 +        nel += nvpiv ;
   1.749 +
   1.750 +/* ========================================================================= */
   1.751 +/* CONSTRUCT NEW ELEMENT */
   1.752 +/* ========================================================================= */
   1.753 +
   1.754 +        /* -----------------------------------------------------------------
   1.755 +         * At this point, me is the pivotal supervariable.  It will be
   1.756 +         * converted into the current element.  Scan list of the pivotal
   1.757 +         * supervariable, me, setting tree pointers and constructing new list
   1.758 +         * of supervariables for the new element, me.  p is a pointer to the
   1.759 +         * current position in the old list.
   1.760 +         * ----------------------------------------------------------------- */
   1.761 +
   1.762 +        /* flag the variable "me" as being in Lme by negating Nv [me] */
   1.763 +        Nv [me] = -nvpiv ;
   1.764 +        degme = 0 ;
   1.765 +        ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ;
   1.766 +
   1.767 +        if (elenme == 0)
   1.768 +        {
   1.769 +
   1.770 +            /* ------------------------------------------------------------- */
   1.771 +            /* construct the new element in place */
   1.772 +            /* ------------------------------------------------------------- */
   1.773 +
   1.774 +            pme1 = Pe [me] ;
   1.775 +            pme2 = pme1 - 1 ;
   1.776 +
   1.777 +            for (p = pme1 ; p <= pme1 + Len [me] - 1 ; p++)
   1.778 +            {
   1.779 +                i = Iw [p] ;
   1.780 +                ASSERT (i >= 0 && i < n && Nv [i] >= 0) ;
   1.781 +                nvi = Nv [i] ;
   1.782 +                if (nvi > 0)
   1.783 +                {
   1.784 +
   1.785 +                    /* ----------------------------------------------------- */
   1.786 +                    /* i is a principal variable not yet placed in Lme. */
   1.787 +                    /* store i in new list */
   1.788 +                    /* ----------------------------------------------------- */
   1.789 +
   1.790 +                    /* flag i as being in Lme by negating Nv [i] */
   1.791 +                    degme += nvi ;
   1.792 +                    Nv [i] = -nvi ;
   1.793 +                    Iw [++pme2] = i ;
   1.794 +
   1.795 +                    /* ----------------------------------------------------- */
   1.796 +                    /* remove variable i from degree list. */
   1.797 +                    /* ----------------------------------------------------- */
   1.798 +
   1.799 +                    ilast = Last [i] ;
   1.800 +                    inext = Next [i] ;
   1.801 +                    ASSERT (ilast >= EMPTY && ilast < n) ;
   1.802 +                    ASSERT (inext >= EMPTY && inext < n) ;
   1.803 +                    if (inext != EMPTY) Last [inext] = ilast ;
   1.804 +                    if (ilast != EMPTY)
   1.805 +                    {
   1.806 +                        Next [ilast] = inext ;
   1.807 +                    }
   1.808 +                    else
   1.809 +                    {
   1.810 +                        /* i is at the head of the degree list */
   1.811 +                        ASSERT (Degree [i] >= 0 && Degree [i] < n) ;
   1.812 +                        Head [Degree [i]] = inext ;
   1.813 +                    }
   1.814 +                }
   1.815 +            }
   1.816 +        }
   1.817 +        else
   1.818 +        {
   1.819 +
   1.820 +            /* ------------------------------------------------------------- */
   1.821 +            /* construct the new element in empty space, Iw [pfree ...] */
   1.822 +            /* ------------------------------------------------------------- */
   1.823 +
   1.824 +            p = Pe [me] ;
   1.825 +            pme1 = pfree ;
   1.826 +            slenme = Len [me] - elenme ;
   1.827 +
   1.828 +            for (knt1 = 1 ; knt1 <= elenme + 1 ; knt1++)
   1.829 +            {
   1.830 +
   1.831 +                if (knt1 > elenme)
   1.832 +                {
   1.833 +                    /* search the supervariables in me. */
   1.834 +                    e = me ;
   1.835 +                    pj = p ;
   1.836 +                    ln = slenme ;
   1.837 +                    AMD_DEBUG2 (("Search sv: "ID" "ID" "ID"\n", me,pj,ln)) ;
   1.838 +                }
   1.839 +                else
   1.840 +                {
   1.841 +                    /* search the elements in me. */
   1.842 +                    e = Iw [p++] ;
   1.843 +                    ASSERT (e >= 0 && e < n) ;
   1.844 +                    pj = Pe [e] ;
   1.845 +                    ln = Len [e] ;
   1.846 +                    AMD_DEBUG2 (("Search element e "ID" in me "ID"\n", e,me)) ;
   1.847 +                    ASSERT (Elen [e] < EMPTY && W [e] > 0 && pj >= 0) ;
   1.848 +                }
   1.849 +                ASSERT (ln >= 0 && (ln == 0 || (pj >= 0 && pj < iwlen))) ;
   1.850 +
   1.851 +                /* ---------------------------------------------------------
   1.852 +                 * search for different supervariables and add them to the
   1.853 +                 * new list, compressing when necessary. this loop is
   1.854 +                 * executed once for each element in the list and once for
   1.855 +                 * all the supervariables in the list.
   1.856 +                 * --------------------------------------------------------- */
   1.857 +
   1.858 +                for (knt2 = 1 ; knt2 <= ln ; knt2++)
   1.859 +                {
   1.860 +                    i = Iw [pj++] ;
   1.861 +                    ASSERT (i >= 0 && i < n && (i == me || Elen [i] >= EMPTY));
   1.862 +                    nvi = Nv [i] ;
   1.863 +                    AMD_DEBUG2 ((": "ID" "ID" "ID" "ID"\n",
   1.864 +                                i, Elen [i], Nv [i], wflg)) ;
   1.865 +
   1.866 +                    if (nvi > 0)
   1.867 +                    {
   1.868 +
   1.869 +                        /* ------------------------------------------------- */
   1.870 +                        /* compress Iw, if necessary */
   1.871 +                        /* ------------------------------------------------- */
   1.872 +
   1.873 +                        if (pfree >= iwlen)
   1.874 +                        {
   1.875 +
   1.876 +                            AMD_DEBUG1 (("GARBAGE COLLECTION\n")) ;
   1.877 +
   1.878 +                            /* prepare for compressing Iw by adjusting pointers
   1.879 +                             * and lengths so that the lists being searched in
   1.880 +                             * the inner and outer loops contain only the
   1.881 +                             * remaining entries. */
   1.882 +
   1.883 +                            Pe [me] = p ;
   1.884 +                            Len [me] -= knt1 ;
   1.885 +                            /* check if nothing left of supervariable me */
   1.886 +                            if (Len [me] == 0) Pe [me] = EMPTY ;
   1.887 +                            Pe [e] = pj ;
   1.888 +                            Len [e] = ln - knt2 ;
   1.889 +                            /* nothing left of element e */
   1.890 +                            if (Len [e] == 0) Pe [e] = EMPTY ;
   1.891 +
   1.892 +                            ncmpa++ ;   /* one more garbage collection */
   1.893 +
   1.894 +                            /* store first entry of each object in Pe */
   1.895 +                            /* FLIP the first entry in each object */
   1.896 +                            for (j = 0 ; j < n ; j++)
   1.897 +                            {
   1.898 +                                pn = Pe [j] ;
   1.899 +                                if (pn >= 0)
   1.900 +                                {
   1.901 +                                    ASSERT (pn >= 0 && pn < iwlen) ;
   1.902 +                                    Pe [j] = Iw [pn] ;
   1.903 +                                    Iw [pn] = FLIP (j) ;
   1.904 +                                }
   1.905 +                            }
   1.906 +
   1.907 +                            /* psrc/pdst point to source/destination */
   1.908 +                            psrc = 0 ;
   1.909 +                            pdst = 0 ;
   1.910 +                            pend = pme1 - 1 ;
   1.911 +
   1.912 +                            while (psrc <= pend)
   1.913 +                            {
   1.914 +                                /* search for next FLIP'd entry */
   1.915 +                                j = FLIP (Iw [psrc++]) ;
   1.916 +                                if (j >= 0)
   1.917 +                                {
   1.918 +                                    AMD_DEBUG2 (("Got object j: "ID"\n", j)) ;
   1.919 +                                    Iw [pdst] = Pe [j] ;
   1.920 +                                    Pe [j] = pdst++ ;
   1.921 +                                    lenj = Len [j] ;
   1.922 +                                    /* copy from source to destination */
   1.923 +                                    for (knt3 = 0 ; knt3 <= lenj - 2 ; knt3++)
   1.924 +                                    {
   1.925 +                                        Iw [pdst++] = Iw [psrc++] ;
   1.926 +                                    }
   1.927 +                                }
   1.928 +                            }
   1.929 +
   1.930 +                            /* move the new partially-constructed element */
   1.931 +                            p1 = pdst ;
   1.932 +                            for (psrc = pme1 ; psrc <= pfree-1 ; psrc++)
   1.933 +                            {
   1.934 +                                Iw [pdst++] = Iw [psrc] ;
   1.935 +                            }
   1.936 +                            pme1 = p1 ;
   1.937 +                            pfree = pdst ;
   1.938 +                            pj = Pe [e] ;
   1.939 +                            p = Pe [me] ;
   1.940 +
   1.941 +                        }
   1.942 +
   1.943 +                        /* ------------------------------------------------- */
   1.944 +                        /* i is a principal variable not yet placed in Lme */
   1.945 +                        /* store i in new list */
   1.946 +                        /* ------------------------------------------------- */
   1.947 +
   1.948 +                        /* flag i as being in Lme by negating Nv [i] */
   1.949 +                        degme += nvi ;
   1.950 +                        Nv [i] = -nvi ;
   1.951 +                        Iw [pfree++] = i ;
   1.952 +                        AMD_DEBUG2 (("     s: "ID"     nv "ID"\n", i, Nv [i]));
   1.953 +
   1.954 +                        /* ------------------------------------------------- */
   1.955 +                        /* remove variable i from degree link list */
   1.956 +                        /* ------------------------------------------------- */
   1.957 +
   1.958 +                        ilast = Last [i] ;
   1.959 +                        inext = Next [i] ;
   1.960 +                        ASSERT (ilast >= EMPTY && ilast < n) ;
   1.961 +                        ASSERT (inext >= EMPTY && inext < n) ;
   1.962 +                        if (inext != EMPTY) Last [inext] = ilast ;
   1.963 +                        if (ilast != EMPTY)
   1.964 +                        {
   1.965 +                            Next [ilast] = inext ;
   1.966 +                        }
   1.967 +                        else
   1.968 +                        {
   1.969 +                            /* i is at the head of the degree list */
   1.970 +                            ASSERT (Degree [i] >= 0 && Degree [i] < n) ;
   1.971 +                            Head [Degree [i]] = inext ;
   1.972 +                        }
   1.973 +                    }
   1.974 +                }
   1.975 +
   1.976 +                if (e != me)
   1.977 +                {
   1.978 +                    /* set tree pointer and flag to indicate element e is
   1.979 +                     * absorbed into new element me (the parent of e is me) */
   1.980 +                    AMD_DEBUG1 ((" Element "ID" => "ID"\n", e, me)) ;
   1.981 +                    Pe [e] = FLIP (me) ;
   1.982 +                    W [e] = 0 ;
   1.983 +                }
   1.984 +            }
   1.985 +
   1.986 +            pme2 = pfree - 1 ;
   1.987 +        }
   1.988 +
   1.989 +        /* ----------------------------------------------------------------- */
   1.990 +        /* me has now been converted into an element in Iw [pme1..pme2] */
   1.991 +        /* ----------------------------------------------------------------- */
   1.992 +
   1.993 +        /* degme holds the external degree of new element */
   1.994 +        Degree [me] = degme ;
   1.995 +        Pe [me] = pme1 ;
   1.996 +        Len [me] = pme2 - pme1 + 1 ;
   1.997 +        ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ;
   1.998 +
   1.999 +        Elen [me] = FLIP (nvpiv + degme) ;
  1.1000 +        /* FLIP (Elen (me)) is now the degree of pivot (including
  1.1001 +         * diagonal part). */
  1.1002 +
  1.1003 +#ifndef NDEBUG
  1.1004 +        AMD_DEBUG2 (("New element structure: length= "ID"\n", pme2-pme1+1)) ;
  1.1005 +        for (pme = pme1 ; pme <= pme2 ; pme++) AMD_DEBUG3 ((" "ID"", Iw[pme]));
  1.1006 +        AMD_DEBUG3 (("\n")) ;
  1.1007 +#endif
  1.1008 +
  1.1009 +        /* ----------------------------------------------------------------- */
  1.1010 +        /* make sure that wflg is not too large. */
  1.1011 +        /* ----------------------------------------------------------------- */
  1.1012 +
  1.1013 +        /* With the current value of wflg, wflg+n must not cause integer
  1.1014 +         * overflow */
  1.1015 +
  1.1016 +        wflg = clear_flag (wflg, wbig, W, n) ;
  1.1017 +
  1.1018 +/* ========================================================================= */
  1.1019 +/* COMPUTE (W [e] - wflg) = |Le\Lme| FOR ALL ELEMENTS */
  1.1020 +/* ========================================================================= */
  1.1021 +
  1.1022 +        /* -----------------------------------------------------------------
  1.1023 +         * Scan 1:  compute the external degrees of previous elements with
  1.1024 +         * respect to the current element.  That is:
  1.1025 +         *       (W [e] - wflg) = |Le \ Lme|
  1.1026 +         * for each element e that appears in any supervariable in Lme.  The
  1.1027 +         * notation Le refers to the pattern (list of supervariables) of a
  1.1028 +         * previous element e, where e is not yet absorbed, stored in
  1.1029 +         * Iw [Pe [e] + 1 ... Pe [e] + Len [e]].  The notation Lme
  1.1030 +         * refers to the pattern of the current element (stored in
  1.1031 +         * Iw [pme1..pme2]).   If aggressive absorption is enabled, and
  1.1032 +         * (W [e] - wflg) becomes zero, then the element e will be absorbed
  1.1033 +         * in Scan 2.
  1.1034 +         * ----------------------------------------------------------------- */
  1.1035 +
  1.1036 +        AMD_DEBUG2 (("me: ")) ;
  1.1037 +        for (pme = pme1 ; pme <= pme2 ; pme++)
  1.1038 +        {
  1.1039 +            i = Iw [pme] ;
  1.1040 +            ASSERT (i >= 0 && i < n) ;
  1.1041 +            eln = Elen [i] ;
  1.1042 +            AMD_DEBUG3 ((""ID" Elen "ID": \n", i, eln)) ;
  1.1043 +            if (eln > 0)
  1.1044 +            {
  1.1045 +                /* note that Nv [i] has been negated to denote i in Lme: */
  1.1046 +                nvi = -Nv [i] ;
  1.1047 +                ASSERT (nvi > 0 && Pe [i] >= 0 && Pe [i] < iwlen) ;
  1.1048 +                wnvi = wflg - nvi ;
  1.1049 +                for (p = Pe [i] ; p <= Pe [i] + eln - 1 ; p++)
  1.1050 +                {
  1.1051 +                    e = Iw [p] ;
  1.1052 +                    ASSERT (e >= 0 && e < n) ;
  1.1053 +                    we = W [e] ;
  1.1054 +                    AMD_DEBUG4 (("    e "ID" we "ID" ", e, we)) ;
  1.1055 +                    if (we >= wflg)
  1.1056 +                    {
  1.1057 +                        /* unabsorbed element e has been seen in this loop */
  1.1058 +                        AMD_DEBUG4 (("    unabsorbed, first time seen")) ;
  1.1059 +                        we -= nvi ;
  1.1060 +                    }
  1.1061 +                    else if (we != 0)
  1.1062 +                    {
  1.1063 +                        /* e is an unabsorbed element */
  1.1064 +                        /* this is the first we have seen e in all of Scan 1 */
  1.1065 +                        AMD_DEBUG4 (("    unabsorbed")) ;
  1.1066 +                        we = Degree [e] + wnvi ;
  1.1067 +                    }
  1.1068 +                    AMD_DEBUG4 (("\n")) ;
  1.1069 +                    W [e] = we ;
  1.1070 +                }
  1.1071 +            }
  1.1072 +        }
  1.1073 +        AMD_DEBUG2 (("\n")) ;
  1.1074 +
  1.1075 +/* ========================================================================= */
  1.1076 +/* DEGREE UPDATE AND ELEMENT ABSORPTION */
  1.1077 +/* ========================================================================= */
  1.1078 +
  1.1079 +        /* -----------------------------------------------------------------
  1.1080 +         * Scan 2:  for each i in Lme, sum up the degree of Lme (which is
  1.1081 +         * degme), plus the sum of the external degrees of each Le for the
  1.1082 +         * elements e appearing within i, plus the supervariables in i.
  1.1083 +         * Place i in hash list.
  1.1084 +         * ----------------------------------------------------------------- */
  1.1085 +
  1.1086 +        for (pme = pme1 ; pme <= pme2 ; pme++)
  1.1087 +        {
  1.1088 +            i = Iw [pme] ;
  1.1089 +            ASSERT (i >= 0 && i < n && Nv [i] < 0 && Elen [i] >= 0) ;
  1.1090 +            AMD_DEBUG2 (("Updating: i "ID" "ID" "ID"\n", i, Elen[i], Len [i]));
  1.1091 +            p1 = Pe [i] ;
  1.1092 +            p2 = p1 + Elen [i] - 1 ;
  1.1093 +            pn = p1 ;
  1.1094 +            hash = 0 ;
  1.1095 +            deg = 0 ;
  1.1096 +            ASSERT (p1 >= 0 && p1 < iwlen && p2 >= -1 && p2 < iwlen) ;
  1.1097 +
  1.1098 +            /* ------------------------------------------------------------- */
  1.1099 +            /* scan the element list associated with supervariable i */
  1.1100 +            /* ------------------------------------------------------------- */
  1.1101 +
  1.1102 +            /* UMFPACK/MA38-style approximate degree: */
  1.1103 +            if (aggressive)
  1.1104 +            {
  1.1105 +                for (p = p1 ; p <= p2 ; p++)
  1.1106 +                {
  1.1107 +                    e = Iw [p] ;
  1.1108 +                    ASSERT (e >= 0 && e < n) ;
  1.1109 +                    we = W [e] ;
  1.1110 +                    if (we != 0)
  1.1111 +                    {
  1.1112 +                        /* e is an unabsorbed element */
  1.1113 +                        /* dext = | Le \ Lme | */
  1.1114 +                        dext = we - wflg ;
  1.1115 +                        if (dext > 0)
  1.1116 +                        {
  1.1117 +                            deg += dext ;
  1.1118 +                            Iw [pn++] = e ;
  1.1119 +                            hash += e ;
  1.1120 +                            AMD_DEBUG4 ((" e: "ID" hash = "ID"\n",e,hash)) ;
  1.1121 +                        }
  1.1122 +                        else
  1.1123 +                        {
  1.1124 +                            /* external degree of e is zero, absorb e into me*/
  1.1125 +                            AMD_DEBUG1 ((" Element "ID" =>"ID" (aggressive)\n",
  1.1126 +                                e, me)) ;
  1.1127 +                            ASSERT (dext == 0) ;
  1.1128 +                            Pe [e] = FLIP (me) ;
  1.1129 +                            W [e] = 0 ;
  1.1130 +                        }
  1.1131 +                    }
  1.1132 +                }
  1.1133 +            }
  1.1134 +            else
  1.1135 +            {
  1.1136 +                for (p = p1 ; p <= p2 ; p++)
  1.1137 +                {
  1.1138 +                    e = Iw [p] ;
  1.1139 +                    ASSERT (e >= 0 && e < n) ;
  1.1140 +                    we = W [e] ;
  1.1141 +                    if (we != 0)
  1.1142 +                    {
  1.1143 +                        /* e is an unabsorbed element */
  1.1144 +                        dext = we - wflg ;
  1.1145 +                        ASSERT (dext >= 0) ;
  1.1146 +                        deg += dext ;
  1.1147 +                        Iw [pn++] = e ;
  1.1148 +                        hash += e ;
  1.1149 +                        AMD_DEBUG4 (("  e: "ID" hash = "ID"\n",e,hash)) ;
  1.1150 +                    }
  1.1151 +                }
  1.1152 +            }
  1.1153 +
  1.1154 +            /* count the number of elements in i (including me): */
  1.1155 +            Elen [i] = pn - p1 + 1 ;
  1.1156 +
  1.1157 +            /* ------------------------------------------------------------- */
  1.1158 +            /* scan the supervariables in the list associated with i */
  1.1159 +            /* ------------------------------------------------------------- */
  1.1160 +
  1.1161 +            /* The bulk of the AMD run time is typically spent in this loop,
  1.1162 +             * particularly if the matrix has many dense rows that are not
  1.1163 +             * removed prior to ordering. */
  1.1164 +            p3 = pn ;
  1.1165 +            p4 = p1 + Len [i] ;
  1.1166 +            for (p = p2 + 1 ; p < p4 ; p++)
  1.1167 +            {
  1.1168 +                j = Iw [p] ;
  1.1169 +                ASSERT (j >= 0 && j < n) ;
  1.1170 +                nvj = Nv [j] ;
  1.1171 +                if (nvj > 0)
  1.1172 +                {
  1.1173 +                    /* j is unabsorbed, and not in Lme. */
  1.1174 +                    /* add to degree and add to new list */
  1.1175 +                    deg += nvj ;
  1.1176 +                    Iw [pn++] = j ;
  1.1177 +                    hash += j ;
  1.1178 +                    AMD_DEBUG4 (("  s: "ID" hash "ID" Nv[j]= "ID"\n",
  1.1179 +                                j, hash, nvj)) ;
  1.1180 +                }
  1.1181 +            }
  1.1182 +
  1.1183 +            /* ------------------------------------------------------------- */
  1.1184 +            /* update the degree and check for mass elimination */
  1.1185 +            /* ------------------------------------------------------------- */
  1.1186 +
  1.1187 +            /* with aggressive absorption, deg==0 is identical to the
  1.1188 +             * Elen [i] == 1 && p3 == pn test, below. */
  1.1189 +            ASSERT (IMPLIES (aggressive, (deg==0) == (Elen[i]==1 && p3==pn))) ;
  1.1190 +
  1.1191 +            if (Elen [i] == 1 && p3 == pn)
  1.1192 +            {
  1.1193 +
  1.1194 +                /* --------------------------------------------------------- */
  1.1195 +                /* mass elimination */
  1.1196 +                /* --------------------------------------------------------- */
  1.1197 +
  1.1198 +                /* There is nothing left of this node except for an edge to
  1.1199 +                 * the current pivot element.  Elen [i] is 1, and there are
  1.1200 +                 * no variables adjacent to node i.  Absorb i into the
  1.1201 +                 * current pivot element, me.  Note that if there are two or
  1.1202 +                 * more mass eliminations, fillin due to mass elimination is
  1.1203 +                 * possible within the nvpiv-by-nvpiv pivot block.  It is this
  1.1204 +                 * step that causes AMD's analysis to be an upper bound.
  1.1205 +                 *
  1.1206 +                 * The reason is that the selected pivot has a lower
  1.1207 +                 * approximate degree than the true degree of the two mass
  1.1208 +                 * eliminated nodes.  There is no edge between the two mass
  1.1209 +                 * eliminated nodes.  They are merged with the current pivot
  1.1210 +                 * anyway.
  1.1211 +                 *
  1.1212 +                 * No fillin occurs in the Schur complement, in any case,
  1.1213 +                 * and this effect does not decrease the quality of the
  1.1214 +                 * ordering itself, just the quality of the nonzero and
  1.1215 +                 * flop count analysis.  It also means that the post-ordering
  1.1216 +                 * is not an exact elimination tree post-ordering. */
  1.1217 +
  1.1218 +                AMD_DEBUG1 (("  MASS i "ID" => parent e "ID"\n", i, me)) ;
  1.1219 +                Pe [i] = FLIP (me) ;
  1.1220 +                nvi = -Nv [i] ;
  1.1221 +                degme -= nvi ;
  1.1222 +                nvpiv += nvi ;
  1.1223 +                nel += nvi ;
  1.1224 +                Nv [i] = 0 ;
  1.1225 +                Elen [i] = EMPTY ;
  1.1226 +
  1.1227 +            }
  1.1228 +            else
  1.1229 +            {
  1.1230 +
  1.1231 +                /* --------------------------------------------------------- */
  1.1232 +                /* update the upper-bound degree of i */
  1.1233 +                /* --------------------------------------------------------- */
  1.1234 +
  1.1235 +                /* the following degree does not yet include the size
  1.1236 +                 * of the current element, which is added later: */
  1.1237 +
  1.1238 +                Degree [i] = MIN (Degree [i], deg) ;
  1.1239 +
  1.1240 +                /* --------------------------------------------------------- */
  1.1241 +                /* add me to the list for i */
  1.1242 +                /* --------------------------------------------------------- */
  1.1243 +
  1.1244 +                /* move first supervariable to end of list */
  1.1245 +                Iw [pn] = Iw [p3] ;
  1.1246 +                /* move first element to end of element part of list */
  1.1247 +                Iw [p3] = Iw [p1] ;
  1.1248 +                /* add new element, me, to front of list. */
  1.1249 +                Iw [p1] = me ;
  1.1250 +                /* store the new length of the list in Len [i] */
  1.1251 +                Len [i] = pn - p1 + 1 ;
  1.1252 +
  1.1253 +                /* --------------------------------------------------------- */
  1.1254 +                /* place in hash bucket.  Save hash key of i in Last [i]. */
  1.1255 +                /* --------------------------------------------------------- */
  1.1256 +
  1.1257 +                /* NOTE: this can fail if hash is negative, because the ANSI C
  1.1258 +                 * standard does not define a % b when a and/or b are negative.
  1.1259 +                 * That's why hash is defined as an unsigned Int, to avoid this
  1.1260 +                 * problem. */
  1.1261 +                hash = hash % n ;
  1.1262 +                ASSERT (((Int) hash) >= 0 && ((Int) hash) < n) ;
  1.1263 +
  1.1264 +                /* if the Hhead array is not used: */
  1.1265 +                j = Head [hash] ;
  1.1266 +                if (j <= EMPTY)
  1.1267 +                {
  1.1268 +                    /* degree list is empty, hash head is FLIP (j) */
  1.1269 +                    Next [i] = FLIP (j) ;
  1.1270 +                    Head [hash] = FLIP (i) ;
  1.1271 +                }
  1.1272 +                else
  1.1273 +                {
  1.1274 +                    /* degree list is not empty, use Last [Head [hash]] as
  1.1275 +                     * hash head. */
  1.1276 +                    Next [i] = Last [j] ;
  1.1277 +                    Last [j] = i ;
  1.1278 +                }
  1.1279 +
  1.1280 +                /* if a separate Hhead array is used: *
  1.1281 +                Next [i] = Hhead [hash] ;
  1.1282 +                Hhead [hash] = i ;
  1.1283 +                */
  1.1284 +
  1.1285 +                Last [i] = hash ;
  1.1286 +            }
  1.1287 +        }
  1.1288 +
  1.1289 +        Degree [me] = degme ;
  1.1290 +
  1.1291 +        /* ----------------------------------------------------------------- */
  1.1292 +        /* Clear the counter array, W [...], by incrementing wflg. */
  1.1293 +        /* ----------------------------------------------------------------- */
  1.1294 +
  1.1295 +        /* make sure that wflg+n does not cause integer overflow */
  1.1296 +        lemax =  MAX (lemax, degme) ;
  1.1297 +        wflg += lemax ;
  1.1298 +        wflg = clear_flag (wflg, wbig, W, n) ;
  1.1299 +        /*  at this point, W [0..n-1] < wflg holds */
  1.1300 +
  1.1301 +/* ========================================================================= */
  1.1302 +/* SUPERVARIABLE DETECTION */
  1.1303 +/* ========================================================================= */
  1.1304 +
  1.1305 +        AMD_DEBUG1 (("Detecting supervariables:\n")) ;
  1.1306 +        for (pme = pme1 ; pme <= pme2 ; pme++)
  1.1307 +        {
  1.1308 +            i = Iw [pme] ;
  1.1309 +            ASSERT (i >= 0 && i < n) ;
  1.1310 +            AMD_DEBUG2 (("Consider i "ID" nv "ID"\n", i, Nv [i])) ;
  1.1311 +            if (Nv [i] < 0)
  1.1312 +            {
  1.1313 +                /* i is a principal variable in Lme */
  1.1314 +
  1.1315 +                /* ---------------------------------------------------------
  1.1316 +                 * examine all hash buckets with 2 or more variables.  We do
  1.1317 +                 * this by examing all unique hash keys for supervariables in
  1.1318 +                 * the pattern Lme of the current element, me
  1.1319 +                 * --------------------------------------------------------- */
  1.1320 +
  1.1321 +                /* let i = head of hash bucket, and empty the hash bucket */
  1.1322 +                ASSERT (Last [i] >= 0 && Last [i] < n) ;
  1.1323 +                hash = Last [i] ;
  1.1324 +
  1.1325 +                /* if Hhead array is not used: */
  1.1326 +                j = Head [hash] ;
  1.1327 +                if (j == EMPTY)
  1.1328 +                {
  1.1329 +                    /* hash bucket and degree list are both empty */
  1.1330 +                    i = EMPTY ;
  1.1331 +                }
  1.1332 +                else if (j < EMPTY)
  1.1333 +                {
  1.1334 +                    /* degree list is empty */
  1.1335 +                    i = FLIP (j) ;
  1.1336 +                    Head [hash] = EMPTY ;
  1.1337 +                }
  1.1338 +                else
  1.1339 +                {
  1.1340 +                    /* degree list is not empty, restore Last [j] of head j */
  1.1341 +                    i = Last [j] ;
  1.1342 +                    Last [j] = EMPTY ;
  1.1343 +                }
  1.1344 +
  1.1345 +                /* if separate Hhead array is used: *
  1.1346 +                i = Hhead [hash] ;
  1.1347 +                Hhead [hash] = EMPTY ;
  1.1348 +                */
  1.1349 +
  1.1350 +                ASSERT (i >= EMPTY && i < n) ;
  1.1351 +                AMD_DEBUG2 (("----i "ID" hash "ID"\n", i, hash)) ;
  1.1352 +
  1.1353 +                while (i != EMPTY && Next [i] != EMPTY)
  1.1354 +                {
  1.1355 +
  1.1356 +                    /* -----------------------------------------------------
  1.1357 +                     * this bucket has one or more variables following i.
  1.1358 +                     * scan all of them to see if i can absorb any entries
  1.1359 +                     * that follow i in hash bucket.  Scatter i into w.
  1.1360 +                     * ----------------------------------------------------- */
  1.1361 +
  1.1362 +                    ln = Len [i] ;
  1.1363 +                    eln = Elen [i] ;
  1.1364 +                    ASSERT (ln >= 0 && eln >= 0) ;
  1.1365 +                    ASSERT (Pe [i] >= 0 && Pe [i] < iwlen) ;
  1.1366 +                    /* do not flag the first element in the list (me) */
  1.1367 +                    for (p = Pe [i] + 1 ; p <= Pe [i] + ln - 1 ; p++)
  1.1368 +                    {
  1.1369 +                        ASSERT (Iw [p] >= 0 && Iw [p] < n) ;
  1.1370 +                        W [Iw [p]] = wflg ;
  1.1371 +                    }
  1.1372 +
  1.1373 +                    /* ----------------------------------------------------- */
  1.1374 +                    /* scan every other entry j following i in bucket */
  1.1375 +                    /* ----------------------------------------------------- */
  1.1376 +
  1.1377 +                    jlast = i ;
  1.1378 +                    j = Next [i] ;
  1.1379 +                    ASSERT (j >= EMPTY && j < n) ;
  1.1380 +
  1.1381 +                    while (j != EMPTY)
  1.1382 +                    {
  1.1383 +                        /* ------------------------------------------------- */
  1.1384 +                        /* check if j and i have identical nonzero pattern */
  1.1385 +                        /* ------------------------------------------------- */
  1.1386 +
  1.1387 +                        AMD_DEBUG3 (("compare i "ID" and j "ID"\n", i,j)) ;
  1.1388 +
  1.1389 +                        /* check if i and j have the same Len and Elen */
  1.1390 +                        ASSERT (Len [j] >= 0 && Elen [j] >= 0) ;
  1.1391 +                        ASSERT (Pe [j] >= 0 && Pe [j] < iwlen) ;
  1.1392 +                        ok = (Len [j] == ln) && (Elen [j] == eln) ;
  1.1393 +                        /* skip the first element in the list (me) */
  1.1394 +                        for (p = Pe [j] + 1 ; ok && p <= Pe [j] + ln - 1 ; p++)
  1.1395 +                        {
  1.1396 +                            ASSERT (Iw [p] >= 0 && Iw [p] < n) ;
  1.1397 +                            if (W [Iw [p]] != wflg) ok = 0 ;
  1.1398 +                        }
  1.1399 +                        if (ok)
  1.1400 +                        {
  1.1401 +                            /* --------------------------------------------- */
  1.1402 +                            /* found it!  j can be absorbed into i */
  1.1403 +                            /* --------------------------------------------- */
  1.1404 +
  1.1405 +                            AMD_DEBUG1 (("found it! j "ID" => i "ID"\n", j,i));
  1.1406 +                            Pe [j] = FLIP (i) ;
  1.1407 +                            /* both Nv [i] and Nv [j] are negated since they */
  1.1408 +                            /* are in Lme, and the absolute values of each */
  1.1409 +                            /* are the number of variables in i and j: */
  1.1410 +                            Nv [i] += Nv [j] ;
  1.1411 +                            Nv [j] = 0 ;
  1.1412 +                            Elen [j] = EMPTY ;
  1.1413 +                            /* delete j from hash bucket */
  1.1414 +                            ASSERT (j != Next [j]) ;
  1.1415 +                            j = Next [j] ;
  1.1416 +                            Next [jlast] = j ;
  1.1417 +
  1.1418 +                        }
  1.1419 +                        else
  1.1420 +                        {
  1.1421 +                            /* j cannot be absorbed into i */
  1.1422 +                            jlast = j ;
  1.1423 +                            ASSERT (j != Next [j]) ;
  1.1424 +                            j = Next [j] ;
  1.1425 +                        }
  1.1426 +                        ASSERT (j >= EMPTY && j < n) ;
  1.1427 +                    }
  1.1428 +
  1.1429 +                    /* -----------------------------------------------------
  1.1430 +                     * no more variables can be absorbed into i
  1.1431 +                     * go to next i in bucket and clear flag array
  1.1432 +                     * ----------------------------------------------------- */
  1.1433 +
  1.1434 +                    wflg++ ;
  1.1435 +                    i = Next [i] ;
  1.1436 +                    ASSERT (i >= EMPTY && i < n) ;
  1.1437 +
  1.1438 +                }
  1.1439 +            }
  1.1440 +        }
  1.1441 +        AMD_DEBUG2 (("detect done\n")) ;
  1.1442 +
  1.1443 +/* ========================================================================= */
  1.1444 +/* RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVARIABLES FROM ELEMENT */
  1.1445 +/* ========================================================================= */
  1.1446 +
  1.1447 +        p = pme1 ;
  1.1448 +        nleft = n - nel ;
  1.1449 +        for (pme = pme1 ; pme <= pme2 ; pme++)
  1.1450 +        {
  1.1451 +            i = Iw [pme] ;
  1.1452 +            ASSERT (i >= 0 && i < n) ;
  1.1453 +            nvi = -Nv [i] ;
  1.1454 +            AMD_DEBUG3 (("Restore i "ID" "ID"\n", i, nvi)) ;
  1.1455 +            if (nvi > 0)
  1.1456 +            {
  1.1457 +                /* i is a principal variable in Lme */
  1.1458 +                /* restore Nv [i] to signify that i is principal */
  1.1459 +                Nv [i] = nvi ;
  1.1460 +
  1.1461 +                /* --------------------------------------------------------- */
  1.1462 +                /* compute the external degree (add size of current element) */
  1.1463 +                /* --------------------------------------------------------- */
  1.1464 +
  1.1465 +                deg = Degree [i] + degme - nvi ;
  1.1466 +                deg = MIN (deg, nleft - nvi) ;
  1.1467 +                ASSERT (IMPLIES (aggressive, deg > 0) && deg >= 0 && deg < n) ;
  1.1468 +
  1.1469 +                /* --------------------------------------------------------- */
  1.1470 +                /* place the supervariable at the head of the degree list */
  1.1471 +                /* --------------------------------------------------------- */
  1.1472 +
  1.1473 +                inext = Head [deg] ;
  1.1474 +                ASSERT (inext >= EMPTY && inext < n) ;
  1.1475 +                if (inext != EMPTY) Last [inext] = i ;
  1.1476 +                Next [i] = inext ;
  1.1477 +                Last [i] = EMPTY ;
  1.1478 +                Head [deg] = i ;
  1.1479 +
  1.1480 +                /* --------------------------------------------------------- */
  1.1481 +                /* save the new degree, and find the minimum degree */
  1.1482 +                /* --------------------------------------------------------- */
  1.1483 +
  1.1484 +                mindeg = MIN (mindeg, deg) ;
  1.1485 +                Degree [i] = deg ;
  1.1486 +
  1.1487 +                /* --------------------------------------------------------- */
  1.1488 +                /* place the supervariable in the element pattern */
  1.1489 +                /* --------------------------------------------------------- */
  1.1490 +
  1.1491 +                Iw [p++] = i ;
  1.1492 +
  1.1493 +            }
  1.1494 +        }
  1.1495 +        AMD_DEBUG2 (("restore done\n")) ;
  1.1496 +
  1.1497 +/* ========================================================================= */
  1.1498 +/* FINALIZE THE NEW ELEMENT */
  1.1499 +/* ========================================================================= */
  1.1500 +
  1.1501 +        AMD_DEBUG2 (("ME = "ID" DONE\n", me)) ;
  1.1502 +        Nv [me] = nvpiv ;
  1.1503 +        /* save the length of the list for the new element me */
  1.1504 +        Len [me] = p - pme1 ;
  1.1505 +        if (Len [me] == 0)
  1.1506 +        {
  1.1507 +            /* there is nothing left of the current pivot element */
  1.1508 +            /* it is a root of the assembly tree */
  1.1509 +            Pe [me] = EMPTY ;
  1.1510 +            W [me] = 0 ;
  1.1511 +        }
  1.1512 +        if (elenme != 0)
  1.1513 +        {
  1.1514 +            /* element was not constructed in place: deallocate part of */
  1.1515 +            /* it since newly nonprincipal variables may have been removed */
  1.1516 +            pfree = p ;
  1.1517 +        }
  1.1518 +
  1.1519 +        /* The new element has nvpiv pivots and the size of the contribution
  1.1520 +         * block for a multifrontal method is degme-by-degme, not including
  1.1521 +         * the "dense" rows/columns.  If the "dense" rows/columns are included,
  1.1522 +         * the frontal matrix is no larger than
  1.1523 +         * (degme+ndense)-by-(degme+ndense).
  1.1524 +         */
  1.1525 +
  1.1526 +        if (Info != (double *) NULL)
  1.1527 +        {
  1.1528 +            f = nvpiv ;
  1.1529 +            r = degme + ndense ;
  1.1530 +            dmax = MAX (dmax, f + r) ;
  1.1531 +
  1.1532 +            /* number of nonzeros in L (excluding the diagonal) */
  1.1533 +            lnzme = f*r + (f-1)*f/2 ;
  1.1534 +            lnz += lnzme ;
  1.1535 +
  1.1536 +            /* number of divide operations for LDL' and for LU */
  1.1537 +            ndiv += lnzme ;
  1.1538 +
  1.1539 +            /* number of multiply-subtract pairs for LU */
  1.1540 +            s = f*r*r + r*(f-1)*f + (f-1)*f*(2*f-1)/6 ;
  1.1541 +            nms_lu += s ;
  1.1542 +
  1.1543 +            /* number of multiply-subtract pairs for LDL' */
  1.1544 +            nms_ldl += (s + lnzme)/2 ;
  1.1545 +        }
  1.1546 +
  1.1547 +#ifndef NDEBUG
  1.1548 +        AMD_DEBUG2 (("finalize done nel "ID" n "ID"\n   ::::\n", nel, n)) ;
  1.1549 +        for (pme = Pe [me] ; pme <= Pe [me] + Len [me] - 1 ; pme++)
  1.1550 +        {
  1.1551 +              AMD_DEBUG3 ((" "ID"", Iw [pme])) ;
  1.1552 +        }
  1.1553 +        AMD_DEBUG3 (("\n")) ;
  1.1554 +#endif
  1.1555 +
  1.1556 +    }
  1.1557 +
  1.1558 +/* ========================================================================= */
  1.1559 +/* DONE SELECTING PIVOTS */
  1.1560 +/* ========================================================================= */
  1.1561 +
  1.1562 +    if (Info != (double *) NULL)
  1.1563 +    {
  1.1564 +
  1.1565 +        /* count the work to factorize the ndense-by-ndense submatrix */
  1.1566 +        f = ndense ;
  1.1567 +        dmax = MAX (dmax, (double) ndense) ;
  1.1568 +
  1.1569 +        /* number of nonzeros in L (excluding the diagonal) */
  1.1570 +        lnzme = (f-1)*f/2 ;
  1.1571 +        lnz += lnzme ;
  1.1572 +
  1.1573 +        /* number of divide operations for LDL' and for LU */
  1.1574 +        ndiv += lnzme ;
  1.1575 +
  1.1576 +        /* number of multiply-subtract pairs for LU */
  1.1577 +        s = (f-1)*f*(2*f-1)/6 ;
  1.1578 +        nms_lu += s ;
  1.1579 +
  1.1580 +        /* number of multiply-subtract pairs for LDL' */
  1.1581 +        nms_ldl += (s + lnzme)/2 ;
  1.1582 +
  1.1583 +        /* number of nz's in L (excl. diagonal) */
  1.1584 +        Info [AMD_LNZ] = lnz ;
  1.1585 +
  1.1586 +        /* number of divide ops for LU and LDL' */
  1.1587 +        Info [AMD_NDIV] = ndiv ;
  1.1588 +
  1.1589 +        /* number of multiply-subtract pairs for LDL' */
  1.1590 +        Info [AMD_NMULTSUBS_LDL] = nms_ldl ;
  1.1591 +
  1.1592 +        /* number of multiply-subtract pairs for LU */
  1.1593 +        Info [AMD_NMULTSUBS_LU] = nms_lu ;
  1.1594 +
  1.1595 +        /* number of "dense" rows/columns */
  1.1596 +        Info [AMD_NDENSE] = ndense ;
  1.1597 +
  1.1598 +        /* largest front is dmax-by-dmax */
  1.1599 +        Info [AMD_DMAX] = dmax ;
  1.1600 +
  1.1601 +        /* number of garbage collections in AMD */
  1.1602 +        Info [AMD_NCMPA] = ncmpa ;
  1.1603 +
  1.1604 +        /* successful ordering */
  1.1605 +        Info [AMD_STATUS] = AMD_OK ;
  1.1606 +    }
  1.1607 +
  1.1608 +/* ========================================================================= */
  1.1609 +/* POST-ORDERING */
  1.1610 +/* ========================================================================= */
  1.1611 +
  1.1612 +/* -------------------------------------------------------------------------
  1.1613 + * Variables at this point:
  1.1614 + *
  1.1615 + * Pe: holds the elimination tree.  The parent of j is FLIP (Pe [j]),
  1.1616 + *      or EMPTY if j is a root.  The tree holds both elements and
  1.1617 + *      non-principal (unordered) variables absorbed into them.
  1.1618 + *      Dense variables are non-principal and unordered.
  1.1619 + *
  1.1620 + * Elen: holds the size of each element, including the diagonal part.
  1.1621 + *      FLIP (Elen [e]) > 0 if e is an element.  For unordered
  1.1622 + *      variables i, Elen [i] is EMPTY.
  1.1623 + *
  1.1624 + * Nv: Nv [e] > 0 is the number of pivots represented by the element e.
  1.1625 + *      For unordered variables i, Nv [i] is zero.
  1.1626 + *
  1.1627 + * Contents no longer needed:
  1.1628 + *      W, Iw, Len, Degree, Head, Next, Last.
  1.1629 + *
  1.1630 + * The matrix itself has been destroyed.
  1.1631 + *
  1.1632 + * n: the size of the matrix.
  1.1633 + * No other scalars needed (pfree, iwlen, etc.)
  1.1634 + * ------------------------------------------------------------------------- */
  1.1635 +
  1.1636 +    /* restore Pe */
  1.1637 +    for (i = 0 ; i < n ; i++)
  1.1638 +    {
  1.1639 +        Pe [i] = FLIP (Pe [i]) ;
  1.1640 +    }
  1.1641 +
  1.1642 +    /* restore Elen, for output information, and for postordering */
  1.1643 +    for (i = 0 ; i < n ; i++)
  1.1644 +    {
  1.1645 +        Elen [i] = FLIP (Elen [i]) ;
  1.1646 +    }
  1.1647 +
  1.1648 +/* Now the parent of j is Pe [j], or EMPTY if j is a root.  Elen [e] > 0
  1.1649 + * is the size of element e.  Elen [i] is EMPTY for unordered variable i. */
  1.1650 +
  1.1651 +#ifndef NDEBUG
  1.1652 +    AMD_DEBUG2 (("\nTree:\n")) ;
  1.1653 +    for (i = 0 ; i < n ; i++)
  1.1654 +    {
  1.1655 +        AMD_DEBUG2 ((" "ID" parent: "ID"   ", i, Pe [i])) ;
  1.1656 +        ASSERT (Pe [i] >= EMPTY && Pe [i] < n) ;
  1.1657 +        if (Nv [i] > 0)
  1.1658 +        {
  1.1659 +            /* this is an element */
  1.1660 +            e = i ;
  1.1661 +            AMD_DEBUG2 ((" element, size is "ID"\n", Elen [i])) ;
  1.1662 +            ASSERT (Elen [e] > 0) ;
  1.1663 +        }
  1.1664 +        AMD_DEBUG2 (("\n")) ;
  1.1665 +    }
  1.1666 +    AMD_DEBUG2 (("\nelements:\n")) ;
  1.1667 +    for (e = 0 ; e < n ; e++)
  1.1668 +    {
  1.1669 +        if (Nv [e] > 0)
  1.1670 +        {
  1.1671 +            AMD_DEBUG3 (("Element e= "ID" size "ID" nv "ID" \n", e,
  1.1672 +                Elen [e], Nv [e])) ;
  1.1673 +        }
  1.1674 +    }
  1.1675 +    AMD_DEBUG2 (("\nvariables:\n")) ;
  1.1676 +    for (i = 0 ; i < n ; i++)
  1.1677 +    {
  1.1678 +        Int cnt ;
  1.1679 +        if (Nv [i] == 0)
  1.1680 +        {
  1.1681 +            AMD_DEBUG3 (("i unordered: "ID"\n", i)) ;
  1.1682 +            j = Pe [i] ;
  1.1683 +            cnt = 0 ;
  1.1684 +            AMD_DEBUG3 (("  j: "ID"\n", j)) ;
  1.1685 +            if (j == EMPTY)
  1.1686 +            {
  1.1687 +                AMD_DEBUG3 (("  i is a dense variable\n")) ;
  1.1688 +            }
  1.1689 +            else
  1.1690 +            {
  1.1691 +                ASSERT (j >= 0 && j < n) ;
  1.1692 +                while (Nv [j] == 0)
  1.1693 +                {
  1.1694 +                    AMD_DEBUG3 (("      j : "ID"\n", j)) ;
  1.1695 +                    j = Pe [j] ;
  1.1696 +                    AMD_DEBUG3 (("      j:: "ID"\n", j)) ;
  1.1697 +                    cnt++ ;
  1.1698 +                    if (cnt > n) break ;
  1.1699 +                }
  1.1700 +                e = j ;
  1.1701 +                AMD_DEBUG3 (("  got to e: "ID"\n", e)) ;
  1.1702 +            }
  1.1703 +        }
  1.1704 +    }
  1.1705 +#endif
  1.1706 +
  1.1707 +/* ========================================================================= */
  1.1708 +/* compress the paths of the variables */
  1.1709 +/* ========================================================================= */
  1.1710 +
  1.1711 +    for (i = 0 ; i < n ; i++)
  1.1712 +    {
  1.1713 +        if (Nv [i] == 0)
  1.1714 +        {
  1.1715 +
  1.1716 +            /* -------------------------------------------------------------
  1.1717 +             * i is an un-ordered row.  Traverse the tree from i until
  1.1718 +             * reaching an element, e.  The element, e, was the principal
  1.1719 +             * supervariable of i and all nodes in the path from i to when e
  1.1720 +             * was selected as pivot.
  1.1721 +             * ------------------------------------------------------------- */
  1.1722 +
  1.1723 +            AMD_DEBUG1 (("Path compression, i unordered: "ID"\n", i)) ;
  1.1724 +            j = Pe [i] ;
  1.1725 +            ASSERT (j >= EMPTY && j < n) ;
  1.1726 +            AMD_DEBUG3 (("      j: "ID"\n", j)) ;
  1.1727 +            if (j == EMPTY)
  1.1728 +            {
  1.1729 +                /* Skip a dense variable.  It has no parent. */
  1.1730 +                AMD_DEBUG3 (("      i is a dense variable\n")) ;
  1.1731 +                continue ;
  1.1732 +            }
  1.1733 +
  1.1734 +            /* while (j is a variable) */
  1.1735 +            while (Nv [j] == 0)
  1.1736 +            {
  1.1737 +                AMD_DEBUG3 (("          j : "ID"\n", j)) ;
  1.1738 +                j = Pe [j] ;
  1.1739 +                AMD_DEBUG3 (("          j:: "ID"\n", j)) ;
  1.1740 +                ASSERT (j >= 0 && j < n) ;
  1.1741 +            }
  1.1742 +            /* got to an element e */
  1.1743 +            e = j ;
  1.1744 +            AMD_DEBUG3 (("got to e: "ID"\n", e)) ;
  1.1745 +
  1.1746 +            /* -------------------------------------------------------------
  1.1747 +             * traverse the path again from i to e, and compress the path
  1.1748 +             * (all nodes point to e).  Path compression allows this code to
  1.1749 +             * compute in O(n) time.
  1.1750 +             * ------------------------------------------------------------- */
  1.1751 +
  1.1752 +            j = i ;
  1.1753 +            /* while (j is a variable) */
  1.1754 +            while (Nv [j] == 0)
  1.1755 +            {
  1.1756 +                jnext = Pe [j] ;
  1.1757 +                AMD_DEBUG3 (("j "ID" jnext "ID"\n", j, jnext)) ;
  1.1758 +                Pe [j] = e ;
  1.1759 +                j = jnext ;
  1.1760 +                ASSERT (j >= 0 && j < n) ;
  1.1761 +            }
  1.1762 +        }
  1.1763 +    }
  1.1764 +
  1.1765 +/* ========================================================================= */
  1.1766 +/* postorder the assembly tree */
  1.1767 +/* ========================================================================= */
  1.1768 +
  1.1769 +    AMD_postorder (n, Pe, Nv, Elen,
  1.1770 +        W,                      /* output order */
  1.1771 +        Head, Next, Last) ;     /* workspace */
  1.1772 +
  1.1773 +/* ========================================================================= */
  1.1774 +/* compute output permutation and inverse permutation */
  1.1775 +/* ========================================================================= */
  1.1776 +
  1.1777 +    /* W [e] = k means that element e is the kth element in the new
  1.1778 +     * order.  e is in the range 0 to n-1, and k is in the range 0 to
  1.1779 +     * the number of elements.  Use Head for inverse order. */
  1.1780 +
  1.1781 +    for (k = 0 ; k < n ; k++)
  1.1782 +    {
  1.1783 +        Head [k] = EMPTY ;
  1.1784 +        Next [k] = EMPTY ;
  1.1785 +    }
  1.1786 +    for (e = 0 ; e < n ; e++)
  1.1787 +    {
  1.1788 +        k = W [e] ;
  1.1789 +        ASSERT ((k == EMPTY) == (Nv [e] == 0)) ;
  1.1790 +        if (k != EMPTY)
  1.1791 +        {
  1.1792 +            ASSERT (k >= 0 && k < n) ;
  1.1793 +            Head [k] = e ;
  1.1794 +        }
  1.1795 +    }
  1.1796 +
  1.1797 +    /* construct output inverse permutation in Next,
  1.1798 +     * and permutation in Last */
  1.1799 +    nel = 0 ;
  1.1800 +    for (k = 0 ; k < n ; k++)
  1.1801 +    {
  1.1802 +        e = Head [k] ;
  1.1803 +        if (e == EMPTY) break ;
  1.1804 +        ASSERT (e >= 0 && e < n && Nv [e] > 0) ;
  1.1805 +        Next [e] = nel ;
  1.1806 +        nel += Nv [e] ;
  1.1807 +    }
  1.1808 +    ASSERT (nel == n - ndense) ;
  1.1809 +
  1.1810 +    /* order non-principal variables (dense, & those merged into supervar's) */
  1.1811 +    for (i = 0 ; i < n ; i++)
  1.1812 +    {
  1.1813 +        if (Nv [i] == 0)
  1.1814 +        {
  1.1815 +            e = Pe [i] ;
  1.1816 +            ASSERT (e >= EMPTY && e < n) ;
  1.1817 +            if (e != EMPTY)
  1.1818 +            {
  1.1819 +                /* This is an unordered variable that was merged
  1.1820 +                 * into element e via supernode detection or mass
  1.1821 +                 * elimination of i when e became the pivot element.
  1.1822 +                 * Place i in order just before e. */
  1.1823 +                ASSERT (Next [i] == EMPTY && Nv [e] > 0) ;
  1.1824 +                Next [i] = Next [e] ;
  1.1825 +                Next [e]++ ;
  1.1826 +            }
  1.1827 +            else
  1.1828 +            {
  1.1829 +                /* This is a dense unordered variable, with no parent.
  1.1830 +                 * Place it last in the output order. */
  1.1831 +                Next [i] = nel++ ;
  1.1832 +            }
  1.1833 +        }
  1.1834 +    }
  1.1835 +    ASSERT (nel == n) ;
  1.1836 +
  1.1837 +    AMD_DEBUG2 (("\n\nPerm:\n")) ;
  1.1838 +    for (i = 0 ; i < n ; i++)
  1.1839 +    {
  1.1840 +        k = Next [i] ;
  1.1841 +        ASSERT (k >= 0 && k < n) ;
  1.1842 +        Last [k] = i ;
  1.1843 +        AMD_DEBUG2 (("   perm ["ID"] = "ID"\n", k, i)) ;
  1.1844 +    }
  1.1845 +}