src/amd/amd_1.c
changeset 1 c445c931472f
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/amd/amd_1.c	Mon Dec 06 13:09:21 2010 +0100
     1.3 @@ -0,0 +1,181 @@
     1.4 +/* ========================================================================= */
     1.5 +/* === AMD_1 =============================================================== */
     1.6 +/* ========================================================================= */
     1.7 +
     1.8 +/* ------------------------------------------------------------------------- */
     1.9 +/* AMD, Copyright (c) Timothy A. Davis,                                      */
    1.10 +/* Patrick R. Amestoy, and Iain S. Duff.  See ../README.txt for License.     */
    1.11 +/* email: davis at cise.ufl.edu    CISE Department, Univ. of Florida.        */
    1.12 +/* web: http://www.cise.ufl.edu/research/sparse/amd                          */
    1.13 +/* ------------------------------------------------------------------------- */
    1.14 +
    1.15 +/* AMD_1: Construct A+A' for a sparse matrix A and perform the AMD ordering.
    1.16 + *
    1.17 + * The n-by-n sparse matrix A can be unsymmetric.  It is stored in MATLAB-style
    1.18 + * compressed-column form, with sorted row indices in each column, and no
    1.19 + * duplicate entries.  Diagonal entries may be present, but they are ignored.
    1.20 + * Row indices of column j of A are stored in Ai [Ap [j] ... Ap [j+1]-1].
    1.21 + * Ap [0] must be zero, and nz = Ap [n] is the number of entries in A.  The
    1.22 + * size of the matrix, n, must be greater than or equal to zero.
    1.23 + *
    1.24 + * This routine must be preceded by a call to AMD_aat, which computes the
    1.25 + * number of entries in each row/column in A+A', excluding the diagonal.
    1.26 + * Len [j], on input, is the number of entries in row/column j of A+A'.  This
    1.27 + * routine constructs the matrix A+A' and then calls AMD_2.  No error checking
    1.28 + * is performed (this was done in AMD_valid).
    1.29 + */
    1.30 +
    1.31 +#include "amd_internal.h"
    1.32 +
    1.33 +GLOBAL void AMD_1
    1.34 +(
    1.35 +    Int n,              /* n > 0 */
    1.36 +    const Int Ap [ ],   /* input of size n+1, not modified */
    1.37 +    const Int Ai [ ],   /* input of size nz = Ap [n], not modified */
    1.38 +    Int P [ ],          /* size n output permutation */
    1.39 +    Int Pinv [ ],       /* size n output inverse permutation */
    1.40 +    Int Len [ ],        /* size n input, undefined on output */
    1.41 +    Int slen,           /* slen >= sum (Len [0..n-1]) + 7n,
    1.42 +                         * ideally slen = 1.2 * sum (Len) + 8n */
    1.43 +    Int S [ ],          /* size slen workspace */
    1.44 +    double Control [ ], /* input array of size AMD_CONTROL */
    1.45 +    double Info [ ]     /* output array of size AMD_INFO */
    1.46 +)
    1.47 +{
    1.48 +    Int i, j, k, p, pfree, iwlen, pj, p1, p2, pj2, *Iw, *Pe, *Nv, *Head,
    1.49 +        *Elen, *Degree, *s, *W, *Sp, *Tp ;
    1.50 +
    1.51 +    /* --------------------------------------------------------------------- */
    1.52 +    /* construct the matrix for AMD_2 */
    1.53 +    /* --------------------------------------------------------------------- */
    1.54 +
    1.55 +    ASSERT (n > 0) ;
    1.56 +
    1.57 +    iwlen = slen - 6*n ;
    1.58 +    s = S ;
    1.59 +    Pe = s ;        s += n ;
    1.60 +    Nv = s ;        s += n ;
    1.61 +    Head = s ;      s += n ;
    1.62 +    Elen = s ;      s += n ;
    1.63 +    Degree = s ;    s += n ;
    1.64 +    W = s ;         s += n ;
    1.65 +    Iw = s ;        s += iwlen ;
    1.66 +
    1.67 +    ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ;
    1.68 +
    1.69 +    /* construct the pointers for A+A' */
    1.70 +    Sp = Nv ;                   /* use Nv and W as workspace for Sp and Tp [ */
    1.71 +    Tp = W ;
    1.72 +    pfree = 0 ;
    1.73 +    for (j = 0 ; j < n ; j++)
    1.74 +    {
    1.75 +        Pe [j] = pfree ;
    1.76 +        Sp [j] = pfree ;
    1.77 +        pfree += Len [j] ;
    1.78 +    }
    1.79 +
    1.80 +    /* Note that this restriction on iwlen is slightly more restrictive than
    1.81 +     * what is strictly required in AMD_2.  AMD_2 can operate with no elbow
    1.82 +     * room at all, but it will be very slow.  For better performance, at
    1.83 +     * least size-n elbow room is enforced. */
    1.84 +    ASSERT (iwlen >= pfree + n) ;
    1.85 +
    1.86 +#ifndef NDEBUG
    1.87 +    for (p = 0 ; p < iwlen ; p++) Iw [p] = EMPTY ;
    1.88 +#endif
    1.89 +
    1.90 +    for (k = 0 ; k < n ; k++)
    1.91 +    {
    1.92 +        AMD_DEBUG1 (("Construct row/column k= "ID" of A+A'\n", k))  ;
    1.93 +        p1 = Ap [k] ;
    1.94 +        p2 = Ap [k+1] ;
    1.95 +
    1.96 +        /* construct A+A' */
    1.97 +        for (p = p1 ; p < p2 ; )
    1.98 +        {
    1.99 +            /* scan the upper triangular part of A */
   1.100 +            j = Ai [p] ;
   1.101 +            ASSERT (j >= 0 && j < n) ;
   1.102 +            if (j < k)
   1.103 +            {
   1.104 +                /* entry A (j,k) in the strictly upper triangular part */
   1.105 +                ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
   1.106 +                ASSERT (Sp [k] < (k == n-1 ? pfree : Pe [k+1])) ;
   1.107 +                Iw [Sp [j]++] = k ;
   1.108 +                Iw [Sp [k]++] = j ;
   1.109 +                p++ ;
   1.110 +            }
   1.111 +            else if (j == k)
   1.112 +            {
   1.113 +                /* skip the diagonal */
   1.114 +                p++ ;
   1.115 +                break ;
   1.116 +            }
   1.117 +            else /* j > k */
   1.118 +            {
   1.119 +                /* first entry below the diagonal */
   1.120 +                break ;
   1.121 +            }
   1.122 +            /* scan lower triangular part of A, in column j until reaching
   1.123 +             * row k.  Start where last scan left off. */
   1.124 +            ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ;
   1.125 +            pj2 = Ap [j+1] ;
   1.126 +            for (pj = Tp [j] ; pj < pj2 ; )
   1.127 +            {
   1.128 +                i = Ai [pj] ;
   1.129 +                ASSERT (i >= 0 && i < n) ;
   1.130 +                if (i < k)
   1.131 +                {
   1.132 +                    /* A (i,j) is only in the lower part, not in upper */
   1.133 +                    ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ;
   1.134 +                    ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
   1.135 +                    Iw [Sp [i]++] = j ;
   1.136 +                    Iw [Sp [j]++] = i ;
   1.137 +                    pj++ ;
   1.138 +                }
   1.139 +                else if (i == k)
   1.140 +                {
   1.141 +                    /* entry A (k,j) in lower part and A (j,k) in upper */
   1.142 +                    pj++ ;
   1.143 +                    break ;
   1.144 +                }
   1.145 +                else /* i > k */
   1.146 +                {
   1.147 +                    /* consider this entry later, when k advances to i */
   1.148 +                    break ;
   1.149 +                }
   1.150 +            }
   1.151 +            Tp [j] = pj ;
   1.152 +        }
   1.153 +        Tp [k] = p ;
   1.154 +    }
   1.155 +
   1.156 +    /* clean up, for remaining mismatched entries */
   1.157 +    for (j = 0 ; j < n ; j++)
   1.158 +    {
   1.159 +        for (pj = Tp [j] ; pj < Ap [j+1] ; pj++)
   1.160 +        {
   1.161 +            i = Ai [pj] ;
   1.162 +            ASSERT (i >= 0 && i < n) ;
   1.163 +            /* A (i,j) is only in the lower part, not in upper */
   1.164 +            ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ;
   1.165 +            ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
   1.166 +            Iw [Sp [i]++] = j ;
   1.167 +            Iw [Sp [j]++] = i ;
   1.168 +        }
   1.169 +    }
   1.170 +
   1.171 +#ifndef NDEBUG
   1.172 +    for (j = 0 ; j < n-1 ; j++) ASSERT (Sp [j] == Pe [j+1]) ;
   1.173 +    ASSERT (Sp [n-1] == pfree) ;
   1.174 +#endif
   1.175 +
   1.176 +    /* Tp and Sp no longer needed ] */
   1.177 +
   1.178 +    /* --------------------------------------------------------------------- */
   1.179 +    /* order the matrix */
   1.180 +    /* --------------------------------------------------------------------- */
   1.181 +
   1.182 +    AMD_2 (n, Pe, Iw, Len, iwlen, pfree,
   1.183 +        Nv, Pinv, P, Head, Elen, Degree, W, Control, Info) ;
   1.184 +}