00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 #ifdef HAVE_CONFIG_H
00054 #include <config.h>
00055 #endif
00056
00057 #include "ov.h"
00058 #include "defun-dld.h"
00059 #include "error.h"
00060 #include "gripes.h"
00061 #include "utils.h"
00062 #include "oct-locbuf.h"
00063
00064 #include "ov-re-mat.h"
00065 #include "ov-re-sparse.h"
00066 #include "ov-cx-sparse.h"
00067 #include "oct-sparse.h"
00068
00069
00070 struct CMK_Node
00071 {
00072
00073 octave_idx_type id;
00074
00075 octave_idx_type deg;
00076
00077 octave_idx_type dist;
00078 };
00079
00080
00081
00082
00083
00084
00085
00086 inline static void
00087 Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qt, const CMK_Node& o)
00088 {
00089 Q[qt] = o;
00090 qt = (qt + 1) % (N + 1);
00091 }
00092
00093
00094
00095 inline static CMK_Node
00096 Q_deq (CMK_Node * Q, octave_idx_type N, octave_idx_type& qh)
00097 {
00098 CMK_Node r = Q[qh];
00099 qh = (qh + 1) % (N + 1);
00100 return r;
00101 }
00102
00103
00104 #define Q_empty(Q, N, qh, qt) ((qh) == (qt))
00105
00106
00107
00108
00109 #define LEFT(i) (((i) << 1) + 1) // = (2*(i)+1)
00110
00111 #define RIGHT(i) (((i) << 1) + 2) // = (2*(i)+2)
00112
00113 #define PARENT(i) (((i) - 1) >> 1) // = floor(((i)-1)/2)
00114
00115
00116
00117
00118 static void
00119 H_heapify_min (CMK_Node *A, octave_idx_type i, octave_idx_type size)
00120 {
00121 octave_idx_type j = i;
00122 for (;;)
00123 {
00124 octave_idx_type l = LEFT(j);
00125 octave_idx_type r = RIGHT(j);
00126
00127 octave_idx_type smallest;
00128 if (l < size && A[l].deg < A[j].deg)
00129 smallest = l;
00130 else
00131 smallest = j;
00132
00133 if (r < size && A[r].deg < A[smallest].deg)
00134 smallest = r;
00135
00136 if (smallest != j)
00137 {
00138 CMK_Node tmp = A[j];
00139 A[j] = A[smallest];
00140 A[smallest] = tmp;
00141 j = smallest;
00142 }
00143 else
00144 break;
00145 }
00146 }
00147
00148
00149
00150 static void
00151 H_insert (CMK_Node *H, octave_idx_type& h, const CMK_Node& o)
00152 {
00153 octave_idx_type i = h++;
00154
00155 H[i] = o;
00156
00157 if (i == 0)
00158 return;
00159 do
00160 {
00161 octave_idx_type p = PARENT(i);
00162 if (H[i].deg < H[p].deg)
00163 {
00164 CMK_Node tmp = H[i];
00165 H[i] = H[p];
00166 H[p] = tmp;
00167
00168 i = p;
00169 }
00170 else
00171 break;
00172 }
00173 while (i > 0);
00174 }
00175
00176
00177
00178
00179 inline static CMK_Node
00180 H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg)
00181 {
00182 CMK_Node r = H[0];
00183 H[0] = H[--h];
00184 if (reorg)
00185 H_heapify_min(H, 0, h);
00186 return r;
00187 }
00188
00189
00190 #define H_empty(H, h) ((h) == 0)
00191
00192
00193
00194
00195 static octave_idx_type
00196 find_starting_node (octave_idx_type N, const octave_idx_type *ridx,
00197 const octave_idx_type *cidx, const octave_idx_type *ridx2,
00198 const octave_idx_type *cidx2, octave_idx_type *D,
00199 octave_idx_type start)
00200 {
00201 CMK_Node w;
00202
00203 OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1);
00204 boolNDArray btmp (dim_vector (1, N), false);
00205 bool *visit = btmp.fortran_vec ();
00206
00207 octave_idx_type qh = 0;
00208 octave_idx_type qt = 0;
00209 CMK_Node x;
00210 x.id = start;
00211 x.deg = D[start];
00212 x.dist = 0;
00213 Q_enq (Q, N, qt, x);
00214 visit[start] = true;
00215
00216
00217 octave_idx_type level = 0;
00218
00219 octave_idx_type max_dist = 0;
00220
00221 for (;;)
00222 {
00223 while (! Q_empty (Q, N, qh, qt))
00224 {
00225 CMK_Node v = Q_deq (Q, N, qh);
00226
00227 if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
00228 x = v;
00229
00230 octave_idx_type i = v.id;
00231
00232
00233 octave_idx_type j1 = cidx[i];
00234 octave_idx_type j2 = cidx2[i];
00235 while (j1 < cidx[i+1] || j2 < cidx2[i+1])
00236 {
00237 OCTAVE_QUIT;
00238
00239 if (j1 == cidx[i+1])
00240 {
00241 octave_idx_type r2 = ridx2[j2++];
00242 if (! visit[r2])
00243 {
00244
00245 w.id = r2;
00246 w.deg = D[r2];
00247 w.dist = v.dist+1;
00248 Q_enq (Q, N, qt, w);
00249 visit[r2] = true;
00250
00251 if (w.dist > level)
00252 level = w.dist;
00253 }
00254 }
00255 else if (j2 == cidx2[i+1])
00256 {
00257 octave_idx_type r1 = ridx[j1++];
00258 if (! visit[r1])
00259 {
00260
00261 w.id = r1;
00262 w.deg = D[r1];
00263 w.dist = v.dist+1;
00264 Q_enq (Q, N, qt, w);
00265 visit[r1] = true;
00266
00267 if (w.dist > level)
00268 level = w.dist;
00269 }
00270 }
00271 else
00272 {
00273 octave_idx_type r1 = ridx[j1];
00274 octave_idx_type r2 = ridx2[j2];
00275 if (r1 <= r2)
00276 {
00277 if (! visit[r1])
00278 {
00279 w.id = r1;
00280 w.deg = D[r1];
00281 w.dist = v.dist+1;
00282 Q_enq (Q, N, qt, w);
00283 visit[r1] = true;
00284
00285 if (w.dist > level)
00286 level = w.dist;
00287 }
00288 j1++;
00289 if (r1 == r2)
00290 j2++;
00291 }
00292 else
00293 {
00294 if (! visit[r2])
00295 {
00296 w.id = r2;
00297 w.deg = D[r2];
00298 w.dist = v.dist+1;
00299 Q_enq (Q, N, qt, w);
00300 visit[r2] = true;
00301
00302 if (w.dist > level)
00303 level = w.dist;
00304 }
00305 j2++;
00306 }
00307 }
00308 }
00309 }
00310
00311 if (max_dist < x.dist)
00312 {
00313 max_dist = x.dist;
00314
00315 for (octave_idx_type i = 0; i < N; i++)
00316 visit[i] = false;
00317
00318 visit[x.id] = true;
00319 x.dist = 0;
00320 qt = qh = 0;
00321 Q_enq (Q, N, qt, x);
00322 }
00323 else
00324 break;
00325 }
00326 return x.id;
00327 }
00328
00329
00330
00331
00332
00333 static octave_idx_type
00334 calc_degrees (octave_idx_type N, const octave_idx_type *ridx,
00335 const octave_idx_type *cidx, octave_idx_type *D)
00336 {
00337 octave_idx_type max_deg = 0;
00338
00339 for (octave_idx_type i = 0; i < N; i++)
00340 D[i] = 0;
00341
00342 for (octave_idx_type j = 0; j < N; j++)
00343 {
00344 for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
00345 {
00346 OCTAVE_QUIT;
00347 octave_idx_type k = ridx[i];
00348
00349 D[k]++;
00350 if (D[k] > max_deg)
00351 max_deg = D[k];
00352
00353
00354 if (k != j)
00355 {
00356 bool found = false;
00357 for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
00358 {
00359 OCTAVE_QUIT;
00360
00361 if (ridx[l] == j)
00362 {
00363 found = true;
00364 break;
00365 }
00366 else if (ridx[l] > j)
00367 break;
00368 }
00369
00370 if (! found)
00371 {
00372
00373 D[j]++;
00374 if (D[j] > max_deg)
00375 max_deg = D[j];
00376 }
00377 }
00378 }
00379 }
00380 return max_deg;
00381 }
00382
00383
00384
00385 static void
00386 transpose (octave_idx_type N, const octave_idx_type *ridx,
00387 const octave_idx_type *cidx, octave_idx_type *ridx2,
00388 octave_idx_type *cidx2)
00389 {
00390 octave_idx_type nz = cidx[N];
00391
00392 OCTAVE_LOCAL_BUFFER (octave_idx_type, w, N + 1);
00393 for (octave_idx_type i = 0; i < N; i++)
00394 w[i] = 0;
00395 for (octave_idx_type i = 0; i < nz; i++)
00396 w[ridx[i]]++;
00397 nz = 0;
00398 for (octave_idx_type i = 0; i < N; i++)
00399 {
00400 OCTAVE_QUIT;
00401 cidx2[i] = nz;
00402 nz += w[i];
00403 w[i] = cidx2[i];
00404 }
00405 cidx2[N] = nz;
00406 w[N] = nz;
00407
00408 for (octave_idx_type j = 0; j < N; j++)
00409 for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
00410 {
00411 OCTAVE_QUIT;
00412 octave_idx_type q = w [ridx[k]]++;
00413 ridx2[q] = j;
00414 }
00415 }
00416
00417
00418 DEFUN_DLD (symrcm, args, ,
00419 "-*- texinfo -*-\n\
00420 @deftypefn {Loadable Function} {@var{p} =} symrcm (@var{S})\n\
00421 Return the symmetric reverse Cuthill-McKee permutation of @var{S}.\n\
00422 @var{p} is a permutation vector such that\n\
00423 @code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements\n\
00424 closer to the diagonal than @var{S}. This is a good preordering for LU\n\
00425 or Cholesky@tie{}factorization of matrices that come from 'long, skinny'\n\
00426 problems. It works for both symmetric and asymmetric @var{S}.\n\
00427 \n\
00428 The algorithm represents a heuristic approach to the NP-complete\n\
00429 bandwidth minimization problem. The implementation is based in the\n\
00430 descriptions found in\n\
00431 \n\
00432 E. Cuthill, J. McKee. @cite{Reducing the Bandwidth of Sparse Symmetric\n\
00433 Matrices}. Proceedings of the 24th ACM National Conference, 157--172\n\
00434 1969, Brandon Press, New Jersey.\n\
00435 \n\
00436 A. George, J.W.H. Liu. @cite{Computer Solution of Large Sparse\n\
00437 Positive Definite Systems}, Prentice Hall Series in Computational\n\
00438 Mathematics, ISBN 0-13-165274-5, 1981.\n\
00439 \n\
00440 @seealso{colperm, colamd, symamd}\n\
00441 @end deftypefn")
00442 {
00443 octave_value retval;
00444 int nargin = args.length ();
00445
00446 if (nargin != 1)
00447 {
00448 print_usage ();
00449 return retval;
00450 }
00451
00452 octave_value arg = args(0);
00453
00454
00455
00456 octave_idx_type *cidx;
00457 octave_idx_type *ridx;
00458 SparseMatrix Ar;
00459 SparseComplexMatrix Ac;
00460
00461 if (arg.is_real_type ())
00462 {
00463 Ar = arg.sparse_matrix_value ();
00464
00465 cidx = Ar.xcidx ();
00466 ridx = Ar.xridx ();
00467 }
00468 else
00469 {
00470 Ac = arg.sparse_complex_matrix_value ();
00471 cidx = Ac.xcidx ();
00472 ridx = Ac.xridx ();
00473 }
00474
00475 if (error_state)
00476 return retval;
00477
00478 octave_idx_type nr = arg.rows ();
00479 octave_idx_type nc = arg.columns ();
00480
00481 if (nr != nc)
00482 {
00483 gripe_square_matrix_required ("symrcm");
00484 return retval;
00485 }
00486
00487 if (nr == 0 && nc == 0)
00488 return octave_value (NDArray (dim_vector (1, 0)));
00489
00490
00491 octave_idx_type s = 0;
00492
00493
00494 octave_idx_type qt = 0, qh = 0;
00495 CMK_Node v, w;
00496
00497 octave_idx_type N = nr;
00498
00499 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
00500 OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
00501 transpose (N, ridx, cidx, ridx2, cidx2);
00502
00503
00504 NDArray P (dim_vector (1, N));
00505
00506
00507 OCTAVE_LOCAL_BUFFER (octave_idx_type, D, N);
00508 octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);
00509
00510
00511
00512 if (max_deg == 0)
00513 {
00514 for (octave_idx_type i = 0; i < N; i++)
00515 P(i) = i;
00516 return octave_value (P);
00517 }
00518
00519
00520
00521 OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);
00522
00523
00524
00525 OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1);
00526
00527
00528 octave_idx_type c = -1;
00529
00530
00531
00532
00533
00534 octave_idx_type B = 0;
00535
00536
00537
00538
00539 boolNDArray btmp (dim_vector (1, N), false);
00540 bool *visit = btmp.fortran_vec ();
00541
00542 do
00543 {
00544
00545 octave_idx_type i;
00546 for (i = 0; i < N; i++)
00547 if (! visit[i])
00548 break;
00549
00550
00551 v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
00552
00553
00554
00555
00556 v.deg = D[v.id];
00557 v.dist = 0;
00558 visit[v.id] = true;
00559 Q_enq (Q, N, qt, v);
00560
00561
00562
00563
00564
00565 octave_idx_type Bsub = 0;
00566
00567 octave_idx_type level = 0;
00568
00569 octave_idx_type level_N = 1;
00570
00571 while (! Q_empty (Q, N, qh, qt))
00572 {
00573 v = Q_deq (Q, N, qh);
00574 i = v.id;
00575
00576 c++;
00577
00578
00579
00580
00581
00582
00583
00584 P(c) = i;
00585
00586
00587 s = 0;
00588 octave_idx_type j1 = cidx[i];
00589 octave_idx_type j2 = cidx2[i];
00590
00591 OCTAVE_QUIT;
00592 while (j1 < cidx[i+1] || j2 < cidx2[i+1])
00593 {
00594 OCTAVE_QUIT;
00595 if (j1 == cidx[i+1])
00596 {
00597 octave_idx_type r2 = ridx2[j2++];
00598 if (! visit[r2])
00599 {
00600
00601 w.id = r2;
00602 w.deg = D[r2];
00603 w.dist = v.dist+1;
00604 H_insert(S, s, w);
00605 visit[r2] = true;
00606 }
00607 }
00608 else if (j2 == cidx2[i+1])
00609 {
00610 octave_idx_type r1 = ridx[j1++];
00611 if (! visit[r1])
00612 {
00613 w.id = r1;
00614 w.deg = D[r1];
00615 w.dist = v.dist+1;
00616 H_insert(S, s, w);
00617 visit[r1] = true;
00618 }
00619 }
00620 else
00621 {
00622 octave_idx_type r1 = ridx[j1];
00623 octave_idx_type r2 = ridx2[j2];
00624 if (r1 <= r2)
00625 {
00626 if (! visit[r1])
00627 {
00628 w.id = r1;
00629 w.deg = D[r1];
00630 w.dist = v.dist+1;
00631 H_insert(S, s, w);
00632 visit[r1] = true;
00633 }
00634 j1++;
00635 if (r1 == r2)
00636 j2++;
00637 }
00638 else
00639 {
00640 if (! visit[r2])
00641 {
00642 w.id = r2;
00643 w.deg = D[r2];
00644 w.dist = v.dist+1;
00645 H_insert(S, s, w);
00646 visit[r2] = true;
00647 }
00648 j2++;
00649 }
00650 }
00651 }
00652
00653
00654 while (! H_empty (S, s))
00655 {
00656 OCTAVE_QUIT;
00657
00658
00659 v = H_remove_min(S, s, 1);
00660
00661
00662 if (v.dist > level)
00663 {
00664
00665
00666
00667
00668 if (Bsub < level_N)
00669 Bsub = level_N;
00670
00671 level = v.dist;
00672
00673 level_N = 1;
00674 }
00675 else
00676 {
00677
00678
00679 level_N++;
00680 }
00681
00682
00683 Q_enq (Q, N, qt, v);
00684 }
00685
00686
00687 if (Bsub < level_N)
00688 Bsub = level_N;
00689 }
00690
00691
00692
00693 if (Bsub > B)
00694 B = Bsub;
00695 }
00696
00697 while (c+1 < N);
00698
00699
00700 s = N / 2 - 1;
00701 for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
00702 {
00703 double tmp = P.elem(i);
00704 P.elem(i) = P.elem(j);
00705 P.elem(j) = tmp;
00706 }
00707
00708
00709 return octave_value (P+1);
00710 }