GNU Octave 7.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
symrcm.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2007-2022 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26/*
27An implementation of the Reverse Cuthill-McKee algorithm (symrcm)
28
29The implementation of this algorithm is based in the descriptions found in
30
31@INPROCEEDINGS{,
32 author = {E. Cuthill and J. McKee},
33 title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
34 booktitle = {Proceedings of the 24th ACM National Conference},
35 publisher = {Brandon Press},
36 pages = {157 -- 172},
37 location = {New Jersey},
38 year = {1969}
39}
40
41@BOOK{,
42 author = {Alan George and Joseph W. H. Liu},
43 title = {Computer Solution of Large Sparse Positive Definite Systems},
44 publisher = {Prentice Hall Series in Computational Mathematics},
45 ISBN = {0-13-165274-5},
46 year = {1981}
47}
48
49The algorithm represents a heuristic approach to the NP-complete minimum
50bandwidth problem.
51
52Written by Michael Weitzel <michael.weitzel@@uni-siegen.de>
53 <weitzel@@ldknet.org>
54*/
55
56#if defined (HAVE_CONFIG_H)
57# include "config.h"
58#endif
59
60#include <algorithm>
61
62#include "CSparse.h"
63#include "boolNDArray.h"
64#include "dNDArray.h"
65#include "dSparse.h"
66#include "oct-locbuf.h"
67#include "oct-sparse.h"
68#include "quit.h"
69
70#include "defun.h"
71#include "errwarn.h"
72#include "ov.h"
73#include "ovl.h"
74
75OCTAVE_NAMESPACE_BEGIN
76
77// A node struct for the Cuthill-McKee algorithm
79{
80 // the node's id (matrix row index)
82 // the node's degree
84 // minimal distance to the root of the spanning tree
86};
87
88// A simple queue.
89// Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
90// stored in an array. qh and qt point to queue head and tail.
91
92// Enqueue operation (adds a node "o" at the tail)
93
94inline static void
96{
97 Q[qt] = o;
98 qt = (qt + 1) % (N + 1);
99}
100
101// Dequeue operation (removes a node from the head)
102
103inline static CMK_Node
105{
106 CMK_Node r = Q[qh];
107 qh = (qh + 1) % (N + 1);
108 return r;
109}
110
111// Predicate (queue empty)
112#define Q_empty(Q, N, qh, qt) ((qh) == (qt))
113
114// A simple, array-based binary heap (used as a priority queue for nodes)
115
116// the left descendant of entry i
117#define LEFT(i) (((i) << 1) + 1) // = (2*(i)+1)
118// the right descendant of entry i
119#define RIGHT(i) (((i) << 1) + 2) // = (2*(i)+2)
120// the parent of entry i
121#define PARENT(i) (((i) - 1) >> 1) // = floor(((i)-1)/2)
122
123// Builds a min-heap (the root contains the smallest element). A is an array
124// with the graph's nodes, i is a starting position, size is the length of A.
125
126static void
128{
129 octave_idx_type j = i;
130 for (;;)
131 {
132 octave_idx_type l = LEFT(j);
133 octave_idx_type r = RIGHT(j);
134
135 octave_idx_type smallest;
136 if (l < size && A[l].deg < A[j].deg)
137 smallest = l;
138 else
139 smallest = j;
140
141 if (r < size && A[r].deg < A[smallest].deg)
142 smallest = r;
143
144 if (smallest != j)
145 {
146 std::swap (A[j], A[smallest]);
147 j = smallest;
148 }
149 else
150 break;
151 }
152}
153
154// Heap operation insert. Running time is O(log(n))
155
156static void
158{
159 octave_idx_type i = h++;
160
161 H[i] = o;
162
163 if (i == 0)
164 return;
165 do
166 {
167 octave_idx_type p = PARENT(i);
168 if (H[i].deg < H[p].deg)
169 {
170 std::swap (H[i], H[p]);
171
172 i = p;
173 }
174 else
175 break;
176 }
177 while (i > 0);
178}
179
180// Heap operation remove-min. Removes the smallest element in O(1) and
181// reorganizes the heap optionally in O(log(n))
182
183inline static CMK_Node
184H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/)
185{
186 CMK_Node r = H[0];
187 H[0] = H[--h];
188 if (reorg)
189 H_heapify_min (H, 0, h);
190 return r;
191}
192
193// Predicate (heap empty)
194#define H_empty(H, h) ((h) == 0)
195
196// Helper function for the Cuthill-McKee algorithm. Tries to determine a
197// pseudo-peripheral node of the graph as starting node.
198
199static octave_idx_type
201 const octave_idx_type *cidx, const octave_idx_type *ridx2,
202 const octave_idx_type *cidx2, octave_idx_type *D,
203 octave_idx_type start)
204{
205 CMK_Node w;
206
208 boolNDArray btmp (dim_vector (1, N), false);
209 bool *visit = btmp.fortran_vec ();
210
211 octave_idx_type qh = 0;
212 octave_idx_type qt = 0;
213 CMK_Node x;
214 x.id = start;
215 x.deg = D[start];
216 x.dist = 0;
217 Q_enq (Q, N, qt, x);
218 visit[start] = true;
219
220 // distance level
221 octave_idx_type level = 0;
222 // current largest "eccentricity"
223 octave_idx_type max_dist = 0;
224
225 for (;;)
226 {
227 while (! Q_empty (Q, N, qh, qt))
228 {
229 CMK_Node v = Q_deq (Q, N, qh);
230
231 if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
232 x = v;
233
234 octave_idx_type i = v.id;
235
236 // add all unvisited neighbors to the queue
237 octave_idx_type j1 = cidx[i];
238 octave_idx_type j2 = cidx2[i];
239 while (j1 < cidx[i+1] || j2 < cidx2[i+1])
240 {
241 octave_quit ();
242
243 if (j1 == cidx[i+1])
244 {
245 octave_idx_type r2 = ridx2[j2++];
246 if (! visit[r2])
247 {
248 // the distance of node j is dist(i)+1
249 w.id = r2;
250 w.deg = D[r2];
251 w.dist = v.dist+1;
252 Q_enq (Q, N, qt, w);
253 visit[r2] = true;
254
255 if (w.dist > level)
256 level = w.dist;
257 }
258 }
259 else if (j2 == cidx2[i+1])
260 {
261 octave_idx_type r1 = ridx[j1++];
262 if (! visit[r1])
263 {
264 // the distance of node j is dist(i)+1
265 w.id = r1;
266 w.deg = D[r1];
267 w.dist = v.dist+1;
268 Q_enq (Q, N, qt, w);
269 visit[r1] = true;
270
271 if (w.dist > level)
272 level = w.dist;
273 }
274 }
275 else
276 {
277 octave_idx_type r1 = ridx[j1];
278 octave_idx_type r2 = ridx2[j2];
279 if (r1 <= r2)
280 {
281 if (! visit[r1])
282 {
283 w.id = r1;
284 w.deg = D[r1];
285 w.dist = v.dist+1;
286 Q_enq (Q, N, qt, w);
287 visit[r1] = true;
288
289 if (w.dist > level)
290 level = w.dist;
291 }
292 j1++;
293 if (r1 == r2)
294 j2++;
295 }
296 else
297 {
298 if (! visit[r2])
299 {
300 w.id = r2;
301 w.deg = D[r2];
302 w.dist = v.dist+1;
303 Q_enq (Q, N, qt, w);
304 visit[r2] = true;
305
306 if (w.dist > level)
307 level = w.dist;
308 }
309 j2++;
310 }
311 }
312 }
313 } // finish of BFS
314
315 if (max_dist < x.dist)
316 {
317 max_dist = x.dist;
318
319 for (octave_idx_type i = 0; i < N; i++)
320 visit[i] = false;
321
322 visit[x.id] = true;
323 x.dist = 0;
324 qt = qh = 0;
325 Q_enq (Q, N, qt, x);
326 }
327 else
328 break;
329 }
330 return x.id;
331}
332
333// Calculates the node's degrees. This means counting the nonzero elements
334// in the symmetric matrix' rows. This works for non-symmetric matrices
335// as well.
336
337static octave_idx_type
339 const octave_idx_type *cidx, octave_idx_type *D)
340{
341 octave_idx_type max_deg = 0;
342
343 for (octave_idx_type i = 0; i < N; i++)
344 D[i] = 0;
345
346 for (octave_idx_type j = 0; j < N; j++)
347 {
348 for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
349 {
350 octave_quit ();
351
352 octave_idx_type k = ridx[i];
353 // there is a nonzero element (k,j)
354 D[k]++;
355 if (D[k] > max_deg)
356 max_deg = D[k];
357 // if there is no element (j,k) there is one in
358 // the symmetric matrix:
359 if (k != j)
360 {
361 bool found = false;
362 for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
363 {
364 octave_quit ();
365
366 if (ridx[l] == j)
367 {
368 found = true;
369 break;
370 }
371 else if (ridx[l] > j)
372 break;
373 }
374
375 if (! found)
376 {
377 // A(j,k) == 0
378 D[j]++;
379 if (D[j] > max_deg)
380 max_deg = D[j];
381 }
382 }
383 }
384 }
385 return max_deg;
386}
387
388// Transpose of the structure of a square sparse matrix
389
390static void
392 const octave_idx_type *cidx, octave_idx_type *ridx2,
393 octave_idx_type *cidx2)
394{
395 octave_idx_type nz = cidx[N];
396
398 for (octave_idx_type i = 0; i < N; i++)
399 w[i] = 0;
400 for (octave_idx_type i = 0; i < nz; i++)
401 w[ridx[i]]++;
402 nz = 0;
403 for (octave_idx_type i = 0; i < N; i++)
404 {
405 octave_quit ();
406
407 cidx2[i] = nz;
408 nz += w[i];
409 w[i] = cidx2[i];
410 }
411 cidx2[N] = nz;
412 w[N] = nz;
413
414 for (octave_idx_type j = 0; j < N; j++)
415 for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
416 {
417 octave_quit ();
418
419 octave_idx_type q = w[ridx[k]]++;
420 ridx2[q] = j;
421 }
422}
423
424// An implementation of the Cuthill-McKee algorithm.
425DEFUN (symrcm, args, ,
426 doc: /* -*- texinfo -*-
427@deftypefn {} {@var{p} =} symrcm (@var{S})
428Return the symmetric reverse @nospell{Cuthill-McKee} permutation of @var{S}.
429
430@var{p} is a permutation vector such that
431@code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements closer
432to the diagonal than @var{S}. This is a good preordering for LU or
433Cholesky@tie{}factorization of matrices that come from ``long, skinny''
434problems. It works for both symmetric and asymmetric @var{S}.
435
436The algorithm represents a heuristic approach to the NP-complete bandwidth
437minimization problem. The implementation is based in the descriptions found
438in
439
440@nospell{E. Cuthill, J. McKee}.
441@cite{Reducing the Bandwidth of Sparse Symmetric Matrices}.
442Proceedings of the 24th @nospell{ACM} National Conference,
443157--172 1969, Brandon Press, New Jersey.
444
445@nospell{A. George, J.W.H. Liu}. @cite{Computer Solution of Large Sparse
446Positive Definite Systems}, Prentice Hall Series in Computational
447Mathematics, ISBN 0-13-165274-5, 1981.
448
449@seealso{colperm, colamd, symamd}
450@end deftypefn */)
451{
452 if (args.length () != 1)
453 print_usage ();
454
455 octave_value arg = args(0);
456
457 // the parameter of the matrix is converted into a sparse matrix
458 //(if necessary)
459 octave_idx_type *cidx;
460 octave_idx_type *ridx;
461 SparseMatrix Ar;
463
464 if (arg.isreal ())
465 {
466 Ar = arg.sparse_matrix_value ();
467 // Note cidx/ridx are const, so use xridx and xcidx...
468 cidx = Ar.xcidx ();
469 ridx = Ar.xridx ();
470 }
471 else
472 {
473 Ac = arg.sparse_complex_matrix_value ();
474 cidx = Ac.xcidx ();
475 ridx = Ac.xridx ();
476 }
477
478 octave_idx_type nr = arg.rows ();
479 octave_idx_type nc = arg.columns ();
480
481 if (nr != nc)
482 err_square_matrix_required ("symrcm", "S");
483
484 if (nr == 0 && nc == 0)
485 return ovl (NDArray (dim_vector (1, 0)));
486
487 // sizes of the heaps
488 octave_idx_type s = 0;
489
490 // head- and tail-indices for the queue
491 octave_idx_type qt = 0;
492 octave_idx_type qh = 0;
493 CMK_Node v, w;
494 // dimension of the matrix
495 octave_idx_type N = nr;
496
498 OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
499 transpose (N, ridx, cidx, ridx2, cidx2);
500
501 // the permutation vector
502 NDArray P (dim_vector (1, N));
503
504 // compute the node degrees
506 octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);
507
508 // if none of the nodes has a degree > 0 (a matrix of zeros)
509 // the return value corresponds to the identity permutation
510 if (max_deg == 0)
511 {
512 for (octave_idx_type i = 0; i < N; i++)
513 P(i) = i;
514
515 return ovl (P);
516 }
517
518 // a heap for the a node's neighbors. The number of neighbors is
519 // limited by the maximum degree max_deg:
520 OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);
521
522 // a queue for the BFS. The array is always one element larger than
523 // the number of entries that are stored.
525
526 // a counter (for building the permutation)
527 octave_idx_type c = -1;
528
529 // upper bound for the bandwidth (=quality of solution)
530 // initialize the bandwidth of the graph with 0. B contains the
531 // the maximum of the theoretical lower limits of the subgraphs
532 // bandwidths.
533 octave_idx_type B = 0;
534
535 // mark all nodes as unvisited; with the exception of the nodes
536 // that have degree==0 and build a CC of the graph.
537
538 boolNDArray btmp (dim_vector (1, N), false);
539 bool *visit = btmp.fortran_vec ();
540
541 do
542 {
543 // locate an unvisited starting node of the graph
545 for (i = 0; i < N; i++)
546 if (! visit[i])
547 break;
548
549 // locate a probably better starting node
550 v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
551
552 // mark the node as visited and enqueue it (a starting node
553 // for the BFS). Since the node will be a root of a spanning
554 // tree, its dist is 0.
555 v.deg = D[v.id];
556 v.dist = 0;
557 visit[v.id] = true;
558 Q_enq (Q, N, qt, v);
559
560 // lower bound for the bandwidth of a subgraph
561 // keep a "level" in the spanning tree (= min. distance to the
562 // root) for determining the bandwidth of the computed
563 // permutation P
564 octave_idx_type Bsub = 0;
565 // min. dist. to the root is 0
566 octave_idx_type level = 0;
567 // the root is the first/only node on level 0
568 octave_idx_type level_N = 1;
569
570 while (! Q_empty (Q, N, qh, qt))
571 {
572 v = Q_deq (Q, N, qh);
573 i = v.id;
574
575 c++;
576
577 // for computing the inverse permutation P where
578 // A(inv(P),inv(P)) or P'*A*P is banded
579 // P(i) = c;
580
581 // for computing permutation P where
582 // A(P(i),P(j)) or P*A*P' is banded
583 P(c) = i;
584
585 // put all unvisited neighbors j of node i on the heap
586 s = 0;
587 octave_idx_type j1 = cidx[i];
588 octave_idx_type j2 = cidx2[i];
589
590 octave_quit ();
591
592 while (j1 < cidx[i+1] || j2 < cidx2[i+1])
593 {
594 octave_quit ();
595
596 if (j1 == cidx[i+1])
597 {
598 octave_idx_type r2 = ridx2[j2++];
599 if (! visit[r2])
600 {
601 // the distance of node j is dist(i)+1
602 w.id = r2;
603 w.deg = D[r2];
604 w.dist = v.dist+1;
605 H_insert (S, s, w);
606 visit[r2] = true;
607 }
608 }
609 else if (j2 == cidx2[i+1])
610 {
611 octave_idx_type r1 = ridx[j1++];
612 if (! visit[r1])
613 {
614 w.id = r1;
615 w.deg = D[r1];
616 w.dist = v.dist+1;
617 H_insert (S, s, w);
618 visit[r1] = true;
619 }
620 }
621 else
622 {
623 octave_idx_type r1 = ridx[j1];
624 octave_idx_type r2 = ridx2[j2];
625 if (r1 <= r2)
626 {
627 if (! visit[r1])
628 {
629 w.id = r1;
630 w.deg = D[r1];
631 w.dist = v.dist+1;
632 H_insert (S, s, w);
633 visit[r1] = true;
634 }
635 j1++;
636 if (r1 == r2)
637 j2++;
638 }
639 else
640 {
641 if (! visit[r2])
642 {
643 w.id = r2;
644 w.deg = D[r2];
645 w.dist = v.dist+1;
646 H_insert (S, s, w);
647 visit[r2] = true;
648 }
649 j2++;
650 }
651 }
652 }
653
654 // add the neighbors to the queue (sorted by node degree)
655 while (! H_empty (S, s))
656 {
657 octave_quit ();
658
659 // locate a neighbor of i with minimal degree in O(log(N))
660 v = H_remove_min (S, s, 1);
661
662 // entered the BFS a new level?
663 if (v.dist > level)
664 {
665 // adjustment of bandwidth:
666 // "[...] the minimum bandwidth that
667 // can be obtained [...] is the
668 // maximum number of nodes per level"
669 if (Bsub < level_N)
670 Bsub = level_N;
671
672 level = v.dist;
673 // v is the first node on the new level
674 level_N = 1;
675 }
676 else
677 {
678 // there is no new level but another node on
679 // this level:
680 level_N++;
681 }
682
683 // enqueue v in O(1)
684 Q_enq (Q, N, qt, v);
685 }
686
687 // synchronize the bandwidth with level_N once again:
688 if (Bsub < level_N)
689 Bsub = level_N;
690 }
691 // finish of BFS. If there are still unvisited nodes in the graph
692 // then it is split into CCs. The computed bandwidth is the maximum
693 // of all subgraphs. Update:
694 if (Bsub > B)
695 B = Bsub;
696 }
697 // are there any nodes left?
698 while (c+1 < N);
699
700 // compute the reverse-ordering
701 s = N / 2 - 1;
702 for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
703 std::swap (P.elem (i), P.elem (j));
704
705 // increment all indices, since Octave is not C
706 return ovl (P+1);
707}
708
709OCTAVE_NAMESPACE_END
T & elem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:534
OCTARRAY_API T * fortran_vec(void)
Size of the specified dimension.
Definition: Array.cc:1744
octave_idx_type * xridx(void)
Definition: Sparse.h:589
octave_idx_type * xcidx(void)
Definition: Sparse.h:602
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
SparseMatrix sparse_matrix_value(bool frc_str_conv=false) const
Definition: ov.h:945
bool isreal(void) const
Definition: ov.h:783
octave_idx_type rows(void) const
Definition: ov.h:590
octave_idx_type columns(void) const
Definition: ov.h:592
SparseComplexMatrix sparse_complex_matrix_value(bool frc_str_conv=false) const
Definition: ov.h:949
OCTINTERP_API void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void err_square_matrix_required(const char *fcn, const char *name)
Definition: errwarn.cc:122
F77_RET_T const F77_INT F77_CMPLX const F77_INT F77_CMPLX * B
F77_RET_T const F77_INT const F77_INT const F77_INT F77_DBLE const F77_INT F77_DBLE const F77_INT F77_DBLE * Q
F77_RET_T const F77_INT F77_CMPLX * A
F77_RET_T const F77_INT & N
F77_RET_T const F77_DBLE * x
std::complex< double > w(std::complex< double > z, double relerr=0)
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:211
octave_idx_type deg
Definition: symrcm.cc:83
octave_idx_type dist
Definition: symrcm.cc:85
octave_idx_type id
Definition: symrcm.cc:81
#define RIGHT(i)
Definition: symrcm.cc:119
static void Q_enq(CMK_Node *Q, octave_idx_type N, octave_idx_type &qt, const CMK_Node &o)
Definition: symrcm.cc:95
#define LEFT(i)
Definition: symrcm.cc:117
static CMK_Node Q_deq(CMK_Node *Q, octave_idx_type N, octave_idx_type &qh)
Definition: symrcm.cc:104
static void H_heapify_min(CMK_Node *A, octave_idx_type i, octave_idx_type size)
Definition: symrcm.cc:127
static void H_insert(CMK_Node *H, octave_idx_type &h, const CMK_Node &o)
Definition: symrcm.cc:157
#define H_empty(H, h)
Definition: symrcm.cc:194
#define PARENT(i)
Definition: symrcm.cc:121
#define Q_empty(Q, N, qh, qt)
Definition: symrcm.cc:112
static CMK_Node H_remove_min(CMK_Node *H, octave_idx_type &h, int reorg)
Definition: symrcm.cc:184
static octave_idx_type calc_degrees(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, octave_idx_type *D)
Definition: symrcm.cc:338
static void transpose(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, octave_idx_type *ridx2, octave_idx_type *cidx2)
Definition: symrcm.cc:391
static octave_idx_type find_starting_node(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, const octave_idx_type *ridx2, const octave_idx_type *cidx2, octave_idx_type *D, octave_idx_type start)
Definition: symrcm.cc:200