GNU Octave  6.2.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
symrcm.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2007-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 /*
27 An implementation of the Reverse Cuthill-McKee algorithm (symrcm)
28 
29 The implementation of this algorithm is based in the descriptions found in
30 
31 @INPROCEEDINGS{,
32  author = {E. Cuthill and J. McKee},
33  title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
34  booktitle = {Proceedings of the 24th ACM National Conference},
35  publisher = {Brandon Press},
36  pages = {157 -- 172},
37  location = {New Jersey},
38  year = {1969}
39 }
40 
41 @BOOK{,
42  author = {Alan George and Joseph W. H. Liu},
43  title = {Computer Solution of Large Sparse Positive Definite Systems},
44  publisher = {Prentice Hall Series in Computational Mathematics},
45  ISBN = {0-13-165274-5},
46  year = {1981}
47 }
48 
49 The algorithm represents a heuristic approach to the NP-complete minimum
50 bandwidth problem.
51 
52 Written by Michael Weitzel <michael.weitzel@@uni-siegen.de>
53  <weitzel@@ldknet.org>
54 */
55 
56 #if defined (HAVE_CONFIG_H)
57 # include "config.h"
58 #endif
59 
60 #include <algorithm>
61 
62 #include "CSparse.h"
63 #include "boolNDArray.h"
64 #include "dNDArray.h"
65 #include "dSparse.h"
66 #include "oct-locbuf.h"
67 #include "oct-sparse.h"
68 #include "quit.h"
69 
70 #include "defun.h"
71 #include "errwarn.h"
72 #include "ov.h"
73 #include "ovl.h"
74 
75 // A node struct for the Cuthill-McKee algorithm
76 struct CMK_Node
77 {
78  // the node's id (matrix row index)
80  // the node's degree
82  // minimal distance to the root of the spanning tree
84 };
85 
86 // A simple queue.
87 // Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
88 // stored in an array. qh and qt point to queue head and tail.
89 
90 // Enqueue operation (adds a node "o" at the tail)
91 
92 inline static void
94 {
95  Q[qt] = o;
96  qt = (qt + 1) % (N + 1);
97 }
98 
99 // Dequeue operation (removes a node from the head)
100 
101 inline static CMK_Node
103 {
104  CMK_Node r = Q[qh];
105  qh = (qh + 1) % (N + 1);
106  return r;
107 }
108 
109 // Predicate (queue empty)
110 #define Q_empty(Q, N, qh, qt) ((qh) == (qt))
111 
112 // A simple, array-based binary heap (used as a priority queue for nodes)
113 
114 // the left descendant of entry i
115 #define LEFT(i) (((i) << 1) + 1) // = (2*(i)+1)
116 // the right descendant of entry i
117 #define RIGHT(i) (((i) << 1) + 2) // = (2*(i)+2)
118 // the parent of entry i
119 #define PARENT(i) (((i) - 1) >> 1) // = floor(((i)-1)/2)
120 
121 // Builds a min-heap (the root contains the smallest element). A is an array
122 // with the graph's nodes, i is a starting position, size is the length of A.
123 
124 static void
126 {
127  octave_idx_type j = i;
128  for (;;)
129  {
130  octave_idx_type l = LEFT(j);
131  octave_idx_type r = RIGHT(j);
132 
133  octave_idx_type smallest;
134  if (l < size && A[l].deg < A[j].deg)
135  smallest = l;
136  else
137  smallest = j;
138 
139  if (r < size && A[r].deg < A[smallest].deg)
140  smallest = r;
141 
142  if (smallest != j)
143  {
144  std::swap (A[j], A[smallest]);
145  j = smallest;
146  }
147  else
148  break;
149  }
150 }
151 
152 // Heap operation insert. Running time is O(log(n))
153 
154 static void
156 {
157  octave_idx_type i = h++;
158 
159  H[i] = o;
160 
161  if (i == 0)
162  return;
163  do
164  {
165  octave_idx_type p = PARENT(i);
166  if (H[i].deg < H[p].deg)
167  {
168  std::swap (H[i], H[p]);
169 
170  i = p;
171  }
172  else
173  break;
174  }
175  while (i > 0);
176 }
177 
178 // Heap operation remove-min. Removes the smallest element in O(1) and
179 // reorganizes the heap optionally in O(log(n))
180 
181 inline static CMK_Node
182 H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/)
183 {
184  CMK_Node r = H[0];
185  H[0] = H[--h];
186  if (reorg)
187  H_heapify_min (H, 0, h);
188  return r;
189 }
190 
191 // Predicate (heap empty)
192 #define H_empty(H, h) ((h) == 0)
193 
194 // Helper function for the Cuthill-McKee algorithm. Tries to determine a
195 // pseudo-peripheral node of the graph as starting node.
196 
197 static octave_idx_type
199  const octave_idx_type *cidx, const octave_idx_type *ridx2,
200  const octave_idx_type *cidx2, octave_idx_type *D,
201  octave_idx_type start)
202 {
203  CMK_Node w;
204 
206  boolNDArray btmp (dim_vector (1, N), false);
207  bool *visit = btmp.fortran_vec ();
208 
209  octave_idx_type qh = 0;
210  octave_idx_type qt = 0;
211  CMK_Node x;
212  x.id = start;
213  x.deg = D[start];
214  x.dist = 0;
215  Q_enq (Q, N, qt, x);
216  visit[start] = true;
217 
218  // distance level
219  octave_idx_type level = 0;
220  // current largest "eccentricity"
221  octave_idx_type max_dist = 0;
222 
223  for (;;)
224  {
225  while (! Q_empty (Q, N, qh, qt))
226  {
227  CMK_Node v = Q_deq (Q, N, qh);
228 
229  if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
230  x = v;
231 
232  octave_idx_type i = v.id;
233 
234  // add all unvisited neighbors to the queue
235  octave_idx_type j1 = cidx[i];
236  octave_idx_type j2 = cidx2[i];
237  while (j1 < cidx[i+1] || j2 < cidx2[i+1])
238  {
239  octave_quit ();
240 
241  if (j1 == cidx[i+1])
242  {
243  octave_idx_type r2 = ridx2[j2++];
244  if (! visit[r2])
245  {
246  // the distance of node j is dist(i)+1
247  w.id = r2;
248  w.deg = D[r2];
249  w.dist = v.dist+1;
250  Q_enq (Q, N, qt, w);
251  visit[r2] = true;
252 
253  if (w.dist > level)
254  level = w.dist;
255  }
256  }
257  else if (j2 == cidx2[i+1])
258  {
259  octave_idx_type r1 = ridx[j1++];
260  if (! visit[r1])
261  {
262  // the distance of node j is dist(i)+1
263  w.id = r1;
264  w.deg = D[r1];
265  w.dist = v.dist+1;
266  Q_enq (Q, N, qt, w);
267  visit[r1] = true;
268 
269  if (w.dist > level)
270  level = w.dist;
271  }
272  }
273  else
274  {
275  octave_idx_type r1 = ridx[j1];
276  octave_idx_type r2 = ridx2[j2];
277  if (r1 <= r2)
278  {
279  if (! visit[r1])
280  {
281  w.id = r1;
282  w.deg = D[r1];
283  w.dist = v.dist+1;
284  Q_enq (Q, N, qt, w);
285  visit[r1] = true;
286 
287  if (w.dist > level)
288  level = w.dist;
289  }
290  j1++;
291  if (r1 == r2)
292  j2++;
293  }
294  else
295  {
296  if (! visit[r2])
297  {
298  w.id = r2;
299  w.deg = D[r2];
300  w.dist = v.dist+1;
301  Q_enq (Q, N, qt, w);
302  visit[r2] = true;
303 
304  if (w.dist > level)
305  level = w.dist;
306  }
307  j2++;
308  }
309  }
310  }
311  } // finish of BFS
312 
313  if (max_dist < x.dist)
314  {
315  max_dist = x.dist;
316 
317  for (octave_idx_type i = 0; i < N; i++)
318  visit[i] = false;
319 
320  visit[x.id] = true;
321  x.dist = 0;
322  qt = qh = 0;
323  Q_enq (Q, N, qt, x);
324  }
325  else
326  break;
327  }
328  return x.id;
329 }
330 
331 // Calculates the node's degrees. This means counting the nonzero elements
332 // in the symmetric matrix' rows. This works for non-symmetric matrices
333 // as well.
334 
335 static octave_idx_type
337  const octave_idx_type *cidx, octave_idx_type *D)
338 {
339  octave_idx_type max_deg = 0;
340 
341  for (octave_idx_type i = 0; i < N; i++)
342  D[i] = 0;
343 
344  for (octave_idx_type j = 0; j < N; j++)
345  {
346  for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
347  {
348  octave_quit ();
349 
350  octave_idx_type k = ridx[i];
351  // there is a nonzero element (k,j)
352  D[k]++;
353  if (D[k] > max_deg)
354  max_deg = D[k];
355  // if there is no element (j,k) there is one in
356  // the symmetric matrix:
357  if (k != j)
358  {
359  bool found = false;
360  for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
361  {
362  octave_quit ();
363 
364  if (ridx[l] == j)
365  {
366  found = true;
367  break;
368  }
369  else if (ridx[l] > j)
370  break;
371  }
372 
373  if (! found)
374  {
375  // A(j,k) == 0
376  D[j]++;
377  if (D[j] > max_deg)
378  max_deg = D[j];
379  }
380  }
381  }
382  }
383  return max_deg;
384 }
385 
386 // Transpose of the structure of a square sparse matrix
387 
388 static void
390  const octave_idx_type *cidx, octave_idx_type *ridx2,
391  octave_idx_type *cidx2)
392 {
393  octave_idx_type nz = cidx[N];
394 
396  for (octave_idx_type i = 0; i < N; i++)
397  w[i] = 0;
398  for (octave_idx_type i = 0; i < nz; i++)
399  w[ridx[i]]++;
400  nz = 0;
401  for (octave_idx_type i = 0; i < N; i++)
402  {
403  octave_quit ();
404 
405  cidx2[i] = nz;
406  nz += w[i];
407  w[i] = cidx2[i];
408  }
409  cidx2[N] = nz;
410  w[N] = nz;
411 
412  for (octave_idx_type j = 0; j < N; j++)
413  for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
414  {
415  octave_quit ();
416 
417  octave_idx_type q = w[ridx[k]]++;
418  ridx2[q] = j;
419  }
420 }
421 
422 // An implementation of the Cuthill-McKee algorithm.
423 DEFUN (symrcm, args, ,
424  doc: /* -*- texinfo -*-
425 @deftypefn {} {@var{p} =} symrcm (@var{S})
426 Return the symmetric reverse @nospell{Cuthill-McKee} permutation of @var{S}.
427 
428 @var{p} is a permutation vector such that
429 @code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements closer
430 to the diagonal than @var{S}. This is a good preordering for LU or
431 Cholesky@tie{}factorization of matrices that come from ``long, skinny''
432 problems. It works for both symmetric and asymmetric @var{S}.
433 
434 The algorithm represents a heuristic approach to the NP-complete bandwidth
435 minimization problem. The implementation is based in the descriptions found
436 in
437 
438 @nospell{E. Cuthill, J. McKee}.
439 @cite{Reducing the Bandwidth of Sparse Symmetric Matrices}.
440 Proceedings of the 24th @nospell{ACM} National Conference,
441 157--172 1969, Brandon Press, New Jersey.
442 
443 @nospell{A. George, J.W.H. Liu}. @cite{Computer Solution of Large Sparse
444 Positive Definite Systems}, Prentice Hall Series in Computational
445 Mathematics, ISBN 0-13-165274-5, 1981.
446 
447 @seealso{colperm, colamd, symamd}
448 @end deftypefn */)
449 {
450  if (args.length () != 1)
451  print_usage ();
452 
453  octave_value arg = args(0);
454 
455  // the parameter of the matrix is converted into a sparse matrix
456  //(if necessary)
457  octave_idx_type *cidx;
458  octave_idx_type *ridx;
459  SparseMatrix Ar;
461 
462  if (arg.isreal ())
463  {
464  Ar = arg.sparse_matrix_value ();
465  // Note cidx/ridx are const, so use xridx and xcidx...
466  cidx = Ar.xcidx ();
467  ridx = Ar.xridx ();
468  }
469  else
470  {
471  Ac = arg.sparse_complex_matrix_value ();
472  cidx = Ac.xcidx ();
473  ridx = Ac.xridx ();
474  }
475 
476  octave_idx_type nr = arg.rows ();
477  octave_idx_type nc = arg.columns ();
478 
479  if (nr != nc)
480  err_square_matrix_required ("symrcm", "S");
481 
482  if (nr == 0 && nc == 0)
483  return ovl (NDArray (dim_vector (1, 0)));
484 
485  // sizes of the heaps
486  octave_idx_type s = 0;
487 
488  // head- and tail-indices for the queue
489  octave_idx_type qt = 0;
490  octave_idx_type qh = 0;
491  CMK_Node v, w;
492  // dimension of the matrix
493  octave_idx_type N = nr;
494 
495  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
496  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
497  transpose (N, ridx, cidx, ridx2, cidx2);
498 
499  // the permutation vector
500  NDArray P (dim_vector (1, N));
501 
502  // compute the node degrees
504  octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);
505 
506  // if none of the nodes has a degree > 0 (a matrix of zeros)
507  // the return value corresponds to the identity permutation
508  if (max_deg == 0)
509  {
510  for (octave_idx_type i = 0; i < N; i++)
511  P(i) = i;
512 
513  return ovl (P);
514  }
515 
516  // a heap for the a node's neighbors. The number of neighbors is
517  // limited by the maximum degree max_deg:
518  OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);
519 
520  // a queue for the BFS. The array is always one element larger than
521  // the number of entries that are stored.
523 
524  // a counter (for building the permutation)
525  octave_idx_type c = -1;
526 
527  // upper bound for the bandwidth (=quality of solution)
528  // initialize the bandwidth of the graph with 0. B contains the
529  // the maximum of the theoretical lower limits of the subgraphs
530  // bandwidths.
531  octave_idx_type B = 0;
532 
533  // mark all nodes as unvisited; with the exception of the nodes
534  // that have degree==0 and build a CC of the graph.
535 
536  boolNDArray btmp (dim_vector (1, N), false);
537  bool *visit = btmp.fortran_vec ();
538 
539  do
540  {
541  // locate an unvisited starting node of the graph
542  octave_idx_type i;
543  for (i = 0; i < N; i++)
544  if (! visit[i])
545  break;
546 
547  // locate a probably better starting node
548  v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
549 
550  // mark the node as visited and enqueue it (a starting node
551  // for the BFS). Since the node will be a root of a spanning
552  // tree, its dist is 0.
553  v.deg = D[v.id];
554  v.dist = 0;
555  visit[v.id] = true;
556  Q_enq (Q, N, qt, v);
557 
558  // lower bound for the bandwidth of a subgraph
559  // keep a "level" in the spanning tree (= min. distance to the
560  // root) for determining the bandwidth of the computed
561  // permutation P
562  octave_idx_type Bsub = 0;
563  // min. dist. to the root is 0
564  octave_idx_type level = 0;
565  // the root is the first/only node on level 0
566  octave_idx_type level_N = 1;
567 
568  while (! Q_empty (Q, N, qh, qt))
569  {
570  v = Q_deq (Q, N, qh);
571  i = v.id;
572 
573  c++;
574 
575  // for computing the inverse permutation P where
576  // A(inv(P),inv(P)) or P'*A*P is banded
577  // P(i) = c;
578 
579  // for computing permutation P where
580  // A(P(i),P(j)) or P*A*P' is banded
581  P(c) = i;
582 
583  // put all unvisited neighbors j of node i on the heap
584  s = 0;
585  octave_idx_type j1 = cidx[i];
586  octave_idx_type j2 = cidx2[i];
587 
588  octave_quit ();
589 
590  while (j1 < cidx[i+1] || j2 < cidx2[i+1])
591  {
592  octave_quit ();
593 
594  if (j1 == cidx[i+1])
595  {
596  octave_idx_type r2 = ridx2[j2++];
597  if (! visit[r2])
598  {
599  // the distance of node j is dist(i)+1
600  w.id = r2;
601  w.deg = D[r2];
602  w.dist = v.dist+1;
603  H_insert (S, s, w);
604  visit[r2] = true;
605  }
606  }
607  else if (j2 == cidx2[i+1])
608  {
609  octave_idx_type r1 = ridx[j1++];
610  if (! visit[r1])
611  {
612  w.id = r1;
613  w.deg = D[r1];
614  w.dist = v.dist+1;
615  H_insert (S, s, w);
616  visit[r1] = true;
617  }
618  }
619  else
620  {
621  octave_idx_type r1 = ridx[j1];
622  octave_idx_type r2 = ridx2[j2];
623  if (r1 <= r2)
624  {
625  if (! visit[r1])
626  {
627  w.id = r1;
628  w.deg = D[r1];
629  w.dist = v.dist+1;
630  H_insert (S, s, w);
631  visit[r1] = true;
632  }
633  j1++;
634  if (r1 == r2)
635  j2++;
636  }
637  else
638  {
639  if (! visit[r2])
640  {
641  w.id = r2;
642  w.deg = D[r2];
643  w.dist = v.dist+1;
644  H_insert (S, s, w);
645  visit[r2] = true;
646  }
647  j2++;
648  }
649  }
650  }
651 
652  // add the neighbors to the queue (sorted by node degree)
653  while (! H_empty (S, s))
654  {
655  octave_quit ();
656 
657  // locate a neighbor of i with minimal degree in O(log(N))
658  v = H_remove_min (S, s, 1);
659 
660  // entered the BFS a new level?
661  if (v.dist > level)
662  {
663  // adjustment of bandwidth:
664  // "[...] the minimum bandwidth that
665  // can be obtained [...] is the
666  // maximum number of nodes per level"
667  if (Bsub < level_N)
668  Bsub = level_N;
669 
670  level = v.dist;
671  // v is the first node on the new level
672  level_N = 1;
673  }
674  else
675  {
676  // there is no new level but another node on
677  // this level:
678  level_N++;
679  }
680 
681  // enqueue v in O(1)
682  Q_enq (Q, N, qt, v);
683  }
684 
685  // synchronize the bandwidth with level_N once again:
686  if (Bsub < level_N)
687  Bsub = level_N;
688  }
689  // finish of BFS. If there are still unvisited nodes in the graph
690  // then it is split into CCs. The computed bandwidth is the maximum
691  // of all subgraphs. Update:
692  if (Bsub > B)
693  B = Bsub;
694  }
695  // are there any nodes left?
696  while (c+1 < N);
697 
698  // compute the reverse-ordering
699  s = N / 2 - 1;
700  for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
701  std::swap (P.elem (i), P.elem (j));
702 
703  // increment all indices, since Octave is not C
704  return ovl (P+1);
705 }
T & elem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:499
const T * fortran_vec(void) const
Size of the specified dimension.
Definition: Array.h:583
octave_idx_type * xridx(void)
Definition: Sparse.h:485
octave_idx_type * xcidx(void)
Definition: Sparse.h:498
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:95
SparseMatrix sparse_matrix_value(bool frc_str_conv=false) const
Definition: ov.h:853
bool isreal(void) const
Definition: ov.h:691
octave_idx_type rows(void) const
Definition: ov.h:504
octave_idx_type columns(void) const
Definition: ov.h:506
SparseComplexMatrix sparse_complex_matrix_value(bool frc_str_conv=false) const
Definition: ov.h:857
OCTINTERP_API void print_usage(void)
Definition: defun.cc:53
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void err_square_matrix_required(const char *fcn, const char *name)
Definition: errwarn.cc:122
F77_RET_T const F77_INT F77_CMPLX const F77_INT F77_CMPLX * B
F77_RET_T const F77_INT const F77_INT const F77_INT F77_DBLE const F77_INT F77_DBLE const F77_INT F77_DBLE * Q
F77_RET_T const F77_INT & N
F77_RET_T const F77_INT F77_CMPLX * A
F77_RET_T const F77_DBLE * x
T * r
Definition: mx-inlines.cc:773
std::complex< double > w(std::complex< double > z, double relerr=0)
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:211
octave_idx_type deg
Definition: symrcm.cc:81
octave_idx_type dist
Definition: symrcm.cc:83
octave_idx_type id
Definition: symrcm.cc:79
#define RIGHT(i)
Definition: symrcm.cc:117
static void Q_enq(CMK_Node *Q, octave_idx_type N, octave_idx_type &qt, const CMK_Node &o)
Definition: symrcm.cc:93
#define LEFT(i)
Definition: symrcm.cc:115
static CMK_Node Q_deq(CMK_Node *Q, octave_idx_type N, octave_idx_type &qh)
Definition: symrcm.cc:102
static void H_heapify_min(CMK_Node *A, octave_idx_type i, octave_idx_type size)
Definition: symrcm.cc:125
static void H_insert(CMK_Node *H, octave_idx_type &h, const CMK_Node &o)
Definition: symrcm.cc:155
#define H_empty(H, h)
Definition: symrcm.cc:192
#define PARENT(i)
Definition: symrcm.cc:119
#define Q_empty(Q, N, qh, qt)
Definition: symrcm.cc:110
static CMK_Node H_remove_min(CMK_Node *H, octave_idx_type &h, int reorg)
Definition: symrcm.cc:182
static octave_idx_type calc_degrees(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, octave_idx_type *D)
Definition: symrcm.cc:336
static void transpose(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, octave_idx_type *ridx2, octave_idx_type *cidx2)
Definition: symrcm.cc:389
static octave_idx_type find_starting_node(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, const octave_idx_type *ridx2, const octave_idx_type *cidx2, octave_idx_type *D, octave_idx_type start)
Definition: symrcm.cc:198