GNU Octave  8.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
symrcm.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2007-2023 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 /*
27 An implementation of the Reverse Cuthill-McKee algorithm (symrcm)
28 
29 The implementation of this algorithm is based in the descriptions found in
30 
31 @INPROCEEDINGS{,
32  author = {E. Cuthill and J. McKee},
33  title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
34  booktitle = {Proceedings of the 24th ACM National Conference},
35  publisher = {Brandon Press},
36  pages = {157 -- 172},
37  location = {New Jersey},
38  year = {1969}
39 }
40 
41 @BOOK{,
42  author = {Alan George and Joseph W. H. Liu},
43  title = {Computer Solution of Large Sparse Positive Definite Systems},
44  publisher = {Prentice Hall Series in Computational Mathematics},
45  ISBN = {0-13-165274-5},
46  year = {1981}
47 }
48 
49 The algorithm represents a heuristic approach to the NP-complete minimum
50 bandwidth problem.
51 
52 Written by Michael Weitzel <michael.weitzel@@uni-siegen.de>
53  <weitzel@@ldknet.org>
54 */
55 
56 #if defined (HAVE_CONFIG_H)
57 # include "config.h"
58 #endif
59 
60 #include <algorithm>
61 
62 #include "CSparse.h"
63 #include "boolNDArray.h"
64 #include "dNDArray.h"
65 #include "dSparse.h"
66 #include "oct-locbuf.h"
67 #include "oct-sparse.h"
68 #include "quit.h"
69 
70 #include "defun.h"
71 #include "errwarn.h"
72 #include "ov.h"
73 #include "ovl.h"
74 
76 
77 // A node struct for the Cuthill-McKee algorithm
78 struct CMK_Node
79 {
80  // the node's id (matrix row index)
82  // the node's degree
84  // minimal distance to the root of the spanning tree
86 };
87 
88 // A simple queue.
89 // Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
90 // stored in an array. qh and qt point to queue head and tail.
91 
92 // Enqueue operation (adds a node "o" at the tail)
93 
94 inline static void
96 {
97  Q[qt] = o;
98  qt = (qt + 1) % (N + 1);
99 }
100 
101 // Dequeue operation (removes a node from the head)
102 
103 inline static CMK_Node
105 {
106  CMK_Node r = Q[qh];
107  qh = (qh + 1) % (N + 1);
108  return r;
109 }
110 
111 // Predicate (queue empty)
112 #define Q_empty(Q, N, qh, qt) ((qh) == (qt))
113 
114 // A simple, array-based binary heap (used as a priority queue for nodes)
115 
116 // the left descendant of entry i
117 #define LEFT(i) (((i) << 1) + 1) // = (2*(i)+1)
118  // the right descendant of entry i
119 #define RIGHT(i) (((i) << 1) + 2) // = (2*(i)+2)
120  // the parent of entry i
121 #define PARENT(i) (((i) - 1) >> 1) // = floor(((i)-1)/2)
122 
123 // Builds a min-heap (the root contains the smallest element). A is an array
124 // with the graph's nodes, i is a starting position, size is the length of A.
125 
126 static void
128 {
129  octave_idx_type j = i;
130  for (;;)
131  {
132  octave_idx_type l = LEFT(j);
133  octave_idx_type r = RIGHT(j);
134 
135  octave_idx_type smallest;
136  if (l < size && A[l].deg < A[j].deg)
137  smallest = l;
138  else
139  smallest = j;
140 
141  if (r < size && A[r].deg < A[smallest].deg)
142  smallest = r;
143 
144  if (smallest != j)
145  {
146  std::swap (A[j], A[smallest]);
147  j = smallest;
148  }
149  else
150  break;
151  }
152 }
153 
154 // Heap operation insert. Running time is O(log(n))
155 
156 static void
158 {
159  octave_idx_type i = h++;
160 
161  H[i] = o;
162 
163  if (i == 0)
164  return;
165  do
166  {
167  octave_idx_type p = PARENT(i);
168  if (H[i].deg < H[p].deg)
169  {
170  std::swap (H[i], H[p]);
171 
172  i = p;
173  }
174  else
175  break;
176  }
177  while (i > 0);
178 }
179 
180 // Heap operation remove-min. Removes the smallest element in O(1) and
181 // reorganizes the heap optionally in O(log(n))
182 
183 inline static CMK_Node
184 H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/)
185 {
186  CMK_Node r = H[0];
187  H[0] = H[--h];
188  if (reorg)
189  H_heapify_min (H, 0, h);
190  return r;
191 }
192 
193 // Predicate (heap empty)
194 #define H_empty(H, h) ((h) == 0)
195 
196 // Helper function for the Cuthill-McKee algorithm. Tries to determine a
197 // pseudo-peripheral node of the graph as starting node.
198 
199 static octave_idx_type
201  const octave_idx_type *cidx, const octave_idx_type *ridx2,
202  const octave_idx_type *cidx2, octave_idx_type *D,
203  octave_idx_type start)
204 {
205  CMK_Node w;
206 
208  boolNDArray btmp (dim_vector (1, N), false);
209  bool *visit = btmp.fortran_vec ();
210 
211  octave_idx_type qh = 0;
212  octave_idx_type qt = 0;
213  CMK_Node x;
214  x.id = start;
215  x.deg = D[start];
216  x.dist = 0;
217  Q_enq (Q, N, qt, x);
218  visit[start] = true;
219 
220  // distance level
221  octave_idx_type level = 0;
222  // current largest "eccentricity"
223  octave_idx_type max_dist = 0;
224 
225  for (;;)
226  {
227  while (! Q_empty (Q, N, qh, qt))
228  {
229  CMK_Node v = Q_deq (Q, N, qh);
230 
231  if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
232  x = v;
233 
234  octave_idx_type i = v.id;
235 
236  // add all unvisited neighbors to the queue
237  octave_idx_type j1 = cidx[i];
238  octave_idx_type j2 = cidx2[i];
239  while (j1 < cidx[i+1] || j2 < cidx2[i+1])
240  {
241  octave_quit ();
242 
243  if (j1 == cidx[i+1])
244  {
245  octave_idx_type r2 = ridx2[j2++];
246  if (! visit[r2])
247  {
248  // the distance of node j is dist(i)+1
249  w.id = r2;
250  w.deg = D[r2];
251  w.dist = v.dist+1;
252  Q_enq (Q, N, qt, w);
253  visit[r2] = true;
254 
255  if (w.dist > level)
256  level = w.dist;
257  }
258  }
259  else if (j2 == cidx2[i+1])
260  {
261  octave_idx_type r1 = ridx[j1++];
262  if (! visit[r1])
263  {
264  // the distance of node j is dist(i)+1
265  w.id = r1;
266  w.deg = D[r1];
267  w.dist = v.dist+1;
268  Q_enq (Q, N, qt, w);
269  visit[r1] = true;
270 
271  if (w.dist > level)
272  level = w.dist;
273  }
274  }
275  else
276  {
277  octave_idx_type r1 = ridx[j1];
278  octave_idx_type r2 = ridx2[j2];
279  if (r1 <= r2)
280  {
281  if (! visit[r1])
282  {
283  w.id = r1;
284  w.deg = D[r1];
285  w.dist = v.dist+1;
286  Q_enq (Q, N, qt, w);
287  visit[r1] = true;
288 
289  if (w.dist > level)
290  level = w.dist;
291  }
292  j1++;
293  if (r1 == r2)
294  j2++;
295  }
296  else
297  {
298  if (! visit[r2])
299  {
300  w.id = r2;
301  w.deg = D[r2];
302  w.dist = v.dist+1;
303  Q_enq (Q, N, qt, w);
304  visit[r2] = true;
305 
306  if (w.dist > level)
307  level = w.dist;
308  }
309  j2++;
310  }
311  }
312  }
313  } // finish of BFS
314 
315  if (max_dist < x.dist)
316  {
317  max_dist = x.dist;
318 
319  for (octave_idx_type i = 0; i < N; i++)
320  visit[i] = false;
321 
322  visit[x.id] = true;
323  x.dist = 0;
324  qt = qh = 0;
325  Q_enq (Q, N, qt, x);
326  }
327  else
328  break;
329  }
330  return x.id;
331 }
332 
333 // Calculates the node's degrees. This means counting the nonzero elements
334 // in the symmetric matrix' rows. This works for non-symmetric matrices
335 // as well.
336 
337 static octave_idx_type
339  const octave_idx_type *cidx, octave_idx_type *D)
340 {
341  octave_idx_type max_deg = 0;
342 
343  for (octave_idx_type i = 0; i < N; i++)
344  D[i] = 0;
345 
346  for (octave_idx_type j = 0; j < N; j++)
347  {
348  for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
349  {
350  octave_quit ();
351 
352  octave_idx_type k = ridx[i];
353  // there is a nonzero element (k,j)
354  D[k]++;
355  if (D[k] > max_deg)
356  max_deg = D[k];
357  // if there is no element (j,k) there is one in
358  // the symmetric matrix:
359  if (k != j)
360  {
361  bool found = false;
362  for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
363  {
364  octave_quit ();
365 
366  if (ridx[l] == j)
367  {
368  found = true;
369  break;
370  }
371  else if (ridx[l] > j)
372  break;
373  }
374 
375  if (! found)
376  {
377  // A(j,k) == 0
378  D[j]++;
379  if (D[j] > max_deg)
380  max_deg = D[j];
381  }
382  }
383  }
384  }
385  return max_deg;
386 }
387 
388 // Transpose of the structure of a square sparse matrix
389 
390 static void
392  const octave_idx_type *cidx, octave_idx_type *ridx2,
393  octave_idx_type *cidx2)
394 {
395  octave_idx_type nz = cidx[N];
396 
398  for (octave_idx_type i = 0; i < N; i++)
399  w[i] = 0;
400  for (octave_idx_type i = 0; i < nz; i++)
401  w[ridx[i]]++;
402  nz = 0;
403  for (octave_idx_type i = 0; i < N; i++)
404  {
405  octave_quit ();
406 
407  cidx2[i] = nz;
408  nz += w[i];
409  w[i] = cidx2[i];
410  }
411  cidx2[N] = nz;
412  w[N] = nz;
413 
414  for (octave_idx_type j = 0; j < N; j++)
415  for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
416  {
417  octave_quit ();
418 
419  octave_idx_type q = w[ridx[k]]++;
420  ridx2[q] = j;
421  }
422 }
423 
424 // An implementation of the Cuthill-McKee algorithm.
425 DEFUN (symrcm, args, ,
426  doc: /* -*- texinfo -*-
427 @deftypefn {} {@var{p} =} symrcm (@var{S})
428 Return the symmetric reverse @nospell{Cuthill-McKee} permutation of @var{S}.
429 
430 @var{p} is a permutation vector such that
431 @code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements closer
432 to the diagonal than @var{S}. This is a good preordering for LU or
433 Cholesky@tie{}factorization of matrices that come from ``long, skinny''
434 problems. It works for both symmetric and asymmetric @var{S}.
435 
436 The algorithm represents a heuristic approach to the NP-complete bandwidth
437 minimization problem. The implementation is based in the descriptions found
438 in
439 
440 @nospell{E. Cuthill, J. McKee}.
441 @cite{Reducing the Bandwidth of Sparse Symmetric Matrices}.
442 Proceedings of the 24th @nospell{ACM} National Conference,
443 157--172 1969, Brandon Press, New Jersey.
444 
445 @nospell{A. George, J.W.H. Liu}. @cite{Computer Solution of Large Sparse
446 Positive Definite Systems}, Prentice Hall Series in Computational
447 Mathematics, ISBN 0-13-165274-5, 1981.
448 
449 @seealso{colperm, colamd, symamd}
450 @end deftypefn */)
451 {
452  if (args.length () != 1)
453  print_usage ();
454 
455  octave_value arg = args(0);
456 
457  // the parameter of the matrix is converted into a sparse matrix
458  //(if necessary)
459  octave_idx_type *cidx;
460  octave_idx_type *ridx;
461  SparseMatrix Ar;
463 
464  if (arg.isreal ())
465  {
466  Ar = arg.sparse_matrix_value ();
467  // Note cidx/ridx are const, so use xridx and xcidx...
468  cidx = Ar.xcidx ();
469  ridx = Ar.xridx ();
470  }
471  else
472  {
473  Ac = arg.sparse_complex_matrix_value ();
474  cidx = Ac.xcidx ();
475  ridx = Ac.xridx ();
476  }
477 
478  octave_idx_type nr = arg.rows ();
479  octave_idx_type nc = arg.columns ();
480 
481  if (nr != nc)
482  err_square_matrix_required ("symrcm", "S");
483 
484  if (nr == 0 && nc == 0)
485  return ovl (NDArray (dim_vector (1, 0)));
486 
487  // sizes of the heaps
488  octave_idx_type s = 0;
489 
490  // head- and tail-indices for the queue
491  octave_idx_type qt = 0;
492  octave_idx_type qh = 0;
493  CMK_Node v, w;
494  // dimension of the matrix
495  octave_idx_type N = nr;
496 
497  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
498  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
499  transpose (N, ridx, cidx, ridx2, cidx2);
500 
501  // the permutation vector
502  NDArray P (dim_vector (1, N));
503 
504  // compute the node degrees
506  octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);
507 
508  // if none of the nodes has a degree > 0 (a matrix of zeros)
509  // the return value corresponds to the identity permutation
510  if (max_deg == 0)
511  {
512  for (octave_idx_type i = 0; i < N; i++)
513  P(i) = i;
514 
515  return ovl (P);
516  }
517 
518  // a heap for the a node's neighbors. The number of neighbors is
519  // limited by the maximum degree max_deg:
520  OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);
521 
522  // a queue for the BFS. The array is always one element larger than
523  // the number of entries that are stored.
525 
526  // a counter (for building the permutation)
527  octave_idx_type c = -1;
528 
529  // upper bound for the bandwidth (=quality of solution)
530  // initialize the bandwidth of the graph with 0. B contains the
531  // the maximum of the theoretical lower limits of the subgraphs
532  // bandwidths.
533  octave_idx_type B = 0;
534 
535  // mark all nodes as unvisited; with the exception of the nodes
536  // that have degree==0 and build a CC of the graph.
537 
538  boolNDArray btmp (dim_vector (1, N), false);
539  bool *visit = btmp.fortran_vec ();
540 
541  do
542  {
543  // locate an unvisited starting node of the graph
544  octave_idx_type i;
545  for (i = 0; i < N; i++)
546  if (! visit[i])
547  break;
548 
549  // locate a probably better starting node
550  v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
551 
552  // mark the node as visited and enqueue it (a starting node
553  // for the BFS). Since the node will be a root of a spanning
554  // tree, its dist is 0.
555  v.deg = D[v.id];
556  v.dist = 0;
557  visit[v.id] = true;
558  Q_enq (Q, N, qt, v);
559 
560  // lower bound for the bandwidth of a subgraph
561  // keep a "level" in the spanning tree (= min. distance to the
562  // root) for determining the bandwidth of the computed
563  // permutation P
564  octave_idx_type Bsub = 0;
565  // min. dist. to the root is 0
566  octave_idx_type level = 0;
567  // the root is the first/only node on level 0
568  octave_idx_type level_N = 1;
569 
570  while (! Q_empty (Q, N, qh, qt))
571  {
572  v = Q_deq (Q, N, qh);
573  i = v.id;
574 
575  c++;
576 
577  // for computing the inverse permutation P where
578  // A(inv(P),inv(P)) or P'*A*P is banded
579  // P(i) = c;
580 
581  // for computing permutation P where
582  // A(P(i),P(j)) or P*A*P' is banded
583  P(c) = i;
584 
585  // put all unvisited neighbors j of node i on the heap
586  s = 0;
587  octave_idx_type j1 = cidx[i];
588  octave_idx_type j2 = cidx2[i];
589 
590  octave_quit ();
591 
592  while (j1 < cidx[i+1] || j2 < cidx2[i+1])
593  {
594  octave_quit ();
595 
596  if (j1 == cidx[i+1])
597  {
598  octave_idx_type r2 = ridx2[j2++];
599  if (! visit[r2])
600  {
601  // the distance of node j is dist(i)+1
602  w.id = r2;
603  w.deg = D[r2];
604  w.dist = v.dist+1;
605  H_insert (S, s, w);
606  visit[r2] = true;
607  }
608  }
609  else if (j2 == cidx2[i+1])
610  {
611  octave_idx_type r1 = ridx[j1++];
612  if (! visit[r1])
613  {
614  w.id = r1;
615  w.deg = D[r1];
616  w.dist = v.dist+1;
617  H_insert (S, s, w);
618  visit[r1] = true;
619  }
620  }
621  else
622  {
623  octave_idx_type r1 = ridx[j1];
624  octave_idx_type r2 = ridx2[j2];
625  if (r1 <= r2)
626  {
627  if (! visit[r1])
628  {
629  w.id = r1;
630  w.deg = D[r1];
631  w.dist = v.dist+1;
632  H_insert (S, s, w);
633  visit[r1] = true;
634  }
635  j1++;
636  if (r1 == r2)
637  j2++;
638  }
639  else
640  {
641  if (! visit[r2])
642  {
643  w.id = r2;
644  w.deg = D[r2];
645  w.dist = v.dist+1;
646  H_insert (S, s, w);
647  visit[r2] = true;
648  }
649  j2++;
650  }
651  }
652  }
653 
654  // add the neighbors to the queue (sorted by node degree)
655  while (! H_empty (S, s))
656  {
657  octave_quit ();
658 
659  // locate a neighbor of i with minimal degree in O(log(N))
660  v = H_remove_min (S, s, 1);
661 
662  // entered the BFS a new level?
663  if (v.dist > level)
664  {
665  // adjustment of bandwidth:
666  // "[...] the minimum bandwidth that
667  // can be obtained [...] is the
668  // maximum number of nodes per level"
669  if (Bsub < level_N)
670  Bsub = level_N;
671 
672  level = v.dist;
673  // v is the first node on the new level
674  level_N = 1;
675  }
676  else
677  {
678  // there is no new level but another node on
679  // this level:
680  level_N++;
681  }
682 
683  // enqueue v in O(1)
684  Q_enq (Q, N, qt, v);
685  }
686 
687  // synchronize the bandwidth with level_N once again:
688  if (Bsub < level_N)
689  Bsub = level_N;
690  }
691  // finish of BFS. If there are still unvisited nodes in the graph
692  // then it is split into CCs. The computed bandwidth is the maximum
693  // of all subgraphs. Update:
694  if (Bsub > B)
695  B = Bsub;
696  }
697  // are there any nodes left?
698  while (c+1 < N);
699 
700  // compute the reverse-ordering
701  s = N / 2 - 1;
702  for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
703  std::swap (P.elem (i), P.elem (j));
704 
705  // increment all indices, since Octave is not C
706  return ovl (P+1);
707 }
708 
OCTAVE_END_NAMESPACE(octave)
OCTARRAY_API T * fortran_vec(void)
Size of the specified dimension.
Definition: Array-base.cc:1766
OCTARRAY_OVERRIDABLE_FUNC_API T & elem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:562
octave_idx_type * xcidx(void)
Definition: Sparse.h:602
octave_idx_type * xridx(void)
Definition: Sparse.h:589
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
SparseMatrix sparse_matrix_value(bool frc_str_conv=false) const
Definition: ov.h:945
bool isreal(void) const
Definition: ov.h:783
octave_idx_type rows(void) const
Definition: ov.h:590
octave_idx_type columns(void) const
Definition: ov.h:592
SparseComplexMatrix sparse_complex_matrix_value(bool frc_str_conv=false) const
Definition: ov.h:949
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTINTERP_API void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void err_square_matrix_required(const char *fcn, const char *name)
Definition: errwarn.cc:122
F77_RET_T const F77_INT F77_CMPLX const F77_INT F77_CMPLX * B
F77_RET_T const F77_INT const F77_INT const F77_INT F77_DBLE const F77_INT F77_DBLE const F77_INT F77_DBLE * Q
F77_RET_T const F77_INT F77_CMPLX * A
F77_RET_T const F77_INT & N
F77_RET_T const F77_DBLE * x
T * r
Definition: mx-inlines.cc:773
std::complex< double > w(std::complex< double > z, double relerr=0)
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:211
octave_idx_type deg
Definition: symrcm.cc:83
octave_idx_type dist
Definition: symrcm.cc:85
octave_idx_type id
Definition: symrcm.cc:81
#define RIGHT(i)
Definition: symrcm.cc:119
static void Q_enq(CMK_Node *Q, octave_idx_type N, octave_idx_type &qt, const CMK_Node &o)
Definition: symrcm.cc:95
#define LEFT(i)
Definition: symrcm.cc:117
static CMK_Node Q_deq(CMK_Node *Q, octave_idx_type N, octave_idx_type &qh)
Definition: symrcm.cc:104
static void H_heapify_min(CMK_Node *A, octave_idx_type i, octave_idx_type size)
Definition: symrcm.cc:127
static void H_insert(CMK_Node *H, octave_idx_type &h, const CMK_Node &o)
Definition: symrcm.cc:157
#define H_empty(H, h)
Definition: symrcm.cc:194
#define PARENT(i)
Definition: symrcm.cc:121
#define Q_empty(Q, N, qh, qt)
Definition: symrcm.cc:112
static CMK_Node H_remove_min(CMK_Node *H, octave_idx_type &h, int reorg)
Definition: symrcm.cc:184
static octave_idx_type calc_degrees(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, octave_idx_type *D)
Definition: symrcm.cc:338
static void transpose(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, octave_idx_type *ridx2, octave_idx_type *cidx2)
Definition: symrcm.cc:391
static octave_idx_type find_starting_node(octave_idx_type N, const octave_idx_type *ridx, const octave_idx_type *cidx, const octave_idx_type *ridx2, const octave_idx_type *cidx2, octave_idx_type *D, octave_idx_type start)
Definition: symrcm.cc:200