GNU Octave  8.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
__ilu__.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2013-2023 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include "oct-locbuf.h"
31 #include "oct-norm.h"
32 
33 #include "defun.h"
34 #include "error.h"
35 
36 #include "builtin-defun-decls.h"
37 
39 
40 // This function implements the IKJ and JKI variants of Gaussian elimination to
41 // perform the ILU0 decomposition. The behavior is controlled by milu
42 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
43 // advantage of CCS format of the input matrix. If milu = 'row' the input
44 // matrix has to be transposed to obtain the equivalent CRS structure so we can
45 // work efficiently with rows. In this case IKJ version is used.
46 template <typename octave_matrix_t, typename T>
47 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
48 {
49  const octave_idx_type n = sm.cols ();
50  octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
51  T r;
52  T tl = 0;
53 
54  enum {OFF, ROW, COL};
55  char opt;
56  if (milu == "row")
57  {
58  opt = ROW;
59  sm = sm.transpose ();
60  }
61  else if (milu == "col")
62  opt = COL;
63  else
64  opt = OFF;
65 
66  // Input matrix pointers
67  octave_idx_type *cidx = sm.cidx ();
68  octave_idx_type *ridx = sm.ridx ();
69  T *data = sm.data ();
70 
71  // Working arrays
74 
75  // Initialize working arrays
76  for (i = 0; i < n; i++)
77  iw[i] = -1;
78 
79  // Loop over all columns
80  for (k = 0; k < n; k++)
81  {
82  j1 = cidx[k];
83  j2 = cidx[k+1];
84 
85  if (j1 == j2)
86  error ("ilu: A has a zero on the diagonal");
87 
88  for (j = j1; j < j2; j++)
89  iw[ridx[j]] = j;
90 
91  r = 0;
92  j = j1;
93  jrow = ridx[j1];
94  while ((jrow < k) && (j < j2))
95  {
96  if (opt == ROW)
97  {
98  tl = data[j] / data[uptr[jrow]];
99  data[j] = tl;
100  }
101  for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
102  {
103  jw = iw[ridx[jj]];
104  if (jw != -1)
105  if (opt == ROW)
106  data[jw] -= tl * data[jj];
107  else
108  data[jw] -= data[j] * data[jj];
109 
110  else
111  // That is for the milu='row'
112  if (opt == ROW)
113  r += tl * data[jj];
114  else if (opt == COL)
115  r += data[j] * data[jj];
116  }
117  j++;
118  jrow = ridx[j];
119  }
120  uptr[k] = j;
121  if (opt != OFF)
122  data[uptr[k]] -= r;
123 
124  if (opt != ROW)
125  for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
126  data[jj] /= data[uptr[k]];
127 
128  if (k != jrow)
129  error ("ilu: A has a zero on the diagonal");
130 
131  if (data[j] == T(0))
132  error ("ilu: encountered a pivot equal to 0");
133 
134  for (i = j1; i < j2; i++)
135  iw[ridx[i]] = -1;
136  }
137 
138  if (opt == ROW)
139  sm = sm.transpose ();
140 }
141 
142 DEFUN (__ilu0__, args, ,
143  doc: /* -*- texinfo -*-
144 @deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
145 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
146 Undocumented internal function.
147 @end deftypefn */)
148 {
149  if (args.length () != 2)
150  print_usage ();
151 
152  octave_value_list retval (2);
153 
154  std::string milu = args(1).string_value ();
155 
156  // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
157  // its structure does not change during the algorithm. The same input
158  // matrix is used to build the output matrix due to that fact.
159  octave_value_list arg_list;
160  if (! args(0).iscomplex ())
161  {
162  SparseMatrix sm = args(0).sparse_matrix_value ();
163  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
164 
165  ilu_0 <SparseMatrix, double> (sm, milu);
166 
167  retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
168  retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
169  }
170  else
171  {
172  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
173  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
174 
175  ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
176 
177  retval(0) = speye +
178  Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
179  retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
180  }
181 
182  return retval;
183 }
184 
185 template <typename octave_matrix_t, typename T>
186 void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
187  octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
188  T *rows_norm, const T droptol = 0,
189  const std::string milu = "off")
190 {
191  // Map the strings into chars for faster comparing inside loops
192  char opt;
193  enum {OFF, ROW, COL};
194  if (milu == "row")
195  opt = ROW;
196  else if (milu == "col")
197  opt = COL;
198  else
199  opt = OFF;
200 
201  octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
202  max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
203 
204  const octave_idx_type n = sm_u.cols ();
205  sm_u = sm_u.transpose ();
206 
207  max_len_u = sm_u.nnz ();
208  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
209  max_len_l = sm_l.nnz ();
210  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
211 
212  // Extract pointers to the arrays for faster access inside loops
213  octave_idx_type *cidx_in_u = sm_u.cidx ();
214  octave_idx_type *ridx_in_u = sm_u.ridx ();
215  T *data_in_u = sm_u.data ();
216  octave_idx_type *cidx_in_l = sm_l.cidx ();
217  octave_idx_type *ridx_in_l = sm_l.ridx ();
218  T *data_in_l = sm_l.data ();
219 
220  // L output arrays
221  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
222  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
223  Array <T> data_out_l (dim_vector (max_len_l, 1));
224  T *data_l = data_out_l.fortran_vec ();
225 
226  // U output arrays
227  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
228  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
229  Array <T> data_out_u (dim_vector (max_len_u, 1));
230  T *data_u = data_out_u.fortran_vec ();
231 
232  // Working arrays
233  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
234  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
235  OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
236  OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
237  OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
238  OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
241  OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
242 
243  T zero = T (0);
244 
245  // Initialize working arrays
246  cidx_u[0] = cidx_in_u[0];
247  cidx_l[0] = cidx_in_l[0];
248  for (i = 0; i < n; i++)
249  {
250  w_data_u[i] = zero;
251  w_data_l[i] = zero;
252  cr_sum[i] = zero;
253  }
254 
255  total_len_u = 0;
256  total_len_l = 0;
257  cols_list_len = 0;
258  rows_list_len = 0;
259 
260  // Loop over all columns
261  for (k = 0; k < n; k++)
262  {
263  // Load the working column and working row
264  for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
265  w_data_l[ridx_in_l[i]] = data_in_l[i];
266 
267  for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
268  w_data_u[ridx_in_u[i]] = data_in_u[i];
269 
270  // Update U working row
271  for (j = 0; j < rows_list_len; j++)
272  {
273  if ((Ufirst[rows_list[j]] != -1))
274  for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
275  {
276  jrow = ridx_u[jj];
277  w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
278  }
279  }
280  // Update L working column
281  for (j = 0; j < cols_list_len; j++)
282  {
283  if (Lfirst[cols_list[j]] != -1)
284  for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
285  {
286  jrow = ridx_l[jj];
287  w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
288  }
289  }
290 
291  if ((max_len_u - total_len_u) < n)
292  {
293  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
294  data_out_u.resize (dim_vector (max_len_u, 1));
295  data_u = data_out_u.fortran_vec ();
296  ridx_out_u.resize (dim_vector (max_len_u, 1));
297  ridx_u = ridx_out_u.fortran_vec ();
298  }
299 
300  if ((max_len_l - total_len_l) < n)
301  {
302  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
303  data_out_l.resize (dim_vector (max_len_l, 1));
304  data_l = data_out_l.fortran_vec ();
305  ridx_out_l.resize (dim_vector (max_len_l, 1));
306  ridx_l = ridx_out_l.fortran_vec ();
307  }
308 
309  // Expand the working row into the U output data structures
310  w_len_l = 0;
311  data_u[total_len_u] = w_data_u[k];
312  ridx_u[total_len_u] = k;
313  w_len_u = 1;
314  for (i = k + 1; i < n; i++)
315  {
316  if (w_data_u[i] != zero)
317  {
318  if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
319  {
320  if (opt == ROW)
321  cr_sum[k] += w_data_u[i];
322  else if (opt == COL)
323  cr_sum[i] += w_data_u[i];
324  }
325  else
326  {
327  data_u[total_len_u + w_len_u] = w_data_u[i];
328  ridx_u[total_len_u + w_len_u] = i;
329  w_len_u++;
330  }
331  }
332 
333  // Expand the working column into the L output data structures
334  if (w_data_l[i] != zero)
335  {
336  if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
337  {
338  if (opt == COL)
339  cr_sum[k] += w_data_l[i];
340  else if (opt == ROW)
341  cr_sum[i] += w_data_l[i];
342  }
343  else
344  {
345  data_l[total_len_l + w_len_l] = w_data_l[i];
346  ridx_l[total_len_l + w_len_l] = i;
347  w_len_l++;
348  }
349  }
350  w_data_u[i] = zero;
351  w_data_l[i] = zero;
352  }
353 
354  // Compensate row and column sums --> milu option
355  if (opt == COL || opt == ROW)
356  data_u[total_len_u] += cr_sum[k];
357 
358  // Check if the pivot is zero
359  if (data_u[total_len_u] == zero)
360  error ("ilu: encountered a pivot equal to 0");
361 
362  // Scale the elements in L by the pivot
363  for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
364  data_l[i] /= data_u[total_len_u];
365 
366  total_len_u += w_len_u;
367  total_len_l += w_len_l;
368  // Check if there are too many elements to be indexed with
369  // octave_idx_type type due to fill-in during the process.
370  if (total_len_l < 0 || total_len_u < 0)
371  error ("ilu: integer overflow. Too many fill-in elements in L or U");
372 
373  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
374  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
375 
376  // The tricky part of the algorithm. The arrays pointing to the first
377  // working element of each column in the next iteration (Lfirst) or
378  // the first working element of each row (Ufirst) are updated. Also the
379  // arrays working as lists cols_list and rows_list are filled with
380  // indices pointing to Ufirst and Lfirst respectively.
381  // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
382  // have to be considered in a certain column or row in next iteration,
383  // can be removed. It feels safer to me using such an indicator.
384  if (k < (n - 1))
385  {
386  if (w_len_u > 0)
387  Ufirst[k] = cidx_u[k];
388  else
389  Ufirst[k] = -1;
390  if (w_len_l > 0)
391  Lfirst[k] = cidx_l[k];
392  else
393  Lfirst[k] = -1;
394  cols_list_len = 0;
395  rows_list_len = 0;
396  for (i = 0; i <= k; i++)
397  {
398  if (Ufirst[i] != -1)
399  {
400  jj = ridx_u[Ufirst[i]];
401  if (jj < (k + 1))
402  {
403  if (Ufirst[i] < (cidx_u[i+1]))
404  {
405  Ufirst[i]++;
406  if (Ufirst[i] == cidx_u[i+1])
407  Ufirst[i] = -1;
408  else
409  jj = ridx_u[Ufirst[i]];
410  }
411  }
412  if (jj == (k + 1))
413  {
414  cols_list[cols_list_len] = i;
415  cols_list_len++;
416  }
417  }
418 
419  if (Lfirst[i] != -1)
420  {
421  jj = ridx_l[Lfirst[i]];
422  if (jj < (k + 1))
423  if (Lfirst[i] < (cidx_l[i+1]))
424  {
425  Lfirst[i]++;
426  if (Lfirst[i] == cidx_l[i+1])
427  Lfirst[i] = -1;
428  else
429  jj = ridx_l[Lfirst[i]];
430  }
431  if (jj == (k + 1))
432  {
433  rows_list[rows_list_len] = i;
434  rows_list_len++;
435  }
436  }
437  }
438  }
439  }
440 
441  // Build the output matrices
442  L = octave_matrix_t (n, n, total_len_l);
443  U = octave_matrix_t (n, n, total_len_u);
444 
445  // FIXME: Can these loops be replaced by std::copy?
446  for (i = 0; i <= n; i++)
447  L.cidx (i) = cidx_l[i];
448 
449  for (i = 0; i < total_len_l; i++)
450  {
451  L.ridx (i) = ridx_l[i];
452  L.data (i) = data_l[i];
453  }
454 
455  for (i = 0; i <= n; i++)
456  U.cidx (i) = cidx_u[i];
457 
458  for (i = 0; i < total_len_u; i++)
459  {
460  U.ridx (i) = ridx_u[i];
461  U.data (i) = data_u[i];
462  }
463 
464  U = U.transpose ();
465 }
466 
467 DEFUN (__iluc__, args, ,
468  doc: /* -*- texinfo -*-
469 @deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
470 Undocumented internal function.
471 @end deftypefn */)
472 {
473  if (args.length () != 3)
474  print_usage ();
475 
476  double droptol = args(1).double_value ();
477  std::string milu = args(2).string_value ();
478 
479  if (! args(0).iscomplex ())
480  {
481  SparseMatrix sm = args(0).sparse_matrix_value ();
482  SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
483  SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
484  SparseMatrix U, L;
485 
486  RowVector sm_col_norms = xcolnorms (sm);
487  ColumnVector sm_row_norms = xrownorms (sm);
488  ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
489  sm_col_norms.fortran_vec (),
490  sm_row_norms.fortran_vec (),
491  droptol, milu);
492 
493  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
494 
495  return ovl (L + speye, U);
496  }
497  else
498  {
499  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
500  SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
501  SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
502  SparseComplexMatrix U, L;
503  Array<Complex> cols_norm = xcolnorms (sm);
504  Array<Complex> rows_norm = xrownorms (sm);
505 
506  ilu_crout <SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
507  cols_norm.fortran_vec (),
508  rows_norm.fortran_vec (),
509  Complex (droptol), milu);
510 
511  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
512 
513  return ovl (L + speye, U);
514  }
515 }
516 
517 // This function implements the IKJ and JKI variants of gaussian elimination
518 // to perform the ILUTP decomposition. The behavior is controlled by milu
519 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
520 // advantage of CCS format of the input matrix. Row pivoting is performed.
521 // If milu = 'row' the input matrix has to be transposed to obtain the
522 // equivalent CRS structure so we can work efficiently with rows. In that
523 // case IKJ version is used and column pivoting is performed.
524 
525 template <typename octave_matrix_t, typename T>
526 void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
527  octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
528  Array <octave_idx_type>& perm_vec, const T droptol = T(0),
529  const T thresh = T(0), const std::string milu = "off",
530  const double udiag = 0)
531 {
532  char opt;
533  enum {OFF, ROW, COL};
534  if (milu == "row")
535  opt = ROW;
536  else if (milu == "col")
537  opt = COL;
538  else
539  opt = OFF;
540 
541  const octave_idx_type n = sm.cols ();
542 
543  // This is necessary for the JKI (milu = "row") variant.
544  if (opt == ROW)
545  sm = sm.transpose ();
546 
547  // Extract pointers to the arrays for faster access inside loops
548  octave_idx_type *cidx_in = sm.cidx ();
549  octave_idx_type *ridx_in = sm.ridx ();
550  T *data_in = sm.data ();
551  octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
552  max_ind, max_len_l, max_len_u;
553  T zero = T(0);
554 
555  T tl = zero, aux, maximum;
556 
557  max_len_u = nnz_u;
558  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
559  max_len_l = nnz_l;
560  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
561 
562  // Extract pointers to the arrays for faster access inside loops
563  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
564  octave_idx_type *cidx_l = cidx_out_l.fortran_vec ();
565  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
566  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
567  Array <T> data_out_l (dim_vector (max_len_l, 1));
568  T *data_l = data_out_l.fortran_vec ();
569 
570  // Data for U
571  Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
572  octave_idx_type *cidx_u = cidx_out_u.fortran_vec ();
573  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
574  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
575  Array <T> data_out_u (dim_vector (max_len_u, 1));
576  T *data_u = data_out_u.fortran_vec ();
577 
578  // Working arrays and permutation arrays
579  octave_idx_type w_len_u, w_len_l;
580  T total_sum, partial_col_sum = zero, partial_row_sum = zero;
581  std::set <octave_idx_type> iw_l;
582  std::set <octave_idx_type> iw_u;
583  std::set <octave_idx_type>::iterator it, it2;
584  OCTAVE_LOCAL_BUFFER (T, w_data, n);
586  octave_idx_type *perm = perm_vec.fortran_vec ();
588 
589  // Initialize working and permutation arrays
590  cidx_l[0] = cidx_in[0];
591  cidx_u[0] = cidx_in[0];
592  for (i = 0; i < n; i++)
593  {
594  w_data[i] = 0;
595  perm[i] = i;
596  iperm[i] = i;
597  }
598  total_len_u = 0;
599  total_len_l = 0;
600 
601  // Loop over all columns
602  for (k = 0; k < n; k++)
603  {
604  for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
605  {
606  p_perm = iperm[ridx_in[j]];
607  w_data[iperm[ridx_in[j]]] = data_in[j];
608  if (p_perm > k)
609  iw_l.insert (ridx_in[j]);
610  else
611  iw_u.insert (p_perm);
612  }
613 
614  it = iw_u.begin ();
615  jrow = *it;
616  total_sum = zero;
617  while ((jrow < k) && (it != iw_u.end ()))
618  {
619  if (opt == COL)
620  partial_col_sum = w_data[jrow];
621  if (w_data[jrow] != zero)
622  {
623  if (opt == ROW)
624  {
625  partial_row_sum = w_data[jrow];
626  tl = w_data[jrow] / data_u[uptr[jrow]];
627  }
628  for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
629  {
630  p_perm = iperm[ridx_l[jj]];
631  aux = w_data[p_perm];
632  if (opt == ROW)
633  {
634  w_data[p_perm] -= tl * data_l[jj];
635  partial_row_sum += tl * data_l[jj];
636  }
637  else
638  {
639  tl = data_l[jj] * w_data[jrow];
640  w_data[p_perm] -= tl;
641  if (opt == COL)
642  partial_col_sum += tl;
643  }
644 
645  if ((aux == zero) && (w_data[p_perm] != zero))
646  {
647  if (p_perm > k)
648  iw_l.insert (ridx_l[jj]);
649  else
650  iw_u.insert (p_perm);
651  }
652  }
653 
654  // Drop element from the U part in IKJ
655  // and L part in JKI variant (milu = [col|off])
656  if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
657  && (w_data[jrow] != zero))
658  {
659  if (opt == COL)
660  total_sum += partial_col_sum;
661  else if (opt == ROW)
662  total_sum += partial_row_sum;
663  w_data[jrow] = zero;
664  it2 = it;
665  it++;
666  iw_u.erase (it2);
667  jrow = *it;
668  continue;
669  }
670  else
671  // This is the element scaled by the pivot
672  // in the actual iteration
673  if (opt == ROW)
674  w_data[jrow] = tl;
675  }
676  jrow = *(++it);
677  }
678 
679  // Search for the pivot and update iw_l and iw_u if the pivot is not the
680  // diagonal element
681  if ((thresh > zero) && (k < (n - 1)))
682  {
683  maximum = std::abs (w_data[k]) / thresh;
684  max_ind = perm[k];
685  for (it = iw_l.begin (); it != iw_l.end (); ++it)
686  {
687  p_perm = iperm[*it];
688  if (std::abs (w_data[p_perm]) > maximum)
689  {
690  maximum = std::abs (w_data[p_perm]);
691  max_ind = *it;
692  it2 = it;
693  }
694  }
695  // If the pivot is not the diagonal element update all
696  p_perm = iperm[max_ind];
697  if (max_ind != perm[k])
698  {
699  iw_l.erase (it2);
700  if (w_data[k] != zero)
701  iw_l.insert (perm[k]);
702  else
703  iw_u.insert (k);
704  // Swap data and update permutation vectors
705  aux = w_data[k];
706  iperm[perm[p_perm]] = k;
707  iperm[perm[k]] = p_perm;
708  c = perm[k];
709  perm[k] = perm[p_perm];
710  perm[p_perm] = c;
711  w_data[k] = w_data[p_perm];
712  w_data[p_perm] = aux;
713  }
714 
715  }
716 
717  // Drop elements in the L part in the IKJ version,
718  // and from the U part in the JKI version.
719  it = iw_l.begin ();
720  while (it != iw_l.end ())
721  {
722  p_perm = iperm[*it];
723  if (droptol > zero)
724  if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
725  {
726  if (opt != OFF)
727  total_sum += w_data[p_perm];
728  w_data[p_perm] = zero;
729  it2 = it;
730  it++;
731  iw_l.erase (it2);
732  continue;
733  }
734 
735  it++;
736  }
737 
738  // If milu == [row|col] summation is preserved.
739  // Compensate diagonal element.
740  if (opt != OFF)
741  {
742  if ((total_sum > zero) && (w_data[k] == zero))
743  iw_u.insert (k);
744  w_data[k] += total_sum;
745  }
746 
747  // Check if the pivot is zero and if udiag is active.
748  // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
749  // will not preserve the row sum for that column/row.
750  if (w_data[k] == zero)
751  {
752  if (udiag != 1)
753  error ("ilu: encountered a pivot equal to 0");
754 
755  w_data[k] = droptol;
756  iw_u.insert (k);
757  }
758 
759  // Scale the elements on the L part for IKJ version (milu = [col|off])
760  if (opt != ROW)
761  for (it = iw_l.begin (); it != iw_l.end (); ++it)
762  {
763  p_perm = iperm[*it];
764  w_data[p_perm] = w_data[p_perm] / w_data[k];
765  }
766 
767  if ((max_len_u - total_len_u) < n)
768  {
769  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
770  data_out_u.resize (dim_vector (max_len_u, 1));
771  data_u = data_out_u.fortran_vec ();
772  ridx_out_u.resize (dim_vector (max_len_u, 1));
773  ridx_u = ridx_out_u.fortran_vec ();
774  }
775 
776  if ((max_len_l - total_len_l) < n)
777  {
778  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
779  data_out_l.resize (dim_vector (max_len_l, 1));
780  data_l = data_out_l.fortran_vec ();
781  ridx_out_l.resize (dim_vector (max_len_l, 1));
782  ridx_l = ridx_out_l.fortran_vec ();
783  }
784 
785  // Expand working vector into U.
786  w_len_u = 0;
787  for (it = iw_u.begin (); it != iw_u.end (); ++it)
788  {
789  if (w_data[*it] != zero)
790  {
791  data_u[total_len_u + w_len_u] = w_data[*it];
792  ridx_u[total_len_u + w_len_u] = *it;
793  w_len_u++;
794  }
795  w_data[*it] = 0;
796  }
797 
798  // Expand working vector into L.
799  w_len_l = 0;
800  for (it = iw_l.begin (); it != iw_l.end (); ++it)
801  {
802  p_perm = iperm[*it];
803  if (w_data[p_perm] != zero)
804  {
805  data_l[total_len_l + w_len_l] = w_data[p_perm];
806  ridx_l[total_len_l + w_len_l] = *it;
807  w_len_l++;
808  }
809  w_data[p_perm] = 0;
810  }
811  total_len_u += w_len_u;
812  total_len_l += w_len_l;
813 
814  // Check if there are too many elements to be indexed with
815  // octave_idx_type type due to fill-in during the process.
816  if (total_len_l < 0 || total_len_u < 0)
817  error ("ilu: Integer overflow. Too many fill-in elements in L or U");
818 
819  if (opt == ROW)
820  uptr[k] = total_len_u - 1;
821 
822  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
823  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
824 
825  iw_l.clear ();
826  iw_u.clear ();
827  }
828 
829  octave_matrix_t *L_ptr;
830  octave_matrix_t *U_ptr;
831  octave_matrix_t diag (n, n, n);
832 
833  // L and U are interchanged if milu = 'row'. It is a matter
834  // of nomenclature to re-use code with both IKJ and JKI
835  // versions of the algorithm.
836  if (opt == ROW)
837  {
838  L_ptr = &U;
839  U_ptr = &L;
840  L = octave_matrix_t (n, n, total_len_u - n);
841  U = octave_matrix_t (n, n, total_len_l);
842  }
843  else
844  {
845  L_ptr = &L;
846  U_ptr = &U;
847  L = octave_matrix_t (n, n, total_len_l);
848  U = octave_matrix_t (n, n, total_len_u);
849  }
850 
851  for (i = 0; i <= n; i++)
852  {
853  L_ptr->cidx (i) = cidx_l[i];
854  U_ptr->cidx (i) = cidx_u[i];
855  if (opt == ROW)
856  U_ptr->cidx (i) -= i;
857  }
858 
859  for (i = 0; i < n; i++)
860  {
861  if (opt == ROW)
862  diag.elem (i, i) = data_u[uptr[i]];
863  j = cidx_l[i];
864 
865  while (j < cidx_l[i+1])
866  {
867  L_ptr->ridx (j) = ridx_l[j];
868  L_ptr->data (j) = data_l[j];
869  j++;
870  }
871  j = cidx_u[i];
872 
873  while (j < cidx_u[i+1])
874  {
875  c = j;
876  if (opt == ROW)
877  {
878  // The diagonal is removed from L if milu = 'row'.
879  // That is because is convenient to have it inside
880  // the L part to carry out the process.
881  if (ridx_u[j] == i)
882  {
883  j++;
884  continue;
885  }
886  else
887  c -= i;
888  }
889  U_ptr->data (c) = data_u[j];
890  U_ptr->ridx (c) = ridx_u[j];
891  j++;
892  }
893  }
894 
895  if (opt == ROW)
896  {
897  U = U.transpose ();
898  // The diagonal, conveniently permuted is added to U
899  U += diag.index (idx_vector::colon, perm_vec);
900  L = L.transpose ();
901  }
902 }
903 
904 DEFUN (__ilutp__, args, nargout,
905  doc: /* -*- texinfo -*-
906 @deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
907 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
908 Undocumented internal function.
909 @end deftypefn */)
910 {
911  if (args.length () != 5)
912  print_usage ();
913 
914  octave_value_list retval;
915  double droptol = args(1).double_value ();
916  double thresh = args(2).double_value ();
917  std::string milu = args(3).string_value ();
918  double udiag = args(4).double_value ();
919 
920  octave_value_list arg_list;
921  octave_idx_type nnz_u, nnz_l;
922  if (! args(0).iscomplex ())
923  {
924  SparseMatrix sm = args(0).sparse_matrix_value ();
925  SparseMatrix U, L;
926  nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
927  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
928  Array <double> rc_norm;
929  if (milu == "row")
930  rc_norm = xrownorms (sm);
931  else
932  rc_norm = xcolnorms (sm);
933  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
934 
935  ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
936  rc_norm.fortran_vec (),
937  perm, droptol, thresh, milu, udiag);
938 
939  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
940  if (milu == "row")
941  {
942  retval(0) = L + speye;
943  if (nargout == 3)
944  {
945  retval(1) = U.index (idx_vector::colon, perm);
946  retval(2) = speye.index (idx_vector::colon, perm);
947  }
948  else
949  retval(1) = U;
950  }
951  else
952  {
953  retval(1) = U;
954  if (nargout == 3)
955  {
956  retval(0) = L.index (perm, idx_vector::colon) + speye;
957  retval(2) = speye.index (perm, idx_vector::colon);
958  }
959  else
960  retval(0) = L + speye.index (idx_vector::colon, perm);
961  }
962  }
963  else
964  {
965  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
966  SparseComplexMatrix U, L;
967  nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
968  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
969  Array <Complex> rc_norm;
970  if (milu == "row")
971  rc_norm = xrownorms (sm);
972  else
973  rc_norm = xcolnorms (sm);
974  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
975 
976  ilu_tp <SparseComplexMatrix, Complex>
977  (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
978  Complex (droptol), Complex (thresh), milu, udiag);
979 
980  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
981  if (milu == "row")
982  {
983  retval(0) = L + speye;
984  if (nargout == 3)
985  {
986  retval(1) = U.index (idx_vector::colon, perm);
987  retval(2) = speye.index (idx_vector::colon, perm);
988  }
989  else if (nargout == 2)
990  retval(1) = U;
991  }
992  else
993  {
994  retval(1) = U;
995  if (nargout == 3)
996  {
997  retval(0) = L.index (perm, idx_vector::colon) + speye;
998  retval(2) = speye.index (perm, idx_vector::colon);
999  }
1000  else
1001  retval(0) = L + speye.index (idx_vector::colon, perm);
1002  }
1003  }
1004 
1005  return retval;
1006 }
1007 
1008 /*
1009 ## No test needed for internal helper function.
1010 %!assert (1)
1011 */
1012 
OCTAVE_END_NAMESPACE(octave)
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:47
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition: __ilu__.cc:526
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition: __ilu__.cc:186
OCTARRAY_API void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
Definition: Array-base.cc:1032
OCTARRAY_API T * fortran_vec(void)
Size of the specified dimension.
Definition: Array-base.cc:1766
OCTAVE_API Sparse< T, Alloc > index(const octave::idx_vector &i, bool resize_ok=false) const
Definition: Sparse.cc:1432
octave_idx_type cols(void) const
Definition: Sparse.h:352
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
static const idx_vector colon
Definition: idx-vector.h:484
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTINTERP_API void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void error(const char *fmt,...)
Definition: error.cc:979
octave_idx_type n
Definition: mx-inlines.cc:753
T * r
Definition: mx-inlines.cc:773
std::complex< double > Complex
Definition: oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
RowVector xcolnorms(const Matrix &m, double p)
Definition: oct-norm.cc:628
ColumnVector xrownorms(const Matrix &m, double p)
Definition: oct-norm.cc:628
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:211
static T abs(T x)
Definition: pr-output.cc:1678
OCTAVE_EXPORT octave_value_list Ftriu(const octave_value_list &args, int)
Definition: tril.cc:431
OCTAVE_EXPORT octave_value_list Ftril(const octave_value_list &args, int)
Definition: tril.cc:382