GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
__ilu__.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2013-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include "oct-locbuf.h"
31 #include "oct-norm.h"
32 
33 #include "defun.h"
34 #include "error.h"
35 
36 #include "builtin-defun-decls.h"
37 
39 
40 // This function implements the IKJ and JKI variants of Gaussian elimination to
41 // perform the ILU0 decomposition. The behavior is controlled by milu
42 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
43 // advantage of CCS format of the input matrix. If milu = 'row' the input
44 // matrix has to be transposed to obtain the equivalent CRS structure so we can
45 // work efficiently with rows. In this case IKJ version is used.
46 template <typename octave_matrix_t, typename T>
47 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
48 {
49  const octave_idx_type n = sm.cols ();
50  octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
51  T r;
52  T tl = 0;
53 
54  enum {OFF, ROW, COL};
55  char opt;
56  if (milu == "row")
57  {
58  opt = ROW;
59  sm = sm.transpose ();
60  }
61  else if (milu == "col")
62  opt = COL;
63  else
64  opt = OFF;
65 
66  // Input matrix pointers
67  octave_idx_type *cidx = sm.cidx ();
68  octave_idx_type *ridx = sm.ridx ();
69  T *data = sm.data ();
70 
71  // Working arrays
74 
75  // Initialize working arrays
76  for (i = 0; i < n; i++)
77  iw[i] = -1;
78 
79  // Loop over all columns
80  for (k = 0; k < n; k++)
81  {
82  j1 = cidx[k];
83  j2 = cidx[k+1];
84 
85  if (j1 == j2)
86  error ("ilu: A has a zero on the diagonal");
87 
88  for (j = j1; j < j2; j++)
89  iw[ridx[j]] = j;
90 
91  r = 0;
92  j = j1;
93  jrow = ridx[j1];
94  while ((jrow < k) && (j < j2))
95  {
96  if (opt == ROW)
97  {
98  tl = data[j] / data[uptr[jrow]];
99  data[j] = tl;
100  }
101  for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
102  {
103  jw = iw[ridx[jj]];
104  if (jw != -1)
105  if (opt == ROW)
106  data[jw] -= tl * data[jj];
107  else
108  data[jw] -= data[j] * data[jj];
109 
110  else
111  // That is for the milu='row'
112  if (opt == ROW)
113  r += tl * data[jj];
114  else if (opt == COL)
115  r += data[j] * data[jj];
116  }
117  j++;
118  jrow = ridx[j];
119  }
120  uptr[k] = j;
121  if (opt != OFF)
122  data[uptr[k]] -= r;
123 
124  if (opt != ROW)
125  for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
126  data[jj] /= data[uptr[k]];
127 
128  if (k != jrow)
129  error ("ilu: A has a zero on the diagonal");
130 
131  if (data[j] == T(0))
132  error ("ilu: encountered a pivot equal to 0");
133 
134  for (i = j1; i < j2; i++)
135  iw[ridx[i]] = -1;
136  }
137 
138  if (opt == ROW)
139  sm = sm.transpose ();
140 }
141 
142 DEFUN (__ilu0__, args, ,
143  doc: /* -*- texinfo -*-
144 @deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
145 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
146 Undocumented internal function.
147 @end deftypefn */)
148 {
149  if (args.length () != 2)
150  print_usage ();
151 
152  octave_value_list retval (2);
153 
154  std::string milu = args(1).string_value ();
155 
156  // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
157  // its structure does not change during the algorithm. The same input
158  // matrix is used to build the output matrix due to that fact.
159  octave_value_list arg_list;
160  if (! args(0).iscomplex ())
161  {
162  SparseMatrix sm = args(0).sparse_matrix_value ();
163  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
164 
165  ilu_0 <SparseMatrix, double> (sm, milu);
166 
167  retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
168  retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
169  }
170  else
171  {
172  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
173  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
174 
175  ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
176 
177  retval(0) = speye +
178  Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
179  retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
180  }
181 
182  return retval;
183 }
184 
185 template <typename octave_matrix_t, typename T>
186 void
187 ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
188  octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
189  T *rows_norm, const T droptol = 0,
190  const std::string milu = "off")
191 {
192  // Map the strings into chars for faster comparing inside loops
193  char opt;
194  enum {OFF, ROW, COL};
195  if (milu == "row")
196  opt = ROW;
197  else if (milu == "col")
198  opt = COL;
199  else
200  opt = OFF;
201 
202  octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
203  max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
204 
205  const octave_idx_type n = sm_u.cols ();
206  sm_u = sm_u.transpose ();
207 
208  max_len_u = sm_u.nnz ();
209  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
210  max_len_l = sm_l.nnz ();
211  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
212 
213  // Extract pointers to the arrays for faster access inside loops
214  octave_idx_type *cidx_in_u = sm_u.cidx ();
215  octave_idx_type *ridx_in_u = sm_u.ridx ();
216  T *data_in_u = sm_u.data ();
217  octave_idx_type *cidx_in_l = sm_l.cidx ();
218  octave_idx_type *ridx_in_l = sm_l.ridx ();
219  T *data_in_l = sm_l.data ();
220 
221  // L output arrays
222  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
223  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
224  Array <T> data_out_l (dim_vector (max_len_l, 1));
225  T *data_l = data_out_l.fortran_vec ();
226 
227  // U output arrays
228  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
229  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
230  Array <T> data_out_u (dim_vector (max_len_u, 1));
231  T *data_u = data_out_u.fortran_vec ();
232 
233  // Working arrays
234  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
235  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
236  OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
237  OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
238  OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
239  OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
242  OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
243 
244  T zero = T (0);
245 
246  // Initialize working arrays
247  cidx_u[0] = cidx_in_u[0];
248  cidx_l[0] = cidx_in_l[0];
249  for (i = 0; i < n; i++)
250  {
251  w_data_u[i] = zero;
252  w_data_l[i] = zero;
253  cr_sum[i] = zero;
254  }
255 
256  total_len_u = 0;
257  total_len_l = 0;
258  cols_list_len = 0;
259  rows_list_len = 0;
260 
261  // Loop over all columns
262  for (k = 0; k < n; k++)
263  {
264  // Load the working column and working row
265  for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
266  w_data_l[ridx_in_l[i]] = data_in_l[i];
267 
268  for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
269  w_data_u[ridx_in_u[i]] = data_in_u[i];
270 
271  // Update U working row
272  for (j = 0; j < rows_list_len; j++)
273  {
274  if ((Ufirst[rows_list[j]] != -1))
275  for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
276  {
277  jrow = ridx_u[jj];
278  w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
279  }
280  }
281  // Update L working column
282  for (j = 0; j < cols_list_len; j++)
283  {
284  if (Lfirst[cols_list[j]] != -1)
285  for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
286  {
287  jrow = ridx_l[jj];
288  w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
289  }
290  }
291 
292  if ((max_len_u - total_len_u) < n)
293  {
294  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
295  data_out_u.resize (dim_vector (max_len_u, 1));
296  data_u = data_out_u.fortran_vec ();
297  ridx_out_u.resize (dim_vector (max_len_u, 1));
298  ridx_u = ridx_out_u.fortran_vec ();
299  }
300 
301  if ((max_len_l - total_len_l) < n)
302  {
303  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
304  data_out_l.resize (dim_vector (max_len_l, 1));
305  data_l = data_out_l.fortran_vec ();
306  ridx_out_l.resize (dim_vector (max_len_l, 1));
307  ridx_l = ridx_out_l.fortran_vec ();
308  }
309 
310  // Expand the working row into the U output data structures
311  w_len_l = 0;
312  data_u[total_len_u] = w_data_u[k];
313  ridx_u[total_len_u] = k;
314  w_len_u = 1;
315  for (i = k + 1; i < n; i++)
316  {
317  if (w_data_u[i] != zero)
318  {
319  if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
320  {
321  if (opt == ROW)
322  cr_sum[k] += w_data_u[i];
323  else if (opt == COL)
324  cr_sum[i] += w_data_u[i];
325  }
326  else
327  {
328  data_u[total_len_u + w_len_u] = w_data_u[i];
329  ridx_u[total_len_u + w_len_u] = i;
330  w_len_u++;
331  }
332  }
333 
334  // Expand the working column into the L output data structures
335  if (w_data_l[i] != zero)
336  {
337  if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
338  {
339  if (opt == COL)
340  cr_sum[k] += w_data_l[i];
341  else if (opt == ROW)
342  cr_sum[i] += w_data_l[i];
343  }
344  else
345  {
346  data_l[total_len_l + w_len_l] = w_data_l[i];
347  ridx_l[total_len_l + w_len_l] = i;
348  w_len_l++;
349  }
350  }
351  w_data_u[i] = zero;
352  w_data_l[i] = zero;
353  }
354 
355  // Compensate row and column sums --> milu option
356  if (opt == COL || opt == ROW)
357  data_u[total_len_u] += cr_sum[k];
358 
359  // Check if the pivot is zero
360  if (data_u[total_len_u] == zero)
361  error ("ilu: encountered a pivot equal to 0");
362 
363  // Scale the elements in L by the pivot
364  for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
365  data_l[i] /= data_u[total_len_u];
366 
367  total_len_u += w_len_u;
368  total_len_l += w_len_l;
369  // Check if there are too many elements to be indexed with
370  // octave_idx_type type due to fill-in during the process.
371  if (total_len_l < 0 || total_len_u < 0)
372  error ("ilu: integer overflow. Too many fill-in elements in L or U");
373 
374  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
375  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
376 
377  // The tricky part of the algorithm. The arrays pointing to the first
378  // working element of each column in the next iteration (Lfirst) or
379  // the first working element of each row (Ufirst) are updated. Also the
380  // arrays working as lists cols_list and rows_list are filled with
381  // indices pointing to Ufirst and Lfirst respectively.
382  // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
383  // have to be considered in a certain column or row in next iteration,
384  // can be removed. It feels safer to me using such an indicator.
385  if (k < (n - 1))
386  {
387  if (w_len_u > 0)
388  Ufirst[k] = cidx_u[k];
389  else
390  Ufirst[k] = -1;
391  if (w_len_l > 0)
392  Lfirst[k] = cidx_l[k];
393  else
394  Lfirst[k] = -1;
395  cols_list_len = 0;
396  rows_list_len = 0;
397  for (i = 0; i <= k; i++)
398  {
399  if (Ufirst[i] != -1)
400  {
401  jj = ridx_u[Ufirst[i]];
402  if (jj < (k + 1))
403  {
404  if (Ufirst[i] < (cidx_u[i+1]))
405  {
406  Ufirst[i]++;
407  if (Ufirst[i] == cidx_u[i+1])
408  Ufirst[i] = -1;
409  else
410  jj = ridx_u[Ufirst[i]];
411  }
412  }
413  if (jj == (k + 1))
414  {
415  cols_list[cols_list_len] = i;
416  cols_list_len++;
417  }
418  }
419 
420  if (Lfirst[i] != -1)
421  {
422  jj = ridx_l[Lfirst[i]];
423  if (jj < (k + 1))
424  if (Lfirst[i] < (cidx_l[i+1]))
425  {
426  Lfirst[i]++;
427  if (Lfirst[i] == cidx_l[i+1])
428  Lfirst[i] = -1;
429  else
430  jj = ridx_l[Lfirst[i]];
431  }
432  if (jj == (k + 1))
433  {
434  rows_list[rows_list_len] = i;
435  rows_list_len++;
436  }
437  }
438  }
439  }
440  }
441 
442  // Build the output matrices
443  L = octave_matrix_t (n, n, total_len_l);
444  U = octave_matrix_t (n, n, total_len_u);
445 
446  // FIXME: Can these loops be replaced by std::copy?
447  for (i = 0; i <= n; i++)
448  L.cidx (i) = cidx_l[i];
449 
450  for (i = 0; i < total_len_l; i++)
451  {
452  L.ridx (i) = ridx_l[i];
453  L.data (i) = data_l[i];
454  }
455 
456  for (i = 0; i <= n; i++)
457  U.cidx (i) = cidx_u[i];
458 
459  for (i = 0; i < total_len_u; i++)
460  {
461  U.ridx (i) = ridx_u[i];
462  U.data (i) = data_u[i];
463  }
464 
465  U = U.transpose ();
466 }
467 
468 DEFUN (__iluc__, args, ,
469  doc: /* -*- texinfo -*-
470 @deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
471 Undocumented internal function.
472 @end deftypefn */)
473 {
474  if (args.length () != 3)
475  print_usage ();
476 
477  double droptol = args(1).double_value ();
478  std::string milu = args(2).string_value ();
479 
480  if (! args(0).iscomplex ())
481  {
482  SparseMatrix sm = args(0).sparse_matrix_value ();
483  SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
484  SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
485  SparseMatrix U, L;
486 
487  RowVector sm_col_norms = xcolnorms (sm);
488  ColumnVector sm_row_norms = xrownorms (sm);
489  ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
490  sm_col_norms.fortran_vec (),
491  sm_row_norms.fortran_vec (),
492  droptol, milu);
493 
494  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
495 
496  return ovl (L + speye, U);
497  }
498  else
499  {
500  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
501  SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
502  SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
503  SparseComplexMatrix U, L;
504  Array<Complex> cols_norm = xcolnorms (sm);
505  Array<Complex> rows_norm = xrownorms (sm);
506 
507  ilu_crout <SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
508  cols_norm.fortran_vec (),
509  rows_norm.fortran_vec (),
510  Complex (droptol), milu);
511 
512  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
513 
514  return ovl (L + speye, U);
515  }
516 }
517 
518 // This function implements the IKJ and JKI variants of gaussian elimination
519 // to perform the ILUTP decomposition. The behavior is controlled by milu
520 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
521 // advantage of CCS format of the input matrix. Row pivoting is performed.
522 // If milu = 'row' the input matrix has to be transposed to obtain the
523 // equivalent CRS structure so we can work efficiently with rows. In that
524 // case IKJ version is used and column pivoting is performed.
525 
526 template <typename octave_matrix_t, typename T>
527 void
528 ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
529  octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
530  Array <octave_idx_type>& perm_vec, const T droptol = T(0),
531  const T thresh = T(0), const std::string milu = "off",
532  const double udiag = 0)
533 {
534  char opt;
535  enum {OFF, ROW, COL};
536  if (milu == "row")
537  opt = ROW;
538  else if (milu == "col")
539  opt = COL;
540  else
541  opt = OFF;
542 
543  const octave_idx_type n = sm.cols ();
544 
545  // This is necessary for the JKI (milu = "row") variant.
546  if (opt == ROW)
547  sm = sm.transpose ();
548 
549  // Extract pointers to the arrays for faster access inside loops
550  octave_idx_type *cidx_in = sm.cidx ();
551  octave_idx_type *ridx_in = sm.ridx ();
552  T *data_in = sm.data ();
553  octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
554  max_ind, max_len_l, max_len_u;
555  T zero = T(0);
556 
557  T tl = zero, aux, maximum;
558 
559  max_len_u = nnz_u;
560  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
561  max_len_l = nnz_l;
562  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
563 
564  // Extract pointers to the arrays for faster access inside loops
565  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
566  octave_idx_type *cidx_l = cidx_out_l.fortran_vec ();
567  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
568  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
569  Array <T> data_out_l (dim_vector (max_len_l, 1));
570  T *data_l = data_out_l.fortran_vec ();
571 
572  // Data for U
573  Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
574  octave_idx_type *cidx_u = cidx_out_u.fortran_vec ();
575  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
576  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
577  Array <T> data_out_u (dim_vector (max_len_u, 1));
578  T *data_u = data_out_u.fortran_vec ();
579 
580  // Working arrays and permutation arrays
581  octave_idx_type w_len_u, w_len_l;
582  T total_sum, partial_col_sum = zero, partial_row_sum = zero;
583  std::set <octave_idx_type> iw_l;
584  std::set <octave_idx_type> iw_u;
585  std::set <octave_idx_type>::iterator it, it2;
586  OCTAVE_LOCAL_BUFFER (T, w_data, n);
588  octave_idx_type *perm = perm_vec.fortran_vec ();
590 
591  // Initialize working and permutation arrays
592  cidx_l[0] = cidx_in[0];
593  cidx_u[0] = cidx_in[0];
594  for (i = 0; i < n; i++)
595  {
596  w_data[i] = 0;
597  perm[i] = i;
598  iperm[i] = i;
599  }
600  total_len_u = 0;
601  total_len_l = 0;
602 
603  // Loop over all columns
604  for (k = 0; k < n; k++)
605  {
606  for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
607  {
608  p_perm = iperm[ridx_in[j]];
609  w_data[iperm[ridx_in[j]]] = data_in[j];
610  if (p_perm > k)
611  iw_l.insert (ridx_in[j]);
612  else
613  iw_u.insert (p_perm);
614  }
615 
616  it = iw_u.begin ();
617  jrow = *it;
618  total_sum = zero;
619  while ((jrow < k) && (it != iw_u.end ()))
620  {
621  if (opt == COL)
622  partial_col_sum = w_data[jrow];
623  if (w_data[jrow] != zero)
624  {
625  if (opt == ROW)
626  {
627  partial_row_sum = w_data[jrow];
628  tl = w_data[jrow] / data_u[uptr[jrow]];
629  }
630  for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
631  {
632  p_perm = iperm[ridx_l[jj]];
633  aux = w_data[p_perm];
634  if (opt == ROW)
635  {
636  w_data[p_perm] -= tl * data_l[jj];
637  partial_row_sum += tl * data_l[jj];
638  }
639  else
640  {
641  tl = data_l[jj] * w_data[jrow];
642  w_data[p_perm] -= tl;
643  if (opt == COL)
644  partial_col_sum += tl;
645  }
646 
647  if ((aux == zero) && (w_data[p_perm] != zero))
648  {
649  if (p_perm > k)
650  iw_l.insert (ridx_l[jj]);
651  else
652  iw_u.insert (p_perm);
653  }
654  }
655 
656  // Drop element from the U part in IKJ
657  // and L part in JKI variant (milu = [col|off])
658  if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
659  && (w_data[jrow] != zero))
660  {
661  if (opt == COL)
662  total_sum += partial_col_sum;
663  else if (opt == ROW)
664  total_sum += partial_row_sum;
665  w_data[jrow] = zero;
666  it2 = it;
667  it++;
668  iw_u.erase (it2);
669  jrow = *it;
670  continue;
671  }
672  else
673  // This is the element scaled by the pivot
674  // in the actual iteration
675  if (opt == ROW)
676  w_data[jrow] = tl;
677  }
678  jrow = *(++it);
679  }
680 
681  // Search for the pivot and update iw_l and iw_u if the pivot is not the
682  // diagonal element
683  if ((thresh > zero) && (k < (n - 1)))
684  {
685  maximum = std::abs (w_data[k]) / thresh;
686  max_ind = perm[k];
687  for (it = iw_l.begin (); it != iw_l.end (); ++it)
688  {
689  p_perm = iperm[*it];
690  if (std::abs (w_data[p_perm]) > maximum)
691  {
692  maximum = std::abs (w_data[p_perm]);
693  max_ind = *it;
694  it2 = it;
695  }
696  }
697  // If the pivot is not the diagonal element update all
698  p_perm = iperm[max_ind];
699  if (max_ind != perm[k])
700  {
701  iw_l.erase (it2);
702  if (w_data[k] != zero)
703  iw_l.insert (perm[k]);
704  else
705  iw_u.insert (k);
706  // Swap data and update permutation vectors
707  aux = w_data[k];
708  iperm[perm[p_perm]] = k;
709  iperm[perm[k]] = p_perm;
710  c = perm[k];
711  perm[k] = perm[p_perm];
712  perm[p_perm] = c;
713  w_data[k] = w_data[p_perm];
714  w_data[p_perm] = aux;
715  }
716 
717  }
718 
719  // Drop elements in the L part in the IKJ version,
720  // and from the U part in the JKI version.
721  it = iw_l.begin ();
722  while (it != iw_l.end ())
723  {
724  p_perm = iperm[*it];
725  if (droptol > zero)
726  if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
727  {
728  if (opt != OFF)
729  total_sum += w_data[p_perm];
730  w_data[p_perm] = zero;
731  it2 = it;
732  it++;
733  iw_l.erase (it2);
734  continue;
735  }
736 
737  it++;
738  }
739 
740  // If milu == [row|col] summation is preserved.
741  // Compensate diagonal element.
742  if (opt != OFF)
743  {
744  if ((total_sum > zero) && (w_data[k] == zero))
745  iw_u.insert (k);
746  w_data[k] += total_sum;
747  }
748 
749  // Check if the pivot is zero and if udiag is active.
750  // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
751  // will not preserve the row sum for that column/row.
752  if (w_data[k] == zero)
753  {
754  if (udiag != 1)
755  error ("ilu: encountered a pivot equal to 0");
756 
757  w_data[k] = droptol;
758  iw_u.insert (k);
759  }
760 
761  // Scale the elements on the L part for IKJ version (milu = [col|off])
762  if (opt != ROW)
763  for (it = iw_l.begin (); it != iw_l.end (); ++it)
764  {
765  p_perm = iperm[*it];
766  w_data[p_perm] = w_data[p_perm] / w_data[k];
767  }
768 
769  if ((max_len_u - total_len_u) < n)
770  {
771  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
772  data_out_u.resize (dim_vector (max_len_u, 1));
773  data_u = data_out_u.fortran_vec ();
774  ridx_out_u.resize (dim_vector (max_len_u, 1));
775  ridx_u = ridx_out_u.fortran_vec ();
776  }
777 
778  if ((max_len_l - total_len_l) < n)
779  {
780  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
781  data_out_l.resize (dim_vector (max_len_l, 1));
782  data_l = data_out_l.fortran_vec ();
783  ridx_out_l.resize (dim_vector (max_len_l, 1));
784  ridx_l = ridx_out_l.fortran_vec ();
785  }
786 
787  // Expand working vector into U.
788  w_len_u = 0;
789  for (it = iw_u.begin (); it != iw_u.end (); ++it)
790  {
791  if (w_data[*it] != zero)
792  {
793  data_u[total_len_u + w_len_u] = w_data[*it];
794  ridx_u[total_len_u + w_len_u] = *it;
795  w_len_u++;
796  }
797  w_data[*it] = 0;
798  }
799 
800  // Expand working vector into L.
801  w_len_l = 0;
802  for (it = iw_l.begin (); it != iw_l.end (); ++it)
803  {
804  p_perm = iperm[*it];
805  if (w_data[p_perm] != zero)
806  {
807  data_l[total_len_l + w_len_l] = w_data[p_perm];
808  ridx_l[total_len_l + w_len_l] = *it;
809  w_len_l++;
810  }
811  w_data[p_perm] = 0;
812  }
813  total_len_u += w_len_u;
814  total_len_l += w_len_l;
815 
816  // Check if there are too many elements to be indexed with
817  // octave_idx_type type due to fill-in during the process.
818  if (total_len_l < 0 || total_len_u < 0)
819  error ("ilu: Integer overflow. Too many fill-in elements in L or U");
820 
821  if (opt == ROW)
822  uptr[k] = total_len_u - 1;
823 
824  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
825  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
826 
827  iw_l.clear ();
828  iw_u.clear ();
829  }
830 
831  octave_matrix_t *L_ptr;
832  octave_matrix_t *U_ptr;
833  octave_matrix_t diag (n, n, n);
834 
835  // L and U are interchanged if milu = 'row'. It is a matter
836  // of nomenclature to re-use code with both IKJ and JKI
837  // versions of the algorithm.
838  if (opt == ROW)
839  {
840  L_ptr = &U;
841  U_ptr = &L;
842  L = octave_matrix_t (n, n, total_len_u - n);
843  U = octave_matrix_t (n, n, total_len_l);
844  }
845  else
846  {
847  L_ptr = &L;
848  U_ptr = &U;
849  L = octave_matrix_t (n, n, total_len_l);
850  U = octave_matrix_t (n, n, total_len_u);
851  }
852 
853  for (i = 0; i <= n; i++)
854  {
855  L_ptr->cidx (i) = cidx_l[i];
856  U_ptr->cidx (i) = cidx_u[i];
857  if (opt == ROW)
858  U_ptr->cidx (i) -= i;
859  }
860 
861  for (i = 0; i < n; i++)
862  {
863  if (opt == ROW)
864  diag.elem (i, i) = data_u[uptr[i]];
865  j = cidx_l[i];
866 
867  while (j < cidx_l[i+1])
868  {
869  L_ptr->ridx (j) = ridx_l[j];
870  L_ptr->data (j) = data_l[j];
871  j++;
872  }
873  j = cidx_u[i];
874 
875  while (j < cidx_u[i+1])
876  {
877  c = j;
878  if (opt == ROW)
879  {
880  // The diagonal is removed from L if milu = 'row'.
881  // That is because is convenient to have it inside
882  // the L part to carry out the process.
883  if (ridx_u[j] == i)
884  {
885  j++;
886  continue;
887  }
888  else
889  c -= i;
890  }
891  U_ptr->data (c) = data_u[j];
892  U_ptr->ridx (c) = ridx_u[j];
893  j++;
894  }
895  }
896 
897  if (opt == ROW)
898  {
899  U = U.transpose ();
900  // The diagonal, conveniently permuted is added to U
901  U += diag.index (idx_vector::colon, perm_vec);
902  L = L.transpose ();
903  }
904 }
905 
906 DEFUN (__ilutp__, args, nargout,
907  doc: /* -*- texinfo -*-
908 @deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
909 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
910 Undocumented internal function.
911 @end deftypefn */)
912 {
913  if (args.length () != 5)
914  print_usage ();
915 
916  octave_value_list retval;
917  double droptol = args(1).double_value ();
918  double thresh = args(2).double_value ();
919  std::string milu = args(3).string_value ();
920  double udiag = args(4).double_value ();
921 
922  octave_value_list arg_list;
923  octave_idx_type nnz_u, nnz_l;
924  if (! args(0).iscomplex ())
925  {
926  SparseMatrix sm = args(0).sparse_matrix_value ();
927  SparseMatrix U, L;
928  nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
929  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
930  Array <double> rc_norm;
931  if (milu == "row")
932  rc_norm = xrownorms (sm);
933  else
934  rc_norm = xcolnorms (sm);
935  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
936 
937  ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
938  rc_norm.fortran_vec (),
939  perm, droptol, thresh, milu, udiag);
940 
941  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
942  if (milu == "row")
943  {
944  retval(0) = L + speye;
945  if (nargout == 3)
946  {
947  retval(1) = U.index (idx_vector::colon, perm);
948  retval(2) = speye.index (idx_vector::colon, perm);
949  }
950  else
951  retval(1) = U;
952  }
953  else
954  {
955  retval(1) = U;
956  if (nargout == 3)
957  {
958  retval(0) = L.index (perm, idx_vector::colon) + speye;
959  retval(2) = speye.index (perm, idx_vector::colon);
960  }
961  else
962  retval(0) = L + speye.index (idx_vector::colon, perm);
963  }
964  }
965  else
966  {
967  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
968  SparseComplexMatrix U, L;
969  nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
970  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
971  Array <Complex> rc_norm;
972  if (milu == "row")
973  rc_norm = xrownorms (sm);
974  else
975  rc_norm = xcolnorms (sm);
976  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
977 
978  ilu_tp <SparseComplexMatrix, Complex>
979  (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
980  Complex (droptol), Complex (thresh), milu, udiag);
981 
982  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
983  if (milu == "row")
984  {
985  retval(0) = L + speye;
986  if (nargout == 3)
987  {
988  retval(1) = U.index (idx_vector::colon, perm);
989  retval(2) = speye.index (idx_vector::colon, perm);
990  }
991  else if (nargout == 2)
992  retval(1) = U;
993  }
994  else
995  {
996  retval(1) = U;
997  if (nargout == 3)
998  {
999  retval(0) = L.index (perm, idx_vector::colon) + speye;
1000  retval(2) = speye.index (perm, idx_vector::colon);
1001  }
1002  else
1003  retval(0) = L + speye.index (idx_vector::colon, perm);
1004  }
1005  }
1006 
1007  return retval;
1008 }
1009 
1010 /*
1011 ## No test needed for internal helper function.
1012 %!assert (1)
1013 */
1014 
1015 OCTAVE_END_NAMESPACE(octave)
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:47
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition: __ilu__.cc:528
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition: __ilu__.cc:187
octave_value_list Ftril(const octave_value_list &=octave_value_list(), int=0)
Definition: tril.cc:382
octave_value_list Ftriu(const octave_value_list &=octave_value_list(), int=0)
Definition: tril.cc:431
T * fortran_vec()
Size of the specified dimension.
Definition: Array-base.cc:1764
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
Definition: Array-base.cc:1023
octave_idx_type cols() const
Definition: Sparse.h:352
Sparse< T, Alloc > index(const octave::idx_vector &i, bool resize_ok=false) const
Definition: Sparse.cc:1434
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
static const idx_vector colon
Definition: idx-vector.h:466
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void() error(const char *fmt,...)
Definition: error.cc:988
octave_idx_type n
Definition: mx-inlines.cc:761
T * r
Definition: mx-inlines.cc:781
std::complex< double > Complex
Definition: oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
RowVector xcolnorms(const Matrix &m, double p)
Definition: oct-norm.cc:653
ColumnVector xrownorms(const Matrix &m, double p)
Definition: oct-norm.cc:653
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:219