GNU Octave  6.2.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
__ilu__.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2013-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include "oct-locbuf.h"
31 #include "oct-norm.h"
32 
33 #include "defun.h"
34 #include "error.h"
35 
36 #include "builtin-defun-decls.h"
37 
38 // This function implements the IKJ and JKI variants of Gaussian elimination to
39 // perform the ILU0 decomposition. The behavior is controlled by milu
40 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
41 // advantage of CCS format of the input matrix. If milu = 'row' the input
42 // matrix has to be transposed to obtain the equivalent CRS structure so we can
43 // work efficiently with rows. In this case IKJ version is used.
44 template <typename octave_matrix_t, typename T>
45 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
46 {
47  const octave_idx_type n = sm.cols ();
48  octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
49  T r;
50  T tl = 0;
51 
52  enum {OFF, ROW, COL};
53  char opt;
54  if (milu == "row")
55  {
56  opt = ROW;
57  sm = sm.transpose ();
58  }
59  else if (milu == "col")
60  opt = COL;
61  else
62  opt = OFF;
63 
64  // Input matrix pointers
65  octave_idx_type *cidx = sm.cidx ();
66  octave_idx_type *ridx = sm.ridx ();
67  T *data = sm.data ();
68 
69  // Working arrays
72 
73  // Initialize working arrays
74  for (i = 0; i < n; i++)
75  iw[i] = -1;
76 
77  // Loop over all columns
78  for (k = 0; k < n; k++)
79  {
80  j1 = cidx[k];
81  j2 = cidx[k+1];
82 
83  if (j1 == j2)
84  error ("ilu: A has a zero on the diagonal");
85 
86  for (j = j1; j < j2; j++)
87  iw[ridx[j]] = j;
88 
89  r = 0;
90  j = j1;
91  jrow = ridx[j1];
92  while ((jrow < k) && (j < j2))
93  {
94  if (opt == ROW)
95  {
96  tl = data[j] / data[uptr[jrow]];
97  data[j] = tl;
98  }
99  for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
100  {
101  jw = iw[ridx[jj]];
102  if (jw != -1)
103  if (opt == ROW)
104  data[jw] -= tl * data[jj];
105  else
106  data[jw] -= data[j] * data[jj];
107 
108  else
109  // That is for the milu='row'
110  if (opt == ROW)
111  r += tl * data[jj];
112  else if (opt == COL)
113  r += data[j] * data[jj];
114  }
115  j++;
116  jrow = ridx[j];
117  }
118  uptr[k] = j;
119  if (opt != OFF)
120  data[uptr[k]] -= r;
121 
122  if (opt != ROW)
123  for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
124  data[jj] /= data[uptr[k]];
125 
126  if (k != jrow)
127  error ("ilu: A has a zero on the diagonal");
128 
129  if (data[j] == T(0))
130  error ("ilu: encountered a pivot equal to 0");
131 
132  for (i = j1; i < j2; i++)
133  iw[ridx[i]] = -1;
134  }
135 
136  if (opt == ROW)
137  sm = sm.transpose ();
138 }
139 
140 DEFUN (__ilu0__, args, ,
141  doc: /* -*- texinfo -*-
142 @deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
143 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
144 Undocumented internal function.
145 @end deftypefn */)
146 {
147  if (args.length () != 2)
148  print_usage ();
149 
151 
152  std::string milu = args(1).string_value ();
153 
154  // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
155  // its structure does not change during the algorithm. The same input
156  // matrix is used to build the output matrix due to that fact.
157  octave_value_list arg_list;
158  if (! args(0).iscomplex ())
159  {
160  SparseMatrix sm = args(0).sparse_matrix_value ();
161  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
162 
163  ilu_0 <SparseMatrix, double> (sm, milu);
164 
165  retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
166  retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
167  }
168  else
169  {
170  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
171  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
172 
173  ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
174 
175  retval(0) = speye +
176  Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
177  retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
178  }
179 
180  return retval;
181 }
182 
183 template <typename octave_matrix_t, typename T>
184 void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
185  octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
186  T *rows_norm, const T droptol = 0,
187  const std::string milu = "off")
188 {
189  // Map the strings into chars for faster comparing inside loops
190  char opt;
191  enum {OFF, ROW, COL};
192  if (milu == "row")
193  opt = ROW;
194  else if (milu == "col")
195  opt = COL;
196  else
197  opt = OFF;
198 
199  octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
200  max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
201 
202  const octave_idx_type n = sm_u.cols ();
203  sm_u = sm_u.transpose ();
204 
205  max_len_u = sm_u.nnz ();
206  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
207  max_len_l = sm_l.nnz ();
208  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
209 
210  // Extract pointers to the arrays for faster access inside loops
211  octave_idx_type *cidx_in_u = sm_u.cidx ();
212  octave_idx_type *ridx_in_u = sm_u.ridx ();
213  T *data_in_u = sm_u.data ();
214  octave_idx_type *cidx_in_l = sm_l.cidx ();
215  octave_idx_type *ridx_in_l = sm_l.ridx ();
216  T *data_in_l = sm_l.data ();
217 
218  // L output arrays
219  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
220  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
221  Array <T> data_out_l (dim_vector (max_len_l, 1));
222  T *data_l = data_out_l.fortran_vec ();
223 
224  // U output arrays
225  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
226  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
227  Array <T> data_out_u (dim_vector (max_len_u, 1));
228  T *data_u = data_out_u.fortran_vec ();
229 
230  // Working arrays
231  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
232  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
233  OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
234  OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
235  OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
236  OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
239  OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
240 
241  T zero = T (0);
242 
243  // Initialize working arrays
244  cidx_u[0] = cidx_in_u[0];
245  cidx_l[0] = cidx_in_l[0];
246  for (i = 0; i < n; i++)
247  {
248  w_data_u[i] = zero;
249  w_data_l[i] = zero;
250  cr_sum[i] = zero;
251  }
252 
253  total_len_u = 0;
254  total_len_l = 0;
255  cols_list_len = 0;
256  rows_list_len = 0;
257 
258  // Loop over all columns
259  for (k = 0; k < n; k++)
260  {
261  // Load the working column and working row
262  for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
263  w_data_l[ridx_in_l[i]] = data_in_l[i];
264 
265  for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
266  w_data_u[ridx_in_u[i]] = data_in_u[i];
267 
268  // Update U working row
269  for (j = 0; j < rows_list_len; j++)
270  {
271  if ((Ufirst[rows_list[j]] != -1))
272  for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
273  {
274  jrow = ridx_u[jj];
275  w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
276  }
277  }
278  // Update L working column
279  for (j = 0; j < cols_list_len; j++)
280  {
281  if (Lfirst[cols_list[j]] != -1)
282  for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
283  {
284  jrow = ridx_l[jj];
285  w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
286  }
287  }
288 
289  if ((max_len_u - total_len_u) < n)
290  {
291  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
292  data_out_u.resize (dim_vector (max_len_u, 1));
293  data_u = data_out_u.fortran_vec ();
294  ridx_out_u.resize (dim_vector (max_len_u, 1));
295  ridx_u = ridx_out_u.fortran_vec ();
296  }
297 
298  if ((max_len_l - total_len_l) < n)
299  {
300  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
301  data_out_l.resize (dim_vector (max_len_l, 1));
302  data_l = data_out_l.fortran_vec ();
303  ridx_out_l.resize (dim_vector (max_len_l, 1));
304  ridx_l = ridx_out_l.fortran_vec ();
305  }
306 
307  // Expand the working row into the U output data structures
308  w_len_l = 0;
309  data_u[total_len_u] = w_data_u[k];
310  ridx_u[total_len_u] = k;
311  w_len_u = 1;
312  for (i = k + 1; i < n; i++)
313  {
314  if (w_data_u[i] != zero)
315  {
316  if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
317  {
318  if (opt == ROW)
319  cr_sum[k] += w_data_u[i];
320  else if (opt == COL)
321  cr_sum[i] += w_data_u[i];
322  }
323  else
324  {
325  data_u[total_len_u + w_len_u] = w_data_u[i];
326  ridx_u[total_len_u + w_len_u] = i;
327  w_len_u++;
328  }
329  }
330 
331  // Expand the working column into the L output data structures
332  if (w_data_l[i] != zero)
333  {
334  if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
335  {
336  if (opt == COL)
337  cr_sum[k] += w_data_l[i];
338  else if (opt == ROW)
339  cr_sum[i] += w_data_l[i];
340  }
341  else
342  {
343  data_l[total_len_l + w_len_l] = w_data_l[i];
344  ridx_l[total_len_l + w_len_l] = i;
345  w_len_l++;
346  }
347  }
348  w_data_u[i] = zero;
349  w_data_l[i] = zero;
350  }
351 
352  // Compensate row and column sums --> milu option
353  if (opt == COL || opt == ROW)
354  data_u[total_len_u] += cr_sum[k];
355 
356  // Check if the pivot is zero
357  if (data_u[total_len_u] == zero)
358  error ("ilu: encountered a pivot equal to 0");
359 
360  // Scale the elements in L by the pivot
361  for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
362  data_l[i] /= data_u[total_len_u];
363 
364  total_len_u += w_len_u;
365  total_len_l += w_len_l;
366  // Check if there are too many elements to be indexed with
367  // octave_idx_type type due to fill-in during the process.
368  if (total_len_l < 0 || total_len_u < 0)
369  error ("ilu: integer overflow. Too many fill-in elements in L or U");
370 
371  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
372  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
373 
374  // The tricky part of the algorithm. The arrays pointing to the first
375  // working element of each column in the next iteration (Lfirst) or
376  // the first working element of each row (Ufirst) are updated. Also the
377  // arrays working as lists cols_list and rows_list are filled with
378  // indices pointing to Ufirst and Lfirst respectively.
379  // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
380  // have to be considered in a certain column or row in next iteration,
381  // can be removed. It feels safer to me using such an indicator.
382  if (k < (n - 1))
383  {
384  if (w_len_u > 0)
385  Ufirst[k] = cidx_u[k];
386  else
387  Ufirst[k] = -1;
388  if (w_len_l > 0)
389  Lfirst[k] = cidx_l[k];
390  else
391  Lfirst[k] = -1;
392  cols_list_len = 0;
393  rows_list_len = 0;
394  for (i = 0; i <= k; i++)
395  {
396  if (Ufirst[i] != -1)
397  {
398  jj = ridx_u[Ufirst[i]];
399  if (jj < (k + 1))
400  {
401  if (Ufirst[i] < (cidx_u[i+1]))
402  {
403  Ufirst[i]++;
404  if (Ufirst[i] == cidx_u[i+1])
405  Ufirst[i] = -1;
406  else
407  jj = ridx_u[Ufirst[i]];
408  }
409  }
410  if (jj == (k + 1))
411  {
412  cols_list[cols_list_len] = i;
413  cols_list_len++;
414  }
415  }
416 
417  if (Lfirst[i] != -1)
418  {
419  jj = ridx_l[Lfirst[i]];
420  if (jj < (k + 1))
421  if (Lfirst[i] < (cidx_l[i+1]))
422  {
423  Lfirst[i]++;
424  if (Lfirst[i] == cidx_l[i+1])
425  Lfirst[i] = -1;
426  else
427  jj = ridx_l[Lfirst[i]];
428  }
429  if (jj == (k + 1))
430  {
431  rows_list[rows_list_len] = i;
432  rows_list_len++;
433  }
434  }
435  }
436  }
437  }
438 
439  // Build the output matrices
440  L = octave_matrix_t (n, n, total_len_l);
441  U = octave_matrix_t (n, n, total_len_u);
442 
443  // FIXME: Can these loops be replaced by std::copy?
444  for (i = 0; i <= n; i++)
445  L.cidx (i) = cidx_l[i];
446 
447  for (i = 0; i < total_len_l; i++)
448  {
449  L.ridx (i) = ridx_l[i];
450  L.data (i) = data_l[i];
451  }
452 
453  for (i = 0; i <= n; i++)
454  U.cidx (i) = cidx_u[i];
455 
456  for (i = 0; i < total_len_u; i++)
457  {
458  U.ridx (i) = ridx_u[i];
459  U.data (i) = data_u[i];
460  }
461 
462  U = U.transpose ();
463 }
464 
465 DEFUN (__iluc__, args, ,
466  doc: /* -*- texinfo -*-
467 @deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
468 Undocumented internal function.
469 @end deftypefn */)
470 {
471  if (args.length () != 3)
472  print_usage ();
473 
474  double droptol = args(1).double_value ();
475  std::string milu = args(2).string_value ();
476 
477  if (! args(0).iscomplex ())
478  {
479  SparseMatrix sm = args(0).sparse_matrix_value ();
480  SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
481  SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
482  SparseMatrix U, L;
483 
484  ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
485  xcolnorms (sm).fortran_vec (),
486  xrownorms (sm).fortran_vec (),
487  droptol, milu);
488 
489  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
490 
491  return ovl (L + speye, U);
492  }
493  else
494  {
495  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
496  SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
497  SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
498  SparseComplexMatrix U, L;
499  Array<Complex> cols_norm = xcolnorms (sm);
500  Array<Complex> rows_norm = xrownorms (sm);
501 
502  ilu_crout <SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
503  cols_norm.fortran_vec (),
504  rows_norm.fortran_vec (),
505  Complex (droptol), milu);
506 
507  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
508 
509  return ovl (L + speye, U);
510  }
511 }
512 
513 // This function implements the IKJ and JKI variants of gaussian elimination
514 // to perform the ILUTP decomposition. The behavior is controlled by milu
515 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
516 // advantage of CCS format of the input matrix. Row pivoting is performed.
517 // If milu = 'row' the input matrix has to be transposed to obtain the
518 // equivalent CRS structure so we can work efficiently with rows. In that
519 // case IKJ version is used and column pivoting is performed.
520 
521 template <typename octave_matrix_t, typename T>
522 void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
523  octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
524  Array <octave_idx_type>& perm_vec, const T droptol = T(0),
525  const T thresh = T(0), const std::string milu = "off",
526  const double udiag = 0)
527 {
528  char opt;
529  enum {OFF, ROW, COL};
530  if (milu == "row")
531  opt = ROW;
532  else if (milu == "col")
533  opt = COL;
534  else
535  opt = OFF;
536 
537  const octave_idx_type n = sm.cols ();
538 
539  // This is necessary for the JKI (milu = "row") variant.
540  if (opt == ROW)
541  sm = sm.transpose ();
542 
543  // Extract pointers to the arrays for faster access inside loops
544  octave_idx_type *cidx_in = sm.cidx ();
545  octave_idx_type *ridx_in = sm.ridx ();
546  T *data_in = sm.data ();
547  octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
548  max_ind, max_len_l, max_len_u;
549  T zero = T(0);
550 
551  T tl = zero, aux, maximum;
552 
553  max_len_u = nnz_u;
554  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
555  max_len_l = nnz_l;
556  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
557 
558  // Extract pointers to the arrays for faster access inside loops
559  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
560  octave_idx_type *cidx_l = cidx_out_l.fortran_vec ();
561  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
562  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
563  Array <T> data_out_l (dim_vector (max_len_l, 1));
564  T *data_l = data_out_l.fortran_vec ();
565 
566  // Data for U
567  Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
568  octave_idx_type *cidx_u = cidx_out_u.fortran_vec ();
569  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
570  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
571  Array <T> data_out_u (dim_vector (max_len_u, 1));
572  T *data_u = data_out_u.fortran_vec ();
573 
574  // Working arrays and permutation arrays
575  octave_idx_type w_len_u, w_len_l;
576  T total_sum, partial_col_sum = zero, partial_row_sum = zero;
577  std::set <octave_idx_type> iw_l;
578  std::set <octave_idx_type> iw_u;
579  std::set <octave_idx_type>::iterator it, it2;
580  OCTAVE_LOCAL_BUFFER (T, w_data, n);
582  octave_idx_type *perm = perm_vec.fortran_vec ();
584 
585  // Initialize working and permutation arrays
586  cidx_l[0] = cidx_in[0];
587  cidx_u[0] = cidx_in[0];
588  for (i = 0; i < n; i++)
589  {
590  w_data[i] = 0;
591  perm[i] = i;
592  iperm[i] = i;
593  }
594  total_len_u = 0;
595  total_len_l = 0;
596 
597  // Loop over all columns
598  for (k = 0; k < n; k++)
599  {
600  for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
601  {
602  p_perm = iperm[ridx_in[j]];
603  w_data[iperm[ridx_in[j]]] = data_in[j];
604  if (p_perm > k)
605  iw_l.insert (ridx_in[j]);
606  else
607  iw_u.insert (p_perm);
608  }
609 
610  it = iw_u.begin ();
611  jrow = *it;
612  total_sum = zero;
613  while ((jrow < k) && (it != iw_u.end ()))
614  {
615  if (opt == COL)
616  partial_col_sum = w_data[jrow];
617  if (w_data[jrow] != zero)
618  {
619  if (opt == ROW)
620  {
621  partial_row_sum = w_data[jrow];
622  tl = w_data[jrow] / data_u[uptr[jrow]];
623  }
624  for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
625  {
626  p_perm = iperm[ridx_l[jj]];
627  aux = w_data[p_perm];
628  if (opt == ROW)
629  {
630  w_data[p_perm] -= tl * data_l[jj];
631  partial_row_sum += tl * data_l[jj];
632  }
633  else
634  {
635  tl = data_l[jj] * w_data[jrow];
636  w_data[p_perm] -= tl;
637  if (opt == COL)
638  partial_col_sum += tl;
639  }
640 
641  if ((aux == zero) && (w_data[p_perm] != zero))
642  {
643  if (p_perm > k)
644  iw_l.insert (ridx_l[jj]);
645  else
646  iw_u.insert (p_perm);
647  }
648  }
649 
650  // Drop element from the U part in IKJ
651  // and L part in JKI variant (milu = [col|off])
652  if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
653  && (w_data[jrow] != zero))
654  {
655  if (opt == COL)
656  total_sum += partial_col_sum;
657  else if (opt == ROW)
658  total_sum += partial_row_sum;
659  w_data[jrow] = zero;
660  it2 = it;
661  it++;
662  iw_u.erase (it2);
663  jrow = *it;
664  continue;
665  }
666  else
667  // This is the element scaled by the pivot
668  // in the actual iteration
669  if (opt == ROW)
670  w_data[jrow] = tl;
671  }
672  jrow = *(++it);
673  }
674 
675  // Search for the pivot and update iw_l and iw_u if the pivot is not the
676  // diagonal element
677  if ((thresh > zero) && (k < (n - 1)))
678  {
679  maximum = std::abs (w_data[k]) / thresh;
680  max_ind = perm[k];
681  for (it = iw_l.begin (); it != iw_l.end (); ++it)
682  {
683  p_perm = iperm[*it];
684  if (std::abs (w_data[p_perm]) > maximum)
685  {
686  maximum = std::abs (w_data[p_perm]);
687  max_ind = *it;
688  it2 = it;
689  }
690  }
691  // If the pivot is not the diagonal element update all
692  p_perm = iperm[max_ind];
693  if (max_ind != perm[k])
694  {
695  iw_l.erase (it2);
696  if (w_data[k] != zero)
697  iw_l.insert (perm[k]);
698  else
699  iw_u.insert (k);
700  // Swap data and update permutation vectors
701  aux = w_data[k];
702  iperm[perm[p_perm]] = k;
703  iperm[perm[k]] = p_perm;
704  c = perm[k];
705  perm[k] = perm[p_perm];
706  perm[p_perm] = c;
707  w_data[k] = w_data[p_perm];
708  w_data[p_perm] = aux;
709  }
710 
711  }
712 
713  // Drop elements in the L part in the IKJ version,
714  // and from the U part in the JKI version.
715  it = iw_l.begin ();
716  while (it != iw_l.end ())
717  {
718  p_perm = iperm[*it];
719  if (droptol > zero)
720  if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
721  {
722  if (opt != OFF)
723  total_sum += w_data[p_perm];
724  w_data[p_perm] = zero;
725  it2 = it;
726  it++;
727  iw_l.erase (it2);
728  continue;
729  }
730 
731  it++;
732  }
733 
734  // If milu == [row|col] summation is preserved.
735  // Compensate diagonal element.
736  if (opt != OFF)
737  {
738  if ((total_sum > zero) && (w_data[k] == zero))
739  iw_u.insert (k);
740  w_data[k] += total_sum;
741  }
742 
743  // Check if the pivot is zero and if udiag is active.
744  // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
745  // will not preserve the row sum for that column/row.
746  if (w_data[k] == zero)
747  {
748  if (udiag != 1)
749  error ("ilu: encountered a pivot equal to 0");
750 
751  w_data[k] = droptol;
752  iw_u.insert (k);
753  }
754 
755  // Scale the elements on the L part for IKJ version (milu = [col|off])
756  if (opt != ROW)
757  for (it = iw_l.begin (); it != iw_l.end (); ++it)
758  {
759  p_perm = iperm[*it];
760  w_data[p_perm] = w_data[p_perm] / w_data[k];
761  }
762 
763  if ((max_len_u - total_len_u) < n)
764  {
765  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
766  data_out_u.resize (dim_vector (max_len_u, 1));
767  data_u = data_out_u.fortran_vec ();
768  ridx_out_u.resize (dim_vector (max_len_u, 1));
769  ridx_u = ridx_out_u.fortran_vec ();
770  }
771 
772  if ((max_len_l - total_len_l) < n)
773  {
774  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
775  data_out_l.resize (dim_vector (max_len_l, 1));
776  data_l = data_out_l.fortran_vec ();
777  ridx_out_l.resize (dim_vector (max_len_l, 1));
778  ridx_l = ridx_out_l.fortran_vec ();
779  }
780 
781  // Expand working vector into U.
782  w_len_u = 0;
783  for (it = iw_u.begin (); it != iw_u.end (); ++it)
784  {
785  if (w_data[*it] != zero)
786  {
787  data_u[total_len_u + w_len_u] = w_data[*it];
788  ridx_u[total_len_u + w_len_u] = *it;
789  w_len_u++;
790  }
791  w_data[*it] = 0;
792  }
793 
794  // Expand working vector into L.
795  w_len_l = 0;
796  for (it = iw_l.begin (); it != iw_l.end (); ++it)
797  {
798  p_perm = iperm[*it];
799  if (w_data[p_perm] != zero)
800  {
801  data_l[total_len_l + w_len_l] = w_data[p_perm];
802  ridx_l[total_len_l + w_len_l] = *it;
803  w_len_l++;
804  }
805  w_data[p_perm] = 0;
806  }
807  total_len_u += w_len_u;
808  total_len_l += w_len_l;
809 
810  // Check if there are too many elements to be indexed with
811  // octave_idx_type type due to fill-in during the process.
812  if (total_len_l < 0 || total_len_u < 0)
813  error ("ilu: Integer overflow. Too many fill-in elements in L or U");
814 
815  if (opt == ROW)
816  uptr[k] = total_len_u - 1;
817 
818  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
819  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
820 
821  iw_l.clear ();
822  iw_u.clear ();
823  }
824 
825  octave_matrix_t *L_ptr;
826  octave_matrix_t *U_ptr;
827  octave_matrix_t diag (n, n, n);
828 
829  // L and U are interchanged if milu = 'row'. It is a matter
830  // of nomenclature to re-use code with both IKJ and JKI
831  // versions of the algorithm.
832  if (opt == ROW)
833  {
834  L_ptr = &U;
835  U_ptr = &L;
836  L = octave_matrix_t (n, n, total_len_u - n);
837  U = octave_matrix_t (n, n, total_len_l);
838  }
839  else
840  {
841  L_ptr = &L;
842  U_ptr = &U;
843  L = octave_matrix_t (n, n, total_len_l);
844  U = octave_matrix_t (n, n, total_len_u);
845  }
846 
847  for (i = 0; i <= n; i++)
848  {
849  L_ptr->cidx (i) = cidx_l[i];
850  U_ptr->cidx (i) = cidx_u[i];
851  if (opt == ROW)
852  U_ptr->cidx (i) -= i;
853  }
854 
855  for (i = 0; i < n; i++)
856  {
857  if (opt == ROW)
858  diag.elem (i,i) = data_u[uptr[i]];
859  j = cidx_l[i];
860 
861  while (j < cidx_l[i+1])
862  {
863  L_ptr->ridx (j) = ridx_l[j];
864  L_ptr->data (j) = data_l[j];
865  j++;
866  }
867  j = cidx_u[i];
868 
869  while (j < cidx_u[i+1])
870  {
871  c = j;
872  if (opt == ROW)
873  {
874  // The diagonal is removed from L if milu = 'row'.
875  // That is because is convenient to have it inside
876  // the L part to carry out the process.
877  if (ridx_u[j] == i)
878  {
879  j++;
880  continue;
881  }
882  else
883  c -= i;
884  }
885  U_ptr->data (c) = data_u[j];
886  U_ptr->ridx (c) = ridx_u[j];
887  j++;
888  }
889  }
890 
891  if (opt == ROW)
892  {
893  U = U.transpose ();
894  // The diagonal, conveniently permuted is added to U
895  U += diag.index (idx_vector::colon, perm_vec);
896  L = L.transpose ();
897  }
898 }
899 
900 DEFUN (__ilutp__, args, nargout,
901  doc: /* -*- texinfo -*-
902 @deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
903 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
904 Undocumented internal function.
905 @end deftypefn */)
906 {
907  if (args.length () != 5)
908  print_usage ();
909 
911  double droptol = args(1).double_value ();
912  double thresh = args(2).double_value ();
913  std::string milu = args(3).string_value ();
914  double udiag = args(4).double_value ();
915 
916  octave_value_list arg_list;
917  octave_idx_type nnz_u, nnz_l;
918  if (! args(0).iscomplex ())
919  {
920  SparseMatrix sm = args(0).sparse_matrix_value ();
921  SparseMatrix U, L;
922  nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
923  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
924  Array <double> rc_norm;
925  if (milu == "row")
926  rc_norm = xrownorms (sm);
927  else
928  rc_norm = xcolnorms (sm);
929  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
930 
931  ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
932  rc_norm.fortran_vec (),
933  perm, droptol, thresh, milu, udiag);
934 
935  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
936  if (milu == "row")
937  {
938  retval(0) = L + speye;
939  if (nargout == 3)
940  {
941  retval(1) = U.index (idx_vector::colon, perm);
942  retval(2) = speye.index (idx_vector::colon, perm);
943  }
944  else
945  retval(1) = U;
946  }
947  else
948  {
949  retval(1) = U;
950  if (nargout == 3)
951  {
952  retval(0) = L.index (perm, idx_vector::colon) + speye;
953  retval(2) = speye.index (perm, idx_vector::colon);
954  }
955  else
956  retval(0) = L + speye.index (idx_vector::colon, perm);
957  }
958  }
959  else
960  {
961  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
962  SparseComplexMatrix U, L;
963  nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
964  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
965  Array <Complex> rc_norm;
966  if (milu == "row")
967  rc_norm = xrownorms (sm);
968  else
969  rc_norm = xcolnorms (sm);
970  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
971 
972  ilu_tp <SparseComplexMatrix, Complex>
973  (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
974  Complex (droptol), Complex (thresh), milu, udiag);
975 
976  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
977  if (milu == "row")
978  {
979  retval(0) = L + speye;
980  if (nargout == 3)
981  {
982  retval(1) = U.index (idx_vector::colon, perm);
983  retval(2) = speye.index (idx_vector::colon, perm);
984  }
985  else if (nargout == 2)
986  retval(1) = U;
987  }
988  else
989  {
990  retval(1) = U;
991  if (nargout == 3)
992  {
993  retval(0) = L.index (perm, idx_vector::colon) + speye;
994  retval(2) = speye.index (perm, idx_vector::colon);
995  }
996  else
997  retval(0) = L + speye.index (idx_vector::colon, perm);
998  }
999  }
1000 
1001  return retval;
1002 }
1003 
1004 /*
1005 ## No test needed for internal helper function.
1006 %!assert (1)
1007 */
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:45
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition: __ilu__.cc:522
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition: __ilu__.cc:184
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
Definition: Array.cc:1011
Array< T > index(const idx_vector &i) const
Indexing without resizing.
Definition: Array.cc:698
const T * fortran_vec(void) const
Size of the specified dimension.
Definition: Array.h:583
octave_idx_type cols(void) const
Definition: Sparse.h:251
Sparse< T > index(const idx_vector &i, bool resize_ok=false) const
Definition: Sparse.cc:1393
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:95
static const idx_vector colon
Definition: idx-vector.h:499
OCTINTERP_API void print_usage(void)
Definition: defun.cc:53
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void error(const char *fmt,...)
Definition: error.cc:968
octave_idx_type n
Definition: mx-inlines.cc:753
T * r
Definition: mx-inlines.cc:773
std::complex< double > Complex
Definition: oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
OCTAVE_API RowVector xcolnorms(const Matrix &m, double p)
Definition: oct-norm.cc:594
OCTAVE_API ColumnVector xrownorms(const Matrix &m, double p)
Definition: oct-norm.cc:594
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:211
static T abs(T x)
Definition: pr-output.cc:1678
OCTAVE_EXPORT octave_value_list Ftriu(const octave_value_list &args, int)
Definition: tril.cc:425
OCTAVE_EXPORT octave_value_list Ftril(const octave_value_list &args, int)
Definition: tril.cc:376