GNU Octave 7.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
__ilu__.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2013-2022 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include "oct-locbuf.h"
31#include "oct-norm.h"
32
33#include "defun.h"
34#include "error.h"
35
36#include "builtin-defun-decls.h"
37
38OCTAVE_NAMESPACE_BEGIN
39
40// This function implements the IKJ and JKI variants of Gaussian elimination to
41// perform the ILU0 decomposition. The behavior is controlled by milu
42// parameter. If milu = ['off'|'col'] the JKI version is performed taking
43// advantage of CCS format of the input matrix. If milu = 'row' the input
44// matrix has to be transposed to obtain the equivalent CRS structure so we can
45// work efficiently with rows. In this case IKJ version is used.
46template <typename octave_matrix_t, typename T>
47void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
48{
49 const octave_idx_type n = sm.cols ();
50 octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
51 T r;
52 T tl = 0;
53
54 enum {OFF, ROW, COL};
55 char opt;
56 if (milu == "row")
57 {
58 opt = ROW;
59 sm = sm.transpose ();
60 }
61 else if (milu == "col")
62 opt = COL;
63 else
64 opt = OFF;
65
66 // Input matrix pointers
67 octave_idx_type *cidx = sm.cidx ();
68 octave_idx_type *ridx = sm.ridx ();
69 T *data = sm.data ();
70
71 // Working arrays
74
75 // Initialize working arrays
76 for (i = 0; i < n; i++)
77 iw[i] = -1;
78
79 // Loop over all columns
80 for (k = 0; k < n; k++)
81 {
82 j1 = cidx[k];
83 j2 = cidx[k+1];
84
85 if (j1 == j2)
86 error ("ilu: A has a zero on the diagonal");
87
88 for (j = j1; j < j2; j++)
89 iw[ridx[j]] = j;
90
91 r = 0;
92 j = j1;
93 jrow = ridx[j1];
94 while ((jrow < k) && (j < j2))
95 {
96 if (opt == ROW)
97 {
98 tl = data[j] / data[uptr[jrow]];
99 data[j] = tl;
100 }
101 for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
102 {
103 jw = iw[ridx[jj]];
104 if (jw != -1)
105 if (opt == ROW)
106 data[jw] -= tl * data[jj];
107 else
108 data[jw] -= data[j] * data[jj];
109
110 else
111 // That is for the milu='row'
112 if (opt == ROW)
113 r += tl * data[jj];
114 else if (opt == COL)
115 r += data[j] * data[jj];
116 }
117 j++;
118 jrow = ridx[j];
119 }
120 uptr[k] = j;
121 if (opt != OFF)
122 data[uptr[k]] -= r;
123
124 if (opt != ROW)
125 for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
126 data[jj] /= data[uptr[k]];
127
128 if (k != jrow)
129 error ("ilu: A has a zero on the diagonal");
130
131 if (data[j] == T(0))
132 error ("ilu: encountered a pivot equal to 0");
133
134 for (i = j1; i < j2; i++)
135 iw[ridx[i]] = -1;
136 }
137
138 if (opt == ROW)
139 sm = sm.transpose ();
140}
141
142DEFUN (__ilu0__, args, ,
143 doc: /* -*- texinfo -*-
144@deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
145@deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
146Undocumented internal function.
147@end deftypefn */)
148{
149 if (args.length () != 2)
150 print_usage ();
151
152 octave_value_list retval (2);
153
154 std::string milu = args(1).string_value ();
155
156 // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
157 // its structure does not change during the algorithm. The same input
158 // matrix is used to build the output matrix due to that fact.
159 octave_value_list arg_list;
160 if (! args(0).iscomplex ())
161 {
162 SparseMatrix sm = args(0).sparse_matrix_value ();
163 SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
164
165 ilu_0 <SparseMatrix, double> (sm, milu);
166
167 retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
168 retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
169 }
170 else
171 {
172 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
173 SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
174
175 ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
176
177 retval(0) = speye +
178 Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
179 retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
180 }
181
182 return retval;
183}
184
185template <typename octave_matrix_t, typename T>
186void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
187 octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
188 T *rows_norm, const T droptol = 0,
189 const std::string milu = "off")
190{
191 // Map the strings into chars for faster comparing inside loops
192 char opt;
193 enum {OFF, ROW, COL};
194 if (milu == "row")
195 opt = ROW;
196 else if (milu == "col")
197 opt = COL;
198 else
199 opt = OFF;
200
201 octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
202 max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
203
204 const octave_idx_type n = sm_u.cols ();
205 sm_u = sm_u.transpose ();
206
207 max_len_u = sm_u.nnz ();
208 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
209 max_len_l = sm_l.nnz ();
210 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
211
212 // Extract pointers to the arrays for faster access inside loops
213 octave_idx_type *cidx_in_u = sm_u.cidx ();
214 octave_idx_type *ridx_in_u = sm_u.ridx ();
215 T *data_in_u = sm_u.data ();
216 octave_idx_type *cidx_in_l = sm_l.cidx ();
217 octave_idx_type *ridx_in_l = sm_l.ridx ();
218 T *data_in_l = sm_l.data ();
219
220 // L output arrays
221 Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
222 octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
223 Array <T> data_out_l (dim_vector (max_len_l, 1));
224 T *data_l = data_out_l.fortran_vec ();
225
226 // U output arrays
227 Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
228 octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
229 Array <T> data_out_u (dim_vector (max_len_u, 1));
230 T *data_u = data_out_u.fortran_vec ();
231
232 // Working arrays
233 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
234 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
235 OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
236 OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
237 OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
238 OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
241 OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
242
243 T zero = T (0);
244
245 // Initialize working arrays
246 cidx_u[0] = cidx_in_u[0];
247 cidx_l[0] = cidx_in_l[0];
248 for (i = 0; i < n; i++)
249 {
250 w_data_u[i] = zero;
251 w_data_l[i] = zero;
252 cr_sum[i] = zero;
253 }
254
255 total_len_u = 0;
256 total_len_l = 0;
257 cols_list_len = 0;
258 rows_list_len = 0;
259
260 // Loop over all columns
261 for (k = 0; k < n; k++)
262 {
263 // Load the working column and working row
264 for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
265 w_data_l[ridx_in_l[i]] = data_in_l[i];
266
267 for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
268 w_data_u[ridx_in_u[i]] = data_in_u[i];
269
270 // Update U working row
271 for (j = 0; j < rows_list_len; j++)
272 {
273 if ((Ufirst[rows_list[j]] != -1))
274 for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
275 {
276 jrow = ridx_u[jj];
277 w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
278 }
279 }
280 // Update L working column
281 for (j = 0; j < cols_list_len; j++)
282 {
283 if (Lfirst[cols_list[j]] != -1)
284 for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
285 {
286 jrow = ridx_l[jj];
287 w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
288 }
289 }
290
291 if ((max_len_u - total_len_u) < n)
292 {
293 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
294 data_out_u.resize (dim_vector (max_len_u, 1));
295 data_u = data_out_u.fortran_vec ();
296 ridx_out_u.resize (dim_vector (max_len_u, 1));
297 ridx_u = ridx_out_u.fortran_vec ();
298 }
299
300 if ((max_len_l - total_len_l) < n)
301 {
302 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
303 data_out_l.resize (dim_vector (max_len_l, 1));
304 data_l = data_out_l.fortran_vec ();
305 ridx_out_l.resize (dim_vector (max_len_l, 1));
306 ridx_l = ridx_out_l.fortran_vec ();
307 }
308
309 // Expand the working row into the U output data structures
310 w_len_l = 0;
311 data_u[total_len_u] = w_data_u[k];
312 ridx_u[total_len_u] = k;
313 w_len_u = 1;
314 for (i = k + 1; i < n; i++)
315 {
316 if (w_data_u[i] != zero)
317 {
318 if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
319 {
320 if (opt == ROW)
321 cr_sum[k] += w_data_u[i];
322 else if (opt == COL)
323 cr_sum[i] += w_data_u[i];
324 }
325 else
326 {
327 data_u[total_len_u + w_len_u] = w_data_u[i];
328 ridx_u[total_len_u + w_len_u] = i;
329 w_len_u++;
330 }
331 }
332
333 // Expand the working column into the L output data structures
334 if (w_data_l[i] != zero)
335 {
336 if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
337 {
338 if (opt == COL)
339 cr_sum[k] += w_data_l[i];
340 else if (opt == ROW)
341 cr_sum[i] += w_data_l[i];
342 }
343 else
344 {
345 data_l[total_len_l + w_len_l] = w_data_l[i];
346 ridx_l[total_len_l + w_len_l] = i;
347 w_len_l++;
348 }
349 }
350 w_data_u[i] = zero;
351 w_data_l[i] = zero;
352 }
353
354 // Compensate row and column sums --> milu option
355 if (opt == COL || opt == ROW)
356 data_u[total_len_u] += cr_sum[k];
357
358 // Check if the pivot is zero
359 if (data_u[total_len_u] == zero)
360 error ("ilu: encountered a pivot equal to 0");
361
362 // Scale the elements in L by the pivot
363 for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
364 data_l[i] /= data_u[total_len_u];
365
366 total_len_u += w_len_u;
367 total_len_l += w_len_l;
368 // Check if there are too many elements to be indexed with
369 // octave_idx_type type due to fill-in during the process.
370 if (total_len_l < 0 || total_len_u < 0)
371 error ("ilu: integer overflow. Too many fill-in elements in L or U");
372
373 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
374 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
375
376 // The tricky part of the algorithm. The arrays pointing to the first
377 // working element of each column in the next iteration (Lfirst) or
378 // the first working element of each row (Ufirst) are updated. Also the
379 // arrays working as lists cols_list and rows_list are filled with
380 // indices pointing to Ufirst and Lfirst respectively.
381 // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
382 // have to be considered in a certain column or row in next iteration,
383 // can be removed. It feels safer to me using such an indicator.
384 if (k < (n - 1))
385 {
386 if (w_len_u > 0)
387 Ufirst[k] = cidx_u[k];
388 else
389 Ufirst[k] = -1;
390 if (w_len_l > 0)
391 Lfirst[k] = cidx_l[k];
392 else
393 Lfirst[k] = -1;
394 cols_list_len = 0;
395 rows_list_len = 0;
396 for (i = 0; i <= k; i++)
397 {
398 if (Ufirst[i] != -1)
399 {
400 jj = ridx_u[Ufirst[i]];
401 if (jj < (k + 1))
402 {
403 if (Ufirst[i] < (cidx_u[i+1]))
404 {
405 Ufirst[i]++;
406 if (Ufirst[i] == cidx_u[i+1])
407 Ufirst[i] = -1;
408 else
409 jj = ridx_u[Ufirst[i]];
410 }
411 }
412 if (jj == (k + 1))
413 {
414 cols_list[cols_list_len] = i;
415 cols_list_len++;
416 }
417 }
418
419 if (Lfirst[i] != -1)
420 {
421 jj = ridx_l[Lfirst[i]];
422 if (jj < (k + 1))
423 if (Lfirst[i] < (cidx_l[i+1]))
424 {
425 Lfirst[i]++;
426 if (Lfirst[i] == cidx_l[i+1])
427 Lfirst[i] = -1;
428 else
429 jj = ridx_l[Lfirst[i]];
430 }
431 if (jj == (k + 1))
432 {
433 rows_list[rows_list_len] = i;
434 rows_list_len++;
435 }
436 }
437 }
438 }
439 }
440
441 // Build the output matrices
442 L = octave_matrix_t (n, n, total_len_l);
443 U = octave_matrix_t (n, n, total_len_u);
444
445 // FIXME: Can these loops be replaced by std::copy?
446 for (i = 0; i <= n; i++)
447 L.cidx (i) = cidx_l[i];
448
449 for (i = 0; i < total_len_l; i++)
450 {
451 L.ridx (i) = ridx_l[i];
452 L.data (i) = data_l[i];
453 }
454
455 for (i = 0; i <= n; i++)
456 U.cidx (i) = cidx_u[i];
457
458 for (i = 0; i < total_len_u; i++)
459 {
460 U.ridx (i) = ridx_u[i];
461 U.data (i) = data_u[i];
462 }
463
464 U = U.transpose ();
465}
466
467DEFUN (__iluc__, args, ,
468 doc: /* -*- texinfo -*-
469@deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
470Undocumented internal function.
471@end deftypefn */)
472{
473 if (args.length () != 3)
474 print_usage ();
475
476 double droptol = args(1).double_value ();
477 std::string milu = args(2).string_value ();
478
479 if (! args(0).iscomplex ())
480 {
481 SparseMatrix sm = args(0).sparse_matrix_value ();
482 SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
483 SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
484 SparseMatrix U, L;
485
486 ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
487 xcolnorms (sm).fortran_vec (),
488 xrownorms (sm).fortran_vec (),
489 droptol, milu);
490
491 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
492
493 return ovl (L + speye, U);
494 }
495 else
496 {
497 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
498 SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
499 SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
501 Array<Complex> cols_norm = xcolnorms (sm);
502 Array<Complex> rows_norm = xrownorms (sm);
503
504 ilu_crout <SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
505 cols_norm.fortran_vec (),
506 rows_norm.fortran_vec (),
507 Complex (droptol), milu);
508
509 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
510
511 return ovl (L + speye, U);
512 }
513}
514
515// This function implements the IKJ and JKI variants of gaussian elimination
516// to perform the ILUTP decomposition. The behavior is controlled by milu
517// parameter. If milu = ['off'|'col'] the JKI version is performed taking
518// advantage of CCS format of the input matrix. Row pivoting is performed.
519// If milu = 'row' the input matrix has to be transposed to obtain the
520// equivalent CRS structure so we can work efficiently with rows. In that
521// case IKJ version is used and column pivoting is performed.
522
523template <typename octave_matrix_t, typename T>
524void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
525 octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
526 Array <octave_idx_type>& perm_vec, const T droptol = T(0),
527 const T thresh = T(0), const std::string milu = "off",
528 const double udiag = 0)
529{
530 char opt;
531 enum {OFF, ROW, COL};
532 if (milu == "row")
533 opt = ROW;
534 else if (milu == "col")
535 opt = COL;
536 else
537 opt = OFF;
538
539 const octave_idx_type n = sm.cols ();
540
541 // This is necessary for the JKI (milu = "row") variant.
542 if (opt == ROW)
543 sm = sm.transpose ();
544
545 // Extract pointers to the arrays for faster access inside loops
546 octave_idx_type *cidx_in = sm.cidx ();
547 octave_idx_type *ridx_in = sm.ridx ();
548 T *data_in = sm.data ();
549 octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
550 max_ind, max_len_l, max_len_u;
551 T zero = T(0);
552
553 T tl = zero, aux, maximum;
554
555 max_len_u = nnz_u;
556 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
557 max_len_l = nnz_l;
558 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
559
560 // Extract pointers to the arrays for faster access inside loops
561 Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
562 octave_idx_type *cidx_l = cidx_out_l.fortran_vec ();
563 Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
564 octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
565 Array <T> data_out_l (dim_vector (max_len_l, 1));
566 T *data_l = data_out_l.fortran_vec ();
567
568 // Data for U
569 Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
570 octave_idx_type *cidx_u = cidx_out_u.fortran_vec ();
571 Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
572 octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
573 Array <T> data_out_u (dim_vector (max_len_u, 1));
574 T *data_u = data_out_u.fortran_vec ();
575
576 // Working arrays and permutation arrays
577 octave_idx_type w_len_u, w_len_l;
578 T total_sum, partial_col_sum = zero, partial_row_sum = zero;
579 std::set <octave_idx_type> iw_l;
580 std::set <octave_idx_type> iw_u;
581 std::set <octave_idx_type>::iterator it, it2;
582 OCTAVE_LOCAL_BUFFER (T, w_data, n);
584 octave_idx_type *perm = perm_vec.fortran_vec ();
586
587 // Initialize working and permutation arrays
588 cidx_l[0] = cidx_in[0];
589 cidx_u[0] = cidx_in[0];
590 for (i = 0; i < n; i++)
591 {
592 w_data[i] = 0;
593 perm[i] = i;
594 iperm[i] = i;
595 }
596 total_len_u = 0;
597 total_len_l = 0;
598
599 // Loop over all columns
600 for (k = 0; k < n; k++)
601 {
602 for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
603 {
604 p_perm = iperm[ridx_in[j]];
605 w_data[iperm[ridx_in[j]]] = data_in[j];
606 if (p_perm > k)
607 iw_l.insert (ridx_in[j]);
608 else
609 iw_u.insert (p_perm);
610 }
611
612 it = iw_u.begin ();
613 jrow = *it;
614 total_sum = zero;
615 while ((jrow < k) && (it != iw_u.end ()))
616 {
617 if (opt == COL)
618 partial_col_sum = w_data[jrow];
619 if (w_data[jrow] != zero)
620 {
621 if (opt == ROW)
622 {
623 partial_row_sum = w_data[jrow];
624 tl = w_data[jrow] / data_u[uptr[jrow]];
625 }
626 for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
627 {
628 p_perm = iperm[ridx_l[jj]];
629 aux = w_data[p_perm];
630 if (opt == ROW)
631 {
632 w_data[p_perm] -= tl * data_l[jj];
633 partial_row_sum += tl * data_l[jj];
634 }
635 else
636 {
637 tl = data_l[jj] * w_data[jrow];
638 w_data[p_perm] -= tl;
639 if (opt == COL)
640 partial_col_sum += tl;
641 }
642
643 if ((aux == zero) && (w_data[p_perm] != zero))
644 {
645 if (p_perm > k)
646 iw_l.insert (ridx_l[jj]);
647 else
648 iw_u.insert (p_perm);
649 }
650 }
651
652 // Drop element from the U part in IKJ
653 // and L part in JKI variant (milu = [col|off])
654 if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
655 && (w_data[jrow] != zero))
656 {
657 if (opt == COL)
658 total_sum += partial_col_sum;
659 else if (opt == ROW)
660 total_sum += partial_row_sum;
661 w_data[jrow] = zero;
662 it2 = it;
663 it++;
664 iw_u.erase (it2);
665 jrow = *it;
666 continue;
667 }
668 else
669 // This is the element scaled by the pivot
670 // in the actual iteration
671 if (opt == ROW)
672 w_data[jrow] = tl;
673 }
674 jrow = *(++it);
675 }
676
677 // Search for the pivot and update iw_l and iw_u if the pivot is not the
678 // diagonal element
679 if ((thresh > zero) && (k < (n - 1)))
680 {
681 maximum = std::abs (w_data[k]) / thresh;
682 max_ind = perm[k];
683 for (it = iw_l.begin (); it != iw_l.end (); ++it)
684 {
685 p_perm = iperm[*it];
686 if (std::abs (w_data[p_perm]) > maximum)
687 {
688 maximum = std::abs (w_data[p_perm]);
689 max_ind = *it;
690 it2 = it;
691 }
692 }
693 // If the pivot is not the diagonal element update all
694 p_perm = iperm[max_ind];
695 if (max_ind != perm[k])
696 {
697 iw_l.erase (it2);
698 if (w_data[k] != zero)
699 iw_l.insert (perm[k]);
700 else
701 iw_u.insert (k);
702 // Swap data and update permutation vectors
703 aux = w_data[k];
704 iperm[perm[p_perm]] = k;
705 iperm[perm[k]] = p_perm;
706 c = perm[k];
707 perm[k] = perm[p_perm];
708 perm[p_perm] = c;
709 w_data[k] = w_data[p_perm];
710 w_data[p_perm] = aux;
711 }
712
713 }
714
715 // Drop elements in the L part in the IKJ version,
716 // and from the U part in the JKI version.
717 it = iw_l.begin ();
718 while (it != iw_l.end ())
719 {
720 p_perm = iperm[*it];
721 if (droptol > zero)
722 if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
723 {
724 if (opt != OFF)
725 total_sum += w_data[p_perm];
726 w_data[p_perm] = zero;
727 it2 = it;
728 it++;
729 iw_l.erase (it2);
730 continue;
731 }
732
733 it++;
734 }
735
736 // If milu == [row|col] summation is preserved.
737 // Compensate diagonal element.
738 if (opt != OFF)
739 {
740 if ((total_sum > zero) && (w_data[k] == zero))
741 iw_u.insert (k);
742 w_data[k] += total_sum;
743 }
744
745 // Check if the pivot is zero and if udiag is active.
746 // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
747 // will not preserve the row sum for that column/row.
748 if (w_data[k] == zero)
749 {
750 if (udiag != 1)
751 error ("ilu: encountered a pivot equal to 0");
752
753 w_data[k] = droptol;
754 iw_u.insert (k);
755 }
756
757 // Scale the elements on the L part for IKJ version (milu = [col|off])
758 if (opt != ROW)
759 for (it = iw_l.begin (); it != iw_l.end (); ++it)
760 {
761 p_perm = iperm[*it];
762 w_data[p_perm] = w_data[p_perm] / w_data[k];
763 }
764
765 if ((max_len_u - total_len_u) < n)
766 {
767 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
768 data_out_u.resize (dim_vector (max_len_u, 1));
769 data_u = data_out_u.fortran_vec ();
770 ridx_out_u.resize (dim_vector (max_len_u, 1));
771 ridx_u = ridx_out_u.fortran_vec ();
772 }
773
774 if ((max_len_l - total_len_l) < n)
775 {
776 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
777 data_out_l.resize (dim_vector (max_len_l, 1));
778 data_l = data_out_l.fortran_vec ();
779 ridx_out_l.resize (dim_vector (max_len_l, 1));
780 ridx_l = ridx_out_l.fortran_vec ();
781 }
782
783 // Expand working vector into U.
784 w_len_u = 0;
785 for (it = iw_u.begin (); it != iw_u.end (); ++it)
786 {
787 if (w_data[*it] != zero)
788 {
789 data_u[total_len_u + w_len_u] = w_data[*it];
790 ridx_u[total_len_u + w_len_u] = *it;
791 w_len_u++;
792 }
793 w_data[*it] = 0;
794 }
795
796 // Expand working vector into L.
797 w_len_l = 0;
798 for (it = iw_l.begin (); it != iw_l.end (); ++it)
799 {
800 p_perm = iperm[*it];
801 if (w_data[p_perm] != zero)
802 {
803 data_l[total_len_l + w_len_l] = w_data[p_perm];
804 ridx_l[total_len_l + w_len_l] = *it;
805 w_len_l++;
806 }
807 w_data[p_perm] = 0;
808 }
809 total_len_u += w_len_u;
810 total_len_l += w_len_l;
811
812 // Check if there are too many elements to be indexed with
813 // octave_idx_type type due to fill-in during the process.
814 if (total_len_l < 0 || total_len_u < 0)
815 error ("ilu: Integer overflow. Too many fill-in elements in L or U");
816
817 if (opt == ROW)
818 uptr[k] = total_len_u - 1;
819
820 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
821 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
822
823 iw_l.clear ();
824 iw_u.clear ();
825 }
826
827 octave_matrix_t *L_ptr;
828 octave_matrix_t *U_ptr;
829 octave_matrix_t diag (n, n, n);
830
831 // L and U are interchanged if milu = 'row'. It is a matter
832 // of nomenclature to re-use code with both IKJ and JKI
833 // versions of the algorithm.
834 if (opt == ROW)
835 {
836 L_ptr = &U;
837 U_ptr = &L;
838 L = octave_matrix_t (n, n, total_len_u - n);
839 U = octave_matrix_t (n, n, total_len_l);
840 }
841 else
842 {
843 L_ptr = &L;
844 U_ptr = &U;
845 L = octave_matrix_t (n, n, total_len_l);
846 U = octave_matrix_t (n, n, total_len_u);
847 }
848
849 for (i = 0; i <= n; i++)
850 {
851 L_ptr->cidx (i) = cidx_l[i];
852 U_ptr->cidx (i) = cidx_u[i];
853 if (opt == ROW)
854 U_ptr->cidx (i) -= i;
855 }
856
857 for (i = 0; i < n; i++)
858 {
859 if (opt == ROW)
860 diag.elem (i, i) = data_u[uptr[i]];
861 j = cidx_l[i];
862
863 while (j < cidx_l[i+1])
864 {
865 L_ptr->ridx (j) = ridx_l[j];
866 L_ptr->data (j) = data_l[j];
867 j++;
868 }
869 j = cidx_u[i];
870
871 while (j < cidx_u[i+1])
872 {
873 c = j;
874 if (opt == ROW)
875 {
876 // The diagonal is removed from L if milu = 'row'.
877 // That is because is convenient to have it inside
878 // the L part to carry out the process.
879 if (ridx_u[j] == i)
880 {
881 j++;
882 continue;
883 }
884 else
885 c -= i;
886 }
887 U_ptr->data (c) = data_u[j];
888 U_ptr->ridx (c) = ridx_u[j];
889 j++;
890 }
891 }
892
893 if (opt == ROW)
894 {
895 U = U.transpose ();
896 // The diagonal, conveniently permuted is added to U
897 U += diag.index (idx_vector::colon, perm_vec);
898 L = L.transpose ();
899 }
900}
901
902DEFUN (__ilutp__, args, nargout,
903 doc: /* -*- texinfo -*-
904@deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
905@deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
906Undocumented internal function.
907@end deftypefn */)
908{
909 if (args.length () != 5)
910 print_usage ();
911
912 octave_value_list retval;
913 double droptol = args(1).double_value ();
914 double thresh = args(2).double_value ();
915 std::string milu = args(3).string_value ();
916 double udiag = args(4).double_value ();
917
918 octave_value_list arg_list;
919 octave_idx_type nnz_u, nnz_l;
920 if (! args(0).iscomplex ())
921 {
922 SparseMatrix sm = args(0).sparse_matrix_value ();
923 SparseMatrix U, L;
924 nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
925 nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
926 Array <double> rc_norm;
927 if (milu == "row")
928 rc_norm = xrownorms (sm);
929 else
930 rc_norm = xcolnorms (sm);
931 Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
932
933 ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
934 rc_norm.fortran_vec (),
935 perm, droptol, thresh, milu, udiag);
936
937 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
938 if (milu == "row")
939 {
940 retval(0) = L + speye;
941 if (nargout == 3)
942 {
943 retval(1) = U.index (idx_vector::colon, perm);
944 retval(2) = speye.index (idx_vector::colon, perm);
945 }
946 else
947 retval(1) = U;
948 }
949 else
950 {
951 retval(1) = U;
952 if (nargout == 3)
953 {
954 retval(0) = L.index (perm, idx_vector::colon) + speye;
955 retval(2) = speye.index (perm, idx_vector::colon);
956 }
957 else
958 retval(0) = L + speye.index (idx_vector::colon, perm);
959 }
960 }
961 else
962 {
963 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
965 nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
966 nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
967 Array <Complex> rc_norm;
968 if (milu == "row")
969 rc_norm = xrownorms (sm);
970 else
971 rc_norm = xcolnorms (sm);
972 Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
973
974 ilu_tp <SparseComplexMatrix, Complex>
975 (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
976 Complex (droptol), Complex (thresh), milu, udiag);
977
978 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
979 if (milu == "row")
980 {
981 retval(0) = L + speye;
982 if (nargout == 3)
983 {
984 retval(1) = U.index (idx_vector::colon, perm);
985 retval(2) = speye.index (idx_vector::colon, perm);
986 }
987 else if (nargout == 2)
988 retval(1) = U;
989 }
990 else
991 {
992 retval(1) = U;
993 if (nargout == 3)
994 {
995 retval(0) = L.index (perm, idx_vector::colon) + speye;
996 retval(2) = speye.index (perm, idx_vector::colon);
997 }
998 else
999 retval(0) = L + speye.index (idx_vector::colon, perm);
1000 }
1001 }
1002
1003 return retval;
1004}
1005
1006/*
1007## No test needed for internal helper function.
1008%!assert (1)
1009*/
1010
1011OCTAVE_NAMESPACE_END
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition: __ilu__.cc:524
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition: __ilu__.cc:186
OCTAVE_NAMESPACE_BEGIN void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:47
OCTARRAY_API void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
Definition: Array.cc:1010
OCTARRAY_API T * fortran_vec(void)
Size of the specified dimension.
Definition: Array.cc:1744
OCTAVE_API Sparse< T, Alloc > index(const octave::idx_vector &i, bool resize_ok=false) const
Definition: Sparse.cc:1432
octave_idx_type cols(void) const
Definition: Sparse.h:352
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
static const idx_vector colon
Definition: idx-vector.h:483
OCTINTERP_API void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void error(const char *fmt,...)
Definition: error.cc:980
ColumnVector xrownorms(const Matrix &m, double p)
Definition: oct-norm.cc:628
RowVector xcolnorms(const Matrix &m, double p)
Definition: oct-norm.cc:628
std::complex< double > Complex
Definition: oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:211
static T abs(T x)
Definition: pr-output.cc:1678
OCTAVE_EXPORT octave_value_list Ftriu(const octave_value_list &args, int)
Definition: tril.cc:431
OCTAVE_EXPORT octave_value_list Ftril(const octave_value_list &args, int)
Definition: tril.cc:382