GNU Octave 11.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
__ilu__.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2013-2026 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include "oct-locbuf.h"
31#include "oct-norm.h"
32
33#include "defun.h"
34#include "error.h"
35
36#include "builtin-defun-decls.h"
37
39
40// This function implements the IKJ and JKI variants of Gaussian elimination to
41// perform the ILU0 decomposition. The behavior is controlled by milu
42// parameter. If milu = ['off'|'col'] the JKI version is performed taking
43// advantage of CCS format of the input matrix. If milu = 'row' the input
44// matrix has to be transposed to obtain the equivalent CRS structure so we can
45// work efficiently with rows. In this case IKJ version is used.
46template <typename octave_matrix_t, typename T>
47void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
48{
49 const octave_idx_type n = sm.cols ();
50 octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
51 T r;
52 T tl = 0;
53
54 enum {OFF, ROW, COL};
55 char opt;
56 if (milu == "row")
57 {
58 opt = ROW;
59 sm = sm.transpose ();
60 }
61 else if (milu == "col")
62 opt = COL;
63 else
64 opt = OFF;
65
66 // Input matrix pointers
67 octave_idx_type *cidx = sm.cidx ();
68 octave_idx_type *ridx = sm.ridx ();
69 T *data = sm.data ();
70
71 // Working arrays
74
75 // Initialize working arrays
76 for (i = 0; i < n; i++)
77 iw[i] = -1;
78
79 // Loop over all columns
80 for (k = 0; k < n; k++)
81 {
82 j1 = cidx[k];
83 j2 = cidx[k+1];
84
85 if (j1 == j2)
86 error ("ilu: A has a zero on the diagonal");
87
88 for (j = j1; j < j2; j++)
89 iw[ridx[j]] = j;
90
91 r = 0;
92 j = j1;
93 jrow = ridx[j1];
94 while ((jrow < k) && (j < j2))
95 {
96 if (opt == ROW)
97 {
98 tl = data[j] / data[uptr[jrow]];
99 data[j] = tl;
100 }
101 for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
102 {
103 jw = iw[ridx[jj]];
104 if (jw != -1)
105 if (opt == ROW)
106 data[jw] -= tl * data[jj];
107 else
108 data[jw] -= data[j] * data[jj];
109
110 else
111 // That is for the milu='row'
112 if (opt == ROW)
113 r += tl * data[jj];
114 else if (opt == COL)
115 r += data[j] * data[jj];
116 }
117 j++;
118 jrow = ridx[j];
119 }
120 uptr[k] = j;
121 if (opt != OFF)
122 data[uptr[k]] -= r;
123
124 if (opt != ROW)
125 for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
126 data[jj] /= data[uptr[k]];
127
128 if (k != jrow)
129 error ("ilu: A has a zero on the diagonal");
130
131 if (data[j] == T(0))
132 error ("ilu: encountered a pivot equal to 0");
133
134 for (i = j1; i < j2; i++)
135 iw[ridx[i]] = -1;
136 }
137
138 if (opt == ROW)
139 sm = sm.transpose ();
140}
141
142DEFUN (__ilu0__, args, ,
143 doc: /* -*- texinfo -*-
144@deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
145@deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
146Undocumented internal function.
147@end deftypefn */)
148{
149 if (args.length () != 2)
150 print_usage ();
151
152 octave_value_list retval (2);
153
154 std::string milu = args(1).string_value ();
155
156 // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
157 // its structure does not change during the algorithm. The same input
158 // matrix is used to build the output matrix due to that fact.
159 octave_value_list arg_list;
160 if (! args(0).iscomplex ())
161 {
162 SparseMatrix sm = args(0).sparse_matrix_value ();
163 SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
164
165 ilu_0<SparseMatrix, double> (sm, milu);
166
167 retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
168 retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
169 }
170 else
171 {
172 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
173 SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
174
175 ilu_0<SparseComplexMatrix, Complex> (sm, milu);
176
177 retval(0) = speye +
178 Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
179 retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
180 }
181
182 return retval;
183}
184
185template <typename octave_matrix_t, typename T>
186void
187ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
188 octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
189 T *rows_norm, const T droptol = 0,
190 const std::string milu = "off")
191{
192 // Map the strings into chars for faster comparing inside loops
193 char opt;
194 enum {OFF, ROW, COL};
195 if (milu == "row")
196 opt = ROW;
197 else if (milu == "col")
198 opt = COL;
199 else
200 opt = OFF;
201
202 octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
203 max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
204
205 const octave_idx_type n = sm_u.cols ();
206 sm_u = sm_u.transpose ();
207
208 max_len_u = sm_u.nnz ();
209 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
210 max_len_l = sm_l.nnz ();
211 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
212
213 // Extract pointers to the arrays for faster access inside loops
214 octave_idx_type *cidx_in_u = sm_u.cidx ();
215 octave_idx_type *ridx_in_u = sm_u.ridx ();
216 T *data_in_u = sm_u.data ();
217 octave_idx_type *cidx_in_l = sm_l.cidx ();
218 octave_idx_type *ridx_in_l = sm_l.ridx ();
219 T *data_in_l = sm_l.data ();
220
221 // L output arrays
222 Array<octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
223 octave_idx_type *ridx_l = ridx_out_l.rwdata ();
224 Array<T> data_out_l (dim_vector (max_len_l, 1));
225 T *data_l = data_out_l.rwdata ();
226
227 // U output arrays
228 Array<octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
229 octave_idx_type *ridx_u = ridx_out_u.rwdata ();
230 Array<T> data_out_u (dim_vector (max_len_u, 1));
231 T *data_u = data_out_u.rwdata ();
232
233 // Working arrays
234 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
235 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
236 OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
237 OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
238 OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
239 OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
242 OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
243
244 T zero = T (0);
245
246 // Initialize working arrays
247 cidx_u[0] = cidx_in_u[0];
248 cidx_l[0] = cidx_in_l[0];
249 for (i = 0; i < n; i++)
250 {
251 w_data_u[i] = zero;
252 w_data_l[i] = zero;
253 cr_sum[i] = zero;
254 }
255
256 total_len_u = 0;
257 total_len_l = 0;
258 cols_list_len = 0;
259 rows_list_len = 0;
260
261 // Loop over all columns
262 for (k = 0; k < n; k++)
263 {
264 // Load the working column and working row
265 for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
266 w_data_l[ridx_in_l[i]] = data_in_l[i];
267
268 for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
269 w_data_u[ridx_in_u[i]] = data_in_u[i];
270
271 // Update U working row
272 for (j = 0; j < rows_list_len; j++)
273 {
274 if ((Ufirst[rows_list[j]] != -1))
275 for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
276 {
277 jrow = ridx_u[jj];
278 w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
279 }
280 }
281 // Update L working column
282 for (j = 0; j < cols_list_len; j++)
283 {
284 if (Lfirst[cols_list[j]] != -1)
285 for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
286 {
287 jrow = ridx_l[jj];
288 w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
289 }
290 }
291
292 if ((max_len_u - total_len_u) < n)
293 {
294 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
295 data_out_u.resize (dim_vector (max_len_u, 1));
296 data_u = data_out_u.rwdata ();
297 ridx_out_u.resize (dim_vector (max_len_u, 1));
298 ridx_u = ridx_out_u.rwdata ();
299 }
300
301 if ((max_len_l - total_len_l) < n)
302 {
303 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
304 data_out_l.resize (dim_vector (max_len_l, 1));
305 data_l = data_out_l.rwdata ();
306 ridx_out_l.resize (dim_vector (max_len_l, 1));
307 ridx_l = ridx_out_l.rwdata ();
308 }
309
310 // Expand the working row into the U output data structures
311 w_len_l = 0;
312 data_u[total_len_u] = w_data_u[k];
313 ridx_u[total_len_u] = k;
314 w_len_u = 1;
315 for (i = k + 1; i < n; i++)
316 {
317 if (w_data_u[i] != zero)
318 {
319 if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
320 {
321 if (opt == ROW)
322 cr_sum[k] += w_data_u[i];
323 else if (opt == COL)
324 cr_sum[i] += w_data_u[i];
325 }
326 else
327 {
328 data_u[total_len_u + w_len_u] = w_data_u[i];
329 ridx_u[total_len_u + w_len_u] = i;
330 w_len_u++;
331 }
332 }
333
334 // Expand the working column into the L output data structures
335 if (w_data_l[i] != zero)
336 {
337 if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
338 {
339 if (opt == COL)
340 cr_sum[k] += w_data_l[i];
341 else if (opt == ROW)
342 cr_sum[i] += w_data_l[i];
343 }
344 else
345 {
346 data_l[total_len_l + w_len_l] = w_data_l[i];
347 ridx_l[total_len_l + w_len_l] = i;
348 w_len_l++;
349 }
350 }
351 w_data_u[i] = zero;
352 w_data_l[i] = zero;
353 }
354
355 // Compensate row and column sums --> milu option
356 if (opt == COL || opt == ROW)
357 data_u[total_len_u] += cr_sum[k];
358
359 // Check if the pivot is zero
360 if (data_u[total_len_u] == zero)
361 error ("ilu: encountered a pivot equal to 0");
362
363 // Scale the elements in L by the pivot
364 for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
365 data_l[i] /= data_u[total_len_u];
366
367 total_len_u += w_len_u;
368 total_len_l += w_len_l;
369 // Check if there are too many elements to be indexed with
370 // octave_idx_type type due to fill-in during the process.
371 if (total_len_l < 0 || total_len_u < 0)
372 error ("ilu: integer overflow. Too many fill-in elements in L or U");
373
374 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
375 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
376
377 // The tricky part of the algorithm. The arrays pointing to the first
378 // working element of each column in the next iteration (Lfirst) or
379 // the first working element of each row (Ufirst) are updated. Also the
380 // arrays working as lists cols_list and rows_list are filled with
381 // indices pointing to Ufirst and Lfirst respectively.
382 // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
383 // have to be considered in a certain column or row in next iteration,
384 // can be removed. It feels safer to me using such an indicator.
385 if (k < (n - 1))
386 {
387 if (w_len_u > 0)
388 Ufirst[k] = cidx_u[k];
389 else
390 Ufirst[k] = -1;
391 if (w_len_l > 0)
392 Lfirst[k] = cidx_l[k];
393 else
394 Lfirst[k] = -1;
395 cols_list_len = 0;
396 rows_list_len = 0;
397 for (i = 0; i <= k; i++)
398 {
399 if (Ufirst[i] != -1)
400 {
401 jj = ridx_u[Ufirst[i]];
402 if (jj < (k + 1))
403 {
404 if (Ufirst[i] < (cidx_u[i+1]))
405 {
406 Ufirst[i]++;
407 if (Ufirst[i] == cidx_u[i+1])
408 Ufirst[i] = -1;
409 else
410 jj = ridx_u[Ufirst[i]];
411 }
412 }
413 if (jj == (k + 1))
414 {
415 cols_list[cols_list_len] = i;
416 cols_list_len++;
417 }
418 }
419
420 if (Lfirst[i] != -1)
421 {
422 jj = ridx_l[Lfirst[i]];
423 if (jj < (k + 1))
424 if (Lfirst[i] < (cidx_l[i+1]))
425 {
426 Lfirst[i]++;
427 if (Lfirst[i] == cidx_l[i+1])
428 Lfirst[i] = -1;
429 else
430 jj = ridx_l[Lfirst[i]];
431 }
432 if (jj == (k + 1))
433 {
434 rows_list[rows_list_len] = i;
435 rows_list_len++;
436 }
437 }
438 }
439 }
440 }
441
442 // Build the output matrices
443 L = octave_matrix_t (n, n, total_len_l);
444 U = octave_matrix_t (n, n, total_len_u);
445
446 // FIXME: Can these loops be replaced by std::copy?
447 for (i = 0; i <= n; i++)
448 L.cidx (i) = cidx_l[i];
449
450 for (i = 0; i < total_len_l; i++)
451 {
452 L.ridx (i) = ridx_l[i];
453 L.data (i) = data_l[i];
454 }
455
456 for (i = 0; i <= n; i++)
457 U.cidx (i) = cidx_u[i];
458
459 for (i = 0; i < total_len_u; i++)
460 {
461 U.ridx (i) = ridx_u[i];
462 U.data (i) = data_u[i];
463 }
464
465 U = U.transpose ();
466}
467
468DEFUN (__iluc__, args, ,
469 doc: /* -*- texinfo -*-
470@deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
471Undocumented internal function.
472@end deftypefn */)
473{
474 if (args.length () != 3)
475 print_usage ();
476
477 double droptol = args(1).double_value ();
478 std::string milu = args(2).string_value ();
479
480 if (! args(0).iscomplex ())
481 {
482 SparseMatrix sm = args(0).sparse_matrix_value ();
483 SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
484 SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
485 SparseMatrix U, L;
486
487 RowVector sm_col_norms = xcolnorms (sm);
488 ColumnVector sm_row_norms = xrownorms (sm);
489 ilu_crout<SparseMatrix, double> (sm_l, sm_u, L, U,
490 sm_col_norms.rwdata (),
491 sm_row_norms.rwdata (),
492 droptol, milu);
493
494 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
495
496 return ovl (L + speye, U);
497 }
498 else
499 {
500 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
501 SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
502 SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
504 Array<Complex> cols_norm = xcolnorms (sm);
505 Array<Complex> rows_norm = xrownorms (sm);
506
507 ilu_crout<SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
508 cols_norm.rwdata (),
509 rows_norm.rwdata (),
510 Complex (droptol), milu);
511
512 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
513
514 return ovl (L + speye, U);
515 }
516}
517
518// This function implements the IKJ and JKI variants of gaussian elimination
519// to perform the ILUTP decomposition. The behavior is controlled by milu
520// parameter. If milu = ['off'|'col'] the JKI version is performed taking
521// advantage of CCS format of the input matrix. Row pivoting is performed.
522// If milu = 'row' the input matrix has to be transposed to obtain the
523// equivalent CRS structure so we can work efficiently with rows. In that
524// case IKJ version is used and column pivoting is performed.
525
526template <typename octave_matrix_t, typename T>
527void
528ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
529 octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
530 Array<octave_idx_type>& perm_vec, const T droptol = T(0),
531 const T thresh = T(0), const std::string milu = "off",
532 const double udiag = 0)
533{
534 char opt;
535 enum {OFF, ROW, COL};
536 if (milu == "row")
537 opt = ROW;
538 else if (milu == "col")
539 opt = COL;
540 else
541 opt = OFF;
542
543 const octave_idx_type n = sm.cols ();
544
545 // This is necessary for the JKI (milu = "row") variant.
546 if (opt == ROW)
547 sm = sm.transpose ();
548
549 // Extract pointers to the arrays for faster access inside loops
550 octave_idx_type *cidx_in = sm.cidx ();
551 octave_idx_type *ridx_in = sm.ridx ();
552 T *data_in = sm.data ();
553 octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
554 max_ind, max_len_l, max_len_u;
555 T zero = T(0);
556
557 T tl = zero, aux, maximum;
558
559 max_len_u = nnz_u;
560 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
561 max_len_l = nnz_l;
562 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
563
564 // Extract pointers to the arrays for faster access inside loops
565 Array<octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
566 octave_idx_type *cidx_l = cidx_out_l.rwdata ();
567 Array<octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
568 octave_idx_type *ridx_l = ridx_out_l.rwdata ();
569 Array<T> data_out_l (dim_vector (max_len_l, 1));
570 T *data_l = data_out_l.rwdata ();
571
572 // Data for U
573 Array<octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
574 octave_idx_type *cidx_u = cidx_out_u.rwdata ();
575 Array<octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
576 octave_idx_type *ridx_u = ridx_out_u.rwdata ();
577 Array<T> data_out_u (dim_vector (max_len_u, 1));
578 T *data_u = data_out_u.rwdata ();
579
580 // Working arrays and permutation arrays
581 octave_idx_type w_len_u, w_len_l;
582 T total_sum, partial_col_sum = zero, partial_row_sum = zero;
583 std::set<octave_idx_type> iw_l;
584 std::set<octave_idx_type> iw_u;
585 std::set<octave_idx_type>::iterator it, it2;
586 OCTAVE_LOCAL_BUFFER (T, w_data, n);
588 octave_idx_type *perm = perm_vec.rwdata ();
590
591 // Initialize working and permutation arrays
592 cidx_l[0] = cidx_in[0];
593 cidx_u[0] = cidx_in[0];
594 for (i = 0; i < n; i++)
595 {
596 w_data[i] = 0;
597 perm[i] = i;
598 iperm[i] = i;
599 }
600 total_len_u = 0;
601 total_len_l = 0;
602
603 // Loop over all columns
604 for (k = 0; k < n; k++)
605 {
606 for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
607 {
608 p_perm = iperm[ridx_in[j]];
609 w_data[iperm[ridx_in[j]]] = data_in[j];
610 if (p_perm > k)
611 iw_l.insert (ridx_in[j]);
612 else
613 iw_u.insert (p_perm);
614 }
615
616 it = iw_u.begin ();
617 total_sum = zero;
618 while (it != iw_u.end ())
619 {
620 jrow = *it;
621 if (jrow >= k)
622 break;
623
624 if (opt == COL)
625 partial_col_sum = w_data[jrow];
626 if (w_data[jrow] != zero)
627 {
628 if (opt == ROW)
629 {
630 partial_row_sum = w_data[jrow];
631 tl = w_data[jrow] / data_u[uptr[jrow]];
632 }
633 for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
634 {
635 p_perm = iperm[ridx_l[jj]];
636 aux = w_data[p_perm];
637 if (opt == ROW)
638 {
639 w_data[p_perm] -= tl * data_l[jj];
640 partial_row_sum += tl * data_l[jj];
641 }
642 else
643 {
644 tl = data_l[jj] * w_data[jrow];
645 w_data[p_perm] -= tl;
646 if (opt == COL)
647 partial_col_sum += tl;
648 }
649
650 if ((aux == zero) && (w_data[p_perm] != zero))
651 {
652 if (p_perm > k)
653 iw_l.insert (ridx_l[jj]);
654 else
655 iw_u.insert (p_perm);
656 }
657 }
658
659 // Drop element from the U part in IKJ
660 // and L part in JKI variant (milu = [col|off])
661 if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
662 && (w_data[jrow] != zero))
663 {
664 if (opt == COL)
665 total_sum += partial_col_sum;
666 else if (opt == ROW)
667 total_sum += partial_row_sum;
668 w_data[jrow] = zero;
669 it = iw_u.erase (it); // Returns next valid iterator
670 continue;
671 }
672 else
673 // This is the element scaled by the pivot
674 // in the actual iteration
675 if (opt == ROW)
676 w_data[jrow] = tl;
677 }
678 ++it; // Increment only when we did not erase
679 }
680
681 // Search for the pivot and update iw_l and iw_u if the pivot is not the
682 // diagonal element
683 if ((thresh > zero) && (k < (n - 1)))
684 {
685 maximum = std::abs (w_data[k]) / thresh;
686 max_ind = perm[k];
687 for (it = iw_l.begin (); it != iw_l.end (); ++it)
688 {
689 p_perm = iperm[*it];
690 if (std::abs (w_data[p_perm]) > maximum)
691 {
692 maximum = std::abs (w_data[p_perm]);
693 max_ind = *it;
694 it2 = it;
695 }
696 }
697 // If the pivot is not the diagonal element update all
698 p_perm = iperm[max_ind];
699 if (max_ind != perm[k])
700 {
701 iw_l.erase (it2);
702 if (w_data[k] != zero)
703 iw_l.insert (perm[k]);
704 else
705 iw_u.insert (k);
706 // Update permutation vectors and swap data
707 iperm[perm[p_perm]] = k;
708 iperm[perm[k]] = p_perm;
709 std::swap (perm[k], perm[p_perm]);
710 std::swap (w_data[k], w_data[p_perm]);
711 }
712
713 }
714
715 // Drop elements in the L part in the IKJ version,
716 // and from the U part in the JKI version.
717 it = iw_l.begin ();
718 while (it != iw_l.end ())
719 {
720 p_perm = iperm[*it];
721 if (droptol > zero)
722 if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
723 {
724 if (opt != OFF)
725 total_sum += w_data[p_perm];
726 w_data[p_perm] = zero;
727 it = iw_l.erase (it);
728 continue;
729 }
730
731 it++;
732 }
733
734 // If milu == [row|col] summation is preserved.
735 // Compensate diagonal element.
736 if (opt != OFF)
737 {
738 if ((total_sum > zero) && (w_data[k] == zero))
739 iw_u.insert (k);
740 w_data[k] += total_sum;
741 }
742
743 // Check if the pivot is zero and if udiag is active.
744 // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
745 // will not preserve the row sum for that column/row.
746 if (w_data[k] == zero)
747 {
748 if (udiag != 1)
749 error ("ilu: encountered a pivot equal to 0");
750
751 w_data[k] = droptol;
752 iw_u.insert (k);
753 }
754
755 // Scale the elements on the L part for IKJ version (milu = [col|off])
756 if (opt != ROW)
757 for (it = iw_l.begin (); it != iw_l.end (); ++it)
758 {
759 p_perm = iperm[*it];
760 w_data[p_perm] = w_data[p_perm] / w_data[k];
761 }
762
763 if ((max_len_u - total_len_u) < n)
764 {
765 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
766 data_out_u.resize (dim_vector (max_len_u, 1));
767 data_u = data_out_u.rwdata ();
768 ridx_out_u.resize (dim_vector (max_len_u, 1));
769 ridx_u = ridx_out_u.rwdata ();
770 }
771
772 if ((max_len_l - total_len_l) < n)
773 {
774 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
775 data_out_l.resize (dim_vector (max_len_l, 1));
776 data_l = data_out_l.rwdata ();
777 ridx_out_l.resize (dim_vector (max_len_l, 1));
778 ridx_l = ridx_out_l.rwdata ();
779 }
780
781 // Expand working vector into U.
782 w_len_u = 0;
783 for (it = iw_u.begin (); it != iw_u.end (); ++it)
784 {
785 if (w_data[*it] != zero)
786 {
787 data_u[total_len_u + w_len_u] = w_data[*it];
788 ridx_u[total_len_u + w_len_u] = *it;
789 w_len_u++;
790 }
791 w_data[*it] = 0;
792 }
793
794 // Expand working vector into L.
795 w_len_l = 0;
796 for (it = iw_l.begin (); it != iw_l.end (); ++it)
797 {
798 p_perm = iperm[*it];
799 if (w_data[p_perm] != zero)
800 {
801 data_l[total_len_l + w_len_l] = w_data[p_perm];
802 ridx_l[total_len_l + w_len_l] = *it;
803 w_len_l++;
804 }
805 w_data[p_perm] = 0;
806 }
807 total_len_u += w_len_u;
808 total_len_l += w_len_l;
809
810 // Check if there are too many elements to be indexed with
811 // octave_idx_type type due to fill-in during the process.
812 if (total_len_l < 0 || total_len_u < 0)
813 error ("ilu: Integer overflow. Too many fill-in elements in L or U");
814
815 if (opt == ROW)
816 uptr[k] = total_len_u - 1;
817
818 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
819 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
820
821 iw_l.clear ();
822 iw_u.clear ();
823 }
824
825 octave_matrix_t *L_ptr;
826 octave_matrix_t *U_ptr;
827 octave_matrix_t diag (n, n, n);
828
829 // L and U are interchanged if milu = 'row'. It is a matter
830 // of nomenclature to re-use code with both IKJ and JKI
831 // versions of the algorithm.
832 if (opt == ROW)
833 {
834 L_ptr = &U;
835 U_ptr = &L;
836 L = octave_matrix_t (n, n, total_len_u - n);
837 U = octave_matrix_t (n, n, total_len_l);
838 }
839 else
840 {
841 L_ptr = &L;
842 U_ptr = &U;
843 L = octave_matrix_t (n, n, total_len_l);
844 U = octave_matrix_t (n, n, total_len_u);
845 }
846
847 for (i = 0; i <= n; i++)
848 {
849 L_ptr->cidx (i) = cidx_l[i];
850 U_ptr->cidx (i) = cidx_u[i];
851 if (opt == ROW)
852 U_ptr->cidx (i) -= i;
853 }
854
855 for (i = 0; i < n; i++)
856 {
857 if (opt == ROW)
858 diag.elem (i, i) = data_u[uptr[i]];
859 j = cidx_l[i];
860
861 while (j < cidx_l[i+1])
862 {
863 L_ptr->ridx (j) = ridx_l[j];
864 L_ptr->data (j) = data_l[j];
865 j++;
866 }
867 j = cidx_u[i];
868
869 while (j < cidx_u[i+1])
870 {
871 c = j;
872 if (opt == ROW)
873 {
874 // The diagonal is removed from L if milu = 'row'.
875 // That is because is convenient to have it inside
876 // the L part to carry out the process.
877 if (ridx_u[j] == i)
878 {
879 j++;
880 continue;
881 }
882 else
883 c -= i;
884 }
885 U_ptr->data (c) = data_u[j];
886 U_ptr->ridx (c) = ridx_u[j];
887 j++;
888 }
889 }
890
891 if (opt == ROW)
892 {
893 U = U.transpose ();
894 // The diagonal, conveniently permuted is added to U
895 U += diag.index (idx_vector::colon, perm_vec);
896 L = L.transpose ();
897 }
898}
899
900DEFUN (__ilutp__, args, nargout,
901 doc: /* -*- texinfo -*-
902@deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
903@deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
904Undocumented internal function.
905@end deftypefn */)
906{
907 if (args.length () != 5)
908 print_usage ();
909
910 octave_value_list retval;
911 double droptol = args(1).double_value ();
912 double thresh = args(2).double_value ();
913 std::string milu = args(3).string_value ();
914 double udiag = args(4).double_value ();
915
916 octave_value_list arg_list;
917 octave_idx_type nnz_u, nnz_l;
918 if (! args(0).iscomplex ())
919 {
920 SparseMatrix sm = args(0).sparse_matrix_value ();
921 SparseMatrix U, L;
922 nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
923 nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
924 Array<double> rc_norm;
925 if (milu == "row")
926 rc_norm = xrownorms (sm);
927 else
928 rc_norm = xcolnorms (sm);
929 Array<octave_idx_type> perm (dim_vector (sm.cols (), 1));
930
931 ilu_tp<SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
932 rc_norm.rwdata (),
933 perm, droptol, thresh, milu, udiag);
934
935 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
936 if (milu == "row")
937 {
938 retval(0) = L + speye;
939 if (nargout == 3)
940 {
941 retval(1) = U.index (idx_vector::colon, perm);
942 retval(2) = speye.index (idx_vector::colon, perm);
943 }
944 else
945 retval(1) = U;
946 }
947 else
948 {
949 retval(1) = U;
950 if (nargout == 3)
951 {
952 retval(0) = L.index (perm, idx_vector::colon) + speye;
953 retval(2) = speye.index (perm, idx_vector::colon);
954 }
955 else
956 retval(0) = L + speye.index (idx_vector::colon, perm);
957 }
958 }
959 else
960 {
961 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
963 nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
964 nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
965 Array<Complex> rc_norm;
966 if (milu == "row")
967 rc_norm = xrownorms (sm);
968 else
969 rc_norm = xcolnorms (sm);
970 Array<octave_idx_type> perm (dim_vector (sm.cols (), 1));
971
972 ilu_tp<SparseComplexMatrix, Complex>
973 (sm, L, U, nnz_u, nnz_l, rc_norm.rwdata (), perm,
974 Complex (droptol), Complex (thresh), milu, udiag);
975
976 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
977 if (milu == "row")
978 {
979 retval(0) = L + speye;
980 if (nargout == 3)
981 {
982 retval(1) = U.index (idx_vector::colon, perm);
983 retval(2) = speye.index (idx_vector::colon, perm);
984 }
985 else if (nargout == 2)
986 retval(1) = U;
987 }
988 else
989 {
990 retval(1) = U;
991 if (nargout == 3)
992 {
993 retval(0) = L.index (perm, idx_vector::colon) + speye;
994 retval(2) = speye.index (perm, idx_vector::colon);
995 }
996 else
997 retval(0) = L + speye.index (idx_vector::colon, perm);
998 }
999 }
1000
1001 return retval;
1002}
1003
1004/*
1005## No test needed for internal helper function.
1006%!assert (1)
1007*/
1008
1009OCTAVE_END_NAMESPACE(octave)
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition __ilu__.cc:47
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition __ilu__.cc:528
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition __ilu__.cc:187
N Dimensional Array with copy-on-write semantics.
Definition Array-base.h:130
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
T * rwdata()
Size of the specified dimension.
octave_idx_type cols() const
Definition Sparse.h:351
Sparse< T, Alloc > index(const octave::idx_vector &i, bool resize_ok=false) const
Definition Sparse.cc:1447
Vector representing the dimensions (size) of an Array.
Definition dim-vector.h:92
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage()
Definition defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition defun.h:56
void error(const char *fmt,...)
Definition error.cc:1008
std::complex< double > Complex
Definition oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
RowVector xcolnorms(const Matrix &m, double p)
Definition oct-norm.cc:1108
ColumnVector xrownorms(const Matrix &m, double p)
Definition oct-norm.cc:1108
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition ovl.h:217
octave_value_list Ftril(const octave_value_list &args, int)
Definition tril.cc:382
octave_value_list Ftriu(const octave_value_list &args, int)
Definition tril.cc:431