GNU Octave 10.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
__ilu__.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2013-2025 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include "oct-locbuf.h"
31#include "oct-norm.h"
32
33#include "defun.h"
34#include "error.h"
35
36#include "builtin-defun-decls.h"
37
39
40// This function implements the IKJ and JKI variants of Gaussian elimination to
41// perform the ILU0 decomposition. The behavior is controlled by milu
42// parameter. If milu = ['off'|'col'] the JKI version is performed taking
43// advantage of CCS format of the input matrix. If milu = 'row' the input
44// matrix has to be transposed to obtain the equivalent CRS structure so we can
45// work efficiently with rows. In this case IKJ version is used.
46template <typename octave_matrix_t, typename T>
47void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
48{
49 const octave_idx_type n = sm.cols ();
50 octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
51 T r;
52 T tl = 0;
53
54 enum {OFF, ROW, COL};
55 char opt;
56 if (milu == "row")
57 {
58 opt = ROW;
59 sm = sm.transpose ();
60 }
61 else if (milu == "col")
62 opt = COL;
63 else
64 opt = OFF;
65
66 // Input matrix pointers
67 octave_idx_type *cidx = sm.cidx ();
68 octave_idx_type *ridx = sm.ridx ();
69 T *data = sm.data ();
70
71 // Working arrays
74
75 // Initialize working arrays
76 for (i = 0; i < n; i++)
77 iw[i] = -1;
78
79 // Loop over all columns
80 for (k = 0; k < n; k++)
81 {
82 j1 = cidx[k];
83 j2 = cidx[k+1];
84
85 if (j1 == j2)
86 error ("ilu: A has a zero on the diagonal");
87
88 for (j = j1; j < j2; j++)
89 iw[ridx[j]] = j;
90
91 r = 0;
92 j = j1;
93 jrow = ridx[j1];
94 while ((jrow < k) && (j < j2))
95 {
96 if (opt == ROW)
97 {
98 tl = data[j] / data[uptr[jrow]];
99 data[j] = tl;
100 }
101 for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
102 {
103 jw = iw[ridx[jj]];
104 if (jw != -1)
105 if (opt == ROW)
106 data[jw] -= tl * data[jj];
107 else
108 data[jw] -= data[j] * data[jj];
109
110 else
111 // That is for the milu='row'
112 if (opt == ROW)
113 r += tl * data[jj];
114 else if (opt == COL)
115 r += data[j] * data[jj];
116 }
117 j++;
118 jrow = ridx[j];
119 }
120 uptr[k] = j;
121 if (opt != OFF)
122 data[uptr[k]] -= r;
123
124 if (opt != ROW)
125 for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
126 data[jj] /= data[uptr[k]];
127
128 if (k != jrow)
129 error ("ilu: A has a zero on the diagonal");
130
131 if (data[j] == T(0))
132 error ("ilu: encountered a pivot equal to 0");
133
134 for (i = j1; i < j2; i++)
135 iw[ridx[i]] = -1;
136 }
137
138 if (opt == ROW)
139 sm = sm.transpose ();
140}
141
142DEFUN (__ilu0__, args, ,
143 doc: /* -*- texinfo -*-
144@deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
145@deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
146Undocumented internal function.
147@end deftypefn */)
148{
149 if (args.length () != 2)
150 print_usage ();
151
152 octave_value_list retval (2);
153
154 std::string milu = args(1).string_value ();
155
156 // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
157 // its structure does not change during the algorithm. The same input
158 // matrix is used to build the output matrix due to that fact.
159 octave_value_list arg_list;
160 if (! args(0).iscomplex ())
161 {
162 SparseMatrix sm = args(0).sparse_matrix_value ();
163 SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
164
165 ilu_0 <SparseMatrix, double> (sm, milu);
166
167 retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
168 retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
169 }
170 else
171 {
172 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
173 SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
174
175 ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
176
177 retval(0) = speye +
178 Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
179 retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
180 }
181
182 return retval;
183}
184
185template <typename octave_matrix_t, typename T>
186void
187ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
188 octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
189 T *rows_norm, const T droptol = 0,
190 const std::string milu = "off")
191{
192 // Map the strings into chars for faster comparing inside loops
193 char opt;
194 enum {OFF, ROW, COL};
195 if (milu == "row")
196 opt = ROW;
197 else if (milu == "col")
198 opt = COL;
199 else
200 opt = OFF;
201
202 octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
203 max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
204
205 const octave_idx_type n = sm_u.cols ();
206 sm_u = sm_u.transpose ();
207
208 max_len_u = sm_u.nnz ();
209 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
210 max_len_l = sm_l.nnz ();
211 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
212
213 // Extract pointers to the arrays for faster access inside loops
214 octave_idx_type *cidx_in_u = sm_u.cidx ();
215 octave_idx_type *ridx_in_u = sm_u.ridx ();
216 T *data_in_u = sm_u.data ();
217 octave_idx_type *cidx_in_l = sm_l.cidx ();
218 octave_idx_type *ridx_in_l = sm_l.ridx ();
219 T *data_in_l = sm_l.data ();
220
221 // L output arrays
222 Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
223 octave_idx_type *ridx_l = ridx_out_l.rwdata ();
224 Array <T> data_out_l (dim_vector (max_len_l, 1));
225 T *data_l = data_out_l.rwdata ();
226
227 // U output arrays
228 Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
229 octave_idx_type *ridx_u = ridx_out_u.rwdata ();
230 Array <T> data_out_u (dim_vector (max_len_u, 1));
231 T *data_u = data_out_u.rwdata ();
232
233 // Working arrays
234 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
235 OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
236 OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
237 OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
238 OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
239 OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
242 OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
243
244 T zero = T (0);
245
246 // Initialize working arrays
247 cidx_u[0] = cidx_in_u[0];
248 cidx_l[0] = cidx_in_l[0];
249 for (i = 0; i < n; i++)
250 {
251 w_data_u[i] = zero;
252 w_data_l[i] = zero;
253 cr_sum[i] = zero;
254 }
255
256 total_len_u = 0;
257 total_len_l = 0;
258 cols_list_len = 0;
259 rows_list_len = 0;
260
261 // Loop over all columns
262 for (k = 0; k < n; k++)
263 {
264 // Load the working column and working row
265 for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
266 w_data_l[ridx_in_l[i]] = data_in_l[i];
267
268 for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
269 w_data_u[ridx_in_u[i]] = data_in_u[i];
270
271 // Update U working row
272 for (j = 0; j < rows_list_len; j++)
273 {
274 if ((Ufirst[rows_list[j]] != -1))
275 for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
276 {
277 jrow = ridx_u[jj];
278 w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
279 }
280 }
281 // Update L working column
282 for (j = 0; j < cols_list_len; j++)
283 {
284 if (Lfirst[cols_list[j]] != -1)
285 for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
286 {
287 jrow = ridx_l[jj];
288 w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
289 }
290 }
291
292 if ((max_len_u - total_len_u) < n)
293 {
294 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
295 data_out_u.resize (dim_vector (max_len_u, 1));
296 data_u = data_out_u.rwdata ();
297 ridx_out_u.resize (dim_vector (max_len_u, 1));
298 ridx_u = ridx_out_u.rwdata ();
299 }
300
301 if ((max_len_l - total_len_l) < n)
302 {
303 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
304 data_out_l.resize (dim_vector (max_len_l, 1));
305 data_l = data_out_l.rwdata ();
306 ridx_out_l.resize (dim_vector (max_len_l, 1));
307 ridx_l = ridx_out_l.rwdata ();
308 }
309
310 // Expand the working row into the U output data structures
311 w_len_l = 0;
312 data_u[total_len_u] = w_data_u[k];
313 ridx_u[total_len_u] = k;
314 w_len_u = 1;
315 for (i = k + 1; i < n; i++)
316 {
317 if (w_data_u[i] != zero)
318 {
319 if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
320 {
321 if (opt == ROW)
322 cr_sum[k] += w_data_u[i];
323 else if (opt == COL)
324 cr_sum[i] += w_data_u[i];
325 }
326 else
327 {
328 data_u[total_len_u + w_len_u] = w_data_u[i];
329 ridx_u[total_len_u + w_len_u] = i;
330 w_len_u++;
331 }
332 }
333
334 // Expand the working column into the L output data structures
335 if (w_data_l[i] != zero)
336 {
337 if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
338 {
339 if (opt == COL)
340 cr_sum[k] += w_data_l[i];
341 else if (opt == ROW)
342 cr_sum[i] += w_data_l[i];
343 }
344 else
345 {
346 data_l[total_len_l + w_len_l] = w_data_l[i];
347 ridx_l[total_len_l + w_len_l] = i;
348 w_len_l++;
349 }
350 }
351 w_data_u[i] = zero;
352 w_data_l[i] = zero;
353 }
354
355 // Compensate row and column sums --> milu option
356 if (opt == COL || opt == ROW)
357 data_u[total_len_u] += cr_sum[k];
358
359 // Check if the pivot is zero
360 if (data_u[total_len_u] == zero)
361 error ("ilu: encountered a pivot equal to 0");
362
363 // Scale the elements in L by the pivot
364 for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
365 data_l[i] /= data_u[total_len_u];
366
367 total_len_u += w_len_u;
368 total_len_l += w_len_l;
369 // Check if there are too many elements to be indexed with
370 // octave_idx_type type due to fill-in during the process.
371 if (total_len_l < 0 || total_len_u < 0)
372 error ("ilu: integer overflow. Too many fill-in elements in L or U");
373
374 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
375 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
376
377 // The tricky part of the algorithm. The arrays pointing to the first
378 // working element of each column in the next iteration (Lfirst) or
379 // the first working element of each row (Ufirst) are updated. Also the
380 // arrays working as lists cols_list and rows_list are filled with
381 // indices pointing to Ufirst and Lfirst respectively.
382 // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
383 // have to be considered in a certain column or row in next iteration,
384 // can be removed. It feels safer to me using such an indicator.
385 if (k < (n - 1))
386 {
387 if (w_len_u > 0)
388 Ufirst[k] = cidx_u[k];
389 else
390 Ufirst[k] = -1;
391 if (w_len_l > 0)
392 Lfirst[k] = cidx_l[k];
393 else
394 Lfirst[k] = -1;
395 cols_list_len = 0;
396 rows_list_len = 0;
397 for (i = 0; i <= k; i++)
398 {
399 if (Ufirst[i] != -1)
400 {
401 jj = ridx_u[Ufirst[i]];
402 if (jj < (k + 1))
403 {
404 if (Ufirst[i] < (cidx_u[i+1]))
405 {
406 Ufirst[i]++;
407 if (Ufirst[i] == cidx_u[i+1])
408 Ufirst[i] = -1;
409 else
410 jj = ridx_u[Ufirst[i]];
411 }
412 }
413 if (jj == (k + 1))
414 {
415 cols_list[cols_list_len] = i;
416 cols_list_len++;
417 }
418 }
419
420 if (Lfirst[i] != -1)
421 {
422 jj = ridx_l[Lfirst[i]];
423 if (jj < (k + 1))
424 if (Lfirst[i] < (cidx_l[i+1]))
425 {
426 Lfirst[i]++;
427 if (Lfirst[i] == cidx_l[i+1])
428 Lfirst[i] = -1;
429 else
430 jj = ridx_l[Lfirst[i]];
431 }
432 if (jj == (k + 1))
433 {
434 rows_list[rows_list_len] = i;
435 rows_list_len++;
436 }
437 }
438 }
439 }
440 }
441
442 // Build the output matrices
443 L = octave_matrix_t (n, n, total_len_l);
444 U = octave_matrix_t (n, n, total_len_u);
445
446 // FIXME: Can these loops be replaced by std::copy?
447 for (i = 0; i <= n; i++)
448 L.cidx (i) = cidx_l[i];
449
450 for (i = 0; i < total_len_l; i++)
451 {
452 L.ridx (i) = ridx_l[i];
453 L.data (i) = data_l[i];
454 }
455
456 for (i = 0; i <= n; i++)
457 U.cidx (i) = cidx_u[i];
458
459 for (i = 0; i < total_len_u; i++)
460 {
461 U.ridx (i) = ridx_u[i];
462 U.data (i) = data_u[i];
463 }
464
465 U = U.transpose ();
466}
467
468DEFUN (__iluc__, args, ,
469 doc: /* -*- texinfo -*-
470@deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
471Undocumented internal function.
472@end deftypefn */)
473{
474 if (args.length () != 3)
475 print_usage ();
476
477 double droptol = args(1).double_value ();
478 std::string milu = args(2).string_value ();
479
480 if (! args(0).iscomplex ())
481 {
482 SparseMatrix sm = args(0).sparse_matrix_value ();
483 SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
484 SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
485 SparseMatrix U, L;
486
487 RowVector sm_col_norms = xcolnorms (sm);
488 ColumnVector sm_row_norms = xrownorms (sm);
489 ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
490 sm_col_norms.rwdata (),
491 sm_row_norms.rwdata (),
492 droptol, milu);
493
494 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
495
496 return ovl (L + speye, U);
497 }
498 else
499 {
500 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
501 SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
502 SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
504 Array<Complex> cols_norm = xcolnorms (sm);
505 Array<Complex> rows_norm = xrownorms (sm);
506
507 ilu_crout <SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
508 cols_norm.rwdata (),
509 rows_norm.rwdata (),
510 Complex (droptol), milu);
511
512 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
513
514 return ovl (L + speye, U);
515 }
516}
517
518// This function implements the IKJ and JKI variants of gaussian elimination
519// to perform the ILUTP decomposition. The behavior is controlled by milu
520// parameter. If milu = ['off'|'col'] the JKI version is performed taking
521// advantage of CCS format of the input matrix. Row pivoting is performed.
522// If milu = 'row' the input matrix has to be transposed to obtain the
523// equivalent CRS structure so we can work efficiently with rows. In that
524// case IKJ version is used and column pivoting is performed.
525
526template <typename octave_matrix_t, typename T>
527void
528ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
529 octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
530 Array <octave_idx_type>& perm_vec, const T droptol = T(0),
531 const T thresh = T(0), const std::string milu = "off",
532 const double udiag = 0)
533{
534 char opt;
535 enum {OFF, ROW, COL};
536 if (milu == "row")
537 opt = ROW;
538 else if (milu == "col")
539 opt = COL;
540 else
541 opt = OFF;
542
543 const octave_idx_type n = sm.cols ();
544
545 // This is necessary for the JKI (milu = "row") variant.
546 if (opt == ROW)
547 sm = sm.transpose ();
548
549 // Extract pointers to the arrays for faster access inside loops
550 octave_idx_type *cidx_in = sm.cidx ();
551 octave_idx_type *ridx_in = sm.ridx ();
552 T *data_in = sm.data ();
553 octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
554 max_ind, max_len_l, max_len_u;
555 T zero = T(0);
556
557 T tl = zero, aux, maximum;
558
559 max_len_u = nnz_u;
560 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
561 max_len_l = nnz_l;
562 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
563
564 // Extract pointers to the arrays for faster access inside loops
565 Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
566 octave_idx_type *cidx_l = cidx_out_l.rwdata ();
567 Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
568 octave_idx_type *ridx_l = ridx_out_l.rwdata ();
569 Array <T> data_out_l (dim_vector (max_len_l, 1));
570 T *data_l = data_out_l.rwdata ();
571
572 // Data for U
573 Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
574 octave_idx_type *cidx_u = cidx_out_u.rwdata ();
575 Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
576 octave_idx_type *ridx_u = ridx_out_u.rwdata ();
577 Array <T> data_out_u (dim_vector (max_len_u, 1));
578 T *data_u = data_out_u.rwdata ();
579
580 // Working arrays and permutation arrays
581 octave_idx_type w_len_u, w_len_l;
582 T total_sum, partial_col_sum = zero, partial_row_sum = zero;
583 std::set <octave_idx_type> iw_l;
584 std::set <octave_idx_type> iw_u;
585 std::set <octave_idx_type>::iterator it, it2;
586 OCTAVE_LOCAL_BUFFER (T, w_data, n);
588 octave_idx_type *perm = perm_vec.rwdata ();
590
591 // Initialize working and permutation arrays
592 cidx_l[0] = cidx_in[0];
593 cidx_u[0] = cidx_in[0];
594 for (i = 0; i < n; i++)
595 {
596 w_data[i] = 0;
597 perm[i] = i;
598 iperm[i] = i;
599 }
600 total_len_u = 0;
601 total_len_l = 0;
602
603 // Loop over all columns
604 for (k = 0; k < n; k++)
605 {
606 for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
607 {
608 p_perm = iperm[ridx_in[j]];
609 w_data[iperm[ridx_in[j]]] = data_in[j];
610 if (p_perm > k)
611 iw_l.insert (ridx_in[j]);
612 else
613 iw_u.insert (p_perm);
614 }
615
616 it = iw_u.begin ();
617 jrow = *it;
618 total_sum = zero;
619 while ((jrow < k) && (it != iw_u.end ()))
620 {
621 if (opt == COL)
622 partial_col_sum = w_data[jrow];
623 if (w_data[jrow] != zero)
624 {
625 if (opt == ROW)
626 {
627 partial_row_sum = w_data[jrow];
628 tl = w_data[jrow] / data_u[uptr[jrow]];
629 }
630 for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
631 {
632 p_perm = iperm[ridx_l[jj]];
633 aux = w_data[p_perm];
634 if (opt == ROW)
635 {
636 w_data[p_perm] -= tl * data_l[jj];
637 partial_row_sum += tl * data_l[jj];
638 }
639 else
640 {
641 tl = data_l[jj] * w_data[jrow];
642 w_data[p_perm] -= tl;
643 if (opt == COL)
644 partial_col_sum += tl;
645 }
646
647 if ((aux == zero) && (w_data[p_perm] != zero))
648 {
649 if (p_perm > k)
650 iw_l.insert (ridx_l[jj]);
651 else
652 iw_u.insert (p_perm);
653 }
654 }
655
656 // Drop element from the U part in IKJ
657 // and L part in JKI variant (milu = [col|off])
658 if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
659 && (w_data[jrow] != zero))
660 {
661 if (opt == COL)
662 total_sum += partial_col_sum;
663 else if (opt == ROW)
664 total_sum += partial_row_sum;
665 w_data[jrow] = zero;
666 it2 = it;
667 it++;
668 iw_u.erase (it2);
669 jrow = *it;
670 continue;
671 }
672 else
673 // This is the element scaled by the pivot
674 // in the actual iteration
675 if (opt == ROW)
676 w_data[jrow] = tl;
677 }
678 jrow = *(++it);
679 }
680
681 // Search for the pivot and update iw_l and iw_u if the pivot is not the
682 // diagonal element
683 if ((thresh > zero) && (k < (n - 1)))
684 {
685 maximum = std::abs (w_data[k]) / thresh;
686 max_ind = perm[k];
687 for (it = iw_l.begin (); it != iw_l.end (); ++it)
688 {
689 p_perm = iperm[*it];
690 if (std::abs (w_data[p_perm]) > maximum)
691 {
692 maximum = std::abs (w_data[p_perm]);
693 max_ind = *it;
694 it2 = it;
695 }
696 }
697 // If the pivot is not the diagonal element update all
698 p_perm = iperm[max_ind];
699 if (max_ind != perm[k])
700 {
701 iw_l.erase (it2);
702 if (w_data[k] != zero)
703 iw_l.insert (perm[k]);
704 else
705 iw_u.insert (k);
706 // Swap data and update permutation vectors
707 aux = w_data[k];
708 iperm[perm[p_perm]] = k;
709 iperm[perm[k]] = p_perm;
710 c = perm[k];
711 perm[k] = perm[p_perm];
712 perm[p_perm] = c;
713 w_data[k] = w_data[p_perm];
714 w_data[p_perm] = aux;
715 }
716
717 }
718
719 // Drop elements in the L part in the IKJ version,
720 // and from the U part in the JKI version.
721 it = iw_l.begin ();
722 while (it != iw_l.end ())
723 {
724 p_perm = iperm[*it];
725 if (droptol > zero)
726 if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
727 {
728 if (opt != OFF)
729 total_sum += w_data[p_perm];
730 w_data[p_perm] = zero;
731 it2 = it;
732 it++;
733 iw_l.erase (it2);
734 continue;
735 }
736
737 it++;
738 }
739
740 // If milu == [row|col] summation is preserved.
741 // Compensate diagonal element.
742 if (opt != OFF)
743 {
744 if ((total_sum > zero) && (w_data[k] == zero))
745 iw_u.insert (k);
746 w_data[k] += total_sum;
747 }
748
749 // Check if the pivot is zero and if udiag is active.
750 // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
751 // will not preserve the row sum for that column/row.
752 if (w_data[k] == zero)
753 {
754 if (udiag != 1)
755 error ("ilu: encountered a pivot equal to 0");
756
757 w_data[k] = droptol;
758 iw_u.insert (k);
759 }
760
761 // Scale the elements on the L part for IKJ version (milu = [col|off])
762 if (opt != ROW)
763 for (it = iw_l.begin (); it != iw_l.end (); ++it)
764 {
765 p_perm = iperm[*it];
766 w_data[p_perm] = w_data[p_perm] / w_data[k];
767 }
768
769 if ((max_len_u - total_len_u) < n)
770 {
771 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
772 data_out_u.resize (dim_vector (max_len_u, 1));
773 data_u = data_out_u.rwdata ();
774 ridx_out_u.resize (dim_vector (max_len_u, 1));
775 ridx_u = ridx_out_u.rwdata ();
776 }
777
778 if ((max_len_l - total_len_l) < n)
779 {
780 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
781 data_out_l.resize (dim_vector (max_len_l, 1));
782 data_l = data_out_l.rwdata ();
783 ridx_out_l.resize (dim_vector (max_len_l, 1));
784 ridx_l = ridx_out_l.rwdata ();
785 }
786
787 // Expand working vector into U.
788 w_len_u = 0;
789 for (it = iw_u.begin (); it != iw_u.end (); ++it)
790 {
791 if (w_data[*it] != zero)
792 {
793 data_u[total_len_u + w_len_u] = w_data[*it];
794 ridx_u[total_len_u + w_len_u] = *it;
795 w_len_u++;
796 }
797 w_data[*it] = 0;
798 }
799
800 // Expand working vector into L.
801 w_len_l = 0;
802 for (it = iw_l.begin (); it != iw_l.end (); ++it)
803 {
804 p_perm = iperm[*it];
805 if (w_data[p_perm] != zero)
806 {
807 data_l[total_len_l + w_len_l] = w_data[p_perm];
808 ridx_l[total_len_l + w_len_l] = *it;
809 w_len_l++;
810 }
811 w_data[p_perm] = 0;
812 }
813 total_len_u += w_len_u;
814 total_len_l += w_len_l;
815
816 // Check if there are too many elements to be indexed with
817 // octave_idx_type type due to fill-in during the process.
818 if (total_len_l < 0 || total_len_u < 0)
819 error ("ilu: Integer overflow. Too many fill-in elements in L or U");
820
821 if (opt == ROW)
822 uptr[k] = total_len_u - 1;
823
824 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
825 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
826
827 iw_l.clear ();
828 iw_u.clear ();
829 }
830
831 octave_matrix_t *L_ptr;
832 octave_matrix_t *U_ptr;
833 octave_matrix_t diag (n, n, n);
834
835 // L and U are interchanged if milu = 'row'. It is a matter
836 // of nomenclature to re-use code with both IKJ and JKI
837 // versions of the algorithm.
838 if (opt == ROW)
839 {
840 L_ptr = &U;
841 U_ptr = &L;
842 L = octave_matrix_t (n, n, total_len_u - n);
843 U = octave_matrix_t (n, n, total_len_l);
844 }
845 else
846 {
847 L_ptr = &L;
848 U_ptr = &U;
849 L = octave_matrix_t (n, n, total_len_l);
850 U = octave_matrix_t (n, n, total_len_u);
851 }
852
853 for (i = 0; i <= n; i++)
854 {
855 L_ptr->cidx (i) = cidx_l[i];
856 U_ptr->cidx (i) = cidx_u[i];
857 if (opt == ROW)
858 U_ptr->cidx (i) -= i;
859 }
860
861 for (i = 0; i < n; i++)
862 {
863 if (opt == ROW)
864 diag.elem (i, i) = data_u[uptr[i]];
865 j = cidx_l[i];
866
867 while (j < cidx_l[i+1])
868 {
869 L_ptr->ridx (j) = ridx_l[j];
870 L_ptr->data (j) = data_l[j];
871 j++;
872 }
873 j = cidx_u[i];
874
875 while (j < cidx_u[i+1])
876 {
877 c = j;
878 if (opt == ROW)
879 {
880 // The diagonal is removed from L if milu = 'row'.
881 // That is because is convenient to have it inside
882 // the L part to carry out the process.
883 if (ridx_u[j] == i)
884 {
885 j++;
886 continue;
887 }
888 else
889 c -= i;
890 }
891 U_ptr->data (c) = data_u[j];
892 U_ptr->ridx (c) = ridx_u[j];
893 j++;
894 }
895 }
896
897 if (opt == ROW)
898 {
899 U = U.transpose ();
900 // The diagonal, conveniently permuted is added to U
901 U += diag.index (idx_vector::colon, perm_vec);
902 L = L.transpose ();
903 }
904}
905
906DEFUN (__ilutp__, args, nargout,
907 doc: /* -*- texinfo -*-
908@deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
909@deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
910Undocumented internal function.
911@end deftypefn */)
912{
913 if (args.length () != 5)
914 print_usage ();
915
916 octave_value_list retval;
917 double droptol = args(1).double_value ();
918 double thresh = args(2).double_value ();
919 std::string milu = args(3).string_value ();
920 double udiag = args(4).double_value ();
921
922 octave_value_list arg_list;
923 octave_idx_type nnz_u, nnz_l;
924 if (! args(0).iscomplex ())
925 {
926 SparseMatrix sm = args(0).sparse_matrix_value ();
927 SparseMatrix U, L;
928 nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
929 nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
930 Array <double> rc_norm;
931 if (milu == "row")
932 rc_norm = xrownorms (sm);
933 else
934 rc_norm = xcolnorms (sm);
935 Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
936
937 ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
938 rc_norm.rwdata (),
939 perm, droptol, thresh, milu, udiag);
940
941 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
942 if (milu == "row")
943 {
944 retval(0) = L + speye;
945 if (nargout == 3)
946 {
947 retval(1) = U.index (idx_vector::colon, perm);
948 retval(2) = speye.index (idx_vector::colon, perm);
949 }
950 else
951 retval(1) = U;
952 }
953 else
954 {
955 retval(1) = U;
956 if (nargout == 3)
957 {
958 retval(0) = L.index (perm, idx_vector::colon) + speye;
959 retval(2) = speye.index (perm, idx_vector::colon);
960 }
961 else
962 retval(0) = L + speye.index (idx_vector::colon, perm);
963 }
964 }
965 else
966 {
967 SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
969 nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
970 nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
971 Array <Complex> rc_norm;
972 if (milu == "row")
973 rc_norm = xrownorms (sm);
974 else
975 rc_norm = xcolnorms (sm);
976 Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
977
978 ilu_tp <SparseComplexMatrix, Complex>
979 (sm, L, U, nnz_u, nnz_l, rc_norm.rwdata (), perm,
980 Complex (droptol), Complex (thresh), milu, udiag);
981
982 SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
983 if (milu == "row")
984 {
985 retval(0) = L + speye;
986 if (nargout == 3)
987 {
988 retval(1) = U.index (idx_vector::colon, perm);
989 retval(2) = speye.index (idx_vector::colon, perm);
990 }
991 else if (nargout == 2)
992 retval(1) = U;
993 }
994 else
995 {
996 retval(1) = U;
997 if (nargout == 3)
998 {
999 retval(0) = L.index (perm, idx_vector::colon) + speye;
1000 retval(2) = speye.index (perm, idx_vector::colon);
1001 }
1002 else
1003 retval(0) = L + speye.index (idx_vector::colon, perm);
1004 }
1005 }
1006
1007 return retval;
1008}
1009
1010/*
1011## No test needed for internal helper function.
1012%!assert (1)
1013*/
1014
1015OCTAVE_END_NAMESPACE(octave)
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition __ilu__.cc:47
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition __ilu__.cc:528
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition __ilu__.cc:187
N Dimensional Array with copy-on-write semantics.
Definition Array.h:130
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
T * rwdata()
Size of the specified dimension.
octave_idx_type cols() const
Definition Sparse.h:349
Sparse< T, Alloc > index(const octave::idx_vector &i, bool resize_ok=false) const
Definition Sparse.cc:1433
Vector representing the dimensions (size) of an Array.
Definition dim-vector.h:90
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage()
Definition defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition defun.h:56
void error(const char *fmt,...)
Definition error.cc:1003
std::complex< double > Complex
Definition oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
RowVector xcolnorms(const Matrix &m, double p)
Definition oct-norm.cc:653
ColumnVector xrownorms(const Matrix &m, double p)
Definition oct-norm.cc:653
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition ovl.h:217
octave_value_list Ftril(const octave_value_list &args, int)
Definition tril.cc:382
octave_value_list Ftriu(const octave_value_list &args, int)
Definition tril.cc:431