188 octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
189 T *rows_norm,
const T droptol = 0,
190 const std::string milu =
"off")
194 enum {OFF, ROW, COL};
197 else if (milu ==
"col")
202 octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
203 max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
206 sm_u = sm_u.transpose ();
208 max_len_u = sm_u.nnz ();
209 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
210 max_len_l = sm_l.nnz ();
211 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
216 T *data_in_u = sm_u.data ();
219 T *data_in_l = sm_l.data ();
225 T *data_l = data_out_l.
rwdata ();
231 T *data_u = data_out_u.
rwdata ();
247 cidx_u[0] = cidx_in_u[0];
248 cidx_l[0] = cidx_in_l[0];
249 for (i = 0; i < n; i++)
262 for (k = 0; k < n; k++)
265 for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
266 w_data_l[ridx_in_l[i]] = data_in_l[i];
268 for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
269 w_data_u[ridx_in_u[i]] = data_in_u[i];
272 for (j = 0; j < rows_list_len; j++)
274 if ((Ufirst[rows_list[j]] != -1))
275 for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
278 w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
282 for (j = 0; j < cols_list_len; j++)
284 if (Lfirst[cols_list[j]] != -1)
285 for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
288 w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
292 if ((max_len_u - total_len_u) < n)
294 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
296 data_u = data_out_u.
rwdata ();
298 ridx_u = ridx_out_u.
rwdata ();
301 if ((max_len_l - total_len_l) < n)
303 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
305 data_l = data_out_l.
rwdata ();
307 ridx_l = ridx_out_l.
rwdata ();
312 data_u[total_len_u] = w_data_u[k];
313 ridx_u[total_len_u] = k;
315 for (i = k + 1; i < n; i++)
317 if (w_data_u[i] != zero)
319 if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
322 cr_sum[k] += w_data_u[i];
324 cr_sum[i] += w_data_u[i];
328 data_u[total_len_u + w_len_u] = w_data_u[i];
329 ridx_u[total_len_u + w_len_u] = i;
335 if (w_data_l[i] != zero)
337 if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
340 cr_sum[k] += w_data_l[i];
342 cr_sum[i] += w_data_l[i];
346 data_l[total_len_l + w_len_l] = w_data_l[i];
347 ridx_l[total_len_l + w_len_l] = i;
356 if (opt == COL || opt == ROW)
357 data_u[total_len_u] += cr_sum[k];
360 if (data_u[total_len_u] == zero)
361 error (
"ilu: encountered a pivot equal to 0");
364 for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
365 data_l[i] /= data_u[total_len_u];
367 total_len_u += w_len_u;
368 total_len_l += w_len_l;
371 if (total_len_l < 0 || total_len_u < 0)
372 error (
"ilu: integer overflow. Too many fill-in elements in L or U");
374 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
375 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
388 Ufirst[k] = cidx_u[k];
392 Lfirst[k] = cidx_l[k];
397 for (i = 0; i <= k; i++)
401 jj = ridx_u[Ufirst[i]];
404 if (Ufirst[i] < (cidx_u[i+1]))
407 if (Ufirst[i] == cidx_u[i+1])
410 jj = ridx_u[Ufirst[i]];
415 cols_list[cols_list_len] = i;
422 jj = ridx_l[Lfirst[i]];
424 if (Lfirst[i] < (cidx_l[i+1]))
427 if (Lfirst[i] == cidx_l[i+1])
430 jj = ridx_l[Lfirst[i]];
434 rows_list[rows_list_len] = i;
443 L = octave_matrix_t (n, n, total_len_l);
444 U = octave_matrix_t (n, n, total_len_u);
447 for (i = 0; i <= n; i++)
448 L.cidx (i) = cidx_l[i];
450 for (i = 0; i < total_len_l; i++)
452 L.ridx (i) = ridx_l[i];
453 L.data (i) = data_l[i];
456 for (i = 0; i <= n; i++)
457 U.cidx (i) = cidx_u[i];
459 for (i = 0; i < total_len_u; i++)
461 U.ridx (i) = ridx_u[i];
462 U.data (i) = data_u[i];
528ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
531 const T thresh = T(0),
const std::string milu =
"off",
532 const double udiag = 0)
535 enum {OFF, ROW, COL};
538 else if (milu ==
"col")
547 sm = sm.transpose ();
552 T *data_in = sm.data ();
553 octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
554 max_ind, max_len_l, max_len_u;
557 T tl = zero, aux, maximum;
560 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
562 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
570 T *data_l = data_out_l.
rwdata ();
578 T *data_u = data_out_u.
rwdata ();
582 T total_sum, partial_col_sum = zero, partial_row_sum = zero;
583 std::set<octave_idx_type> iw_l;
584 std::set<octave_idx_type> iw_u;
585 std::set<octave_idx_type>::iterator it, it2;
592 cidx_l[0] = cidx_in[0];
593 cidx_u[0] = cidx_in[0];
594 for (i = 0; i < n; i++)
604 for (k = 0; k < n; k++)
606 for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
608 p_perm = iperm[ridx_in[j]];
609 w_data[iperm[ridx_in[j]]] = data_in[j];
611 iw_l.insert (ridx_in[j]);
613 iw_u.insert (p_perm);
618 while (it != iw_u.end ())
625 partial_col_sum = w_data[jrow];
626 if (w_data[jrow] != zero)
630 partial_row_sum = w_data[jrow];
631 tl = w_data[jrow] / data_u[uptr[jrow]];
633 for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
635 p_perm = iperm[ridx_l[jj]];
636 aux = w_data[p_perm];
639 w_data[p_perm] -= tl * data_l[jj];
640 partial_row_sum += tl * data_l[jj];
644 tl = data_l[jj] * w_data[jrow];
645 w_data[p_perm] -= tl;
647 partial_col_sum += tl;
650 if ((aux == zero) && (w_data[p_perm] != zero))
653 iw_l.insert (ridx_l[jj]);
655 iw_u.insert (p_perm);
661 if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
662 && (w_data[jrow] != zero))
665 total_sum += partial_col_sum;
667 total_sum += partial_row_sum;
669 it = iw_u.erase (it);
683 if ((thresh > zero) && (k < (n - 1)))
685 maximum = std::abs (w_data[k]) / thresh;
687 for (it = iw_l.begin (); it != iw_l.end (); ++it)
690 if (std::abs (w_data[p_perm]) > maximum)
692 maximum = std::abs (w_data[p_perm]);
698 p_perm = iperm[max_ind];
699 if (max_ind != perm[k])
702 if (w_data[k] != zero)
703 iw_l.insert (perm[k]);
707 iperm[perm[p_perm]] = k;
708 iperm[perm[k]] = p_perm;
709 std::swap (perm[k], perm[p_perm]);
710 std::swap (w_data[k], w_data[p_perm]);
718 while (it != iw_l.end ())
722 if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
725 total_sum += w_data[p_perm];
726 w_data[p_perm] = zero;
727 it = iw_l.erase (it);
738 if ((total_sum > zero) && (w_data[k] == zero))
740 w_data[k] += total_sum;
746 if (w_data[k] == zero)
749 error (
"ilu: encountered a pivot equal to 0");
757 for (it = iw_l.begin (); it != iw_l.end (); ++it)
760 w_data[p_perm] = w_data[p_perm] / w_data[k];
763 if ((max_len_u - total_len_u) < n)
765 max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
767 data_u = data_out_u.
rwdata ();
769 ridx_u = ridx_out_u.
rwdata ();
772 if ((max_len_l - total_len_l) < n)
774 max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
776 data_l = data_out_l.
rwdata ();
778 ridx_l = ridx_out_l.
rwdata ();
783 for (it = iw_u.begin (); it != iw_u.end (); ++it)
785 if (w_data[*it] != zero)
787 data_u[total_len_u + w_len_u] = w_data[*it];
788 ridx_u[total_len_u + w_len_u] = *it;
796 for (it = iw_l.begin (); it != iw_l.end (); ++it)
799 if (w_data[p_perm] != zero)
801 data_l[total_len_l + w_len_l] = w_data[p_perm];
802 ridx_l[total_len_l + w_len_l] = *it;
807 total_len_u += w_len_u;
808 total_len_l += w_len_l;
812 if (total_len_l < 0 || total_len_u < 0)
813 error (
"ilu: Integer overflow. Too many fill-in elements in L or U");
816 uptr[k] = total_len_u - 1;
818 cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
819 cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
825 octave_matrix_t *L_ptr;
826 octave_matrix_t *U_ptr;
827 octave_matrix_t diag (n, n, n);
836 L = octave_matrix_t (n, n, total_len_u - n);
837 U = octave_matrix_t (n, n, total_len_l);
843 L = octave_matrix_t (n, n, total_len_l);
844 U = octave_matrix_t (n, n, total_len_u);
847 for (i = 0; i <= n; i++)
849 L_ptr->cidx (i) = cidx_l[i];
850 U_ptr->cidx (i) = cidx_u[i];
852 U_ptr->cidx (i) -= i;
855 for (i = 0; i < n; i++)
858 diag.elem (i, i) = data_u[uptr[i]];
861 while (j < cidx_l[i+1])
863 L_ptr->ridx (j) = ridx_l[j];
864 L_ptr->data (j) = data_l[j];
869 while (j < cidx_u[i+1])
885 U_ptr->data (c) = data_u[j];
886 U_ptr->ridx (c) = ridx_u[j];
895 U += diag.index (idx_vector::colon, perm_vec);
907 if (args.length () != 5)
911 double droptol = args(1).double_value ();
912 double thresh = args(2).double_value ();
913 std::string milu = args(3).string_value ();
914 double udiag = args(4).double_value ();
918 if (! args(0).iscomplex ())
922 nnz_u = (
Ftriu (
ovl (sm))(0).sparse_matrix_value ()).nnz ();
923 nnz_l = (
Ftril (
ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
931 ilu_tp<SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
933 perm, droptol, thresh, milu, udiag);
938 retval(0) = L + speye;
941 retval(1) = U.
index (idx_vector::colon, perm);
942 retval(2) = speye.index (idx_vector::colon, perm);
952 retval(0) = L.
index (perm, idx_vector::colon) + speye;
953 retval(2) = speye.
index (perm, idx_vector::colon);
956 retval(0) = L + speye.
index (idx_vector::colon, perm);
963 nnz_u = (
Ftriu (
ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
964 nnz_l = (
Ftril (
ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
972 ilu_tp<SparseComplexMatrix, Complex>
973 (sm, L, U, nnz_u, nnz_l, rc_norm.
rwdata (), perm,
979 retval(0) = L + speye;
982 retval(1) = U.
index (idx_vector::colon, perm);
983 retval(2) = speye.index (idx_vector::colon, perm);
985 else if (nargout == 2)
993 retval(0) = L.
index (perm, idx_vector::colon) + speye;
994 retval(2) = speye.
index (perm, idx_vector::colon);
997 retval(0) = L + speye.
index (idx_vector::colon, perm);