1 ## Copyright (C) 2006, Regents of the University of California -*- mode: octave; -*-
3 ## This program is free software distributed under the "modified" or
4 ## 3-clause BSD license appended to this file.
6 function varargout = toledolu(LU)
8 ## @deftypefn{Function File} {[@var{L}, @var{U}, @var{P}]} = toledolu(@var{A})
9 ## @deftypefnx{Function File} {[@var{L}, @var{U}]} = toledolu(@var{A})
10 ## @deftypefnx{Function File} {@var{LUP}} = toledolu(@var{A})
12 ## Note: This returns a vector of indices for @var{P} and not a permutation
15 ## Factors @var{P}*@var{A}=@var{L}*@var{U} by Sivan Toledo's recursive factorization algorithm
16 ## with partial pivoting. While not as fast as the built-in LU, this
17 ## is significantly faster than the standard, unblocked algorithm
18 ## while remaining relatively easy to modify.
20 ## See the help for lu for details about the other calling forms.
22 ## For the algorithm, see
25 ## Toledo, Sivan. "Locality of reference in LU decomposition with
26 ## partial pivoting," SIAM J. of Matrix Analysis and Applications,
27 ## v18, n4, 1997. DOI: 10.1137/S0895479896297744
34 ## Author: Jason Riedy <ejr@cs.berkeley.edu>
35 ## Keywords: linear-algebra, LU, factorization
38 ## This version isn't *quite* the same as Toledo's algorithm. I use a
39 ## doubling approach rather than using recursion. So non-power-of-2
40 ## columns likely will be slightly different, but that shouldn't
41 ## affect the 'optimality' by more than a small constant factor.
43 ## Also, I don't handle ncol > nrow optimally. The code factors the
44 ## first nrow columns and then updates the remaining ncol-nrow columns
47 ## Might be worth eating the memory cost and tracking L separately.
48 ## The eye(n)+tril(LU,-1) could be expensive.
55 usage ("[L,U,P] = lu(A), [P\\L, U] = lu(A), or (P\\L-I+U) = lu(A)");
58 [nrow, ncol] = size(LU);
59 nstep = min(nrow, ncol);
61 Pswap = zeros(nstep, 1);
64 [pval, pind] = max(abs(LU(j:nrow, j)));
68 kahead = bitand(j, 1+bitcmp(j)); # last 1 bit in j
70 kcols = min(kahead, nstep-j);
76 ## permute just this column
79 LU(pind, j) = LU(j,j);
82 ## apply pending permutations to L
85 jpivstart = j - n_to_piv;
86 while (n_to_piv < kahead)
87 pivcols = jpivstart : jpivstart+n_to_piv-1;
88 for ipiv = ipivstart:j,
91 tmp = LU(pind, pivcols);
92 LU(pind, pivcols) = LU(ipiv, pivcols);
93 LU(ipiv, pivcols) = tmp;
96 ipivstart -= n_to_piv;
98 jpivstart -= n_to_piv;
101 if (LU(j,j) != 0.0 && !isnan(LU(j,j))),
102 LU(j+1:nrow,j) /= LU(j,j);
105 if 0 == kcols, break; endif
107 ## permute U to match perm already applied to L
109 tmp = LU(Pswap(k), Ucol);
110 LU(Pswap(k), Ucol) = LU(k, Ucol);
114 LU(inds, Ucol) = (eye(kahead) + tril(LU(inds, inds),-1)) \ LU(inds, Ucol);
115 LU(Lrow, Ucol) -= LU(Lrow, inds) * LU(inds, Ucol);
118 ## handle pivot permutations in L out from the last step
119 npived = bitand(nstep, 1+bitcmp(nstep));
122 n_to_piv = bitand(j, 1+bitcmp(j));
124 pivcols = j-n_to_piv+1 : j;
125 for ipiv = j+1:nstep,
128 tmp = LU(pind, pivcols);
129 LU(pind, pivcols) = LU(ipiv, pivcols);
130 LU(ipiv, pivcols) = tmp;
138 Ucol = nrow+1 : ncol;
141 tmp = LU(Pswap(k), Ucol);
142 LU(Pswap(k), Ucol) = LU(k, Ucol);
145 LU(inds, Ucol) = (eye(nrow) + tril(LU(inds, inds),-1)) \ LU(inds, Ucol);
154 L = eye(nrow) + tril(LU, -1);
155 varargout{2} = triu(LU);
157 L = eye(nrow) + tril(LU, -1)(:,1:nrow);
158 varargout{2} = triu(LU);
164 varargout{2} = triu(LU)(1:ncol,:);
191 %! [L,U,P] = toledolu(A);
192 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
198 %! [L,U,P] = toledolu(A);
199 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
205 %! [L,U,P] = toledolu(A);
206 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
212 %! [L,U,P] = toledolu(A);
213 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
219 %! [L,U,P] = toledolu(A);
220 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
226 %! [L,U,P] = toledolu(A);
227 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
233 %! [L,U,P] = toledolu(A);
234 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
240 %! [L,U,P] = toledolu(A);
241 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
247 %! [L,U,P] = toledolu(A);
248 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
254 %! [L,U,P] = toledolu(A);
255 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
261 %! [L,U,P] = toledolu(A);
262 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
268 %! [L,U,P] = toledolu(A);
269 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
275 %! [L,U,P] = toledolu(A);
276 %! assert(norm(L*U-A(P,:),inf), 0, M**2*N*eps)
278 ## Copyright (c) 2006, Regents of the University of California
279 ## All rights reserved.
280 ## Redistribution and use in source and binary forms, with or without
281 ## modification, are permitted provided that the following conditions are met:
283 ## * Redistributions of source code must retain the above copyright
284 ## notice, this list of conditions and the following disclaimer.
285 ## * Redistributions in binary form must reproduce the above copyright
286 ## notice, this list of conditions and the following disclaimer in the
287 ## documentation and/or other materials provided with the distribution.
288 ## * Neither the name of the University of California, Berkeley nor the
289 ## names of its contributors may be used to endorse or promote products
290 ## derived from this software without specific prior written permission.
292 ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
293 ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
294 ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
295 ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
296 ## DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
297 ## (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
298 ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
299 ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
300 ## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
301 ## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.