From 0401d6b6dfb8e376b55765cfa7d752e1e9bb218c Mon Sep 17 00:00:00 2001 From: Sebastian Ament Date: Thu, 12 May 2022 19:15:22 -0400 Subject: [PATCH] added conversion from Cholesky to UpdatableCholesky and updated benchmarks --- Manifest.toml | 6 ++-- Project.toml | 4 +-- README.md | 72 +++++++++++++++++++++--------------------- examples/benchmarks.jl | 8 ++--- src/cholesky.jl | 15 +++++++-- test/cholesky.jl | 5 +++ 6 files changed, 63 insertions(+), 47 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 30d05bf..edf609b 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.7.0" +julia_version = "1.7.2" manifest_format = "2.0" [[deps.Artifacts]] @@ -19,9 +19,9 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[deps.LazyInverses]] deps = ["LinearAlgebra", "Test"] -git-tree-sha1 = "dbd234f5a9e6f26ef8d38bcfd0b135099eaabab7" +git-tree-sha1 = "1e4635d2e2bb50065feb4f3bc56ab38037916601" uuid = "9f18896c-49a9-43cc-8eef-c455a8a119c6" -version = "1.1.0" +version = "1.1.4" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/Project.toml b/Project.toml index e30f176..b81ee34 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "UpdatableCholeskyFactorizations" uuid = "1a1c1746-0e7b-45d0-811e-72e8bb59686c" authors = ["Sebastian Ament "] -version = "1.0.0" +version = "1.1.0" [deps] LazyInverses = "9f18896c-49a9-43cc-8eef-c455a8a119c6" @@ -9,5 +9,5 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -LazyInverses = "1.1.0" +LazyInverses = "1.1" julia = "1.7" diff --git a/README.md b/README.md index 0bcb97e..9d115ae 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Compared to constructing Cholesky factorizations from scratch, this package can To install the package, type `]` and subsequently, `add UpdatableCholeskyFactorizations` in the Julia REPL. ## Basic Usage -The package exports the `UpdatableCholesky` type, which can also be referenced as `UCholesky`. +The package exports the `UpdatableCholesky` type, which can also be referenced as `UCholesky`. A structure of this type can be computed using the function ```julia updatable_cholesky(A::AbstractMatrix, m::Int = 2size(A, 1); check::Bool = true), @@ -35,14 +35,14 @@ See the following example. using LinearAlgebra using UpdatableCholeskyFactorizations -# setting up +# setting up n = 128 m = 16 A_full = randn(2n, 2n) A_full = A_full'A_full -A = @view A_full[1:n, 1:n] -a = @view A_full[1:n+1, n+1] -Bm = @view A_full[1:n+m, n+1:n+m] # m columns +A = A_full[1:n, 1:n] +a = A_full[1:n+1, n+1] +Bm = A_full[1:n+m, n+1:n+m] # m columns C = updatable_cholesky(A) # computing a factorization of A @@ -57,53 +57,53 @@ add_column!(C, Bm) # appends the m columns in Bm to the factorization, C is now ## Efficiency The following benchmarks highlight the performance benefits that updating a Cholesky factorization can have over constructing a new factorization from scratch. -First, we report the performance of `LinearAlgebra`'s `cholesky` on two matrix sizes: +First, we report the performance of `LinearAlgebra`'s `cholesky` on two matrix sizes. +The code for the following benchmarks can be found in examples/benchmarks.jl. ```julia 128 by 128 cholesky -BenchmarkTools.TrialEstimate: - time: 85.072 μs +BenchmarkTools.TrialEstimate: + time: 49.125 μs gctime: 0.000 ns (0.00%) - memory: 128.12 KiB - allocs: 4 - + memory: 128.05 KiB + allocs: 2 + 256 by 256 cholesky -BenchmarkTools.TrialEstimate: - time: 315.872 μs +BenchmarkTools.TrialEstimate: + time: 188.875 μs gctime: 0.000 ns (0.00%) - memory: 512.12 KiB - allocs: 4 + memory: 512.05 KiB + allocs: 2 ``` Now, compare this to this package's `updatable_cholesky` and `add_column!` functions: ```julia 128 by 128 updatable_cholesky -BenchmarkTools.TrialEstimate: - time: 169.834 μs +BenchmarkTools.TrialEstimate: + time: 66.500 μs gctime: 0.000 ns (0.00%) - memory: 512.44 KiB - allocs: 8 + memory: 512.08 KiB + allocs: 3 -adding vector to 128 by 128 updatable_cholesky -BenchmarkTools.TrialEstimate: - time: 4.419 μs +adding vector to 128 by 128 UpdatableCholesky +BenchmarkTools.TrialEstimate: + time: 3.000 μs gctime: 0.000 ns (0.00%) - memory: 208 bytes - allocs: 3 + memory: 96 bytes + allocs: 2 -adding 16 vectors to 128 by 128 updatable_cholesky -BenchmarkTools.TrialEstimate: - time: 23.295 μs +adding 16 vectors to 128 by 128 UpdatableCholesky +BenchmarkTools.TrialEstimate: + time: 25.375 μs gctime: 0.000 ns (0.00%) - memory: 208 bytes - allocs: 3 + memory: 0 bytes + allocs: 0 ``` -The construction of the `UpdatableCholesky` structure tends to be approximately two times slower than `cholesky` for a given size, +The construction of the `UpdatableCholesky` structure tends to be slightly slower - 30% in the example above - than `cholesky` for a given size, and by default, consumes as much memory as a cholesky factorization of twice the size, in order to accomodate future column additions without requiring additional memory allocations. However, subsequently updating a factorization is then much faster than computing another factorization from scratch and does not require additional allocations. -In the benchmarks above, adding a vector to the 128 by 128 factorization is ~**20 times faster**, -and adding 16 vectors is ~**4 times faster** than calling `cholesky`. -The benchmarks were computed on a 2017 MacBook Pro with an i7 dual core and 16 GB of RAM. +In the benchmarks above, adding a vector to the 128 by 128 factorization is ~**16 times faster**, +and adding 16 vectors is ~**2 times faster** than calling `cholesky`. +The time advantage grows with increasing `n`. +The benchmarks were computed on a 2021 MacBook Pro with an M1 Pro and 32 GB of RAM. ## Future Work -Currenlty, the package only supports appending columns. Future work could add support for adding columns at arbitrary indices. - - +Currently, the package only supports appending columns. Future work could add support for adding columns at arbitrary indices. diff --git a/examples/benchmarks.jl b/examples/benchmarks.jl index cebe71a..8f0362e 100644 --- a/examples/benchmarks.jl +++ b/examples/benchmarks.jl @@ -7,10 +7,10 @@ n = 128 m = 16 A_full = randn(elty, 2n, 2n) A_full = A_full'A_full -A = @view A_full[1:n, 1:n] -a = @view A_full[1:n+1, n+1] -B = @view A_full[1:2n, n+1:2n] -Bm = @view A_full[1:n+m, n+1:n+m] # adding m columns +A = A_full[1:n, 1:n] +a = A_full[1:n+1, n+1] +B = A_full[1:2n, n+1:2n] +Bm = A_full[1:n+m, n+1:n+m] # adding m columns C = updatable_cholesky(A) set = @benchmarkset "updatable cholesky" begin # for n in 1:16 IDEA: could have loop over different sizes diff --git a/src/cholesky.jl b/src/cholesky.jl index b8ca8c8..1fea5a7 100644 --- a/src/cholesky.jl +++ b/src/cholesky.jl @@ -32,16 +32,27 @@ function updatable_cholesky!(U_full::AbstractMatrix, n::Int; check::Bool = true) m = checksquare(U_full) A = @view U_full[1:n, 1:n] C = cholesky!(A, check = check) - @. U_full[1:n, 1:n] = C.U UpdatableCholesky(U_full, n, C.info) end const ucholesky! = updatable_cholesky! -# constructing +# constructing cholesky from UpdatableCholesky function LinearAlgebra.Cholesky(F::UpdatableCholesky) Cholesky(F.U, :U, F.info) end +# converting Cholesky factorization to UpdatableCholesky +function UpdatableCholeskyFactorizations.updatable_cholesky(C::Union{Cholesky, CholeskyPivoted}, m::Int = size(C, 1); check::Bool = true) + U_full = zeros(eltype(C), m, m) # in principle, could do this in place + n = size(C, 1) + U = C.U + if C isa CholeskyPivoted + U = @view C.U[:, invperm(C.p)] + end + @. U_full[1:n, 1:n] = U + UpdatableCholesky(U_full, n, C.info) +end + function Base.getproperty(F::UpdatableCholesky, s::Symbol) if s == :U U = @view F.U_full[1:F.n, 1:F.n] diff --git a/test/cholesky.jl b/test/cholesky.jl index 7e16a41..ac00020 100644 --- a/test/cholesky.jl +++ b/test/cholesky.jl @@ -48,6 +48,11 @@ using Test @test C2.info == C.info @test size(C2) == size(C) + # conversion from cholesky type + UC2 = updatable_cholesky(C2) + @test UC2 isa UpdatableCholesky + @test UC2.U ≈ C2.U + # adding rows and columns for i in 1:n ai = @view A_full[1:n+i, n+i]