From e830a02d30f04b3297790a1e825b1e8fa8d0f0e8 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Tue, 21 Apr 2026 20:21:43 +0200 Subject: [PATCH 01/23] start gmres draft --- src/linalg_iterative/CMakeLists.txt | 1 + .../stdlib_linalg_iterative_solvers.fypp | 54 +++- ...stdlib_linalg_iterative_solvers_gmres.fypp | 278 ++++++++++++++++++ test/linalg/test_linalg_solve_iterative.fypp | 43 +++ 4 files changed, 375 insertions(+), 1 deletion(-) create mode 100644 src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp diff --git a/src/linalg_iterative/CMakeLists.txt b/src/linalg_iterative/CMakeLists.txt index c91188eba..989b1c951 100644 --- a/src/linalg_iterative/CMakeLists.txt +++ b/src/linalg_iterative/CMakeLists.txt @@ -1,6 +1,7 @@ set(linalg_iterative_fppFiles stdlib_linalg_iterative_solvers_bicgstab.fypp stdlib_linalg_iterative_solvers_cg.fypp + stdlib_linalg_iterative_solvers_gmres.fypp stdlib_linalg_iterative_solvers.fypp stdlib_linalg_iterative_solvers_pcg.fypp ) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp index 6c7725f3f..f3d2a43fe 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp @@ -16,8 +16,9 @@ module stdlib_linalg_iterative_solvers enumerator :: stdlib_size_wksp_cg = 3 enumerator :: stdlib_size_wksp_pcg = 4 enumerator :: stdlib_size_wksp_bicgstab = 8 + enumerator :: stdlib_size_wksp_gmres = 3 end enum - public :: stdlib_size_wksp_cg, stdlib_size_wksp_pcg, stdlib_size_wksp_bicgstab + public :: stdlib_size_wksp_cg, stdlib_size_wksp_pcg, stdlib_size_wksp_bicgstab, stdlib_size_wksp_gmres enum, bind(c) enumerator :: pc_none = 0 @@ -161,6 +162,27 @@ module stdlib_linalg_iterative_solvers end interface public :: stdlib_solve_bicgstab_kernel + !! version: experimental + !! + !! stdlib_solve_gmres_kernel interface for the restarted generalized minimal residual method. + !! [Specifications](../page/specs/stdlib_linalg_iterative_solvers.html#stdlib_solve_gmres_kernel) + interface stdlib_solve_gmres_kernel + #:for k, t, s in R_KINDS_TYPES + module subroutine stdlib_solve_gmres_kernel_${s}$(A,M,b,x,rtol,atol,maxiter,kdim,workspace) + class(stdlib_linop_${s}$_type), intent(in) :: A !! linear operator + class(stdlib_linop_${s}$_type), intent(in) :: M !! preconditioner linear operator + ${t}$, intent(in) :: b(:) !! right-hand side vector + ${t}$, intent(inout) :: x(:) !! solution vector and initial guess + ${t}$, intent(in) :: rtol !! relative tolerance for convergence + ${t}$, intent(in) :: atol !! absolute tolerance for convergence + integer, intent(in) :: maxiter !! maximum number of iterations + integer, intent(in) :: kdim !! Krylov subspace size before restart + type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace !! workspace for the solver + end subroutine + #:endfor + end interface + public :: stdlib_solve_gmres_kernel + !! version: experimental !! !! [Specifications](../page/specs/stdlib_linalg_iterative_solvers.html#stdlib_solve_pcg) @@ -219,6 +241,36 @@ module stdlib_linalg_iterative_solvers end interface public :: stdlib_solve_bicgstab + !! version: experimental + !! + !! [Specifications](../page/specs/stdlib_linalg_iterative_solvers.html#stdlib_solve_gmres) + interface stdlib_solve_gmres + #:for matrix in MATRIX_TYPES + #:for k, t, s in R_KINDS_TYPES + module subroutine stdlib_solve_gmres_${matrix}$_${s}$(A,b,x,di,rtol,atol,maxiter,restart,kdim,precond,M,workspace) + !! linear operator matrix + #:if matrix == "dense" + ${t}$, intent(in) :: A(:,:) + #:else + type(${matrix}$_${s}$_type), intent(in) :: A + #:endif + ${t}$, intent(in) :: b(:) !! right-hand side vector + ${t}$, intent(inout) :: x(:) !! solution vector and initial guess + ${t}$, intent(in), optional :: rtol !! relative tolerance for convergence + ${t}$, intent(in), optional :: atol !! absolute tolerance for convergence + logical(int8), intent(in), optional, target :: di(:) !! dirichlet conditions mask + integer, intent(in), optional :: maxiter !! maximum number of iterations + logical, intent(in), optional :: restart !! restart flag + integer, intent(in), optional :: kdim !! Krylov subspace size before restart + integer, intent(in), optional :: precond !! preconditioner method enumerator + class(stdlib_linop_${s}$_type), optional, intent(in), target :: M !! preconditioner linear operator + type(stdlib_solver_workspace_${s}$_type), optional, intent(inout), target :: workspace !! workspace for the solver + end subroutine + #:endfor + #:endfor + end interface + public :: stdlib_solve_gmres + contains !------------------------------------------------------------------ diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp new file mode 100644 index 000000000..4df8958e3 --- /dev/null +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -0,0 +1,278 @@ +#:include "common.fypp" +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set MATRIX_TYPES = ["dense", "CSR"] +#:set RANKS = range(1, 2+1) + +submodule(stdlib_linalg_iterative_solvers) stdlib_linalg_iterative_gmres + use stdlib_kinds + use stdlib_sparse + use stdlib_constants + use stdlib_optval, only: optval + implicit none + +contains + + #:for k, t, s in R_KINDS_TYPES + module subroutine stdlib_solve_gmres_kernel_${s}$(A,M,b,x,rtol,atol,maxiter,kdim,workspace) + class(stdlib_linop_${s}$_type), intent(in) :: A + class(stdlib_linop_${s}$_type), intent(in) :: M + ${t}$, intent(in) :: b(:), rtol, atol + ${t}$, intent(inout) :: x(:) + integer, intent(in) :: maxiter, kdim + type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace + integer :: i, iter, inner_iter, j + ${t}$ :: beta, denom, hnext, norm_sq, norm_sq0, temp, tolsq + ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), x_base(:), y(:) + + allocate(h(kdim+1, kdim), cs(kdim), sn(kdim), g(kdim+1), y(kdim), x_base(size(x))) + + associate( r => workspace%tmp(:,1), & + w => workspace%tmp(:,2), & + v => workspace%tmp(:,3:kdim+3), & + z => workspace%tmp(:,kdim+4:2*kdim+3) ) + + norm_sq0 = A%inner_product(b, b) + tolsq = max(rtol*rtol*norm_sq0, atol*atol) + + r = b + call A%matvec(x, r, alpha=-one_${s}$, beta=one_${s}$, op='N') + norm_sq = A%inner_product(r, r) + if (associated(workspace%callback)) call workspace%callback(x, norm_sq, 0) + + if (norm_sq <= tolsq .or. maxiter <= 0) then + deallocate(h, cs, sn, g, y, x_base) + return + end if + + iter = 0 + do while (iter < maxiter .and. norm_sq >= tolsq) + beta = sqrt(max(norm_sq, zero_${s}$)) + if (beta <= epsilon(one_${s}$)) exit + + inner_iter = min(kdim, maxiter - iter) + x_base = x + h = zero_${s}$ + cs = zero_${s}$ + sn = zero_${s}$ + g = zero_${s}$ + y = zero_${s}$ + + v(:,1) = r / beta + g(1) = beta + + do j = 1, inner_iter + call M%matvec(v(:,j), z(:,j), alpha=one_${s}$, beta=zero_${s}$, op='N') + call A%matvec(z(:,j), w, alpha=one_${s}$, beta=zero_${s}$, op='N') + + do i = 1, j + h(i,j) = A%inner_product(v(:,i), w) + w = w - h(i,j) * v(:,i) + end do + + hnext = sqrt(max(A%inner_product(w, w), zero_${s}$)) + h(j+1,j) = hnext + if (hnext > epsilon(one_${s}$)) then + v(:,j+1) = w / hnext + else + v(:,j+1) = zero_${s}$ + end if + + do i = 1, j - 1 + temp = cs(i) * h(i,j) + sn(i) * h(i+1,j) + h(i+1,j) = -sn(i) * h(i,j) + cs(i) * h(i+1,j) + h(i,j) = temp + end do + + denom = sqrt(h(j,j) * h(j,j) + h(j+1,j) * h(j+1,j)) + if (denom > epsilon(one_${s}$)) then + cs(j) = h(j,j) / denom + sn(j) = h(j+1,j) / denom + else + cs(j) = one_${s}$ + sn(j) = zero_${s}$ + end if + + temp = cs(j) * h(j,j) + sn(j) * h(j+1,j) + h(j+1,j) = -sn(j) * h(j,j) + cs(j) * h(j+1,j) + h(j,j) = temp + + temp = cs(j) * g(j) + sn(j) * g(j+1) + g(j+1) = -sn(j) * g(j) + cs(j) * g(j+1) + g(j) = temp + + call upper_triangular_solve(h, g, y, j) + x = x_base + do i = 1, j + x = x + y(i) * z(:,i) + end do + + norm_sq = g(j+1) * g(j+1) + iter = iter + 1 + if (associated(workspace%callback)) call workspace%callback(x, norm_sq, iter) + + if (norm_sq < tolsq .or. hnext <= epsilon(one_${s}$) .or. iter >= maxiter) exit + end do + + if (norm_sq < tolsq .or. iter >= maxiter) exit + + r = b + call A%matvec(x, r, alpha=-one_${s}$, beta=one_${s}$, op='N') + norm_sq = A%inner_product(r, r) + end do + end associate + + deallocate(h, cs, sn, g, y, x_base) + + contains + + subroutine upper_triangular_solve(h, g, y, n) + ${t}$, intent(in) :: h(:,:), g(:) + ${t}$, intent(inout) :: y(:) + integer, intent(in) :: n + integer :: row + + y(1:n) = g(1:n) + do row = n, 1, -1 + if (row < n) y(row) = y(row) - sum(h(row,row+1:n) * y(row+1:n)) + if (abs(h(row,row)) > epsilon(one_${s}$)) then + y(row) = y(row) / h(row,row) + else + y(row) = zero_${s}$ + end if + end do + end subroutine + end subroutine + #:endfor + + #:for matrix in MATRIX_TYPES + #:for k, t, s in R_KINDS_TYPES + module subroutine stdlib_solve_gmres_${matrix}$_${s}$(A,b,x,di,rtol,atol,maxiter,restart,kdim,precond,M,workspace) + #:if matrix == "dense" + use stdlib_linalg, only: diag + ${t}$, intent(in) :: A(:,:) + #:else + type(${matrix}$_${s}$_type), intent(in) :: A + #:endif + ${t}$, intent(in) :: b(:) + ${t}$, intent(inout) :: x(:) + ${t}$, intent(in), optional :: rtol, atol + logical(int8), intent(in), optional, target :: di(:) + integer, intent(in), optional :: maxiter, kdim + logical, intent(in), optional :: restart + integer, intent(in), optional :: precond + class(stdlib_linop_${s}$_type), optional, intent(in), target :: M + type(stdlib_solver_workspace_${s}$_type), optional, intent(inout), target :: workspace + type(stdlib_linop_${s}$_type) :: op + type(stdlib_linop_${s}$_type), pointer :: M_ => null() + type(stdlib_solver_workspace_${s}$_type), pointer :: workspace_ + integer :: kdim_, maxiter_, n, ncols, precond_ + ${t}$ :: rtol_, atol_ + logical :: restart_ + logical(int8), pointer :: di_(:) + ${t}$, allocatable :: diagonal(:) + + n = size(b) + maxiter_ = optval(x=maxiter, default=n) + kdim_ = max(1, min(optval(x=kdim, default=min(30, n)), n)) + restart_ = optval(x=restart, default=.true.) + rtol_ = optval(x=rtol, default=1.e-5_${s}$) + atol_ = optval(x=atol, default=epsilon(one_${s}$)) + precond_ = optval(x=precond, default=pc_none) + ncols = 2 * kdim_ + stdlib_size_wksp_gmres + + if (present(M)) then + M_ => M + else + allocate(M_) + allocate(diagonal(n), source=zero_${s}$) + + select case(precond_) + case(pc_jacobi) + #:if matrix == "dense" + diagonal = diag(A) + #:else + call diag(A, diagonal) + #:endif + M_%matvec => precond_jacobi + case default + M_%matvec => precond_none + end select + where(abs(diagonal) > epsilon(zero_${s}$)) diagonal = one_${s}$ / diagonal + end if + + op%matvec => matvec + + if (present(di)) then + di_ => di + else + allocate(di_(n), source=.false._int8) + end if + + if (present(workspace)) then + workspace_ => workspace + else + allocate(workspace_) + end if + if (.not.allocated(workspace_%tmp)) then + allocate(workspace_%tmp(n, ncols), source=zero_${s}$) + else if (size(workspace_%tmp,1) /= n .or. size(workspace_%tmp,2) < ncols) then + deallocate(workspace_%tmp) + allocate(workspace_%tmp(n, ncols), source=zero_${s}$) + end if + + if (restart_) x = zero_${s}$ + x = merge(b, x, di_) + call stdlib_solve_gmres_kernel(op, M_, b, x, rtol_, atol_, maxiter_, kdim_, workspace_) + + if (.not.present(di)) deallocate(di_) + di_ => null() + + if (.not.present(workspace)) then + deallocate(workspace_%tmp) + deallocate(workspace_) + end if + M_ => null() + workspace_ => null() + + contains + + subroutine matvec(x,y,alpha,beta,op) + #:if matrix == "dense" + use stdlib_linalg_blas, only: gemv + #:endif + ${t}$, intent(in) :: x(:) + ${t}$, intent(inout) :: y(:) + ${t}$, intent(in) :: alpha + ${t}$, intent(in) :: beta + character(1), intent(in) :: op + #:if matrix == "dense" + call gemv(op, m=size(A,1), n=size(A,2), alpha=alpha, a=A, lda=size(A,1), x=x, incx=1, beta=beta, y=y, incy=1) + #:else + call spmv(A, x, y, alpha, beta, op) + #:endif + y = merge(zero_${s}$, y, di_) + end subroutine + + subroutine precond_none(x,y,alpha,beta,op) + ${t}$, intent(in) :: x(:) + ${t}$, intent(inout) :: y(:) + ${t}$, intent(in) :: alpha + ${t}$, intent(in) :: beta + character(1), intent(in) :: op + y = merge(zero_${s}$, x, di_) + end subroutine + + subroutine precond_jacobi(x,y,alpha,beta,op) + ${t}$, intent(in) :: x(:) + ${t}$, intent(inout) :: y(:) + ${t}$, intent(in) :: alpha + ${t}$, intent(in) :: beta + character(1), intent(in) :: op + y = merge(zero_${s}$, diagonal * x, di_) + end subroutine + end subroutine + #:endfor + #:endfor + +end submodule stdlib_linalg_iterative_gmres \ No newline at end of file diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 8da08fcb8..13d1e1913 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -23,6 +23,7 @@ module test_linalg_solve_iterative tests = [ new_unittest("stdlib_solve_cg",test_stdlib_solve_cg), & new_unittest("stdlib_solve_pcg",test_stdlib_solve_pcg), & + new_unittest("stdlib_solve_gmres",test_stdlib_solve_gmres), & new_unittest("stdlib_solve_bicgstab",test_stdlib_solve_bicgstab), & new_unittest("stdlib_solve_bicgstab_nonsymmetric",test_stdlib_solve_bicgstab_nonsymmetric) ] @@ -80,6 +81,48 @@ module test_linalg_solve_iterative #:endfor end subroutine test_stdlib_solve_pcg + subroutine test_stdlib_solve_gmres(error) + type(error_type), allocatable, intent(out) :: error + + #:for k, t, s in R_KINDS_TYPES + block + ${t}$, parameter :: tol = 1000*epsilon(0.0_${k}$) + ${t}$ :: A(4,4) = reshape([${t}$ :: 5, 1, 2, 0, & + 0, 4, -1, 1, & + 1, 0, 3, 2, & + 2, -1, 0, 6], [4,4]) + ${t}$ :: x(4), load(4), xref(4) + + xref = [1.0_${k}$, 2.0_${k}$, -1.0_${k}$, 0.5_${k}$] + load = matmul(A, xref) + x = 0.0_${k}$ + + call stdlib_solve_gmres(A, load, x, rtol=1.e-10_${k}$, atol=1.e-12_${k}$, kdim=2, maxiter=12) + + call check(error, norm2(x-xref) Date: Thu, 23 Apr 2026 20:55:08 +0200 Subject: [PATCH 02/23] add comments --- .../stdlib_linalg_iterative_solvers_gmres.fypp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index 4df8958e3..97a52642f 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -32,9 +32,11 @@ contains v => workspace%tmp(:,3:kdim+3), & z => workspace%tmp(:,kdim+4:2*kdim+3) ) + ! Initialize convergence targets from the right-hand side norm. norm_sq0 = A%inner_product(b, b) tolsq = max(rtol*rtol*norm_sq0, atol*atol) + ! Form the initial residual and report the starting iterate. r = b call A%matvec(x, r, alpha=-one_${s}$, beta=one_${s}$, op='N') norm_sq = A%inner_product(r, r) @@ -47,6 +49,7 @@ contains iter = 0 do while (iter < maxiter .and. norm_sq >= tolsq) + ! Start a new GMRES cycle from the current residual. beta = sqrt(max(norm_sq, zero_${s}$)) if (beta <= epsilon(one_${s}$)) exit @@ -58,10 +61,12 @@ contains g = zero_${s}$ y = zero_${s}$ + ! Initialize the Krylov basis and least-squares right-hand side. v(:,1) = r / beta g(1) = beta do j = 1, inner_iter + ! Run Arnoldi with the preconditioned basis vector. call M%matvec(v(:,j), z(:,j), alpha=one_${s}$, beta=zero_${s}$, op='N') call A%matvec(z(:,j), w, alpha=one_${s}$, beta=zero_${s}$, op='N') @@ -78,12 +83,14 @@ contains v(:,j+1) = zero_${s}$ end if + ! Apply the previously accumulated Givens rotations. do i = 1, j - 1 temp = cs(i) * h(i,j) + sn(i) * h(i+1,j) h(i+1,j) = -sn(i) * h(i,j) + cs(i) * h(i+1,j) h(i,j) = temp end do + ! Build and apply the next Givens rotation. denom = sqrt(h(j,j) * h(j,j) + h(j+1,j) * h(j+1,j)) if (denom > epsilon(one_${s}$)) then cs(j) = h(j,j) / denom @@ -101,12 +108,14 @@ contains g(j+1) = -sn(j) * g(j) + cs(j) * g(j+1) g(j) = temp + ! Solve the reduced system and update the current iterate. call upper_triangular_solve(h, g, y, j) x = x_base do i = 1, j x = x + y(i) * z(:,i) end do + ! Track the residual norm estimate for convergence. norm_sq = g(j+1) * g(j+1) iter = iter + 1 if (associated(workspace%callback)) call workspace%callback(x, norm_sq, iter) @@ -116,6 +125,7 @@ contains if (norm_sq < tolsq .or. iter >= maxiter) exit + ! Refresh the residual before the next restarted cycle. r = b call A%matvec(x, r, alpha=-one_${s}$, beta=one_${s}$, op='N') norm_sq = A%inner_product(r, r) From 877f4354c7f05e292ac733f82acbd118f0889d31 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Thu, 23 Apr 2026 21:10:30 +0200 Subject: [PATCH 03/23] remove kdim from test --- test/linalg/test_linalg_solve_iterative.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 13d1e1913..08f0cf526 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -97,8 +97,8 @@ module test_linalg_solve_iterative load = matmul(A, xref) x = 0.0_${k}$ - call stdlib_solve_gmres(A, load, x, rtol=1.e-10_${k}$, atol=1.e-12_${k}$, kdim=2, maxiter=12) - + call stdlib_solve_gmres(A, load, x, rtol=tol, maxiter=12) + call check(error, norm2(x-xref) Date: Thu, 23 Apr 2026 21:23:38 +0200 Subject: [PATCH 04/23] use dot_product --- src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index 97a52642f..d5119e227 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -144,7 +144,7 @@ contains y(1:n) = g(1:n) do row = n, 1, -1 - if (row < n) y(row) = y(row) - sum(h(row,row+1:n) * y(row+1:n)) + if (row < n) y(row) = y(row) - dot_product(h(row,row+1:n) , y(row+1:n)) if (abs(h(row,row)) > epsilon(one_${s}$)) then y(row) = y(row) / h(row,row) else From bf59c029f3b370aff412f5f211f16fe42d798307 Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Fri, 8 May 2026 13:31:48 +0200 Subject: [PATCH 05/23] gmres: add Hilbert matrix test --- test/linalg/test_linalg_solve_iterative.fypp | 30 +++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 08f0cf526..615be1862 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -24,6 +24,7 @@ module test_linalg_solve_iterative tests = [ new_unittest("stdlib_solve_cg",test_stdlib_solve_cg), & new_unittest("stdlib_solve_pcg",test_stdlib_solve_pcg), & new_unittest("stdlib_solve_gmres",test_stdlib_solve_gmres), & + new_unittest("stdlib_solve_gmres_hilbert",test_stdlib_solve_gmres_hilbert), & new_unittest("stdlib_solve_bicgstab",test_stdlib_solve_bicgstab), & new_unittest("stdlib_solve_bicgstab_nonsymmetric",test_stdlib_solve_bicgstab_nonsymmetric) ] @@ -123,6 +124,33 @@ module test_linalg_solve_iterative #:endfor end subroutine test_stdlib_solve_gmres + subroutine test_stdlib_solve_gmres_hilbert(error) + type(error_type), allocatable, intent(out) :: error + integer, parameter :: n = 10 + integer :: r, c + #:for k, t, s in R_KINDS_TYPES + block + ${t}$, parameter :: tol = 1000*epsilon(0.0_${k}$) + ${t}$ :: A(n, n), b(n), x(n), x_ref(n) + do r = 1, n + do c = 1, n + A(r, c) = 1.0_${k}$ / real(r + c - 1, ${k}$) + end do + end do + + x_ref = 1.0_${k}$ + b = matmul(A, x_ref) + x = 0.0_${k}$ + + call stdlib_solve_gmres(A, b, x, rtol=tol, maxiter=n) + + call check(error, norm2(x-x_ref) < tol*norm2(x_ref), & + 'error in GMRES: Hilbert matrix') + if (allocated(error)) return + end block + #:endfor + end subroutine test_stdlib_solve_gmres_hilbert + subroutine test_stdlib_solve_bicgstab(error) type(error_type), allocatable, intent(out) :: error @@ -265,4 +293,4 @@ program test_solve_iterative write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" error stop end if -end program test_solve_iterative +end program test_solve_iterative \ No newline at end of file From da90ab2627ba9b5a3c725130f448d7ec2a7bc0eb Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Sun, 10 May 2026 18:16:47 +0200 Subject: [PATCH 06/23] linalg: add MGS reorthogonalization to GMRES --- ...stdlib_linalg_iterative_solvers_gmres.fypp | 14 +++++++++----- test/linalg/test_linalg_solve_iterative.fypp | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index d5119e227..f9e83c699 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -21,8 +21,8 @@ contains ${t}$, intent(inout) :: x(:) integer, intent(in) :: maxiter, kdim type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace - integer :: i, iter, inner_iter, j - ${t}$ :: beta, denom, hnext, norm_sq, norm_sq0, temp, tolsq + integer :: i, iter, inner_iter, j, k + ${t}$ :: beta, denom, hnext, htmp, norm_sq, norm_sq0, temp, tolsq ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), x_base(:), y(:) allocate(h(kdim+1, kdim), cs(kdim), sn(kdim), g(kdim+1), y(kdim), x_base(size(x))) @@ -70,9 +70,13 @@ contains call M%matvec(v(:,j), z(:,j), alpha=one_${s}$, beta=zero_${s}$, op='N') call A%matvec(z(:,j), w, alpha=one_${s}$, beta=zero_${s}$, op='N') - do i = 1, j - h(i,j) = A%inner_product(v(:,i), w) - w = w - h(i,j) * v(:,i) + ! Modified Gram Schmidt (MGSR) + do k = 1, 2 ! reorthogonalization + do i = 1, j + htmp = A%inner_product(v(:,i), w) + h(i,j) = h(i,j) + htmp + w = w - htmp*v(:,i) + end do end do hnext = sqrt(max(A%inner_product(w, w), zero_${s}$)) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 615be1862..b0ccccc54 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -125,12 +125,24 @@ module test_linalg_solve_iterative end subroutine test_stdlib_solve_gmres subroutine test_stdlib_solve_gmres_hilbert(error) + ! This test uses a Hilbert matrix (n=10) to validate the numerical stability + ! of the GMRES solver. The Hilbert matrix is notoriously ill-conditioned + ! (cond(A) ~ 10^13 for n=10), making it an excellent benchmark for + ! orthogonality maintenance. By setting rtol=0 and maxiter=n, we force + ! the construction of the full Krylov basis, which only converges to + ! the reference solution if the MGS with reorthogonalization + ! preserves the basis' orthogonality. type(error_type), allocatable, intent(out) :: error integer, parameter :: n = 10 integer :: r, c #:for k, t, s in R_KINDS_TYPES block - ${t}$, parameter :: tol = 1000*epsilon(0.0_${k}$) + #:if k == 'sp' + ${t}$, parameter :: tol = 1.e-2_${k}$ + #:else + ${t}$, parameter :: tol = 1.e-5_${k}$ + #:endif + ${t}$ :: A(n, n), b(n), x(n), x_ref(n) do r = 1, n do c = 1, n @@ -142,10 +154,9 @@ module test_linalg_solve_iterative b = matmul(A, x_ref) x = 0.0_${k}$ - call stdlib_solve_gmres(A, b, x, rtol=tol, maxiter=n) - + call stdlib_solve_gmres(A, b, x, rtol=0.0_${k}$, maxiter=n) call check(error, norm2(x-x_ref) < tol*norm2(x_ref), & - 'error in GMRES: Hilbert matrix') + 'error in GMRES ${k}$: Hilbert matrix') if (allocated(error)) return end block #:endfor From 38048ce947ef0076e045320754f7132992ee12f8 Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Tue, 12 May 2026 18:59:33 +0200 Subject: [PATCH 07/23] fix: rename loop variable to avoid fypp collision --- .../stdlib_linalg_iterative_solvers_gmres.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index f9e83c699..bfd26d12d 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -21,7 +21,7 @@ contains ${t}$, intent(inout) :: x(:) integer, intent(in) :: maxiter, kdim type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace - integer :: i, iter, inner_iter, j, k + integer :: i, iter, inner_iter, j, iorth ${t}$ :: beta, denom, hnext, htmp, norm_sq, norm_sq0, temp, tolsq ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), x_base(:), y(:) @@ -71,7 +71,7 @@ contains call A%matvec(z(:,j), w, alpha=one_${s}$, beta=zero_${s}$, op='N') ! Modified Gram Schmidt (MGSR) - do k = 1, 2 ! reorthogonalization + do iorth = 1, 2 ! reorthogonalization do i = 1, j htmp = A%inner_product(v(:,i), w) h(i,j) = h(i,j) + htmp From 36f81b58be12ca6eb99b1e26f7684b8d6301993a Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Tue, 12 May 2026 21:17:48 +0200 Subject: [PATCH 08/23] fix: hilbert sp tolerance test --- test/linalg/test_linalg_solve_iterative.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index b0ccccc54..eaca901d9 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -138,7 +138,7 @@ module test_linalg_solve_iterative #:for k, t, s in R_KINDS_TYPES block #:if k == 'sp' - ${t}$, parameter :: tol = 1.e-2_${k}$ + ${t}$, parameter :: tol = 1.e-1_${k}$ #:else ${t}$, parameter :: tol = 1.e-5_${k}$ #:endif From 87e9f3290dc3fda4737de5b2f61f1ff07485b2ab Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Tue, 12 May 2026 21:49:04 +0200 Subject: [PATCH 09/23] fix: hilbert sp tolerance test --- test/linalg/test_linalg_solve_iterative.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index eaca901d9..42fc6aaeb 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -138,7 +138,7 @@ module test_linalg_solve_iterative #:for k, t, s in R_KINDS_TYPES block #:if k == 'sp' - ${t}$, parameter :: tol = 1.e-1_${k}$ + ${t}$, parameter :: tol = 1.e-0_${k}$ #:else ${t}$, parameter :: tol = 1.e-5_${k}$ #:endif From b9a99e1d90e909a41d55327cba8ba0b86d1141b2 Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Wed, 13 May 2026 10:30:37 +0200 Subject: [PATCH 10/23] fix: hilbert dp tolerance test --- test/linalg/test_linalg_solve_iterative.fypp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 42fc6aaeb..8eb889047 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -137,10 +137,11 @@ module test_linalg_solve_iterative integer :: r, c #:for k, t, s in R_KINDS_TYPES block - #:if k == 'sp' + !Hilbert sp is almost useless. + #:if k == 'sp' ${t}$, parameter :: tol = 1.e-0_${k}$ #:else - ${t}$, parameter :: tol = 1.e-5_${k}$ + ${t}$, parameter :: tol = 1.e-4_${k}$ #:endif ${t}$ :: A(n, n), b(n), x(n), x_ref(n) From 5fa5583ef84effa34b4cd81c8b956f59cd842e0a Mon Sep 17 00:00:00 2001 From: AlexanderGSC Date: Wed, 13 May 2026 11:04:25 +0200 Subject: [PATCH 11/23] fix: hilbert dp tolerance test for Intel 2024 1 cmake --- test/linalg/test_linalg_solve_iterative.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 8eb889047..1c9e8c9f9 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -141,7 +141,7 @@ module test_linalg_solve_iterative #:if k == 'sp' ${t}$, parameter :: tol = 1.e-0_${k}$ #:else - ${t}$, parameter :: tol = 1.e-4_${k}$ + ${t}$, parameter :: tol = 1.e-3_${k}$ #:endif ${t}$ :: A(n, n), b(n), x(n), x_ref(n) From 54769fef2d65ebc0a56b5f7eb6db8234614c320a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Mon, 18 May 2026 08:47:57 +0200 Subject: [PATCH 12/23] Update test/linalg/test_linalg_solve_iterative.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_solve_iterative.fypp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 1c9e8c9f9..839e98dcf 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -92,9 +92,8 @@ module test_linalg_solve_iterative 0, 4, -1, 1, & 1, 0, 3, 2, & 2, -1, 0, 6], [4,4]) - ${t}$ :: x(4), load(4), xref(4) - - xref = [1.0_${k}$, 2.0_${k}$, -1.0_${k}$, 0.5_${k}$] + ${t}$ :: x(4), load(4) + ${t}$, parameter :: xref(*) = [1.0_${k}$, 2.0_${k}$, -1.0_${k}$, 0.5_${k}$] load = matmul(A, xref) x = 0.0_${k}$ From cce736827e85cbf3454659261a081991bda0c54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Mon, 18 May 2026 08:48:09 +0200 Subject: [PATCH 13/23] Update test/linalg/test_linalg_solve_iterative.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_solve_iterative.fypp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 839e98dcf..f43f121e2 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -108,9 +108,9 @@ module test_linalg_solve_iterative ${t}$ :: A(3,3) = reshape([${t}$ :: 10, 1, 2, & 1, 10, 3, & 2, 3, 10], [3,3]) - ${t}$ :: x(3), load(3), xref(3) - - xref = [(137._${k}$/218._${k}$), -(9._${k}$/109._${k}$), (87._${k}$/218._${k}$)] + ${t}$ :: x(3) + ${t}$, parameter :: xref(:) = [(137._${k}$/218._${k}$), -(9._${k}$/109._${k}$), (87._${k}$/218._${k}$)] + ${t}$, parameter :: load(*) = [7.0_${k}$, 1.0_${k}$, 5.0_${k}$] load = [7.0_${k}$, 1.0_${k}$, 5.0_${k}$] x = [0.2_${k}$, -0.1_${k}$, 0.3_${k}$] From 450e7c04db9ddeafd9471e09406d5ad2d24cdf5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Mon, 18 May 2026 08:48:30 +0200 Subject: [PATCH 14/23] Update test/linalg/test_linalg_solve_iterative.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_solve_iterative.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index f43f121e2..1fa83a5e1 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -105,7 +105,7 @@ module test_linalg_solve_iterative block ${t}$, parameter :: tol = 1000*epsilon(0.0_${k}$) - ${t}$ :: A(3,3) = reshape([${t}$ :: 10, 1, 2, & + ${t}$, parameter :: A(3,3) = reshape([${t}$ :: 10, 1, 2, & 1, 10, 3, & 2, 3, 10], [3,3]) ${t}$ :: x(3) From b35926e057fc6e2d2b9883a9c18e9c3a81e866f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Mon, 18 May 2026 08:48:41 +0200 Subject: [PATCH 15/23] Update test/linalg/test_linalg_solve_iterative.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_solve_iterative.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 1fa83a5e1..374551d71 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -88,7 +88,7 @@ module test_linalg_solve_iterative #:for k, t, s in R_KINDS_TYPES block ${t}$, parameter :: tol = 1000*epsilon(0.0_${k}$) - ${t}$ :: A(4,4) = reshape([${t}$ :: 5, 1, 2, 0, & + ${t}$, parameter :: A(4,4) = reshape([${t}$ :: 5, 1, 2, 0, & 0, 4, -1, 1, & 1, 0, 3, 2, & 2, -1, 0, 6], [4,4]) From 902054c4285291407532a63e90142e885939045d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Mon, 18 May 2026 08:48:53 +0200 Subject: [PATCH 16/23] Update test/linalg/test_linalg_solve_iterative.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_solve_iterative.fypp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 374551d71..6fb93ea49 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -143,7 +143,8 @@ module test_linalg_solve_iterative ${t}$, parameter :: tol = 1.e-3_${k}$ #:endif - ${t}$ :: A(n, n), b(n), x(n), x_ref(n) + ${t}$ :: A(n, n), b(n), x(n) + ${t}$, parameter :: x_ref(n) = 1.0_${k}$ do r = 1, n do c = 1, n A(r, c) = 1.0_${k}$ / real(r + c - 1, ${k}$) From 825fd51e31d85f0ad03e822d04614e87a63fd4a8 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Mon, 18 May 2026 09:53:15 +0200 Subject: [PATCH 17/23] fix declarations --- test/linalg/test_linalg_solve_iterative.fypp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 6fb93ea49..6b2b07016 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -109,9 +109,8 @@ module test_linalg_solve_iterative 1, 10, 3, & 2, 3, 10], [3,3]) ${t}$ :: x(3) - ${t}$, parameter :: xref(:) = [(137._${k}$/218._${k}$), -(9._${k}$/109._${k}$), (87._${k}$/218._${k}$)] - ${t}$, parameter :: load(*) = [7.0_${k}$, 1.0_${k}$, 5.0_${k}$] - load = [7.0_${k}$, 1.0_${k}$, 5.0_${k}$] + ${t}$, parameter :: xref(3) = [(137._${k}$/218._${k}$), -(9._${k}$/109._${k}$), (87._${k}$/218._${k}$)] + ${t}$, parameter :: load(3) = [7.0_${k}$, 1.0_${k}$, 5.0_${k}$] x = [0.2_${k}$, -0.1_${k}$, 0.3_${k}$] call stdlib_solve_gmres(A, load, x, rtol=1.e-10_${k}$, precond=pc_jacobi) @@ -124,13 +123,13 @@ module test_linalg_solve_iterative end subroutine test_stdlib_solve_gmres subroutine test_stdlib_solve_gmres_hilbert(error) - ! This test uses a Hilbert matrix (n=10) to validate the numerical stability - ! of the GMRES solver. The Hilbert matrix is notoriously ill-conditioned - ! (cond(A) ~ 10^13 for n=10), making it an excellent benchmark for - ! orthogonality maintenance. By setting rtol=0 and maxiter=n, we force - ! the construction of the full Krylov basis, which only converges to - ! the reference solution if the MGS with reorthogonalization - ! preserves the basis' orthogonality. + ! This test uses a Hilbert matrix (n=10) to validate the numerical stability + ! of the GMRES solver. The Hilbert matrix is notoriously ill-conditioned + ! (cond(A) ~ 10^13 for n=10), making it an excellent benchmark for + ! orthogonality maintenance. By setting rtol=0 and maxiter=n, we force + ! the construction of the full Krylov basis, which only converges to + ! the reference solution if the MGS with reorthogonalization + ! preserves the basis' orthogonality. type(error_type), allocatable, intent(out) :: error integer, parameter :: n = 10 integer :: r, c @@ -151,7 +150,6 @@ module test_linalg_solve_iterative end do end do - x_ref = 1.0_${k}$ b = matmul(A, x_ref) x = 0.0_${k}$ From 6ebfe6bc9e6cbd9af67424a9d1b7e053799611ec Mon Sep 17 00:00:00 2001 From: jalvesz Date: Mon, 15 Jun 2026 21:59:28 +0200 Subject: [PATCH 18/23] make gmres implementation flexible by allowing switching between memory efficiency or runtime speed --- doc/specs/stdlib_linalg_iterative_solvers.md | 94 ++++++++++++++++++- src/linalg_iterative/CMakeLists.txt | 6 +- .../stdlib_linalg_iterative_solvers.fypp | 6 +- ...stdlib_linalg_iterative_solvers_gmres.fypp | 78 ++++++++------- test/linalg/test_linalg_solve_iterative.fypp | 16 ++++ 5 files changed, 164 insertions(+), 36 deletions(-) diff --git a/doc/specs/stdlib_linalg_iterative_solvers.md b/doc/specs/stdlib_linalg_iterative_solvers.md index 6cebb61aa..9a856029d 100644 --- a/doc/specs/stdlib_linalg_iterative_solvers.md +++ b/doc/specs/stdlib_linalg_iterative_solvers.md @@ -348,4 +348,96 @@ The method uses 8 auxiliary vectors internally, requiring more memory than simpl #### Example 2 ```fortran {!example/linalg/example_solve_bicgstab_wilkinson.f90!} -``` \ No newline at end of file +``` + + +### `stdlib_solve_gmres_kernel` subroutine + +#### Description + +Implements the restarted Generalized Minimal Residual (GMRES) method for solving the linear system \( Ax = b \), where \( A \) is a general linear operator defined via the `stdlib_linop` type. This is the core implementation, allowing custom matrix types, custom preconditioners, and user-managed workspace. + +#### Syntax + +`call ` [[stdlib_linalg_iterative_solvers(module):stdlib_solve_gmres_kernel(interface)]] ` (A, M, b, x, rtol, atol, maxiter, kdim, workspace, compact)` + +#### Status + +Experimental + +#### Class + +Subroutine + +#### Argument(s) + +`A`: `class(stdlib_linop__type)` defining the linear operator. This argument is `intent(in)`. + +`M`: `class(stdlib_linop__type)` defining the preconditioner linear operator. This argument is `intent(in)`. + +`b`: 1-D array of `real()` defining the loading conditions of the linear system. This argument is `intent(in)`. + +`x`: 1-D array of `real()` which serves as the input initial guess and the output solution. This argument is `intent(inout)`. + +`rtol` and `atol`: scalars of type `real()` specifying the convergence test. For convergence, the following criterion is used \( || b - Ax ||^2 <= max(rtol^2 * || b ||^2 , atol^2 ) \). These arguments are `intent(in)`. + +`maxiter`: scalar of type `integer` defining the maximum allowed number of iterations. This argument is `intent(in)`. + +`kdim`: scalar of type `integer` defining the Krylov subspace size before restart. This argument is `intent(in)`. + +`workspace`: scalar derived type of `type(stdlib_solver_workspace__type)` holding the work array for the solver. This argument is `intent(inout)`. + +`compact`: scalar of type `logical`. If true, GMRES stores only one preconditioned basis vector at a time and rebuilds the cycle-end correction through the preconditioner, reducing memory usage. If false, GMRES caches the full preconditioned basis for a faster cycle-end update at the cost of additional memory. Default is `.true.`. This argument is `intent(in)`. + +#### Note + +GMRES trades increasing memory and orthogonalization cost for robust convergence on general non-symmetric systems. The `compact` option allows choosing between a lower-memory layout and a slightly faster restart update. + + +### `stdlib_solve_gmres` subroutine + +#### Description + +Provides a user-friendly interface to the restarted GMRES method for solving \( Ax = b \), supporting `dense` and `CSR__type` matrices. GMRES is suitable for general non-symmetric systems and supports optional preconditioning, workspace reuse, and a storage-mode choice for balancing memory use against cycle-end update speed. + +#### Syntax + +`call ` [[stdlib_linalg_iterative_solvers(module):stdlib_solve_gmres(interface)]] ` (A, b, x [, di, rtol, atol, maxiter, restart, kdim, precond, M, workspace, compact])` + +#### Status + +Experimental + +#### Class + +Subroutine + +#### Argument(s) + +`A`: `dense` or `CSR__type` matrix defining the linear system. This argument is `intent(in)`. + +`b`: 1-D array of `real()` defining the loading conditions of the linear system. This argument is `intent(in)`. + +`x`: 1-D array of `real()` which serves as the input initial guess and the output solution. This argument is `intent(inout)`. + +`di` (optional): 1-D mask array of type `logical(int8)` defining the degrees of freedom subject to dirichlet boundary conditions. The actual boundary condition values should be stored in the `b` load array. This argument is `intent(in)`. + +`rtol` and `atol` (optional): scalars of type `real()` specifying the convergence test. For convergence, the following criterion is used \( || b - Ax ||^2 <= max(rtol^2 * || b ||^2 , atol^2 ) \). Default values are `rtol=1.e-5` and `atol=epsilon(1._)`. These arguments are `intent(in)`. + +`maxiter` (optional): scalar of type `integer` defining the maximum allowed number of iterations. If no value is given, a default of `N` is set, where `N = size(b)`. This argument is `intent(in)`. + +`restart` (optional): scalar of type `logical` indicating whether to restart the iteration with zero initial guess. Default is `.true.`. This argument is `intent(in)`. + +`kdim` (optional): scalar of type `integer` defining the Krylov subspace size before restart. If no value is given, a default of `min(30, N)` is used. This argument is `intent(in)`. + +`precond` (optional): scalar of type `integer` enabling to switch among the default preconditioners available with the following enum (`pc_none`, `pc_jacobi`). If no value is given, no preconditioning will be applied. This argument is `intent(in)`. + +`M` (optional): scalar derived type of `class(stdlib_linop__type)` defining a custom preconditioner linear operator. If given, `precond` will have no effect, and a pointer is set to this custom preconditioner. This argument is `intent(in)`. + +`workspace` (optional): scalar derived type of `type(stdlib_solver_workspace__type)` holding the work temporal array for the solver. If the user passes its own `workspace`, then internally a pointer is set to it, otherwise, memory will be internally allocated and deallocated before exiting the procedure. This argument is `intent(inout)`. + +`compact` (optional): scalar of type `logical`. If true, GMRES stores only one preconditioned basis vector per iteration cycle and minimizes memory use. If false, GMRES caches the full preconditioned basis to speed up the cycle-end update. Default is `.true.`. This argument is `intent(in)`. + +#### Note + +GMRES is especially useful for general non-symmetric systems where CG and PCG do not apply. Its main cost is the Krylov basis storage and orthogonalization work within each restart cycle. With `compact=.true.`, the solver reduces workspace requirements significantly. With `compact=.false.`, it retains the previous cached-basis update strategy. \ No newline at end of file diff --git a/src/linalg_iterative/CMakeLists.txt b/src/linalg_iterative/CMakeLists.txt index 989b1c951..adea11ca4 100644 --- a/src/linalg_iterative/CMakeLists.txt +++ b/src/linalg_iterative/CMakeLists.txt @@ -14,4 +14,8 @@ set(linalg_iterative_f90Files configure_stdlib_target(${PROJECT_NAME}_linalg_iterative linalg_iterative_f90Files linalg_iterative_fppFiles linalg_iterative_cppFiles) -target_link_libraries(${PROJECT_NAME}_linalg_iterative PUBLIC ${PROJECT_NAME}_linalg ${PROJECT_NAME}_sparse) +target_link_libraries(${PROJECT_NAME}_linalg_iterative + PUBLIC + ${PROJECT_NAME}_blas + ${PROJECT_NAME}_linalg + ${PROJECT_NAME}_sparse) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp index f3d2a43fe..b91c19246 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp @@ -168,7 +168,7 @@ module stdlib_linalg_iterative_solvers !! [Specifications](../page/specs/stdlib_linalg_iterative_solvers.html#stdlib_solve_gmres_kernel) interface stdlib_solve_gmres_kernel #:for k, t, s in R_KINDS_TYPES - module subroutine stdlib_solve_gmres_kernel_${s}$(A,M,b,x,rtol,atol,maxiter,kdim,workspace) + module subroutine stdlib_solve_gmres_kernel_${s}$(A,M,b,x,rtol,atol,maxiter,kdim,workspace,compact) class(stdlib_linop_${s}$_type), intent(in) :: A !! linear operator class(stdlib_linop_${s}$_type), intent(in) :: M !! preconditioner linear operator ${t}$, intent(in) :: b(:) !! right-hand side vector @@ -178,6 +178,7 @@ module stdlib_linalg_iterative_solvers integer, intent(in) :: maxiter !! maximum number of iterations integer, intent(in) :: kdim !! Krylov subspace size before restart type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace !! workspace for the solver + logical, intent(in) :: compact !! keep only one preconditioned basis vector and rebuild the cycle update at restart end subroutine #:endfor end interface @@ -247,7 +248,7 @@ module stdlib_linalg_iterative_solvers interface stdlib_solve_gmres #:for matrix in MATRIX_TYPES #:for k, t, s in R_KINDS_TYPES - module subroutine stdlib_solve_gmres_${matrix}$_${s}$(A,b,x,di,rtol,atol,maxiter,restart,kdim,precond,M,workspace) + module subroutine stdlib_solve_gmres_${matrix}$_${s}$(A,b,x,di,rtol,atol,maxiter,restart,kdim,precond,M,workspace,compact) !! linear operator matrix #:if matrix == "dense" ${t}$, intent(in) :: A(:,:) @@ -265,6 +266,7 @@ module stdlib_linalg_iterative_solvers integer, intent(in), optional :: precond !! preconditioner method enumerator class(stdlib_linop_${s}$_type), optional, intent(in), target :: M !! preconditioner linear operator type(stdlib_solver_workspace_${s}$_type), optional, intent(inout), target :: workspace !! workspace for the solver + logical, intent(in), optional :: compact !! if true, favor lower memory use over cycle-end update speed end subroutine #:endfor #:endfor diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index bfd26d12d..013269eb2 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -9,29 +9,33 @@ submodule(stdlib_linalg_iterative_solvers) stdlib_linalg_iterative_gmres use stdlib_sparse use stdlib_constants use stdlib_optval, only: optval + use stdlib_linalg_blas, only: gemv implicit none contains #:for k, t, s in R_KINDS_TYPES - module subroutine stdlib_solve_gmres_kernel_${s}$(A,M,b,x,rtol,atol,maxiter,kdim,workspace) + module subroutine stdlib_solve_gmres_kernel_${s}$(A,M,b,x,rtol,atol,maxiter,kdim,workspace,compact) class(stdlib_linop_${s}$_type), intent(in) :: A class(stdlib_linop_${s}$_type), intent(in) :: M ${t}$, intent(in) :: b(:), rtol, atol ${t}$, intent(inout) :: x(:) integer, intent(in) :: maxiter, kdim type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace - integer :: i, iter, inner_iter, j, iorth + logical, intent(in) :: compact + integer :: i, iter, inner_iter, j, j_final, iorth, jz, zbase ${t}$ :: beta, denom, hnext, htmp, norm_sq, norm_sq0, temp, tolsq - ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), x_base(:), y(:) + ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), y(:) - allocate(h(kdim+1, kdim), cs(kdim), sn(kdim), g(kdim+1), y(kdim), x_base(size(x))) + allocate(h(kdim+1, kdim), cs(kdim), sn(kdim), g(kdim+1), y(kdim) ) + zbase = kdim + 4 associate( r => workspace%tmp(:,1), & - w => workspace%tmp(:,2), & - v => workspace%tmp(:,3:kdim+3), & - z => workspace%tmp(:,kdim+4:2*kdim+3) ) - + w => workspace%tmp(:,2), & + v => workspace%tmp(:,3:kdim+3), & + z => workspace%tmp(:,zbase:zbase+merge(0,kdim-1,compact)) ) + + iter = 0 ! Initialize convergence targets from the right-hand side norm. norm_sq0 = A%inner_product(b, b) tolsq = max(rtol*rtol*norm_sq0, atol*atol) @@ -40,21 +44,16 @@ contains r = b call A%matvec(x, r, alpha=-one_${s}$, beta=one_${s}$, op='N') norm_sq = A%inner_product(r, r) - if (associated(workspace%callback)) call workspace%callback(x, norm_sq, 0) + if (associated(workspace%callback)) call workspace%callback(x, norm_sq, iter) - if (norm_sq <= tolsq .or. maxiter <= 0) then - deallocate(h, cs, sn, g, y, x_base) - return - end if + if (norm_sq <= tolsq) return - iter = 0 do while (iter < maxiter .and. norm_sq >= tolsq) ! Start a new GMRES cycle from the current residual. beta = sqrt(max(norm_sq, zero_${s}$)) if (beta <= epsilon(one_${s}$)) exit inner_iter = min(kdim, maxiter - iter) - x_base = x h = zero_${s}$ cs = zero_${s}$ sn = zero_${s}$ @@ -65,12 +64,14 @@ contains v(:,1) = r / beta g(1) = beta + j_final = 0 do j = 1, inner_iter ! Run Arnoldi with the preconditioned basis vector. - call M%matvec(v(:,j), z(:,j), alpha=one_${s}$, beta=zero_${s}$, op='N') - call A%matvec(z(:,j), w, alpha=one_${s}$, beta=zero_${s}$, op='N') + jz = merge(1, j, compact) + call M%matvec(v(:,j), z(:,jz), alpha=one_${s}$, beta=zero_${s}$, op='N') + call A%matvec(z(:,jz), w, alpha=one_${s}$, beta=zero_${s}$, op='N') - ! Modified Gram Schmidt (MGSR) + ! Modified Gram Schmidt (MGSR) do iorth = 1, 2 ! reorthogonalization do i = 1, j htmp = A%inner_product(v(:,i), w) @@ -112,21 +113,34 @@ contains g(j+1) = -sn(j) * g(j) + cs(j) * g(j+1) g(j) = temp - ! Solve the reduced system and update the current iterate. - call upper_triangular_solve(h, g, y, j) - x = x_base - do i = 1, j - x = x + y(i) * z(:,i) - end do - - ! Track the residual norm estimate for convergence. + ! Cheap residual-norm estimate; no solution rebuild needed. norm_sq = g(j+1) * g(j+1) iter = iter + 1 + j_final = j + ! x is kept constant for the logger through out the inner iteration if (associated(workspace%callback)) call workspace%callback(x, norm_sq, iter) if (norm_sq < tolsq .or. hnext <= epsilon(one_${s}$) .or. iter >= maxiter) exit end do + ! Cycle-end update from the least-squares correction. + if (j_final > 0) then + call upper_triangular_solve(h, g, y, j_final) + if (compact) then + call gemv('N', m=size(x), n=j_final, alpha=one_${s}$, & + a=v, lda=size(v,1), & + x=y, incx=1, & + beta=zero_${s}$, y=w, incy=1) + call M%matvec(w, z(:,1), alpha=one_${s}$, beta=zero_${s}$, op='N') + x = x + z(:,1) + else + call gemv('N', m=size(x), n=j_final, alpha=one_${s}$, & + a=z, lda=size(z,1), & + x=y, incx=1, & + beta=one_${s}$, y=x, incy=1) + end if + end if + if (norm_sq < tolsq .or. iter >= maxiter) exit ! Refresh the residual before the next restarted cycle. @@ -136,8 +150,6 @@ contains end do end associate - deallocate(h, cs, sn, g, y, x_base) - contains subroutine upper_triangular_solve(h, g, y, n) @@ -161,7 +173,7 @@ contains #:for matrix in MATRIX_TYPES #:for k, t, s in R_KINDS_TYPES - module subroutine stdlib_solve_gmres_${matrix}$_${s}$(A,b,x,di,rtol,atol,maxiter,restart,kdim,precond,M,workspace) + module subroutine stdlib_solve_gmres_${matrix}$_${s}$(A,b,x,di,rtol,atol,maxiter,restart,kdim,precond,M,workspace,compact) #:if matrix == "dense" use stdlib_linalg, only: diag ${t}$, intent(in) :: A(:,:) @@ -177,12 +189,13 @@ contains integer, intent(in), optional :: precond class(stdlib_linop_${s}$_type), optional, intent(in), target :: M type(stdlib_solver_workspace_${s}$_type), optional, intent(inout), target :: workspace + logical, intent(in), optional :: compact type(stdlib_linop_${s}$_type) :: op type(stdlib_linop_${s}$_type), pointer :: M_ => null() type(stdlib_solver_workspace_${s}$_type), pointer :: workspace_ integer :: kdim_, maxiter_, n, ncols, precond_ ${t}$ :: rtol_, atol_ - logical :: restart_ + logical :: compact_, restart_ logical(int8), pointer :: di_(:) ${t}$, allocatable :: diagonal(:) @@ -190,10 +203,11 @@ contains maxiter_ = optval(x=maxiter, default=n) kdim_ = max(1, min(optval(x=kdim, default=min(30, n)), n)) restart_ = optval(x=restart, default=.true.) + compact_ = optval(x=compact, default=.true.) rtol_ = optval(x=rtol, default=1.e-5_${s}$) atol_ = optval(x=atol, default=epsilon(one_${s}$)) precond_ = optval(x=precond, default=pc_none) - ncols = 2 * kdim_ + stdlib_size_wksp_gmres + ncols = kdim_ + stdlib_size_wksp_gmres + merge(1, kdim_, compact_) if (present(M)) then M_ => M @@ -237,7 +251,7 @@ contains if (restart_) x = zero_${s}$ x = merge(b, x, di_) - call stdlib_solve_gmres_kernel(op, M_, b, x, rtol_, atol_, maxiter_, kdim_, workspace_) + call stdlib_solve_gmres_kernel(op, M_, b, x, rtol_, atol_, maxiter_, kdim_, workspace_, compact=compact_) if (.not.present(di)) deallocate(di_) di_ => null() diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index 6b2b07016..e3c47b32c 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -119,6 +119,22 @@ module test_linalg_solve_iterative if (allocated(error)) return end block + block + ${t}$, parameter :: tol = 1000*epsilon(0.0_${k}$) + ${t}$, parameter :: A(3,3) = reshape([${t}$ :: 10, 1, 2, & + 1, 10, 3, & + 2, 3, 10], [3,3]) + ${t}$ :: x(3) + ${t}$, parameter :: xref(3) = [(137._${k}$/218._${k}$), -(9._${k}$/109._${k}$), (87._${k}$/218._${k}$)] + ${t}$, parameter :: load(3) = [7.0_${k}$, 1.0_${k}$, 5.0_${k}$] + x = [0.2_${k}$, -0.1_${k}$, 0.3_${k}$] + + call stdlib_solve_gmres(A, load, x, rtol=1.e-10_${k}$, precond=pc_jacobi, compact=.false.) + + call check(error, norm2(x-xref) Date: Mon, 15 Jun 2026 22:05:46 +0200 Subject: [PATCH 19/23] add example --- doc/specs/stdlib_linalg_iterative_solvers.md | 8 ++++++- example/linalg/CMakeLists.txt | 1 + example/linalg/example_solve_gmres.f90 | 24 ++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 example/linalg/example_solve_gmres.f90 diff --git a/doc/specs/stdlib_linalg_iterative_solvers.md b/doc/specs/stdlib_linalg_iterative_solvers.md index 9a856029d..f2c9173b9 100644 --- a/doc/specs/stdlib_linalg_iterative_solvers.md +++ b/doc/specs/stdlib_linalg_iterative_solvers.md @@ -440,4 +440,10 @@ Subroutine #### Note -GMRES is especially useful for general non-symmetric systems where CG and PCG do not apply. Its main cost is the Krylov basis storage and orthogonalization work within each restart cycle. With `compact=.true.`, the solver reduces workspace requirements significantly. With `compact=.false.`, it retains the previous cached-basis update strategy. \ No newline at end of file +GMRES is especially useful for general non-symmetric systems where CG and PCG do not apply. Its main cost is the Krylov basis storage and orthogonalization work within each restart cycle. With `compact=.true.`, the solver reduces workspace requirements significantly. With `compact=.false.`, it retains the previous cached-basis update strategy. + +#### Example + +```fortran +{!example/linalg/example_solve_gmres.f90!} +``` \ No newline at end of file diff --git a/example/linalg/CMakeLists.txt b/example/linalg/CMakeLists.txt index dd65b48e4..ae2e0f6f3 100644 --- a/example/linalg/CMakeLists.txt +++ b/example/linalg/CMakeLists.txt @@ -48,6 +48,7 @@ if (STDLIB_LINALG_ITERATIVE) ADD_EXAMPLE(solve_bicgstab) ADD_EXAMPLE(solve_bicgstab_wilkinson) ADD_EXAMPLE(solve_cg) + ADD_EXAMPLE(solve_gmres) ADD_EXAMPLE(solve_pcg) ADD_EXAMPLE(solve_custom) endif() diff --git a/example/linalg/example_solve_gmres.f90 b/example/linalg/example_solve_gmres.f90 new file mode 100644 index 000000000..02ea8ad12 --- /dev/null +++ b/example/linalg/example_solve_gmres.f90 @@ -0,0 +1,24 @@ +program example_solve_gmres + use stdlib_kinds, only: dp + use stdlib_linalg_iterative_solvers, only: stdlib_solve_gmres, pc_jacobi + implicit none + + real(dp), parameter :: A(4,4) = reshape([5.0_dp, 1.0_dp, 2.0_dp, 0.0_dp, & + 0.0_dp, 4.0_dp, -1.0_dp, 1.0_dp, & + 1.0_dp, 0.0_dp, 3.0_dp, 2.0_dp, & + 2.0_dp, -1.0_dp, 0.0_dp, 6.0_dp], [4,4]) + real(dp), parameter :: x0(4) = [0.2_dp, -0.1_dp, 0.3_dp, 0.0_dp] + real(dp), parameter :: xref(4) = [1.0_dp, 2.0_dp, -1.0_dp, 0.5_dp] + real(dp) :: rhs(4), x(4) + + rhs = matmul(A, xref) + + ! GMRES lets the user tune the restart size and the storage mode. + x = x0 + call stdlib_solve_gmres(A, rhs, x, restart=.false., kdim=2, maxiter=12, precond=pc_jacobi) + print *, 'compact:', x + + x = x0 + call stdlib_solve_gmres(A, rhs, x, restart=.false., kdim=2, maxiter=12, precond=pc_jacobi, compact=.false.) + print *, 'cached :', x +end program example_solve_gmres \ No newline at end of file From 3ba5a4f7255081f8c776a6cbda9b69bb6ee91c41 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Tue, 16 Jun 2026 08:08:56 +0200 Subject: [PATCH 20/23] convert static enum to integer helper function to determine the number of colums of the gmres workspace --- .../stdlib_linalg_iterative_solvers.fypp | 10 +++++++++- .../stdlib_linalg_iterative_solvers_gmres.fypp | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp index b91c19246..9bc1b6f8f 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers.fypp @@ -16,7 +16,6 @@ module stdlib_linalg_iterative_solvers enumerator :: stdlib_size_wksp_cg = 3 enumerator :: stdlib_size_wksp_pcg = 4 enumerator :: stdlib_size_wksp_bicgstab = 8 - enumerator :: stdlib_size_wksp_gmres = 3 end enum public :: stdlib_size_wksp_cg, stdlib_size_wksp_pcg, stdlib_size_wksp_bicgstab, stdlib_size_wksp_gmres @@ -274,6 +273,15 @@ module stdlib_linalg_iterative_solvers public :: stdlib_solve_gmres contains + + !------------------------------------------------------------------ + ! helpers + !------------------------------------------------------------------ + integer(int32) function stdlib_size_wksp_gmres(kdim,compact) + integer(int32), intent(in) :: kdim + logical, intent(in) :: compact + stdlib_size_wksp_gmres = 3 + kdim + merge(1, kdim, compact) + end function !------------------------------------------------------------------ ! defaults diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index 013269eb2..24ce77647 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -207,7 +207,7 @@ contains rtol_ = optval(x=rtol, default=1.e-5_${s}$) atol_ = optval(x=atol, default=epsilon(one_${s}$)) precond_ = optval(x=precond, default=pc_none) - ncols = kdim_ + stdlib_size_wksp_gmres + merge(1, kdim_, compact_) + ncols = stdlib_size_wksp_gmres(kdim_,compact_) if (present(M)) then M_ => M From 0069cd33269c7b791eec1eb4c1f41cceaab8b334 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Tue, 16 Jun 2026 17:19:41 +0200 Subject: [PATCH 21/23] simplify iterations logic --- .../stdlib_linalg_iterative_solvers_gmres.fypp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index 24ce77647..31c4ce79e 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -23,7 +23,7 @@ contains integer, intent(in) :: maxiter, kdim type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace logical, intent(in) :: compact - integer :: i, iter, inner_iter, j, j_final, iorth, jz, zbase + integer :: i, iter, j, j_final, iorth, jz, zbase ${t}$ :: beta, denom, hnext, htmp, norm_sq, norm_sq0, temp, tolsq ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), y(:) @@ -49,11 +49,11 @@ contains if (norm_sq <= tolsq) return do while (iter < maxiter .and. norm_sq >= tolsq) + iter = iter + 1 ! Start a new GMRES cycle from the current residual. beta = sqrt(max(norm_sq, zero_${s}$)) if (beta <= epsilon(one_${s}$)) exit - inner_iter = min(kdim, maxiter - iter) h = zero_${s}$ cs = zero_${s}$ sn = zero_${s}$ @@ -65,7 +65,7 @@ contains g(1) = beta j_final = 0 - do j = 1, inner_iter + do j = 1, kdim ! Run Arnoldi with the preconditioned basis vector. jz = merge(1, j, compact) call M%matvec(v(:,j), z(:,jz), alpha=one_${s}$, beta=zero_${s}$, op='N') @@ -115,12 +115,9 @@ contains ! Cheap residual-norm estimate; no solution rebuild needed. norm_sq = g(j+1) * g(j+1) - iter = iter + 1 j_final = j - ! x is kept constant for the logger through out the inner iteration - if (associated(workspace%callback)) call workspace%callback(x, norm_sq, iter) - if (norm_sq < tolsq .or. hnext <= epsilon(one_${s}$) .or. iter >= maxiter) exit + if (norm_sq < tolsq .or. hnext <= epsilon(one_${s}$)) exit end do ! Cycle-end update from the least-squares correction. @@ -141,12 +138,13 @@ contains end if end if - if (norm_sq < tolsq .or. iter >= maxiter) exit - - ! Refresh the residual before the next restarted cycle. + ! Refresh the true residual so the convergence test per restart cycle and the logged + ! value use the true ||b - A*x||, not the Hessenberg estimate. r = b call A%matvec(x, r, alpha=-one_${s}$, beta=one_${s}$, op='N') norm_sq = A%inner_product(r, r) + + if (associated(workspace%callback)) call workspace%callback(x, norm_sq, iter) end do end associate From 5099335b8118ee4352ace76221d083243efdcb60 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sat, 20 Jun 2026 13:18:04 +0200 Subject: [PATCH 22/23] replace manual given rotations and the triangular solve by lapack kernels --- ...stdlib_linalg_iterative_solvers_gmres.fypp | 77 ++++++++++--------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp index 31c4ce79e..0b52f6651 100644 --- a/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp +++ b/src/linalg_iterative/stdlib_linalg_iterative_solvers_gmres.fypp @@ -10,6 +10,8 @@ submodule(stdlib_linalg_iterative_solvers) stdlib_linalg_iterative_gmres use stdlib_constants use stdlib_optval, only: optval use stdlib_linalg_blas, only: gemv + use stdlib_linalg_lapack, only: lartg, lasr, trtrs + use stdlib_linalg_constants, only: ilp implicit none contains @@ -23,17 +25,16 @@ contains integer, intent(in) :: maxiter, kdim type(stdlib_solver_workspace_${s}$_type), intent(inout) :: workspace logical, intent(in) :: compact - integer :: i, iter, j, j_final, iorth, jz, zbase - ${t}$ :: beta, denom, hnext, htmp, norm_sq, norm_sq0, temp, tolsq + integer :: i, iter, j, j_final, iorth, jz, info + ${t}$ :: beta, hnext, htmp, norm_sq, norm_sq0, temp, tolsq ${t}$, allocatable :: cs(:), g(:), h(:,:), sn(:), y(:) allocate(h(kdim+1, kdim), cs(kdim), sn(kdim), g(kdim+1), y(kdim) ) - zbase = kdim + 4 associate( r => workspace%tmp(:,1), & w => workspace%tmp(:,2), & v => workspace%tmp(:,3:kdim+3), & - z => workspace%tmp(:,zbase:zbase+merge(0,kdim-1,compact)) ) + z => workspace%tmp(:,kdim + 4:) ) iter = 0 ! Initialize convergence targets from the right-hand side norm. @@ -88,26 +89,8 @@ contains v(:,j+1) = zero_${s}$ end if - ! Apply the previously accumulated Givens rotations. - do i = 1, j - 1 - temp = cs(i) * h(i,j) + sn(i) * h(i+1,j) - h(i+1,j) = -sn(i) * h(i,j) + cs(i) * h(i+1,j) - h(i,j) = temp - end do - - ! Build and apply the next Givens rotation. - denom = sqrt(h(j,j) * h(j,j) + h(j+1,j) * h(j+1,j)) - if (denom > epsilon(one_${s}$)) then - cs(j) = h(j,j) / denom - sn(j) = h(j+1,j) / denom - else - cs(j) = one_${s}$ - sn(j) = zero_${s}$ - end if - - temp = cs(j) * h(j,j) + sn(j) * h(j+1,j) - h(j+1,j) = -sn(j) * h(j,j) + cs(j) * h(j+1,j) - h(j,j) = temp + ! Apply previous rotations to the new column, then generate the next one. + call apply_givens_rotation(h(1:j+1,j), cs, sn) temp = cs(j) * g(j) + sn(j) * g(j+1) g(j+1) = -sn(j) * g(j) + cs(j) * g(j+1) @@ -122,7 +105,9 @@ contains ! Cycle-end update from the least-squares correction. if (j_final > 0) then - call upper_triangular_solve(h, g, y, j_final) + call upper_triangular_solve(h, g, y, j_final, info) + if(info /= 0) exit + if (compact) then call gemv('N', m=size(x), n=j_final, alpha=one_${s}$, & a=v, lda=size(v,1), & @@ -150,21 +135,41 @@ contains contains - subroutine upper_triangular_solve(h, g, y, n) + subroutine apply_givens_rotation(hcol, c, s) + ! implementation inspired by https://github.com/nekStab/LightKrylov + ${t}$, target, contiguous, intent(inout) :: hcol(:) + ${t}$, intent(inout) :: c(:), s(:) + integer :: k + ${t}$ :: r + ${t}$, pointer :: hmat(:, :) + ! Size of the column. + k = int(size(hcol) - 1, kind=ilp) + ! Apply previous Givens rotations to this new column. + hmat(1:k, 1:1) => hcol(:k) + call lasr('L', 'V', 'F', k, 1_ilp, c(:k-1), s(:k-1), hmat, k) + ! Compute the sine and cosine components for the next rotation. + call lartg(hcol(k), hcol(k+1), c(k), s(k), r) + ! Eliminate H(k+1, k). + hcol(k) = r + hcol(k+1) = zero_${s}$ + end subroutine + + subroutine upper_triangular_solve(h, g, y, n, info) ${t}$, intent(in) :: h(:,:), g(:) - ${t}$, intent(inout) :: y(:) + ${t}$, target, contiguous, intent(inout) :: y(:) integer, intent(in) :: n - integer :: row + integer, intent(out) :: info + integer(ilp) :: n_, lda_ + ${t}$, pointer :: rhs(:, :) y(1:n) = g(1:n) - do row = n, 1, -1 - if (row < n) y(row) = y(row) - dot_product(h(row,row+1:n) , y(row+1:n)) - if (abs(h(row,row)) > epsilon(one_${s}$)) then - y(row) = y(row) / h(row,row) - else - y(row) = zero_${s}$ - end if - end do + + n_ = int(n, kind=ilp) + lda_ = int(size(h,1), kind=ilp) + + rhs(1:n,1:1) => y(:n) + + call trtrs('U','N','N', n_, 1_ilp, h, lda_, rhs, n_, info) end subroutine end subroutine #:endfor From 84b920a9f80d0a0d606a5f677e1abf37956868b5 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sat, 20 Jun 2026 13:35:25 +0200 Subject: [PATCH 23/23] try removing the hilbert test for sp as it is meaningless for the selected size --- test/linalg/test_linalg_solve_iterative.fypp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/linalg/test_linalg_solve_iterative.fypp b/test/linalg/test_linalg_solve_iterative.fypp index e3c47b32c..7e4187f86 100644 --- a/test/linalg/test_linalg_solve_iterative.fypp +++ b/test/linalg/test_linalg_solve_iterative.fypp @@ -150,13 +150,9 @@ module test_linalg_solve_iterative integer, parameter :: n = 10 integer :: r, c #:for k, t, s in R_KINDS_TYPES + #:if k not in ['sp'] # Hilbert sp is almost useless. for n=10, cond(A) ~ 10^13, which is beyond the precision of single precision. block - !Hilbert sp is almost useless. - #:if k == 'sp' - ${t}$, parameter :: tol = 1.e-0_${k}$ - #:else ${t}$, parameter :: tol = 1.e-3_${k}$ - #:endif ${t}$ :: A(n, n), b(n), x(n) ${t}$, parameter :: x_ref(n) = 1.0_${k}$ @@ -174,6 +170,7 @@ module test_linalg_solve_iterative 'error in GMRES ${k}$: Hilbert matrix') if (allocated(error)) return end block + #:endif #:endfor end subroutine test_stdlib_solve_gmres_hilbert