From dc4aaac5f4df787ee63b593a331677ba1f24ebba Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 08:01:49 +0530 Subject: [PATCH 001/104] add PCA to public api --- src/stats/stdlib_stats.fypp | 54 ++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index adf373f0a..673a929d9 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -9,10 +9,12 @@ module stdlib_stats !! ([Specification](../page/specs/stdlib_stats.html)) use stdlib_kinds, only: sp, dp, xdp, qp, & int8, int16, int32, int64 + use stdlib_linalg_state, only: linalg_state_type implicit none private ! Public API public :: corr, cov, mean, median, moment, var + public :: pca, pca_transform, pca_inverse_transform interface corr @@ -637,6 +639,56 @@ module stdlib_stats end function ${RName}$ #:endfor #:endfor - end interface moment + interface pca + !! version: experimental + !! + !! Principal Component Analysis (PCA) + !! ([Specification](../page/specs/stdlib_stats.html#pca)) + #:for k1, t1 in REAL_KINDS_TYPES + module subroutine pca_${k1}$(x, components, singular_values, mean, & + method, overwrite_x, err) + ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out), optional :: mean(:) + character(*), intent(in), optional :: method + logical, intent(in), optional :: overwrite_x + type(linalg_state_type), intent(out), optional :: err + end subroutine pca_${k1}$ + #:endfor + end interface pca + + + interface pca_transform + !! version: experimental + !! + !! Projects data into the reduced dimensional space + !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_transform_${k1}$(x, components, mean) result(res) + ${t1}$, intent(in) :: x(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x,1), size(components,2)) + end function pca_transform_${k1}$ + #:endfor + end interface pca_transform + + + interface pca_inverse_transform + !! version: experimental + !! + !! Reconstructs original data from the reduced space + !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + ${t1}$, intent(in) :: x_reduced(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x_reduced,1), size(components,1)) + end function pca_inverse_transform_${k1}$ + #:endfor + end interface pca_inverse_transform + end module stdlib_stats From 27599e1770d9fd3ab2ab8d7cc0097ce82c65c447 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 08:30:19 +0530 Subject: [PATCH 002/104] include pca submodule --- src/stats/CMakeLists.txt | 1 + src/stats/stdlib_stats_pca.fypp | 53 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 src/stats/stdlib_stats_pca.fypp diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 3e5727565..41042315b 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -16,6 +16,7 @@ set(stats_fppFiles ../stdlib_string_type.fypp stdlib_stats_corr.fypp stdlib_stats_cov.fypp + stdlib_stats_pca.fypp stdlib_stats_distribution_exponential.fypp stdlib_stats_distribution_normal.fypp stdlib_stats_distribution_uniform.fypp diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp new file mode 100644 index 000000000..1fcd810f1 --- /dev/null +++ b/src/stats/stdlib_stats_pca.fypp @@ -0,0 +1,53 @@ +#:include "common.fypp" +#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) +submodule (stdlib_stats) stdlib_stats_pca + use stdlib_error, only: error_stop + use stdlib_optval, only: optval + use stdlib_linalg_state, only: LINALG_SUCCESS, LINALG_ERROR, linalg_state_type + implicit none + +contains + + #:for k1, t1 in REAL_KINDS_TYPES + module subroutine pca_${k1}$(x, components, singular_values, mean, & + method, overwrite_x, err) + ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out), optional :: mean(:) + character(*), intent(in), optional :: method + logical, intent(in), optional :: overwrite_x + type(linalg_state_type), intent(out), optional :: err + + ! Placeholder implementation + if (present(err)) err = linalg_state_type("pca", LINALG_ERROR, "Not implemented yet") + call error_stop("PCA not implemented yet") + + end subroutine pca_${k1}$ + #:endfor + + + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_transform_${k1}$(x, components, mean) result(res) + ${t1}$, intent(in) :: x(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x,1), size(components,2)) + + res = 0.0_${k1}$ ! Placeholder implementation + end function pca_transform_${k1}$ + #:endfor + + + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + ${t1}$, intent(in) :: x_reduced(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x_reduced,1), size(components,1)) + + res = 0.0_${k1}$ ! Placeholder implementation + end function pca_inverse_transform_${k1}$ + #:endfor + +end submodule stdlib_stats_pca From d77fb0e20f0653e911cb58ba0cb0d4261e78bcba Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:01:18 +0530 Subject: [PATCH 003/104] Add PCA module with `pca`, `pca_transform`, and `pca_inverse_transform` routines. --- src/stats/stdlib_stats_pca.fypp | 129 ++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 7 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 1fcd810f1..706f65d2e 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -3,6 +3,8 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_error, only: error_stop use stdlib_optval, only: optval + use stdlib_linalg, only: svd, eigh + use stdlib_linalg_constants, only: ilp use stdlib_linalg_state, only: LINALG_SUCCESS, LINALG_ERROR, linalg_state_type implicit none @@ -19,9 +21,102 @@ contains logical, intent(in), optional :: overwrite_x type(linalg_state_type), intent(out), optional :: err - ! Placeholder implementation - if (present(err)) err = linalg_state_type("pca", LINALG_ERROR, "Not implemented yet") - call error_stop("PCA not implemented yet") + type(linalg_state_type) :: err0 + integer(ilp) :: n, p, i, k + ${t1}$, allocatable :: mu(:) + character(16) :: method_ + + n = size(x, 1, kind=ilp) + p = size(x, 2, kind=ilp) + k = size(components, 1, kind=ilp) + + method_ = optval(method, "svd") + + ! 1. Calculate and optionally return mean + allocate(mu(p)) + mu = mean(x, dim=1) + if (present(mean)) mean = mu + + err0 = linalg_state_type("pca", LINALG_SUCCESS) + + if (method_ == "svd") then + ! 2. Center data and call SVD with temporaries for robustness + block + ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) + integer(ilp) :: n_s + n_s = min(n, p) + allocate(s_tmp(n_s), vt_tmp(n_s, p)) + + if (optval(overwrite_x, .false.)) then + do i = 1, n + x(i, :) = x(i, :) - mu + end do + call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) + else + block + ${t1}$, allocatable :: x_centered(:,:) + allocate(x_centered(n, p)) + do i = 1, n + x_centered(i, :) = x(i, :) - mu + end do + call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) + end block + end if + + if (err0%ok()) then + i = min(size(components, 1, kind=ilp), n_s) + components(:i, :) = vt_tmp(:i, :) + i = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(:i) = s_tmp(:i) + end if + end block + else if (method_ == "eig" .or. method_ == "cov") then + ! 3. Eigendecomposition of covariance matrix + block + ${t1}$, allocatable :: c(:,:) + ${t1}$, allocatable :: vectors(:,:) + real(${k1}$), allocatable :: lambda(:) + integer(ilp), allocatable :: idx(:) + integer(ilp) :: j, m + real(${k1}$) :: tmp_val + + allocate(lambda(p), idx(p), vectors(p, p)) + c = cov(x, dim=1) + call eigh(c, lambda, vectors=vectors, err=err0) + + if (err0%ok()) then + ! Sort eigenvalues and vectors in descending order + do j = 1, p + idx(j) = j + end do + ! Simple bubble sort + do i = 1, p-1 + do j = i+1, p + if (lambda(idx(i)) < lambda(idx(j))) then + m = idx(i) + idx(i) = idx(j) + idx(j) = m + end if + end do + end do + + ! Assign sorted results + m = min(size(components, 1, kind=ilp), p) + do i = 1, m + components(i, :) = vectors(:, idx(i)) + if (lambda(idx(i)) > 0.0_${k1}$) then + singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) + else + singular_values(i) = 0.0_${k1}$ + end if + end do + end if + end block + else + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) + end if + + if (present(err)) err = err0 end subroutine pca_${k1}$ #:endfor @@ -32,9 +127,22 @@ contains ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: mean(:) - ${t1}$ :: res(size(x,1), size(components,2)) + ${t1}$ :: res(size(x,1), size(components,1)) - res = 0.0_${k1}$ ! Placeholder implementation + integer(ilp) :: i, n + ${t1}$, allocatable :: x_centered(:,:) + + n = size(x, 1, kind=ilp) + allocate(x_centered(n, size(x, 2, kind=ilp))) + if (present(mean)) then + do i = 1, n + x_centered(i, :) = x(i, :) - mean + end do + else + x_centered = x + end if + + res = matmul(x_centered, transpose(components)) end function pca_transform_${k1}$ #:endfor @@ -44,9 +152,16 @@ contains ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,1)) + ${t1}$ :: res(size(x_reduced,1), size(components,2)) - res = 0.0_${k1}$ ! Placeholder implementation + integer(ilp) :: i, n + n = size(x_reduced, 1, kind=ilp) + res = matmul(x_reduced, components) + if (present(mean)) then + do i = 1, n + res(i, :) = res(i, :) + mean + end do + end if end function pca_inverse_transform_${k1}$ #:endfor From 24358d17b34d24789b995f05f1db2c63862374ba Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:16:56 +0530 Subject: [PATCH 004/104] add PCA unit test --- test/stats/test_pca.f90 | 76 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 test/stats/test_pca.f90 diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 new file mode 100644 index 000000000..f39c9253d --- /dev/null +++ b/test/stats/test_pca.f90 @@ -0,0 +1,76 @@ +program test_pca + use stdlib_error, only: check + use stdlib_kinds, only: sp, dp + use stdlib_stats, only: pca, pca_transform, pca_inverse_transform + use stdlib_linalg_state, only: linalg_state_type + implicit none + + real(sp), parameter :: sptol = 1000 * epsilon(1._sp) + real(dp), parameter :: dptol = 1000 * epsilon(1._dp) + + call test_pca_sp() + call test_pca_dp() + +contains + + subroutine test_pca_sp() + real(sp) :: x(3, 2), components(2, 2), s(2), mu(2) + real(sp) :: x_red(3, 1), comp_red(1, 2), s_red(1) + real(sp) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + + ! Data: [1, 2], [3, 4], [5, 6] + x = reshape([1.0_sp, 3.0_sp, 5.0_sp, 2.0_sp, 4.0_sp, 6.0_sp], [3, 2]) + + ! Test SVD method + call pca(x, components, s, mean=mu, method="svd", err=err) + call check(err%ok(), "pca_sp svd err") + call check(all(abs(mu - [3.0_sp, 4.0_sp]) < sptol), "pca_sp svd mean") + ! First component should be approx [0.707, 0.707] (or negative) + call check(abs(abs(components(1,1)) - 1.0_sp/sqrt(2.0_sp)) < sptol, "pca_sp svd comp1") + call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp svd s1") + call check(abs(s(2)) < sptol, "pca_sp svd s2") + + ! Test Transform + x_trans = pca_transform(x, components, mu) + ! Second dimension should be zero + call check(all(abs(x_trans(:, 2)) < sptol), "pca_sp transform") + + ! Test Inverse Transform + x_inv = pca_inverse_transform(x_trans, components, mu) + call check(all(abs(x_inv - x) < sptol), "pca_sp inverse") + + ! Test EIG method + call pca(x, components, s, method="eig", err=err) + call check(err%ok(), "pca_sp eig err") + call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp eig s1") + + end subroutine test_pca_sp + + subroutine test_pca_dp() + real(dp) :: x(3, 2), components(2, 2), s(2), mu(2) + real(dp) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + + x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) + + ! Test SVD method + call pca(x, components, s, mean=mu, method="svd", err=err) + call check(err%ok(), "pca_dp svd err") + call check(all(abs(mu - [3.0_dp, 4.0_dp]) < dptol), "pca_dp svd mean") + call check(abs(abs(components(1,1)) - 1.0_dp/sqrt(2.0_dp)) < dptol, "pca_dp svd comp1") + call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp svd s1") + + ! Test Transform/Inverse + x_trans = pca_transform(x, components, mu) + x_inv = pca_inverse_transform(x_trans, components, mu) + call check(all(abs(x_inv - x) < dptol), "pca_dp inverse") + + ! Test EIG method + call pca(x, components, s, method="eig", err=err) + call check(err%ok(), "pca_dp eig err") + call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp eig s1") + + end subroutine test_pca_dp + +end program test_pca From 1dd44ad9e29382f889417186abaf59d09aa76667 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:20:19 +0530 Subject: [PATCH 005/104] update end interface statement --- src/stats/stdlib_stats.fypp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 673a929d9..26d6a41af 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -639,6 +639,9 @@ module stdlib_stats end function ${RName}$ #:endfor #:endfor + end interface moment + + interface pca !! version: experimental !! From 7f79ef6218039bf409c95551e882dfa19c3d228b Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:22:25 +0530 Subject: [PATCH 006/104] update CmakeLists --- test/stats/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/stats/CMakeLists.txt b/test/stats/CMakeLists.txt index ff9d45063..5eb3d61b0 100644 --- a/test/stats/CMakeLists.txt +++ b/test/stats/CMakeLists.txt @@ -14,6 +14,7 @@ fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) ADDTEST(corr) ADDTEST(cov) +ADDTEST(pca) ADDTEST(mean) ADDTEST(median) ADDTEST(moment) From 0d2738cd3ac869cbcd0bf2d9c05fcbf4b3980096 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:46:06 +0530 Subject: [PATCH 007/104] fixed_conflicts --- src/stats/stdlib_stats.fypp | 14 +++++++------- src/stats/stdlib_stats_pca.fypp | 22 +++++++++++----------- test/stats/test_pca.f90 | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 26d6a41af..174958937 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -648,12 +648,12 @@ module stdlib_stats !! Principal Component Analysis (PCA) !! ([Specification](../page/specs/stdlib_stats.html#pca)) #:for k1, t1 in REAL_KINDS_TYPES - module subroutine pca_${k1}$(x, components, singular_values, mean, & + module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) - ${t1}$, intent(out), optional :: mean(:) + ${t1}$, intent(out), optional :: x_mean(:) character(*), intent(in), optional :: method logical, intent(in), optional :: overwrite_x type(linalg_state_type), intent(out), optional :: err @@ -668,10 +668,10 @@ module stdlib_stats !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) #:for k1, t1 in REAL_KINDS_TYPES - module function pca_transform_${k1}$(x, components, mean) result(res) + module function pca_transform_${k1}$(x, components, x_mean) result(res) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) + ${t1}$, intent(in), optional :: x_mean(:) ${t1}$ :: res(size(x,1), size(components,2)) end function pca_transform_${k1}$ #:endfor @@ -684,11 +684,11 @@ module stdlib_stats !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) #:for k1, t1 in REAL_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,1)) + ${t1}$, intent(in), optional :: x_mean(:) + ${t1}$ :: res(size(x_reduced,1), size(components,2)) end function pca_inverse_transform_${k1}$ #:endfor end interface pca_inverse_transform diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 706f65d2e..3a2b14775 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -11,12 +11,12 @@ submodule (stdlib_stats) stdlib_stats_pca contains #:for k1, t1 in REAL_KINDS_TYPES - module subroutine pca_${k1}$(x, components, singular_values, mean, & + module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) - ${t1}$, intent(out), optional :: mean(:) + ${t1}$, intent(out), optional :: x_mean(:) character(*), intent(in), optional :: method logical, intent(in), optional :: overwrite_x type(linalg_state_type), intent(out), optional :: err @@ -35,7 +35,7 @@ contains ! 1. Calculate and optionally return mean allocate(mu(p)) mu = mean(x, dim=1) - if (present(mean)) mean = mu + if (present(x_mean)) x_mean = mu err0 = linalg_state_type("pca", LINALG_SUCCESS) @@ -123,10 +123,10 @@ contains #:for k1, t1 in REAL_KINDS_TYPES - module function pca_transform_${k1}$(x, components, mean) result(res) + module function pca_transform_${k1}$(x, components, x_mean) result(res) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) + ${t1}$, intent(in), optional :: x_mean(:) ${t1}$ :: res(size(x,1), size(components,1)) integer(ilp) :: i, n @@ -134,9 +134,9 @@ contains n = size(x, 1, kind=ilp) allocate(x_centered(n, size(x, 2, kind=ilp))) - if (present(mean)) then + if (present(x_mean)) then do i = 1, n - x_centered(i, :) = x(i, :) - mean + x_centered(i, :) = x(i, :) - x_mean end do else x_centered = x @@ -148,18 +148,18 @@ contains #:for k1, t1 in REAL_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) + ${t1}$, intent(in), optional :: x_mean(:) ${t1}$ :: res(size(x_reduced,1), size(components,2)) integer(ilp) :: i, n n = size(x_reduced, 1, kind=ilp) res = matmul(x_reduced, components) - if (present(mean)) then + if (present(x_mean)) then do i = 1, n - res(i, :) = res(i, :) + mean + res(i, :) = res(i, :) + x_mean end do end if end function pca_inverse_transform_${k1}$ diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 index f39c9253d..6535a25d7 100644 --- a/test/stats/test_pca.f90 +++ b/test/stats/test_pca.f90 @@ -23,7 +23,7 @@ subroutine test_pca_sp() x = reshape([1.0_sp, 3.0_sp, 5.0_sp, 2.0_sp, 4.0_sp, 6.0_sp], [3, 2]) ! Test SVD method - call pca(x, components, s, mean=mu, method="svd", err=err) + call pca(x, components, s, x_mean=mu, method="svd", err=err) call check(err%ok(), "pca_sp svd err") call check(all(abs(mu - [3.0_sp, 4.0_sp]) < sptol), "pca_sp svd mean") ! First component should be approx [0.707, 0.707] (or negative) @@ -55,7 +55,7 @@ subroutine test_pca_dp() x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) ! Test SVD method - call pca(x, components, s, mean=mu, method="svd", err=err) + call pca(x, components, s, x_mean=mu, method="svd", err=err) call check(err%ok(), "pca_dp svd err") call check(all(abs(mu - [3.0_dp, 4.0_dp]) < dptol), "pca_dp svd mean") call check(abs(abs(components(1,1)) - 1.0_dp/sqrt(2.0_dp)) < dptol, "pca_dp svd comp1") From 20b0e98a0ee24870f88c2cb6f9f4d094323442df Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 10:35:41 +0530 Subject: [PATCH 008/104] update interface --- src/stats/stdlib_stats.fypp | 14 ++++++++++---- src/stats/stdlib_stats_pca.fypp | 6 +++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 174958937..aec6a6307 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,12 +642,18 @@ module stdlib_stats end interface moment + #! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or + #! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. + #:set PCA_REAL_KINDS = ["sp", "dp"] + #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] + #:set PCA_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) + interface pca !! version: experimental !! !! Principal Component Analysis (PCA) !! ([Specification](../page/specs/stdlib_stats.html#pca)) - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) @@ -667,12 +673,12 @@ module stdlib_stats !! !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module function pca_transform_${k1}$(x, components, x_mean) result(res) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x,1), size(components,2)) + ${t1}$ :: res(size(x,1), size(components,1)) end function pca_transform_${k1}$ #:endfor end interface pca_transform @@ -683,7 +689,7 @@ module stdlib_stats !! !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 3a2b14775..54e0be03b 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,5 +1,9 @@ #:include "common.fypp" -#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) +#! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or +#! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. +#:set PCA_REAL_KINDS = ["sp", "dp"] +#:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] +#:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca use stdlib_error, only: error_stop use stdlib_optval, only: optval From 654edbaba1767eeaa408f1461ce92745fc1d368e Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 13:50:14 +0530 Subject: [PATCH 009/104] allined with the other linalg function --- src/stats/stdlib_stats.fypp | 6 ++++-- src/stats/stdlib_stats_pca.fypp | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index aec6a6307..3f69ffac3 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,8 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or - #! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. + #! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends + #! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK + #! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) + #! and double (dp) precision for consistency with external optimized backends. #:set PCA_REAL_KINDS = ["sp", "dp"] #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] #:set PCA_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 54e0be03b..00583fe1a 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,6 +1,8 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or -#! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. +#! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends +#! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK +#! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) +#! and double (dp) precision for consistency with external optimized backends. #:set PCA_REAL_KINDS = ["sp", "dp"] #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] #:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) From b7c2be15ce28f4263212da5ca499d53b8738d41e Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 13:57:05 +0530 Subject: [PATCH 010/104] convert to subroutines,updated test --- src/stats/stdlib_stats.fypp | 12 ++++----- src/stats/stdlib_stats_pca.fypp | 46 +++++++++++++++++++++++---------- test/stats/test_pca.f90 | 8 +++--- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 3f69ffac3..4853d3dea 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -676,12 +676,12 @@ module stdlib_stats !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) #:for k1, t1 in PCA_KINDS_TYPES - module function pca_transform_${k1}$(x, components, x_mean) result(res) + module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x,1), size(components,1)) - end function pca_transform_${k1}$ + ${t1}$, intent(out) :: x_transformed(:,:) + end subroutine pca_transform_${k1}$ #:endfor end interface pca_transform @@ -692,12 +692,12 @@ module stdlib_stats !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) #:for k1, t1 in PCA_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) + module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,2)) - end function pca_inverse_transform_${k1}$ + ${t1}$, intent(out) :: x_reconstructed(:,:) + end subroutine pca_inverse_transform_${k1}$ #:endfor end interface pca_inverse_transform diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 00583fe1a..dd7106b8a 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -129,17 +129,22 @@ contains #:for k1, t1 in REAL_KINDS_TYPES - module function pca_transform_${k1}$(x, components, x_mean) result(res) + module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) + use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x,1), size(components,1)) + ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n + integer(ilp) :: i, n, p, k ${t1}$, allocatable :: x_centered(:,:) + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ - n = size(x, 1, kind=ilp) - allocate(x_centered(n, size(x, 2, kind=ilp))) + n = size(x, 1, kind=ilp) ! number of samples + p = size(x, 2, kind=ilp) ! number of features + k = size(components, 1, kind=ilp) ! number of components + + allocate(x_centered(n, p)) if (present(x_mean)) then do i = 1, n x_centered(i, :) = x(i, :) - x_mean @@ -148,27 +153,40 @@ contains x_centered = x end if - res = matmul(x_centered, transpose(components)) - end function pca_transform_${k1}$ + ! x_transformed = x_centered * components^T + ! GEMM: C = alpha * A * B^T + beta * C + ! x_transformed(n,k) = x_centered(n,p) * components(k,p)^T + call gemm('N', 'T', n, k, p, alpha, x_centered, n, components, k, beta, x_transformed, n) + end subroutine pca_transform_${k1}$ #:endfor #:for k1, t1 in REAL_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) + module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) + use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,2)) + ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n - n = size(x_reduced, 1, kind=ilp) - res = matmul(x_reduced, components) + integer(ilp) :: i, n, k, p + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ + + n = size(x_reduced, 1, kind=ilp) ! number of samples + k = size(x_reduced, 2, kind=ilp) ! number of components + p = size(components, 2, kind=ilp) ! number of features + + ! x_reconstructed = x_reduced * components + ! GEMM: C = alpha * A * B + beta * C + ! x_reconstructed(n,p) = x_reduced(n,k) * components(k,p) + call gemm('N', 'N', n, p, k, alpha, x_reduced, n, components, k, beta, x_reconstructed, n) + if (present(x_mean)) then do i = 1, n - res(i, :) = res(i, :) + x_mean + x_reconstructed(i, :) = x_reconstructed(i, :) + x_mean end do end if - end function pca_inverse_transform_${k1}$ + end subroutine pca_inverse_transform_${k1}$ #:endfor end submodule stdlib_stats_pca diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 index 6535a25d7..560365270 100644 --- a/test/stats/test_pca.f90 +++ b/test/stats/test_pca.f90 @@ -32,12 +32,12 @@ subroutine test_pca_sp() call check(abs(s(2)) < sptol, "pca_sp svd s2") ! Test Transform - x_trans = pca_transform(x, components, mu) + call pca_transform(x, components, mu, x_trans) ! Second dimension should be zero call check(all(abs(x_trans(:, 2)) < sptol), "pca_sp transform") ! Test Inverse Transform - x_inv = pca_inverse_transform(x_trans, components, mu) + call pca_inverse_transform(x_trans, components, mu, x_inv) call check(all(abs(x_inv - x) < sptol), "pca_sp inverse") ! Test EIG method @@ -62,8 +62,8 @@ subroutine test_pca_dp() call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp svd s1") ! Test Transform/Inverse - x_trans = pca_transform(x, components, mu) - x_inv = pca_inverse_transform(x_trans, components, mu) + call pca_transform(x, components, mu, x_trans) + call pca_inverse_transform(x_trans, components, mu, x_inv) call check(all(abs(x_inv - x) < dptol), "pca_dp inverse") ! Test EIG method From 63a0a1f86f6cfcea1e4ac928c7dcb18de20823fc Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 14:09:33 +0530 Subject: [PATCH 011/104] fix errors --- src/stats/stdlib_stats_pca.fypp | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index dd7106b8a..556997234 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -7,6 +7,7 @@ #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] #:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca + use stdlib_kinds, only: sp, dp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh @@ -130,19 +131,16 @@ contains #:for k1, t1 in REAL_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) - use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n, p, k + integer(ilp) :: i, n, p ${t1}$, allocatable :: x_centered(:,:) - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ - n = size(x, 1, kind=ilp) ! number of samples - p = size(x, 2, kind=ilp) ! number of features - k = size(components, 1, kind=ilp) ! number of components + n = size(x, 1, kind=ilp) + p = size(x, 2, kind=ilp) allocate(x_centered(n, p)) if (present(x_mean)) then @@ -154,32 +152,24 @@ contains end if ! x_transformed = x_centered * components^T - ! GEMM: C = alpha * A * B^T + beta * C - ! x_transformed(n,k) = x_centered(n,p) * components(k,p)^T - call gemm('N', 'T', n, k, p, alpha, x_centered, n, components, k, beta, x_transformed, n) + x_transformed = matmul(x_centered, transpose(components)) end subroutine pca_transform_${k1}$ #:endfor #:for k1, t1 in REAL_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) - use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n, k, p - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ + integer(ilp) :: i, n - n = size(x_reduced, 1, kind=ilp) ! number of samples - k = size(x_reduced, 2, kind=ilp) ! number of components - p = size(components, 2, kind=ilp) ! number of features + n = size(x_reduced, 1, kind=ilp) ! x_reconstructed = x_reduced * components - ! GEMM: C = alpha * A * B + beta * C - ! x_reconstructed(n,p) = x_reduced(n,k) * components(k,p) - call gemm('N', 'N', n, p, k, alpha, x_reduced, n, components, k, beta, x_reconstructed, n) + x_reconstructed = matmul(x_reduced, components) if (present(x_mean)) then do i = 1, n From cfbcdee5a7165df1de65066b8c88ce90b3539655 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 14:19:22 +0530 Subject: [PATCH 012/104] fixed errors --- src/stats/stdlib_stats_pca.fypp | 68 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 556997234..03470b63f 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -12,7 +12,9 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp - use stdlib_linalg_state, only: LINALG_SUCCESS, LINALG_ERROR, linalg_state_type + use stdlib_linalg_blas, only: gemm + use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR + use stdlib_sorting, only: sort_index implicit none contains @@ -29,7 +31,7 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i, k + integer(ilp) :: n, p, i, k, m, n_s ${t1}$, allocatable :: mu(:) character(16) :: method_ @@ -43,14 +45,11 @@ contains allocate(mu(p)) mu = mean(x, dim=1) if (present(x_mean)) x_mean = mu - - err0 = linalg_state_type("pca", LINALG_SUCCESS) if (method_ == "svd") then ! 2. Center data and call SVD with temporaries for robustness block ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) - integer(ilp) :: n_s n_s = min(n, p) allocate(s_tmp(n_s), vt_tmp(n_s, p)) @@ -71,41 +70,28 @@ contains end if if (err0%ok()) then - i = min(size(components, 1, kind=ilp), n_s) - components(:i, :) = vt_tmp(:i, :) - i = min(size(singular_values, 1, kind=ilp), n_s) - singular_values(:i) = s_tmp(:i) + m = min(size(components, 1, kind=ilp), n_s) + components(:m, :) = vt_tmp(:m, :) + m = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(:m) = s_tmp(:m) end if end block else if (method_ == "eig" .or. method_ == "cov") then ! 3. Eigendecomposition of covariance matrix block - ${t1}$, allocatable :: c(:,:) - ${t1}$, allocatable :: vectors(:,:) - real(${k1}$), allocatable :: lambda(:) + ${t1}$, allocatable :: c(:,:), vectors(:,:) + real(${k1}$), allocatable :: lambda(:), lambda_copy(:) integer(ilp), allocatable :: idx(:) - integer(ilp) :: j, m - real(${k1}$) :: tmp_val - allocate(lambda(p), idx(p), vectors(p, p)) + allocate(lambda(p), lambda_copy(p), idx(p), vectors(p, p)) c = cov(x, dim=1) call eigh(c, lambda, vectors=vectors, err=err0) if (err0%ok()) then - ! Sort eigenvalues and vectors in descending order - do j = 1, p - idx(j) = j - end do - ! Simple bubble sort - do i = 1, p-1 - do j = i+1, p - if (lambda(idx(i)) < lambda(idx(j))) then - m = idx(i) - idx(i) = idx(j) - idx(j) = m - end if - end do - end do + ! Sort eigenvalues in descending order using stdlib_sorting + ! sort_index sorts in ascending order, so we negate values + lambda_copy = -lambda + call sort_index(lambda_copy, idx) ! Assign sorted results m = min(size(components, 1, kind=ilp), p) @@ -123,7 +109,8 @@ contains err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) end if - if (present(err)) err = err0 + ! Handle error state: return error or stop if err not present + call err0%handle(err) end subroutine pca_${k1}$ #:endfor @@ -136,11 +123,13 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n, p + integer(ilp) :: i, n, p, nc ${t1}$, allocatable :: x_centered(:,:) + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) + nc = size(components, 1, kind=ilp) allocate(x_centered(n, p)) if (present(x_mean)) then @@ -151,8 +140,10 @@ contains x_centered = x end if - ! x_transformed = x_centered * components^T - x_transformed = matmul(x_centered, transpose(components)) + ! x_transformed = x_centered * components^T using GEMM + ! GEMM: C = alpha * op(A) * op(B) + beta * C + ! x_transformed(n, nc) = x_centered(n, p) * components(nc, p)^T + call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n) end subroutine pca_transform_${k1}$ #:endfor @@ -164,12 +155,17 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n + integer(ilp) :: i, n, nc, p + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ n = size(x_reduced, 1, kind=ilp) + nc = size(x_reduced, 2, kind=ilp) + p = size(components, 2, kind=ilp) - ! x_reconstructed = x_reduced * components - x_reconstructed = matmul(x_reduced, components) + ! x_reconstructed = x_reduced * components using GEMM + ! GEMM: C = alpha * op(A) * op(B) + beta * C + ! x_reconstructed(n, p) = x_reduced(n, nc) * components(nc, p) + call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then do i = 1, n From db197314ad8ab25324273768d04850d60d8e2f90 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 18:53:17 +0530 Subject: [PATCH 013/104] fix PCA BLAS/LAPACK linking --- src/stats/CMakeLists.txt | 1 + src/stats/stdlib_stats.fypp | 11 ++--- src/stats/stdlib_stats_pca.fypp | 13 +++--- test/stats/CMakeLists.txt | 1 + test/stats/test_pca.f90 | 76 --------------------------------- test/stats/test_pca.fypp | 54 +++++++++++++++++++++++ 6 files changed, 65 insertions(+), 91 deletions(-) delete mode 100644 test/stats/test_pca.f90 create mode 100644 test/stats/test_pca.fypp diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 41042315b..b9b94ab4b 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -31,3 +31,4 @@ set(stats_fppFiles ) configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) +target_link_libraries(stats PUBLIC blas lapack) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 4853d3dea..1fa272f6c 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,13 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends - #! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK - #! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) - #! and double (dp) precision for consistency with external optimized backends. - #:set PCA_REAL_KINDS = ["sp", "dp"] - #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] - #:set PCA_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) + #! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends + #! (e.g., OpenBLAS, MKL) might only support single and double precision, + #! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). + #:set PCA_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) interface pca !! version: experimental diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 03470b63f..e92c00980 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,13 +1,10 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends -#! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK -#! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) -#! and double (dp) precision for consistency with external optimized backends. -#:set PCA_REAL_KINDS = ["sp", "dp"] -#:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] -#:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) +#! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends +#! (e.g., OpenBLAS, MKL) might only support single and double precision, +#! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). +#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh diff --git a/test/stats/CMakeLists.txt b/test/stats/CMakeLists.txt index 5eb3d61b0..3627508a0 100644 --- a/test/stats/CMakeLists.txt +++ b/test/stats/CMakeLists.txt @@ -8,6 +8,7 @@ set(fppFiles test_distribution_uniform.fypp test_distribution_normal.fypp test_distribution_exponential.fypp + test_pca.fypp ) fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 deleted file mode 100644 index 560365270..000000000 --- a/test/stats/test_pca.f90 +++ /dev/null @@ -1,76 +0,0 @@ -program test_pca - use stdlib_error, only: check - use stdlib_kinds, only: sp, dp - use stdlib_stats, only: pca, pca_transform, pca_inverse_transform - use stdlib_linalg_state, only: linalg_state_type - implicit none - - real(sp), parameter :: sptol = 1000 * epsilon(1._sp) - real(dp), parameter :: dptol = 1000 * epsilon(1._dp) - - call test_pca_sp() - call test_pca_dp() - -contains - - subroutine test_pca_sp() - real(sp) :: x(3, 2), components(2, 2), s(2), mu(2) - real(sp) :: x_red(3, 1), comp_red(1, 2), s_red(1) - real(sp) :: x_trans(3, 2), x_inv(3, 2) - type(linalg_state_type) :: err - - ! Data: [1, 2], [3, 4], [5, 6] - x = reshape([1.0_sp, 3.0_sp, 5.0_sp, 2.0_sp, 4.0_sp, 6.0_sp], [3, 2]) - - ! Test SVD method - call pca(x, components, s, x_mean=mu, method="svd", err=err) - call check(err%ok(), "pca_sp svd err") - call check(all(abs(mu - [3.0_sp, 4.0_sp]) < sptol), "pca_sp svd mean") - ! First component should be approx [0.707, 0.707] (or negative) - call check(abs(abs(components(1,1)) - 1.0_sp/sqrt(2.0_sp)) < sptol, "pca_sp svd comp1") - call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp svd s1") - call check(abs(s(2)) < sptol, "pca_sp svd s2") - - ! Test Transform - call pca_transform(x, components, mu, x_trans) - ! Second dimension should be zero - call check(all(abs(x_trans(:, 2)) < sptol), "pca_sp transform") - - ! Test Inverse Transform - call pca_inverse_transform(x_trans, components, mu, x_inv) - call check(all(abs(x_inv - x) < sptol), "pca_sp inverse") - - ! Test EIG method - call pca(x, components, s, method="eig", err=err) - call check(err%ok(), "pca_sp eig err") - call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp eig s1") - - end subroutine test_pca_sp - - subroutine test_pca_dp() - real(dp) :: x(3, 2), components(2, 2), s(2), mu(2) - real(dp) :: x_trans(3, 2), x_inv(3, 2) - type(linalg_state_type) :: err - - x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) - - ! Test SVD method - call pca(x, components, s, x_mean=mu, method="svd", err=err) - call check(err%ok(), "pca_dp svd err") - call check(all(abs(mu - [3.0_dp, 4.0_dp]) < dptol), "pca_dp svd mean") - call check(abs(abs(components(1,1)) - 1.0_dp/sqrt(2.0_dp)) < dptol, "pca_dp svd comp1") - call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp svd s1") - - ! Test Transform/Inverse - call pca_transform(x, components, mu, x_trans) - call pca_inverse_transform(x_trans, components, mu, x_inv) - call check(all(abs(x_inv - x) < dptol), "pca_dp inverse") - - ! Test EIG method - call pca(x, components, s, method="eig", err=err) - call check(err%ok(), "pca_dp eig err") - call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp eig s1") - - end subroutine test_pca_dp - -end program test_pca diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp new file mode 100644 index 000000000..327950753 --- /dev/null +++ b/test/stats/test_pca.fypp @@ -0,0 +1,54 @@ +#:include "common.fypp" +program test_pca + use stdlib_error, only: check + use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_stats, only: pca, pca_transform, pca_inverse_transform + use stdlib_linalg_state, only: linalg_state_type + implicit none + + #:for k1 in REAL_KINDS + real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) + #:endfor + + #:for k1 in REAL_KINDS + call test_pca_${k1}$() + #:endfor + +contains + + #:for k1 in REAL_KINDS + subroutine test_pca_${k1}$() + real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) + real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + + ! Data: [1, 2], [3, 4], [5, 6] + x = reshape([1.0_${k1}$, 3.0_${k1}$, 5.0_${k1}$, 2.0_${k1}$, 4.0_${k1}$, 6.0_${k1}$], [3, 2]) + + ! Test SVD method + call pca(x, components, s, x_mean=mu, method="svd", err=err) + call check(err%ok(), "pca_${k1}$ svd err") + call check(all(abs(mu - [3.0_${k1}$, 4.0_${k1}$]) < ${k1}$tol), "pca_${k1}$ svd mean") + ! First component should be approx [0.707, 0.707] (or negative) + call check(abs(abs(components(1,1)) - 1.0_${k1}$/sqrt(2.0_${k1}$)) < ${k1}$tol, "pca_${k1}$ svd comp1") + call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ svd s1") + call check(abs(s(2)) < ${k1}$tol, "pca_${k1}$ svd s2") + + ! Test Transform + call pca_transform(x, components, mu, x_trans) + ! Second dimension should be zero + call check(all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") + + ! Test Inverse Transform + call pca_inverse_transform(x_trans, components, mu, x_inv) + call check(all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") + + ! Test EIG method + call pca(x, components, s, method="eig", err=err) + call check(err%ok(), "pca_${k1}$ eig err") + call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ eig s1") + + end subroutine test_pca_${k1}$ + #:endfor + +end program test_pca From d9ba5489ad376850f27225092dd079b46d69ad88 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 19:10:09 +0530 Subject: [PATCH 014/104] fix PCA BLAS/LAPACK --- src/stats/stdlib_stats.fypp | 7 ++++--- src/stats/stdlib_stats_pca.fypp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 1fa272f6c..7714ff2f3 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,9 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends - #! (e.g., OpenBLAS, MKL) might only support single and double precision, - #! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). + #! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal + #! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against + #! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve + #! performance for single (sp) and double (dp) precision. #:set PCA_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) interface pca diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index e92c00980..6f2f932c2 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,7 +1,8 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends -#! (e.g., OpenBLAS, MKL) might only support single and double precision, -#! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). +#! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal +#! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against +#! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve +#! performance for single (sp) and double (dp) precision. #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca use stdlib_kinds, only: sp, dp, xdp, qp From 11902b619ebdd310a61ae82cbcd5b2ab2b938d60 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 19:33:36 +0530 Subject: [PATCH 015/104] fix: remove xdp/qp from PCA use statements to fix CI builds --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 2 +- test/stats/test_pca.fypp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 7714ff2f3..a44bbffb7 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -7,7 +7,7 @@ module stdlib_stats !! Provides support for various statistical methods. This includes currently !! descriptive statistics !! ([Specification](../page/specs/stdlib_stats.html)) - use stdlib_kinds, only: sp, dp, xdp, qp, & + use stdlib_kinds, only: sp, dp, & int8, int16, int32, int64 use stdlib_linalg_state, only: linalg_state_type implicit none diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 6f2f932c2..bab9a3171 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -5,7 +5,7 @@ #! performance for single (sp) and double (dp) precision. #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_kinds, only: sp, dp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 327950753..667526949 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,7 +1,7 @@ #:include "common.fypp" program test_pca use stdlib_error, only: check - use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_kinds, only: sp, dp use stdlib_stats, only: pca, pca_transform, pca_inverse_transform use stdlib_linalg_state, only: linalg_state_type implicit none From d7f87906be48580e46be95c67fe89034d0ccd34a Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 19:46:57 +0530 Subject: [PATCH 016/104] both updated --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index a44bbffb7..7714ff2f3 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -7,7 +7,7 @@ module stdlib_stats !! Provides support for various statistical methods. This includes currently !! descriptive statistics !! ([Specification](../page/specs/stdlib_stats.html)) - use stdlib_kinds, only: sp, dp, & + use stdlib_kinds, only: sp, dp, xdp, qp, & int8, int16, int32, int64 use stdlib_linalg_state, only: linalg_state_type implicit none diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index bab9a3171..6f2f932c2 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -5,7 +5,7 @@ #! performance for single (sp) and double (dp) precision. #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh From f8bbd273ef9196d4c1d9e8d7e4d1bbf7d1fff1a2 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 20:17:17 +0530 Subject: [PATCH 017/104] test --- src/stats/stdlib_stats.fypp | 9 ++++----- src/stats/stdlib_stats_pca.fypp | 16 +++++++--------- test/stats/test_pca.fypp | 7 ++++--- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 7714ff2f3..79774c163 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,11 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal - #! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against - #! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve - #! performance for single (sp) and double (dp) precision. - #:set PCA_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) + #! Note: PCA is limited to single (sp) and double (dp) precision because external + #! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. + #! Extended (xdp) and quadruple (qp) precision are not supported for PCA. + #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] interface pca !! version: experimental diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 6f2f932c2..7890c922e 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,11 +1,9 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal -#! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against -#! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve -#! performance for single (sp) and double (dp) precision. -#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) +#! Note: PCA is limited to single (sp) and double (dp) precision because external +#! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. +#:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_kinds, only: sp, dp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh @@ -17,7 +15,7 @@ submodule (stdlib_stats) stdlib_stats_pca contains - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) @@ -114,7 +112,7 @@ contains #:endfor - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) @@ -146,7 +144,7 @@ contains #:endfor - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 667526949..2f0c8d05e 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,4 +1,5 @@ #:include "common.fypp" +#:set PCA_KINDS = ["sp", "dp"] program test_pca use stdlib_error, only: check use stdlib_kinds, only: sp, dp @@ -6,17 +7,17 @@ program test_pca use stdlib_linalg_state, only: linalg_state_type implicit none - #:for k1 in REAL_KINDS + #:for k1 in PCA_KINDS real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) #:endfor - #:for k1 in REAL_KINDS + #:for k1 in PCA_KINDS call test_pca_${k1}$() #:endfor contains - #:for k1 in REAL_KINDS + #:for k1 in PCA_KINDS subroutine test_pca_${k1}$() real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) From 75db88743e922194acff7c1e935330368fb19687 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 20:42:10 +0530 Subject: [PATCH 018/104] modify interfaces for core. --- src/stats/stdlib_stats_pca.fypp | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 7890c922e..057e2a34d 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -27,7 +27,7 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i, k, m, n_s + integer(ilp) :: n, p, i, j, k, m, n_s ${t1}$, allocatable :: mu(:) character(16) :: method_ @@ -37,9 +37,11 @@ contains method_ = optval(method, "svd") - ! 1. Calculate and optionally return mean + ! 1. Calculate mean using intrinsic sum (avoids submodule dependency issues) allocate(mu(p)) - mu = mean(x, dim=1) + do j = 1, p + mu(j) = sum(x(:, j)) / real(n, ${k1}$) + end do if (present(x_mean)) x_mean = mu if (method_ == "svd") then @@ -73,14 +75,29 @@ contains end if end block else if (method_ == "eig" .or. method_ == "cov") then - ! 3. Eigendecomposition of covariance matrix + ! 3. Eigendecomposition of covariance matrix (computed inline) block - ${t1}$, allocatable :: c(:,:), vectors(:,:) + ${t1}$, allocatable :: c(:,:), vectors(:,:), x_centered(:,:) real(${k1}$), allocatable :: lambda(:), lambda_copy(:) integer(ilp), allocatable :: idx(:) + real(${k1}$) :: scale_factor - allocate(lambda(p), lambda_copy(p), idx(p), vectors(p, p)) - c = cov(x, dim=1) + allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + allocate(x_centered(n, p)) + + ! Center data + do i = 1, n + x_centered(i, :) = x(i, :) - mu + end do + + ! Compute covariance matrix: C = X^T * X / (n-1) + scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) + do i = 1, p + do j = 1, p + c(i, j) = dot_product(x_centered(:, i), x_centered(:, j)) * scale_factor + end do + end do + call eigh(c, lambda, vectors=vectors, err=err0) if (err0%ok()) then From d72f72c3f9e3835303b87989a46f1142939cdd98 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Thu, 8 Jan 2026 20:29:20 +0530 Subject: [PATCH 019/104] add stdlib_sorting.fypp in cmakelists.txt --- src/stats/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index b9b94ab4b..0b3f14516 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -13,6 +13,7 @@ set(stats_fppFiles ../stdlib_linalg_state.fypp ../stdlib_random.fypp ../stdlib_selection.fypp + ../stdlib_sorting.fypp ../stdlib_string_type.fypp stdlib_stats_corr.fypp stdlib_stats_cov.fypp From 44ee2e72c8004a5d177df0be7591c0fd11691a7b Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 8 Jan 2026 17:09:29 +0100 Subject: [PATCH 020/104] Fix CMakeLists.txt for the addition of stdlib_storting_pca --- src/stats/CMakeLists.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 0b3f14516..3a9553c70 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -1,5 +1,9 @@ set(stats_cppFiles ../stdlib_linalg_constants.fypp + ../stdlib_sorting.fypp + ../stdlib_sorting_ord_sort.fypp + ../stdlib_sorting_sort_adjoint.fypp + ../stdlib_sorting_sort.fypp ) set(stats_fppFiles @@ -10,10 +14,11 @@ set(stats_fppFiles ../stdlib_error.fypp ../stdlib_linalg.fypp ../stdlib_linalg_diag.fypp + ../stdlib_linalg_eigenvalues.fypp ../stdlib_linalg_state.fypp + ../stdlib_linalg_svd.fypp ../stdlib_random.fypp ../stdlib_selection.fypp - ../stdlib_sorting.fypp ../stdlib_string_type.fypp stdlib_stats_corr.fypp stdlib_stats_cov.fypp @@ -31,5 +36,9 @@ set(stats_fppFiles stdlib_stats_var.fypp ) +set(f90Files + ../stdlib_sorting_radix_sort.f90 + ) + configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) -target_link_libraries(stats PUBLIC blas lapack) +target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) From b3ea627bbdfd66b973433564e917cf536b9197bc Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 16:44:35 +0530 Subject: [PATCH 021/104] Add center_data Helper Subroutine --- src/stats/stdlib_stats_pca.fypp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 057e2a34d..c6737f396 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -15,6 +15,19 @@ submodule (stdlib_stats) stdlib_stats_pca contains + ! Helper subroutine: Centers data in-place by subtracting the mean from each row + #:for k1, t1 in PCA_KINDS_TYPES + pure subroutine center_data_${k1}$(x, mu) + ${t1}$, intent(inout) :: x(:,:) + ${t1}$, intent(in) :: mu(:) + integer(ilp) :: i, n + n = size(x, 1, kind=ilp) + do i = 1, n + x(i, :) = x(i, :) - mu + end do + end subroutine center_data_${k1}$ + #:endfor + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) From 0e94be3eff654b37ea4a61e1fb8ba63c1082a798 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 17:23:32 +0530 Subject: [PATCH 022/104] Replace Manual Mean with stdlib mean --- src/stats/stdlib_stats_pca.fypp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index c6737f396..b90807cb5 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -65,17 +65,14 @@ contains allocate(s_tmp(n_s), vt_tmp(n_s, p)) if (optval(overwrite_x, .false.)) then - do i = 1, n - x(i, :) = x(i, :) - mu - end do + call center_data_${k1}$(x, mu) call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) else block ${t1}$, allocatable :: x_centered(:,:) allocate(x_centered(n, p)) - do i = 1, n - x_centered(i, :) = x(i, :) - mu - end do + x_centered = x + call center_data_${k1}$(x_centered, mu) call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) end block end if @@ -99,9 +96,8 @@ contains allocate(x_centered(n, p)) ! Center data - do i = 1, n - x_centered(i, :) = x(i, :) - mu - end do + x_centered = x + call center_data_${k1}$(x_centered, mu) ! Compute covariance matrix: C = X^T * X / (n-1) scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) From 05d496870eede5f4878b7056247cd048e9f9021b Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 17:49:29 +0530 Subject: [PATCH 023/104] Replace Covariance Loops with BLAS syrk --- src/stats/stdlib_stats_pca.fypp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index b90807cb5..8ff329293 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -8,7 +8,7 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp - use stdlib_linalg_blas, only: gemm + use stdlib_linalg_blas, only: gemm, syrk use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR use stdlib_sorting, only: sort_index implicit none @@ -99,11 +99,15 @@ contains x_centered = x call center_data_${k1}$(x_centered, mu) - ! Compute covariance matrix: C = X^T * X / (n-1) + ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X + ! syrk computes C := alpha*A**T*A + beta*C (upper triangle only) scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - do i = 1, p - do j = 1, p - c(i, j) = dot_product(x_centered(:, i), x_centered(:, j)) * scale_factor + c = 0.0_${k1}$ + call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) + do j = 1, p-1 + do i = j+1, p + c(i, j) = c(j, i) end do end do From d3d1c7198d724eedd643a2d16939a5a3b7a1a052 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 18:17:40 +0530 Subject: [PATCH 024/104] Extract pca_svd_driver and pca_eigh_driver & Updated Main pca Subroutine --- src/stats/stdlib_stats_pca.fypp | 190 +++++++++++++++++--------------- 1 file changed, 103 insertions(+), 87 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 8ff329293..0e885cab5 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -28,6 +28,81 @@ contains end subroutine center_data_${k1}$ #:endfor + ! SVD-based PCA driver: computes principal components via SVD of centered data + #:for k1, t1 in PCA_KINDS_TYPES + subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) + ${t1}$, intent(inout) :: x_centered(:,:) + integer(ilp), intent(in) :: n, p + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + type(linalg_state_type), intent(out) :: err + + integer(ilp) :: n_s, m + ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) + + n_s = min(n, p) + allocate(s_tmp(n_s), vt_tmp(n_s, p)) + + call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) + + if (err%ok()) then + m = min(size(components, 1, kind=ilp), n_s) + components(:m, :) = vt_tmp(:m, :) + m = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(:m) = s_tmp(:m) + end if + end subroutine pca_svd_driver_${k1}$ + #:endfor + + ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix + #:for k1, t1 in PCA_KINDS_TYPES + subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) + ${t1}$, intent(in) :: x_centered(:,:) + integer(ilp), intent(in) :: n, p + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + type(linalg_state_type), intent(out) :: err + + integer(ilp) :: i, j, m + integer(ilp), allocatable :: idx(:) + real(${k1}$) :: scale_factor + real(${k1}$), allocatable :: lambda(:), lambda_copy(:) + ${t1}$, allocatable :: c(:,:), vectors(:,:) + + allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + + ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X + scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) + c = 0.0_${k1}$ + call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) + ! Fill lower triangle from upper triangle + do j = 1, p-1 + do i = j+1, p + c(i, j) = c(j, i) + end do + end do + + call eigh(c, lambda, vectors=vectors, err=err) + + if (err%ok()) then + ! Sort eigenvalues in descending order + lambda_copy = -lambda + call sort_index(lambda_copy, idx) + + ! Assign sorted results + m = min(size(components, 1, kind=ilp), p) + do i = 1, m + components(i, :) = vectors(:, idx(i)) + if (lambda(idx(i)) > 0.0_${k1}$) then + singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) + else + singular_values(i) = 0.0_${k1}$ + end if + end do + end if + end subroutine pca_eigh_driver_${k1}$ + #:endfor + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) @@ -40,104 +115,45 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i, j, k, m, n_s - ${t1}$, allocatable :: mu(:) + integer(ilp) :: n, p + ${t1}$, allocatable :: mu(:), x_centered(:,:) character(16) :: method_ - + n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) - k = size(components, 1, kind=ilp) - method_ = optval(method, "svd") - - ! 1. Calculate mean using intrinsic sum (avoids submodule dependency issues) + + ! Calculate mean along dimension 1 (column means) allocate(mu(p)) - do j = 1, p - mu(j) = sum(x(:, j)) / real(n, ${k1}$) - end do + mu = sum(x, dim=1) / real(n, ${k1}$) if (present(x_mean)) x_mean = mu - if (method_ == "svd") then - ! 2. Center data and call SVD with temporaries for robustness - block - ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) - n_s = min(n, p) - allocate(s_tmp(n_s), vt_tmp(n_s, p)) - - if (optval(overwrite_x, .false.)) then - call center_data_${k1}$(x, mu) - call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) - else - block - ${t1}$, allocatable :: x_centered(:,:) - allocate(x_centered(n, p)) - x_centered = x - call center_data_${k1}$(x_centered, mu) - call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) - end block - end if - - if (err0%ok()) then - m = min(size(components, 1, kind=ilp), n_s) - components(:m, :) = vt_tmp(:m, :) - m = min(size(singular_values, 1, kind=ilp), n_s) - singular_values(:m) = s_tmp(:m) - end if - end block - else if (method_ == "eig" .or. method_ == "cov") then - ! 3. Eigendecomposition of covariance matrix (computed inline) - block - ${t1}$, allocatable :: c(:,:), vectors(:,:), x_centered(:,:) - real(${k1}$), allocatable :: lambda(:), lambda_copy(:) - integer(ilp), allocatable :: idx(:) - real(${k1}$) :: scale_factor - - allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + ! Method dispatch + select case (trim(method_)) + case ("svd") + if (optval(overwrite_x, .false.)) then + call center_data_${k1}$(x, mu) + call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) + else allocate(x_centered(n, p)) - - ! Center data x_centered = x call center_data_${k1}$(x_centered, mu) - - ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X - ! syrk computes C := alpha*A**T*A + beta*C (upper triangle only) - scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - c = 0.0_${k1}$ - call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) - ! Fill lower triangle from upper triangle (syrk only fills upper) - do j = 1, p-1 - do i = j+1, p - c(i, j) = c(j, i) - end do - end do - - call eigh(c, lambda, vectors=vectors, err=err0) - - if (err0%ok()) then - ! Sort eigenvalues in descending order using stdlib_sorting - ! sort_index sorts in ascending order, so we negate values - lambda_copy = -lambda - call sort_index(lambda_copy, idx) - - ! Assign sorted results - m = min(size(components, 1, kind=ilp), p) - do i = 1, m - components(i, :) = vectors(:, idx(i)) - if (lambda(idx(i)) > 0.0_${k1}$) then - singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) - else - singular_values(i) = 0.0_${k1}$ - end if - end do - end if - end block - else - err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) - end if - - ! Handle error state: return error or stop if err not present + call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) + end if + + case ("eig", "cov") + allocate(x_centered(n, p)) + x_centered = x + call center_data_${k1}$(x_centered, mu) + call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) + + case default + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//trim(method_)) + end select + + ! Handle error state call err0%handle(err) - + end subroutine pca_${k1}$ #:endfor From 0659b39fc30e44534b3ba7ca3ad440b0cb7ec3bc Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 19:57:43 +0530 Subject: [PATCH 025/104] optimized for performance and stability --- src/stats/stdlib_stats_pca.fypp | 37 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 0e885cab5..2181a0d57 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -65,6 +65,7 @@ contains integer(ilp) :: i, j, m integer(ilp), allocatable :: idx(:) + ${t1}$ :: alpha, beta real(${k1}$) :: scale_factor real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) @@ -73,9 +74,11 @@ contains ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) + alpha = real(scale_factor, ${k1}$) + beta = 0.0_${k1}$ c = 0.0_${k1}$ - call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) - ! Fill lower triangle from upper triangle + call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) do j = 1, p-1 do i = j+1, p c(i, j) = c(j, i) @@ -89,8 +92,9 @@ contains lambda_copy = -lambda call sort_index(lambda_copy, idx) - ! Assign sorted results + ! Assign sorted results with safety bounds checks m = min(size(components, 1, kind=ilp), p) + m = min(m, size(singular_values, 1, kind=ilp)) do i = 1, m components(i, :) = vectors(:, idx(i)) if (lambda(idx(i)) > 0.0_${k1}$) then @@ -121,14 +125,14 @@ contains n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) - method_ = optval(method, "svd") + method_ = adjustl(optval(method, "svd")) ! Calculate mean along dimension 1 (column means) allocate(mu(p)) mu = sum(x, dim=1) / real(n, ${k1}$) if (present(x_mean)) x_mean = mu - ! Method dispatch + ! Method dispatch using trimmed string for robustness select case (trim(method_)) case ("svd") if (optval(overwrite_x, .false.)) then @@ -165,26 +169,21 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n, p, nc + integer(ilp) :: n, p, nc + ${t1}$ :: alpha, beta ${t1}$, allocatable :: x_centered(:,:) - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) nc = size(components, 1, kind=ilp) allocate(x_centered(n, p)) - if (present(x_mean)) then - do i = 1, n - x_centered(i, :) = x(i, :) - x_mean - end do - else - x_centered = x - end if + x_centered = x + if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) ! x_transformed = x_centered * components^T using GEMM - ! GEMM: C = alpha * op(A) * op(B) + beta * C - ! x_transformed(n, nc) = x_centered(n, p) * components(nc, p)^T + alpha = 1.0_${k1}$ + beta = 0.0_${k1}$ call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n) end subroutine pca_transform_${k1}$ #:endfor @@ -198,15 +197,15 @@ contains ${t1}$, intent(out) :: x_reconstructed(:,:) integer(ilp) :: i, n, nc, p - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ + ${t1}$ :: alpha, beta n = size(x_reduced, 1, kind=ilp) nc = size(x_reduced, 2, kind=ilp) p = size(components, 2, kind=ilp) ! x_reconstructed = x_reduced * components using GEMM - ! GEMM: C = alpha * op(A) * op(B) + beta * C - ! x_reconstructed(n, p) = x_reduced(n, nc) * components(nc, p) + alpha = 1.0_${k1}$ + beta = 0.0_${k1}$ call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then From 4ac725c4dff229c1556e5cbac1880444e4a74d11 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 22:18:25 +0530 Subject: [PATCH 026/104] Cache efficency --- src/stats/stdlib_stats_pca.fypp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 2181a0d57..b1dc0a052 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -20,10 +20,11 @@ contains pure subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) - integer(ilp) :: i, n + integer(ilp) :: i, j, n, m n = size(x, 1, kind=ilp) - do i = 1, n - x(i, :) = x(i, :) - mu + m = size(x, 2, kind=ilp) + do concurrent( j=1:m, i=1:n ) + x(i, j) = x(i, j) - mu(j) end do end subroutine center_data_${k1}$ #:endfor @@ -196,7 +197,7 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n, nc, p + integer(ilp) :: i, j, n, nc, p ${t1}$ :: alpha, beta n = size(x_reduced, 1, kind=ilp) @@ -209,8 +210,8 @@ contains call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then - do i = 1, n - x_reconstructed(i, :) = x_reconstructed(i, :) + x_mean + do concurrent( j=1:p, i=1:n ) + x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) end do end if end subroutine pca_inverse_transform_${k1}$ From 7348fafb6f0e2db5d208a1434b13fbfb65d5c879 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 14 Jan 2026 01:20:55 +0530 Subject: [PATCH 027/104] fix issues build issues. --- example/CMakeLists.txt | 8 ++++---- src/CMakeLists.txt | 5 +++++ src/stats/CMakeLists.txt | 10 ++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 01a88512a..68ff84476 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,16 +2,16 @@ macro(ADD_EXAMPLE name) add_executable(example_${name} example_${name}.f90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endmacro(ADD_EXAMPLE) macro(ADD_EXAMPLEPP name) add_executable(example_${name} example_${name}.F90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endmacro(ADD_EXAMPLEPP) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74ce507d4..f63696eb2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,10 @@ if (NOT STDLIB_NO_BITSET) add_subdirectory(bitsets) +else() + # Provide a dummy INTERFACE target so other targets can link to "bitsets" + # without causing the linker to be passed unresolved -lbitsets flags. + add_library(bitsets INTERFACE) + target_compile_definitions(bitsets INTERFACE STDLIB_NO_BITSET=1) endif() add_subdirectory(blas) add_subdirectory(lapack) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 3a9553c70..d6e7c7c1f 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -42,3 +42,13 @@ set(f90Files configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) + +# Workaround for Intel Fortran compiler ICE (Internal Compiler Error) in stdlib_stats_pca.f90 +# The Intel ifx compiler (2024.1) triggers a segmentation violation during optimization. +# Compiling with -O0 avoids the optimizer codepath that causes the ICE. +if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") + set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/stdlib_stats_pca.f90 + PROPERTIES COMPILE_FLAGS "-O0 -g" + ) +endif() From c58f515444feff8aebc9fb4da0b5176bbfdaa955 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 14 Jan 2026 01:25:58 +0530 Subject: [PATCH 028/104] Revert "fix issues build issues." This reverts commit 7348fafb6f0e2db5d208a1434b13fbfb65d5c879. --- example/CMakeLists.txt | 8 ++++---- src/CMakeLists.txt | 5 ----- src/stats/CMakeLists.txt | 10 ---------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 68ff84476..01a88512a 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,16 +2,16 @@ macro(ADD_EXAMPLE name) add_executable(example_${name} example_${name}.f90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endmacro(ADD_EXAMPLE) macro(ADD_EXAMPLEPP name) add_executable(example_${name} example_${name}.F90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endmacro(ADD_EXAMPLEPP) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f63696eb2..74ce507d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,10 +1,5 @@ if (NOT STDLIB_NO_BITSET) add_subdirectory(bitsets) -else() - # Provide a dummy INTERFACE target so other targets can link to "bitsets" - # without causing the linker to be passed unresolved -lbitsets flags. - add_library(bitsets INTERFACE) - target_compile_definitions(bitsets INTERFACE STDLIB_NO_BITSET=1) endif() add_subdirectory(blas) add_subdirectory(lapack) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index d6e7c7c1f..3a9553c70 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -42,13 +42,3 @@ set(f90Files configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) - -# Workaround for Intel Fortran compiler ICE (Internal Compiler Error) in stdlib_stats_pca.f90 -# The Intel ifx compiler (2024.1) triggers a segmentation violation during optimization. -# Compiling with -O0 avoids the optimizer codepath that causes the ICE. -if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") - set_source_files_properties( - ${CMAKE_CURRENT_BINARY_DIR}/stdlib_stats_pca.f90 - PROPERTIES COMPILE_FLAGS "-O0 -g" - ) -endif() From c776e8d59838749bbd783e1f53d74484a04057fd Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 00:38:09 +0530 Subject: [PATCH 029/104] use nested do loops --- src/stats/stdlib_stats_pca.fypp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index b1dc0a052..175c283f5 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -23,8 +23,10 @@ contains integer(ilp) :: i, j, n, m n = size(x, 1, kind=ilp) m = size(x, 2, kind=ilp) - do concurrent( j=1:m, i=1:n ) - x(i, j) = x(i, j) - mu(j) + do j = 1, m + do i = 1, n + x(i, j) = x(i, j) - mu(j) + end do end do end subroutine center_data_${k1}$ #:endfor @@ -210,8 +212,10 @@ contains call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then - do concurrent( j=1:p, i=1:n ) - x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) + do j = 1, p + do i = 1, n + x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) + end do end do end if end subroutine pca_inverse_transform_${k1}$ From c47e2b6337c499c636bc1526a0b8449b1c8a5edd Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 01:08:30 +0530 Subject: [PATCH 030/104] resolve compiler errors --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 47 +++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 79774c163..9ca59cbc8 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -655,7 +655,7 @@ module stdlib_stats #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) - ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) ${t1}$, intent(out), optional :: x_mean(:) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 175c283f5..793e60550 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -17,7 +17,7 @@ contains ! Helper subroutine: Centers data in-place by subtracting the mean from each row #:for k1, t1 in PCA_KINDS_TYPES - pure subroutine center_data_${k1}$(x, mu) + subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) integer(ilp) :: i, j, n, m @@ -40,19 +40,27 @@ contains real(${k1}$), intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: n_s, m + integer(ilp) :: n_s, m, i ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) n_s = min(n, p) - allocate(s_tmp(n_s), vt_tmp(n_s, p)) + allocate(s_tmp(n_s)) + allocate(vt_tmp(n_s, p)) call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) if (err%ok()) then - m = min(size(components, 1, kind=ilp), n_s) - components(:m, :) = vt_tmp(:m, :) - m = min(size(singular_values, 1, kind=ilp), n_s) - singular_values(:m) = s_tmp(:m) + m = size(components, 1, kind=ilp) + if (n_s < m) m = n_s + do i = 1, m + components(i, :) = vt_tmp(i, :) + end do + + m = size(singular_values, 1, kind=ilp) + if (n_s < m) m = n_s + do i = 1, m + singular_values(i) = s_tmp(i) + end do end if end subroutine pca_svd_driver_${k1}$ #:endfor @@ -73,7 +81,11 @@ contains real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) - allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + allocate(c(p, p)) + allocate(lambda(p)) + allocate(lambda_copy(p)) + allocate(idx(p)) + allocate(vectors(p, p)) ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) @@ -113,7 +125,7 @@ contains #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) - ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) ${t1}$, intent(out), optional :: x_mean(:) @@ -135,9 +147,8 @@ contains mu = sum(x, dim=1) / real(n, ${k1}$) if (present(x_mean)) x_mean = mu - ! Method dispatch using trimmed string for robustness - select case (trim(method_)) - case ("svd") + ! Method dispatch using if-else for maximum compiler stability + if (trim(method_) == "svd") then if (optval(overwrite_x, .false.)) then call center_data_${k1}$(x, mu) call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) @@ -148,18 +159,20 @@ contains call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) end if - case ("eig", "cov") + else if (trim(method_) == "eig" .or. trim(method_) == "cov") then allocate(x_centered(n, p)) x_centered = x call center_data_${k1}$(x_centered, mu) call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) - case default + else err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//trim(method_)) - end select + end if - ! Handle error state - call err0%handle(err) + ! Handle error state explicitly + if (present(err)) then + err = err0 + end if end subroutine pca_${k1}$ #:endfor From 436a52654ab8fb7819bfaf2656886fcc0f47c407 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 13:22:55 +0530 Subject: [PATCH 031/104] fix issue --- src/stats/CMakeLists.txt | 2 +- src/stats/stdlib_stats.fypp | 11 ++- src/stats/stdlib_stats_pca.fypp | 124 ++++++++++++++++---------------- test/stats/test_pca.fypp | 8 ++- 4 files changed, 78 insertions(+), 67 deletions(-) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 3a9553c70..ab0e548ee 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -41,4 +41,4 @@ set(f90Files ) configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) -target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) +target_link_libraries(stats PUBLIC blas lapack $<$:bitsets>) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 9ca59cbc8..d2bfc4eb6 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,10 +642,15 @@ module stdlib_stats end interface moment - #! Note: PCA is limited to single (sp) and double (dp) precision because external - #! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. - #! Extended (xdp) and quadruple (qp) precision are not supported for PCA. + #! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. + #! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] + #:if WITH_XDP + #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] + #:endif + #:if WITH_QP + #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] + #:endif interface pca !! version: experimental diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 793e60550..4cecde8da 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,30 +1,36 @@ #:include "common.fypp" -#! Note: PCA is limited to single (sp) and double (dp) precision because external -#! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. +#! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. +#! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] +#:if WITH_XDP +#:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] +#:endif +#:if WITH_QP +#:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] +#:endif submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp use stdlib_linalg_blas, only: gemm, syrk - use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR + use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR, LINALG_VALUE_ERROR use stdlib_sorting, only: sort_index implicit none contains - ! Helper subroutine: Centers data in-place by subtracting the mean from each row + ! Helper subroutine: Centers data in-place by subtracting the mean from each column #:for k1, t1 in PCA_KINDS_TYPES subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) - integer(ilp) :: i, j, n, m - n = size(x, 1, kind=ilp) - m = size(x, 2, kind=ilp) - do j = 1, m - do i = 1, n + integer(ilp) :: i, j, m, n + m = size(x, 1, kind=ilp) + n = size(x, 2, kind=ilp) + do j = 1, n + do i = 1, m x(i, j) = x(i, j) - mu(j) end do end do @@ -34,13 +40,14 @@ contains ! SVD-based PCA driver: computes principal components via SVD of centered data #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(inout) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: n_s, m, i + integer(ilp) :: n_s, m ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) n_s = min(n, p) @@ -50,17 +57,10 @@ contains call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) if (err%ok()) then - m = size(components, 1, kind=ilp) - if (n_s < m) m = n_s - do i = 1, m - components(i, :) = vt_tmp(i, :) - end do - - m = size(singular_values, 1, kind=ilp) - if (n_s < m) m = n_s - do i = 1, m - singular_values(i) = s_tmp(i) - end do + m = min(size(components, 1, kind=ilp), n_s) + components(1:m, :) = vt_tmp(1:m, :) + m = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(1:m) = s_tmp(1:m) end if end subroutine pca_svd_driver_${k1}$ #:endfor @@ -68,6 +68,7 @@ contains ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) @@ -81,18 +82,13 @@ contains real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) - allocate(c(p, p)) - allocate(lambda(p)) - allocate(lambda_copy(p)) - allocate(idx(p)) - allocate(vectors(p, p)) - ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - alpha = real(scale_factor, ${k1}$) - beta = 0.0_${k1}$ - c = 0.0_${k1}$ + alpha = scale_factor + beta = zero + allocate(c(p, p), source=zero) call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) do j = 1, p-1 do i = j+1, p @@ -100,11 +96,14 @@ contains end do end do + allocate(lambda(p)) + allocate(vectors(p, p)) call eigh(c, lambda, vectors=vectors, err=err) if (err%ok()) then ! Sort eigenvalues in descending order - lambda_copy = -lambda + allocate(lambda_copy, source=-lambda) + allocate(idx(p)) call sort_index(lambda_copy, idx) ! Assign sorted results with safety bounds checks @@ -136,43 +135,49 @@ contains type(linalg_state_type) :: err0 integer(ilp) :: n, p ${t1}$, allocatable :: mu(:), x_centered(:,:) - character(16) :: method_ + character(len=:), allocatable :: method_ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) - method_ = adjustl(optval(method, "svd")) + method_ = trim(adjustl(optval(method, "svd"))) + + ! Calculate mean along dimension 1 (column means) using stdlib mean + allocate(mu, source=mean(x, 1)) - ! Calculate mean along dimension 1 (column means) - allocate(mu(p)) - mu = sum(x, dim=1) / real(n, ${k1}$) - if (present(x_mean)) x_mean = mu + ! Validate and assign x_mean if present + if (present(x_mean)) then + if (size(x_mean) < p) then + err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & + "x_mean array has insufficient size:", size(x_mean), ", expected:", p) + call err0%handle(err) + return + end if + x_mean(1:p) = mu + end if - ! Method dispatch using if-else for maximum compiler stability - if (trim(method_) == "svd") then + ! Method dispatch + select case (method_) + case ("svd") if (optval(overwrite_x, .false.)) then call center_data_${k1}$(x, mu) call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) else - allocate(x_centered(n, p)) - x_centered = x + allocate(x_centered, source=x) call center_data_${k1}$(x_centered, mu) call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) end if - else if (trim(method_) == "eig" .or. trim(method_) == "cov") then - allocate(x_centered(n, p)) - x_centered = x + case ("eig", "cov") + allocate(x_centered, source=x) call center_data_${k1}$(x_centered, mu) call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) - else - err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//trim(method_)) - end if + case default + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) + end select - ! Handle error state explicitly - if (present(err)) then - err = err0 - end if + ! Handle error state + call err0%handle(err) end subroutine pca_${k1}$ #:endfor @@ -180,49 +185,44 @@ contains #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) integer(ilp) :: n, p, nc - ${t1}$ :: alpha, beta ${t1}$, allocatable :: x_centered(:,:) n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) nc = size(components, 1, kind=ilp) - allocate(x_centered(n, p)) - x_centered = x + allocate(x_centered, source=x) if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) ! x_transformed = x_centered * components^T using GEMM - alpha = 1.0_${k1}$ - beta = 0.0_${k1}$ - call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n) + call gemm('N', 'T', n, nc, p, one, x_centered, n, components, nc, zero, x_transformed, n) end subroutine pca_transform_${k1}$ #:endfor #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) integer(ilp) :: i, j, n, nc, p - ${t1}$ :: alpha, beta n = size(x_reduced, 1, kind=ilp) nc = size(x_reduced, 2, kind=ilp) p = size(components, 2, kind=ilp) ! x_reconstructed = x_reduced * components using GEMM - alpha = 1.0_${k1}$ - beta = 0.0_${k1}$ - call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) + call gemm('N', 'N', n, p, nc, one, x_reduced, n, components, nc, zero, x_reconstructed, n) if (present(x_mean)) then do j = 1, p diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 2f0c8d05e..e318990ee 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,8 +1,14 @@ #:include "common.fypp" #:set PCA_KINDS = ["sp", "dp"] +#:if WITH_XDP +#:set PCA_KINDS = PCA_KINDS + ["xdp"] +#:endif +#:if WITH_QP +#:set PCA_KINDS = PCA_KINDS + ["qp"] +#:endif program test_pca use stdlib_error, only: check - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_stats, only: pca, pca_transform, pca_inverse_transform use stdlib_linalg_state, only: linalg_state_type implicit none From 143c21107f3f92f14aecf44aec08e62ec40e0e85 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 08:01:49 +0530 Subject: [PATCH 032/104] add PCA to public api --- src/stats/stdlib_stats.fypp | 54 ++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index adf373f0a..673a929d9 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -9,10 +9,12 @@ module stdlib_stats !! ([Specification](../page/specs/stdlib_stats.html)) use stdlib_kinds, only: sp, dp, xdp, qp, & int8, int16, int32, int64 + use stdlib_linalg_state, only: linalg_state_type implicit none private ! Public API public :: corr, cov, mean, median, moment, var + public :: pca, pca_transform, pca_inverse_transform interface corr @@ -637,6 +639,56 @@ module stdlib_stats end function ${RName}$ #:endfor #:endfor - end interface moment + interface pca + !! version: experimental + !! + !! Principal Component Analysis (PCA) + !! ([Specification](../page/specs/stdlib_stats.html#pca)) + #:for k1, t1 in REAL_KINDS_TYPES + module subroutine pca_${k1}$(x, components, singular_values, mean, & + method, overwrite_x, err) + ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out), optional :: mean(:) + character(*), intent(in), optional :: method + logical, intent(in), optional :: overwrite_x + type(linalg_state_type), intent(out), optional :: err + end subroutine pca_${k1}$ + #:endfor + end interface pca + + + interface pca_transform + !! version: experimental + !! + !! Projects data into the reduced dimensional space + !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_transform_${k1}$(x, components, mean) result(res) + ${t1}$, intent(in) :: x(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x,1), size(components,2)) + end function pca_transform_${k1}$ + #:endfor + end interface pca_transform + + + interface pca_inverse_transform + !! version: experimental + !! + !! Reconstructs original data from the reduced space + !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + ${t1}$, intent(in) :: x_reduced(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x_reduced,1), size(components,1)) + end function pca_inverse_transform_${k1}$ + #:endfor + end interface pca_inverse_transform + end module stdlib_stats From 17cf47375d20061bb39b9d325560c896f56f320c Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 08:30:19 +0530 Subject: [PATCH 033/104] include pca submodule --- src/stats/CMakeLists.txt | 1 + src/stats/stdlib_stats_pca.fypp | 53 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 src/stats/stdlib_stats_pca.fypp diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 3e5727565..41042315b 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -16,6 +16,7 @@ set(stats_fppFiles ../stdlib_string_type.fypp stdlib_stats_corr.fypp stdlib_stats_cov.fypp + stdlib_stats_pca.fypp stdlib_stats_distribution_exponential.fypp stdlib_stats_distribution_normal.fypp stdlib_stats_distribution_uniform.fypp diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp new file mode 100644 index 000000000..1fcd810f1 --- /dev/null +++ b/src/stats/stdlib_stats_pca.fypp @@ -0,0 +1,53 @@ +#:include "common.fypp" +#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) +submodule (stdlib_stats) stdlib_stats_pca + use stdlib_error, only: error_stop + use stdlib_optval, only: optval + use stdlib_linalg_state, only: LINALG_SUCCESS, LINALG_ERROR, linalg_state_type + implicit none + +contains + + #:for k1, t1 in REAL_KINDS_TYPES + module subroutine pca_${k1}$(x, components, singular_values, mean, & + method, overwrite_x, err) + ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out), optional :: mean(:) + character(*), intent(in), optional :: method + logical, intent(in), optional :: overwrite_x + type(linalg_state_type), intent(out), optional :: err + + ! Placeholder implementation + if (present(err)) err = linalg_state_type("pca", LINALG_ERROR, "Not implemented yet") + call error_stop("PCA not implemented yet") + + end subroutine pca_${k1}$ + #:endfor + + + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_transform_${k1}$(x, components, mean) result(res) + ${t1}$, intent(in) :: x(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x,1), size(components,2)) + + res = 0.0_${k1}$ ! Placeholder implementation + end function pca_transform_${k1}$ + #:endfor + + + #:for k1, t1 in REAL_KINDS_TYPES + module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + ${t1}$, intent(in) :: x_reduced(:,:) + ${t1}$, intent(in) :: components(:,:) + ${t1}$, intent(in), optional :: mean(:) + ${t1}$ :: res(size(x_reduced,1), size(components,1)) + + res = 0.0_${k1}$ ! Placeholder implementation + end function pca_inverse_transform_${k1}$ + #:endfor + +end submodule stdlib_stats_pca From 6d0506d64b3ba087dcd8556acedea8d9d4fa887e Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:01:18 +0530 Subject: [PATCH 034/104] Add PCA module with `pca`, `pca_transform`, and `pca_inverse_transform` routines. --- src/stats/stdlib_stats_pca.fypp | 129 ++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 7 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 1fcd810f1..706f65d2e 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -3,6 +3,8 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_error, only: error_stop use stdlib_optval, only: optval + use stdlib_linalg, only: svd, eigh + use stdlib_linalg_constants, only: ilp use stdlib_linalg_state, only: LINALG_SUCCESS, LINALG_ERROR, linalg_state_type implicit none @@ -19,9 +21,102 @@ contains logical, intent(in), optional :: overwrite_x type(linalg_state_type), intent(out), optional :: err - ! Placeholder implementation - if (present(err)) err = linalg_state_type("pca", LINALG_ERROR, "Not implemented yet") - call error_stop("PCA not implemented yet") + type(linalg_state_type) :: err0 + integer(ilp) :: n, p, i, k + ${t1}$, allocatable :: mu(:) + character(16) :: method_ + + n = size(x, 1, kind=ilp) + p = size(x, 2, kind=ilp) + k = size(components, 1, kind=ilp) + + method_ = optval(method, "svd") + + ! 1. Calculate and optionally return mean + allocate(mu(p)) + mu = mean(x, dim=1) + if (present(mean)) mean = mu + + err0 = linalg_state_type("pca", LINALG_SUCCESS) + + if (method_ == "svd") then + ! 2. Center data and call SVD with temporaries for robustness + block + ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) + integer(ilp) :: n_s + n_s = min(n, p) + allocate(s_tmp(n_s), vt_tmp(n_s, p)) + + if (optval(overwrite_x, .false.)) then + do i = 1, n + x(i, :) = x(i, :) - mu + end do + call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) + else + block + ${t1}$, allocatable :: x_centered(:,:) + allocate(x_centered(n, p)) + do i = 1, n + x_centered(i, :) = x(i, :) - mu + end do + call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) + end block + end if + + if (err0%ok()) then + i = min(size(components, 1, kind=ilp), n_s) + components(:i, :) = vt_tmp(:i, :) + i = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(:i) = s_tmp(:i) + end if + end block + else if (method_ == "eig" .or. method_ == "cov") then + ! 3. Eigendecomposition of covariance matrix + block + ${t1}$, allocatable :: c(:,:) + ${t1}$, allocatable :: vectors(:,:) + real(${k1}$), allocatable :: lambda(:) + integer(ilp), allocatable :: idx(:) + integer(ilp) :: j, m + real(${k1}$) :: tmp_val + + allocate(lambda(p), idx(p), vectors(p, p)) + c = cov(x, dim=1) + call eigh(c, lambda, vectors=vectors, err=err0) + + if (err0%ok()) then + ! Sort eigenvalues and vectors in descending order + do j = 1, p + idx(j) = j + end do + ! Simple bubble sort + do i = 1, p-1 + do j = i+1, p + if (lambda(idx(i)) < lambda(idx(j))) then + m = idx(i) + idx(i) = idx(j) + idx(j) = m + end if + end do + end do + + ! Assign sorted results + m = min(size(components, 1, kind=ilp), p) + do i = 1, m + components(i, :) = vectors(:, idx(i)) + if (lambda(idx(i)) > 0.0_${k1}$) then + singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) + else + singular_values(i) = 0.0_${k1}$ + end if + end do + end if + end block + else + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) + end if + + if (present(err)) err = err0 end subroutine pca_${k1}$ #:endfor @@ -32,9 +127,22 @@ contains ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: mean(:) - ${t1}$ :: res(size(x,1), size(components,2)) + ${t1}$ :: res(size(x,1), size(components,1)) - res = 0.0_${k1}$ ! Placeholder implementation + integer(ilp) :: i, n + ${t1}$, allocatable :: x_centered(:,:) + + n = size(x, 1, kind=ilp) + allocate(x_centered(n, size(x, 2, kind=ilp))) + if (present(mean)) then + do i = 1, n + x_centered(i, :) = x(i, :) - mean + end do + else + x_centered = x + end if + + res = matmul(x_centered, transpose(components)) end function pca_transform_${k1}$ #:endfor @@ -44,9 +152,16 @@ contains ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,1)) + ${t1}$ :: res(size(x_reduced,1), size(components,2)) - res = 0.0_${k1}$ ! Placeholder implementation + integer(ilp) :: i, n + n = size(x_reduced, 1, kind=ilp) + res = matmul(x_reduced, components) + if (present(mean)) then + do i = 1, n + res(i, :) = res(i, :) + mean + end do + end if end function pca_inverse_transform_${k1}$ #:endfor From 67c7ddf5c2c9a6eb5772fb8f31063cd820497292 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:16:56 +0530 Subject: [PATCH 035/104] add PCA unit test --- test/stats/test_pca.f90 | 76 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 test/stats/test_pca.f90 diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 new file mode 100644 index 000000000..f39c9253d --- /dev/null +++ b/test/stats/test_pca.f90 @@ -0,0 +1,76 @@ +program test_pca + use stdlib_error, only: check + use stdlib_kinds, only: sp, dp + use stdlib_stats, only: pca, pca_transform, pca_inverse_transform + use stdlib_linalg_state, only: linalg_state_type + implicit none + + real(sp), parameter :: sptol = 1000 * epsilon(1._sp) + real(dp), parameter :: dptol = 1000 * epsilon(1._dp) + + call test_pca_sp() + call test_pca_dp() + +contains + + subroutine test_pca_sp() + real(sp) :: x(3, 2), components(2, 2), s(2), mu(2) + real(sp) :: x_red(3, 1), comp_red(1, 2), s_red(1) + real(sp) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + + ! Data: [1, 2], [3, 4], [5, 6] + x = reshape([1.0_sp, 3.0_sp, 5.0_sp, 2.0_sp, 4.0_sp, 6.0_sp], [3, 2]) + + ! Test SVD method + call pca(x, components, s, mean=mu, method="svd", err=err) + call check(err%ok(), "pca_sp svd err") + call check(all(abs(mu - [3.0_sp, 4.0_sp]) < sptol), "pca_sp svd mean") + ! First component should be approx [0.707, 0.707] (or negative) + call check(abs(abs(components(1,1)) - 1.0_sp/sqrt(2.0_sp)) < sptol, "pca_sp svd comp1") + call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp svd s1") + call check(abs(s(2)) < sptol, "pca_sp svd s2") + + ! Test Transform + x_trans = pca_transform(x, components, mu) + ! Second dimension should be zero + call check(all(abs(x_trans(:, 2)) < sptol), "pca_sp transform") + + ! Test Inverse Transform + x_inv = pca_inverse_transform(x_trans, components, mu) + call check(all(abs(x_inv - x) < sptol), "pca_sp inverse") + + ! Test EIG method + call pca(x, components, s, method="eig", err=err) + call check(err%ok(), "pca_sp eig err") + call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp eig s1") + + end subroutine test_pca_sp + + subroutine test_pca_dp() + real(dp) :: x(3, 2), components(2, 2), s(2), mu(2) + real(dp) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + + x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) + + ! Test SVD method + call pca(x, components, s, mean=mu, method="svd", err=err) + call check(err%ok(), "pca_dp svd err") + call check(all(abs(mu - [3.0_dp, 4.0_dp]) < dptol), "pca_dp svd mean") + call check(abs(abs(components(1,1)) - 1.0_dp/sqrt(2.0_dp)) < dptol, "pca_dp svd comp1") + call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp svd s1") + + ! Test Transform/Inverse + x_trans = pca_transform(x, components, mu) + x_inv = pca_inverse_transform(x_trans, components, mu) + call check(all(abs(x_inv - x) < dptol), "pca_dp inverse") + + ! Test EIG method + call pca(x, components, s, method="eig", err=err) + call check(err%ok(), "pca_dp eig err") + call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp eig s1") + + end subroutine test_pca_dp + +end program test_pca From 720298c5adc429e20a0919a341eae8e96a9e1e8d Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:20:19 +0530 Subject: [PATCH 036/104] update end interface statement --- src/stats/stdlib_stats.fypp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 673a929d9..26d6a41af 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -639,6 +639,9 @@ module stdlib_stats end function ${RName}$ #:endfor #:endfor + end interface moment + + interface pca !! version: experimental !! From 1c2fc757aa7a620837ea16372ca115168260779d Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:22:25 +0530 Subject: [PATCH 037/104] update CmakeLists --- test/stats/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/stats/CMakeLists.txt b/test/stats/CMakeLists.txt index ff9d45063..5eb3d61b0 100644 --- a/test/stats/CMakeLists.txt +++ b/test/stats/CMakeLists.txt @@ -14,6 +14,7 @@ fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) ADDTEST(corr) ADDTEST(cov) +ADDTEST(pca) ADDTEST(mean) ADDTEST(median) ADDTEST(moment) From c43704c95fe5b089c20564577e3c22e37472f689 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 09:46:06 +0530 Subject: [PATCH 038/104] fixed_conflicts --- src/stats/stdlib_stats.fypp | 14 +++++++------- src/stats/stdlib_stats_pca.fypp | 22 +++++++++++----------- test/stats/test_pca.f90 | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 26d6a41af..174958937 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -648,12 +648,12 @@ module stdlib_stats !! Principal Component Analysis (PCA) !! ([Specification](../page/specs/stdlib_stats.html#pca)) #:for k1, t1 in REAL_KINDS_TYPES - module subroutine pca_${k1}$(x, components, singular_values, mean, & + module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) - ${t1}$, intent(out), optional :: mean(:) + ${t1}$, intent(out), optional :: x_mean(:) character(*), intent(in), optional :: method logical, intent(in), optional :: overwrite_x type(linalg_state_type), intent(out), optional :: err @@ -668,10 +668,10 @@ module stdlib_stats !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) #:for k1, t1 in REAL_KINDS_TYPES - module function pca_transform_${k1}$(x, components, mean) result(res) + module function pca_transform_${k1}$(x, components, x_mean) result(res) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) + ${t1}$, intent(in), optional :: x_mean(:) ${t1}$ :: res(size(x,1), size(components,2)) end function pca_transform_${k1}$ #:endfor @@ -684,11 +684,11 @@ module stdlib_stats !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) #:for k1, t1 in REAL_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,1)) + ${t1}$, intent(in), optional :: x_mean(:) + ${t1}$ :: res(size(x_reduced,1), size(components,2)) end function pca_inverse_transform_${k1}$ #:endfor end interface pca_inverse_transform diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 706f65d2e..3a2b14775 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -11,12 +11,12 @@ submodule (stdlib_stats) stdlib_stats_pca contains #:for k1, t1 in REAL_KINDS_TYPES - module subroutine pca_${k1}$(x, components, singular_values, mean, & + module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) - ${t1}$, intent(out), optional :: mean(:) + ${t1}$, intent(out), optional :: x_mean(:) character(*), intent(in), optional :: method logical, intent(in), optional :: overwrite_x type(linalg_state_type), intent(out), optional :: err @@ -35,7 +35,7 @@ contains ! 1. Calculate and optionally return mean allocate(mu(p)) mu = mean(x, dim=1) - if (present(mean)) mean = mu + if (present(x_mean)) x_mean = mu err0 = linalg_state_type("pca", LINALG_SUCCESS) @@ -123,10 +123,10 @@ contains #:for k1, t1 in REAL_KINDS_TYPES - module function pca_transform_${k1}$(x, components, mean) result(res) + module function pca_transform_${k1}$(x, components, x_mean) result(res) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) + ${t1}$, intent(in), optional :: x_mean(:) ${t1}$ :: res(size(x,1), size(components,1)) integer(ilp) :: i, n @@ -134,9 +134,9 @@ contains n = size(x, 1, kind=ilp) allocate(x_centered(n, size(x, 2, kind=ilp))) - if (present(mean)) then + if (present(x_mean)) then do i = 1, n - x_centered(i, :) = x(i, :) - mean + x_centered(i, :) = x(i, :) - x_mean end do else x_centered = x @@ -148,18 +148,18 @@ contains #:for k1, t1 in REAL_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, mean) result(res) + module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: mean(:) + ${t1}$, intent(in), optional :: x_mean(:) ${t1}$ :: res(size(x_reduced,1), size(components,2)) integer(ilp) :: i, n n = size(x_reduced, 1, kind=ilp) res = matmul(x_reduced, components) - if (present(mean)) then + if (present(x_mean)) then do i = 1, n - res(i, :) = res(i, :) + mean + res(i, :) = res(i, :) + x_mean end do end if end function pca_inverse_transform_${k1}$ diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 index f39c9253d..6535a25d7 100644 --- a/test/stats/test_pca.f90 +++ b/test/stats/test_pca.f90 @@ -23,7 +23,7 @@ subroutine test_pca_sp() x = reshape([1.0_sp, 3.0_sp, 5.0_sp, 2.0_sp, 4.0_sp, 6.0_sp], [3, 2]) ! Test SVD method - call pca(x, components, s, mean=mu, method="svd", err=err) + call pca(x, components, s, x_mean=mu, method="svd", err=err) call check(err%ok(), "pca_sp svd err") call check(all(abs(mu - [3.0_sp, 4.0_sp]) < sptol), "pca_sp svd mean") ! First component should be approx [0.707, 0.707] (or negative) @@ -55,7 +55,7 @@ subroutine test_pca_dp() x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) ! Test SVD method - call pca(x, components, s, mean=mu, method="svd", err=err) + call pca(x, components, s, x_mean=mu, method="svd", err=err) call check(err%ok(), "pca_dp svd err") call check(all(abs(mu - [3.0_dp, 4.0_dp]) < dptol), "pca_dp svd mean") call check(abs(abs(components(1,1)) - 1.0_dp/sqrt(2.0_dp)) < dptol, "pca_dp svd comp1") From 9509dcad25b87995ce85e946e64566f503db7226 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 6 Jan 2026 10:35:41 +0530 Subject: [PATCH 039/104] update interface --- src/stats/stdlib_stats.fypp | 14 ++++++++++---- src/stats/stdlib_stats_pca.fypp | 6 +++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 174958937..aec6a6307 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,12 +642,18 @@ module stdlib_stats end interface moment + #! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or + #! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. + #:set PCA_REAL_KINDS = ["sp", "dp"] + #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] + #:set PCA_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) + interface pca !! version: experimental !! !! Principal Component Analysis (PCA) !! ([Specification](../page/specs/stdlib_stats.html#pca)) - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) @@ -667,12 +673,12 @@ module stdlib_stats !! !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module function pca_transform_${k1}$(x, components, x_mean) result(res) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x,1), size(components,2)) + ${t1}$ :: res(size(x,1), size(components,1)) end function pca_transform_${k1}$ #:endfor end interface pca_transform @@ -683,7 +689,7 @@ module stdlib_stats !! !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 3a2b14775..54e0be03b 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,5 +1,9 @@ #:include "common.fypp" -#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) +#! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or +#! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. +#:set PCA_REAL_KINDS = ["sp", "dp"] +#:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] +#:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca use stdlib_error, only: error_stop use stdlib_optval, only: optval From 36fc211e9cb0f3e0c9a090593952914ff5d6ab27 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 13:50:14 +0530 Subject: [PATCH 040/104] allined with the other linalg function --- src/stats/stdlib_stats.fypp | 6 ++++-- src/stats/stdlib_stats_pca.fypp | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index aec6a6307..3f69ffac3 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,8 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or - #! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. + #! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends + #! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK + #! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) + #! and double (dp) precision for consistency with external optimized backends. #:set PCA_REAL_KINDS = ["sp", "dp"] #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] #:set PCA_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 54e0be03b..00583fe1a 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,6 +1,8 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK. LAPACK backends do not support extended (xdp) or -#! quad precision (qp). Therefore, PCA is limited to single (sp) and double (dp) precision only. +#! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends +#! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK +#! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) +#! and double (dp) precision for consistency with external optimized backends. #:set PCA_REAL_KINDS = ["sp", "dp"] #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] #:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) From 19f55b6029805e1eec99663d8cfd472717f3b3bc Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 13:57:05 +0530 Subject: [PATCH 041/104] convert to subroutines,updated test --- src/stats/stdlib_stats.fypp | 12 ++++----- src/stats/stdlib_stats_pca.fypp | 46 +++++++++++++++++++++++---------- test/stats/test_pca.f90 | 8 +++--- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 3f69ffac3..4853d3dea 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -676,12 +676,12 @@ module stdlib_stats !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) #:for k1, t1 in PCA_KINDS_TYPES - module function pca_transform_${k1}$(x, components, x_mean) result(res) + module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x,1), size(components,1)) - end function pca_transform_${k1}$ + ${t1}$, intent(out) :: x_transformed(:,:) + end subroutine pca_transform_${k1}$ #:endfor end interface pca_transform @@ -692,12 +692,12 @@ module stdlib_stats !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) #:for k1, t1 in PCA_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) + module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,2)) - end function pca_inverse_transform_${k1}$ + ${t1}$, intent(out) :: x_reconstructed(:,:) + end subroutine pca_inverse_transform_${k1}$ #:endfor end interface pca_inverse_transform diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 00583fe1a..dd7106b8a 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -129,17 +129,22 @@ contains #:for k1, t1 in REAL_KINDS_TYPES - module function pca_transform_${k1}$(x, components, x_mean) result(res) + module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) + use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x,1), size(components,1)) + ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n + integer(ilp) :: i, n, p, k ${t1}$, allocatable :: x_centered(:,:) + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ - n = size(x, 1, kind=ilp) - allocate(x_centered(n, size(x, 2, kind=ilp))) + n = size(x, 1, kind=ilp) ! number of samples + p = size(x, 2, kind=ilp) ! number of features + k = size(components, 1, kind=ilp) ! number of components + + allocate(x_centered(n, p)) if (present(x_mean)) then do i = 1, n x_centered(i, :) = x(i, :) - x_mean @@ -148,27 +153,40 @@ contains x_centered = x end if - res = matmul(x_centered, transpose(components)) - end function pca_transform_${k1}$ + ! x_transformed = x_centered * components^T + ! GEMM: C = alpha * A * B^T + beta * C + ! x_transformed(n,k) = x_centered(n,p) * components(k,p)^T + call gemm('N', 'T', n, k, p, alpha, x_centered, n, components, k, beta, x_transformed, n) + end subroutine pca_transform_${k1}$ #:endfor #:for k1, t1 in REAL_KINDS_TYPES - module function pca_inverse_transform_${k1}$(x_reduced, components, x_mean) result(res) + module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) + use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) - ${t1}$ :: res(size(x_reduced,1), size(components,2)) + ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n - n = size(x_reduced, 1, kind=ilp) - res = matmul(x_reduced, components) + integer(ilp) :: i, n, k, p + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ + + n = size(x_reduced, 1, kind=ilp) ! number of samples + k = size(x_reduced, 2, kind=ilp) ! number of components + p = size(components, 2, kind=ilp) ! number of features + + ! x_reconstructed = x_reduced * components + ! GEMM: C = alpha * A * B + beta * C + ! x_reconstructed(n,p) = x_reduced(n,k) * components(k,p) + call gemm('N', 'N', n, p, k, alpha, x_reduced, n, components, k, beta, x_reconstructed, n) + if (present(x_mean)) then do i = 1, n - res(i, :) = res(i, :) + x_mean + x_reconstructed(i, :) = x_reconstructed(i, :) + x_mean end do end if - end function pca_inverse_transform_${k1}$ + end subroutine pca_inverse_transform_${k1}$ #:endfor end submodule stdlib_stats_pca diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 index 6535a25d7..560365270 100644 --- a/test/stats/test_pca.f90 +++ b/test/stats/test_pca.f90 @@ -32,12 +32,12 @@ subroutine test_pca_sp() call check(abs(s(2)) < sptol, "pca_sp svd s2") ! Test Transform - x_trans = pca_transform(x, components, mu) + call pca_transform(x, components, mu, x_trans) ! Second dimension should be zero call check(all(abs(x_trans(:, 2)) < sptol), "pca_sp transform") ! Test Inverse Transform - x_inv = pca_inverse_transform(x_trans, components, mu) + call pca_inverse_transform(x_trans, components, mu, x_inv) call check(all(abs(x_inv - x) < sptol), "pca_sp inverse") ! Test EIG method @@ -62,8 +62,8 @@ subroutine test_pca_dp() call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp svd s1") ! Test Transform/Inverse - x_trans = pca_transform(x, components, mu) - x_inv = pca_inverse_transform(x_trans, components, mu) + call pca_transform(x, components, mu, x_trans) + call pca_inverse_transform(x_trans, components, mu, x_inv) call check(all(abs(x_inv - x) < dptol), "pca_dp inverse") ! Test EIG method From 8c4dcd825bb499c98bc23559ad1acbca5a6b2b0e Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 14:09:33 +0530 Subject: [PATCH 042/104] fix errors --- src/stats/stdlib_stats_pca.fypp | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index dd7106b8a..556997234 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -7,6 +7,7 @@ #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] #:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca + use stdlib_kinds, only: sp, dp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh @@ -130,19 +131,16 @@ contains #:for k1, t1 in REAL_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) - use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n, p, k + integer(ilp) :: i, n, p ${t1}$, allocatable :: x_centered(:,:) - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ - n = size(x, 1, kind=ilp) ! number of samples - p = size(x, 2, kind=ilp) ! number of features - k = size(components, 1, kind=ilp) ! number of components + n = size(x, 1, kind=ilp) + p = size(x, 2, kind=ilp) allocate(x_centered(n, p)) if (present(x_mean)) then @@ -154,32 +152,24 @@ contains end if ! x_transformed = x_centered * components^T - ! GEMM: C = alpha * A * B^T + beta * C - ! x_transformed(n,k) = x_centered(n,p) * components(k,p)^T - call gemm('N', 'T', n, k, p, alpha, x_centered, n, components, k, beta, x_transformed, n) + x_transformed = matmul(x_centered, transpose(components)) end subroutine pca_transform_${k1}$ #:endfor #:for k1, t1 in REAL_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) - use stdlib_linalg_blas, only: gemm ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n, k, p - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ + integer(ilp) :: i, n - n = size(x_reduced, 1, kind=ilp) ! number of samples - k = size(x_reduced, 2, kind=ilp) ! number of components - p = size(components, 2, kind=ilp) ! number of features + n = size(x_reduced, 1, kind=ilp) ! x_reconstructed = x_reduced * components - ! GEMM: C = alpha * A * B + beta * C - ! x_reconstructed(n,p) = x_reduced(n,k) * components(k,p) - call gemm('N', 'N', n, p, k, alpha, x_reduced, n, components, k, beta, x_reconstructed, n) + x_reconstructed = matmul(x_reduced, components) if (present(x_mean)) then do i = 1, n From 1c97f51d33af0fad19948755afdf111cecb56d9a Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 14:19:22 +0530 Subject: [PATCH 043/104] fixed errors --- src/stats/stdlib_stats_pca.fypp | 68 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 556997234..03470b63f 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -12,7 +12,9 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp - use stdlib_linalg_state, only: LINALG_SUCCESS, LINALG_ERROR, linalg_state_type + use stdlib_linalg_blas, only: gemm + use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR + use stdlib_sorting, only: sort_index implicit none contains @@ -29,7 +31,7 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i, k + integer(ilp) :: n, p, i, k, m, n_s ${t1}$, allocatable :: mu(:) character(16) :: method_ @@ -43,14 +45,11 @@ contains allocate(mu(p)) mu = mean(x, dim=1) if (present(x_mean)) x_mean = mu - - err0 = linalg_state_type("pca", LINALG_SUCCESS) if (method_ == "svd") then ! 2. Center data and call SVD with temporaries for robustness block ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) - integer(ilp) :: n_s n_s = min(n, p) allocate(s_tmp(n_s), vt_tmp(n_s, p)) @@ -71,41 +70,28 @@ contains end if if (err0%ok()) then - i = min(size(components, 1, kind=ilp), n_s) - components(:i, :) = vt_tmp(:i, :) - i = min(size(singular_values, 1, kind=ilp), n_s) - singular_values(:i) = s_tmp(:i) + m = min(size(components, 1, kind=ilp), n_s) + components(:m, :) = vt_tmp(:m, :) + m = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(:m) = s_tmp(:m) end if end block else if (method_ == "eig" .or. method_ == "cov") then ! 3. Eigendecomposition of covariance matrix block - ${t1}$, allocatable :: c(:,:) - ${t1}$, allocatable :: vectors(:,:) - real(${k1}$), allocatable :: lambda(:) + ${t1}$, allocatable :: c(:,:), vectors(:,:) + real(${k1}$), allocatable :: lambda(:), lambda_copy(:) integer(ilp), allocatable :: idx(:) - integer(ilp) :: j, m - real(${k1}$) :: tmp_val - allocate(lambda(p), idx(p), vectors(p, p)) + allocate(lambda(p), lambda_copy(p), idx(p), vectors(p, p)) c = cov(x, dim=1) call eigh(c, lambda, vectors=vectors, err=err0) if (err0%ok()) then - ! Sort eigenvalues and vectors in descending order - do j = 1, p - idx(j) = j - end do - ! Simple bubble sort - do i = 1, p-1 - do j = i+1, p - if (lambda(idx(i)) < lambda(idx(j))) then - m = idx(i) - idx(i) = idx(j) - idx(j) = m - end if - end do - end do + ! Sort eigenvalues in descending order using stdlib_sorting + ! sort_index sorts in ascending order, so we negate values + lambda_copy = -lambda + call sort_index(lambda_copy, idx) ! Assign sorted results m = min(size(components, 1, kind=ilp), p) @@ -123,7 +109,8 @@ contains err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) end if - if (present(err)) err = err0 + ! Handle error state: return error or stop if err not present + call err0%handle(err) end subroutine pca_${k1}$ #:endfor @@ -136,11 +123,13 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n, p + integer(ilp) :: i, n, p, nc ${t1}$, allocatable :: x_centered(:,:) + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) + nc = size(components, 1, kind=ilp) allocate(x_centered(n, p)) if (present(x_mean)) then @@ -151,8 +140,10 @@ contains x_centered = x end if - ! x_transformed = x_centered * components^T - x_transformed = matmul(x_centered, transpose(components)) + ! x_transformed = x_centered * components^T using GEMM + ! GEMM: C = alpha * op(A) * op(B) + beta * C + ! x_transformed(n, nc) = x_centered(n, p) * components(nc, p)^T + call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n) end subroutine pca_transform_${k1}$ #:endfor @@ -164,12 +155,17 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n + integer(ilp) :: i, n, nc, p + ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ n = size(x_reduced, 1, kind=ilp) + nc = size(x_reduced, 2, kind=ilp) + p = size(components, 2, kind=ilp) - ! x_reconstructed = x_reduced * components - x_reconstructed = matmul(x_reduced, components) + ! x_reconstructed = x_reduced * components using GEMM + ! GEMM: C = alpha * op(A) * op(B) + beta * C + ! x_reconstructed(n, p) = x_reduced(n, nc) * components(nc, p) + call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then do i = 1, n From 2e87b766b406aaee4beb01b66a51f4ce2326aa88 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 18:53:17 +0530 Subject: [PATCH 044/104] fix PCA BLAS/LAPACK linking --- src/stats/CMakeLists.txt | 1 + src/stats/stdlib_stats.fypp | 11 ++--- src/stats/stdlib_stats_pca.fypp | 13 +++--- test/stats/CMakeLists.txt | 1 + test/stats/test_pca.f90 | 76 --------------------------------- test/stats/test_pca.fypp | 54 +++++++++++++++++++++++ 6 files changed, 65 insertions(+), 91 deletions(-) delete mode 100644 test/stats/test_pca.f90 create mode 100644 test/stats/test_pca.fypp diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 41042315b..b9b94ab4b 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -31,3 +31,4 @@ set(stats_fppFiles ) configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) +target_link_libraries(stats PUBLIC blas lapack) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 4853d3dea..1fa272f6c 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,13 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends - #! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK - #! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) - #! and double (dp) precision for consistency with external optimized backends. - #:set PCA_REAL_KINDS = ["sp", "dp"] - #:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] - #:set PCA_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) + #! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends + #! (e.g., OpenBLAS, MKL) might only support single and double precision, + #! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). + #:set PCA_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) interface pca !! version: experimental diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 03470b63f..e92c00980 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,13 +1,10 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK. External optimized BLAS/LAPACK backends -#! (e.g., OpenBLAS, MKL) do not support extended precision (xdp). While stdlib's internal LAPACK -#! implementations do provide quadruple precision (qp) routines, PCA is limited to single (sp) -#! and double (dp) precision for consistency with external optimized backends. -#:set PCA_REAL_KINDS = ["sp", "dp"] -#:set PCA_REAL_TYPES = ["real(sp)", "real(dp)"] -#:set REAL_KINDS_TYPES = list(zip(PCA_REAL_KINDS, PCA_REAL_TYPES)) +#! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends +#! (e.g., OpenBLAS, MKL) might only support single and double precision, +#! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). +#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh diff --git a/test/stats/CMakeLists.txt b/test/stats/CMakeLists.txt index 5eb3d61b0..3627508a0 100644 --- a/test/stats/CMakeLists.txt +++ b/test/stats/CMakeLists.txt @@ -8,6 +8,7 @@ set(fppFiles test_distribution_uniform.fypp test_distribution_normal.fypp test_distribution_exponential.fypp + test_pca.fypp ) fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) diff --git a/test/stats/test_pca.f90 b/test/stats/test_pca.f90 deleted file mode 100644 index 560365270..000000000 --- a/test/stats/test_pca.f90 +++ /dev/null @@ -1,76 +0,0 @@ -program test_pca - use stdlib_error, only: check - use stdlib_kinds, only: sp, dp - use stdlib_stats, only: pca, pca_transform, pca_inverse_transform - use stdlib_linalg_state, only: linalg_state_type - implicit none - - real(sp), parameter :: sptol = 1000 * epsilon(1._sp) - real(dp), parameter :: dptol = 1000 * epsilon(1._dp) - - call test_pca_sp() - call test_pca_dp() - -contains - - subroutine test_pca_sp() - real(sp) :: x(3, 2), components(2, 2), s(2), mu(2) - real(sp) :: x_red(3, 1), comp_red(1, 2), s_red(1) - real(sp) :: x_trans(3, 2), x_inv(3, 2) - type(linalg_state_type) :: err - - ! Data: [1, 2], [3, 4], [5, 6] - x = reshape([1.0_sp, 3.0_sp, 5.0_sp, 2.0_sp, 4.0_sp, 6.0_sp], [3, 2]) - - ! Test SVD method - call pca(x, components, s, x_mean=mu, method="svd", err=err) - call check(err%ok(), "pca_sp svd err") - call check(all(abs(mu - [3.0_sp, 4.0_sp]) < sptol), "pca_sp svd mean") - ! First component should be approx [0.707, 0.707] (or negative) - call check(abs(abs(components(1,1)) - 1.0_sp/sqrt(2.0_sp)) < sptol, "pca_sp svd comp1") - call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp svd s1") - call check(abs(s(2)) < sptol, "pca_sp svd s2") - - ! Test Transform - call pca_transform(x, components, mu, x_trans) - ! Second dimension should be zero - call check(all(abs(x_trans(:, 2)) < sptol), "pca_sp transform") - - ! Test Inverse Transform - call pca_inverse_transform(x_trans, components, mu, x_inv) - call check(all(abs(x_inv - x) < sptol), "pca_sp inverse") - - ! Test EIG method - call pca(x, components, s, method="eig", err=err) - call check(err%ok(), "pca_sp eig err") - call check(abs(s(1) - 4.0_sp) < sptol, "pca_sp eig s1") - - end subroutine test_pca_sp - - subroutine test_pca_dp() - real(dp) :: x(3, 2), components(2, 2), s(2), mu(2) - real(dp) :: x_trans(3, 2), x_inv(3, 2) - type(linalg_state_type) :: err - - x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) - - ! Test SVD method - call pca(x, components, s, x_mean=mu, method="svd", err=err) - call check(err%ok(), "pca_dp svd err") - call check(all(abs(mu - [3.0_dp, 4.0_dp]) < dptol), "pca_dp svd mean") - call check(abs(abs(components(1,1)) - 1.0_dp/sqrt(2.0_dp)) < dptol, "pca_dp svd comp1") - call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp svd s1") - - ! Test Transform/Inverse - call pca_transform(x, components, mu, x_trans) - call pca_inverse_transform(x_trans, components, mu, x_inv) - call check(all(abs(x_inv - x) < dptol), "pca_dp inverse") - - ! Test EIG method - call pca(x, components, s, method="eig", err=err) - call check(err%ok(), "pca_dp eig err") - call check(abs(s(1) - 4.0_dp) < dptol, "pca_dp eig s1") - - end subroutine test_pca_dp - -end program test_pca diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp new file mode 100644 index 000000000..327950753 --- /dev/null +++ b/test/stats/test_pca.fypp @@ -0,0 +1,54 @@ +#:include "common.fypp" +program test_pca + use stdlib_error, only: check + use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_stats, only: pca, pca_transform, pca_inverse_transform + use stdlib_linalg_state, only: linalg_state_type + implicit none + + #:for k1 in REAL_KINDS + real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) + #:endfor + + #:for k1 in REAL_KINDS + call test_pca_${k1}$() + #:endfor + +contains + + #:for k1 in REAL_KINDS + subroutine test_pca_${k1}$() + real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) + real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + + ! Data: [1, 2], [3, 4], [5, 6] + x = reshape([1.0_${k1}$, 3.0_${k1}$, 5.0_${k1}$, 2.0_${k1}$, 4.0_${k1}$, 6.0_${k1}$], [3, 2]) + + ! Test SVD method + call pca(x, components, s, x_mean=mu, method="svd", err=err) + call check(err%ok(), "pca_${k1}$ svd err") + call check(all(abs(mu - [3.0_${k1}$, 4.0_${k1}$]) < ${k1}$tol), "pca_${k1}$ svd mean") + ! First component should be approx [0.707, 0.707] (or negative) + call check(abs(abs(components(1,1)) - 1.0_${k1}$/sqrt(2.0_${k1}$)) < ${k1}$tol, "pca_${k1}$ svd comp1") + call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ svd s1") + call check(abs(s(2)) < ${k1}$tol, "pca_${k1}$ svd s2") + + ! Test Transform + call pca_transform(x, components, mu, x_trans) + ! Second dimension should be zero + call check(all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") + + ! Test Inverse Transform + call pca_inverse_transform(x_trans, components, mu, x_inv) + call check(all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") + + ! Test EIG method + call pca(x, components, s, method="eig", err=err) + call check(err%ok(), "pca_${k1}$ eig err") + call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ eig s1") + + end subroutine test_pca_${k1}$ + #:endfor + +end program test_pca From e665dced6172572172abff288cfcc613a02ed131 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 19:10:09 +0530 Subject: [PATCH 045/104] fix PCA BLAS/LAPACK --- src/stats/stdlib_stats.fypp | 7 ++++--- src/stats/stdlib_stats_pca.fypp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 1fa272f6c..7714ff2f3 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,9 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends - #! (e.g., OpenBLAS, MKL) might only support single and double precision, - #! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). + #! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal + #! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against + #! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve + #! performance for single (sp) and double (dp) precision. #:set PCA_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) interface pca diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index e92c00980..6f2f932c2 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,7 +1,8 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK. While optimized backends -#! (e.g., OpenBLAS, MKL) might only support single and double precision, -#! stdlib's internal BLAS/LAPACK provide support for all kinds (including xdp and qp). +#! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal +#! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against +#! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve +#! performance for single (sp) and double (dp) precision. #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca use stdlib_kinds, only: sp, dp, xdp, qp From 1e6cef7c5aab898577e45abeeef889bdac3669ac Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 19:33:36 +0530 Subject: [PATCH 046/104] fix: remove xdp/qp from PCA use statements to fix CI builds --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 2 +- test/stats/test_pca.fypp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 7714ff2f3..a44bbffb7 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -7,7 +7,7 @@ module stdlib_stats !! Provides support for various statistical methods. This includes currently !! descriptive statistics !! ([Specification](../page/specs/stdlib_stats.html)) - use stdlib_kinds, only: sp, dp, xdp, qp, & + use stdlib_kinds, only: sp, dp, & int8, int16, int32, int64 use stdlib_linalg_state, only: linalg_state_type implicit none diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 6f2f932c2..bab9a3171 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -5,7 +5,7 @@ #! performance for single (sp) and double (dp) precision. #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_kinds, only: sp, dp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 327950753..667526949 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,7 +1,7 @@ #:include "common.fypp" program test_pca use stdlib_error, only: check - use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_kinds, only: sp, dp use stdlib_stats, only: pca, pca_transform, pca_inverse_transform use stdlib_linalg_state, only: linalg_state_type implicit none From f5f0c60a7dec712bbf457b4a1d9bcaeca70a6438 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 19:46:57 +0530 Subject: [PATCH 047/104] both updated --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index a44bbffb7..7714ff2f3 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -7,7 +7,7 @@ module stdlib_stats !! Provides support for various statistical methods. This includes currently !! descriptive statistics !! ([Specification](../page/specs/stdlib_stats.html)) - use stdlib_kinds, only: sp, dp, & + use stdlib_kinds, only: sp, dp, xdp, qp, & int8, int16, int32, int64 use stdlib_linalg_state, only: linalg_state_type implicit none diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index bab9a3171..6f2f932c2 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -5,7 +5,7 @@ #! performance for single (sp) and double (dp) precision. #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh From 57b3cc5ecba905b3b0d6205f84599baf4efa7250 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 20:17:17 +0530 Subject: [PATCH 048/104] test --- src/stats/stdlib_stats.fypp | 9 ++++----- src/stats/stdlib_stats_pca.fypp | 16 +++++++--------- test/stats/test_pca.fypp | 7 ++++--- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 7714ff2f3..79774c163 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,11 +642,10 @@ module stdlib_stats end interface moment - #! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal - #! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against - #! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve - #! performance for single (sp) and double (dp) precision. - #:set PCA_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) + #! Note: PCA is limited to single (sp) and double (dp) precision because external + #! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. + #! Extended (xdp) and quadruple (qp) precision are not supported for PCA. + #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] interface pca !! version: experimental diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 6f2f932c2..7890c922e 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,11 +1,9 @@ #:include "common.fypp" -#! Note: PCA uses SVD and EIGH which rely on LAPACK/BLAS. stdlib provides internal -#! BLAS/LAPACK backends that support all real kinds (sp, dp, xdp, qp). Linking against -#! external optimized libraries (e.g., OpenBLAS, MKL) is optional and can improve -#! performance for single (sp) and double (dp) precision. -#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) +#! Note: PCA is limited to single (sp) and double (dp) precision because external +#! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. +#:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp, xdp, qp + use stdlib_kinds, only: sp, dp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh @@ -17,7 +15,7 @@ submodule (stdlib_stats) stdlib_stats_pca contains - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout), target :: x(:,:) @@ -114,7 +112,7 @@ contains #:endfor - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) @@ -146,7 +144,7 @@ contains #:endfor - #:for k1, t1 in REAL_KINDS_TYPES + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 667526949..2f0c8d05e 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,4 +1,5 @@ #:include "common.fypp" +#:set PCA_KINDS = ["sp", "dp"] program test_pca use stdlib_error, only: check use stdlib_kinds, only: sp, dp @@ -6,17 +7,17 @@ program test_pca use stdlib_linalg_state, only: linalg_state_type implicit none - #:for k1 in REAL_KINDS + #:for k1 in PCA_KINDS real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) #:endfor - #:for k1 in REAL_KINDS + #:for k1 in PCA_KINDS call test_pca_${k1}$() #:endfor contains - #:for k1 in REAL_KINDS + #:for k1 in PCA_KINDS subroutine test_pca_${k1}$() real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) From f014baf263c881b96a08b6ac9a595b25c8de3371 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 7 Jan 2026 20:42:10 +0530 Subject: [PATCH 049/104] modify interfaces for core. --- src/stats/stdlib_stats_pca.fypp | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 7890c922e..057e2a34d 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -27,7 +27,7 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i, k, m, n_s + integer(ilp) :: n, p, i, j, k, m, n_s ${t1}$, allocatable :: mu(:) character(16) :: method_ @@ -37,9 +37,11 @@ contains method_ = optval(method, "svd") - ! 1. Calculate and optionally return mean + ! 1. Calculate mean using intrinsic sum (avoids submodule dependency issues) allocate(mu(p)) - mu = mean(x, dim=1) + do j = 1, p + mu(j) = sum(x(:, j)) / real(n, ${k1}$) + end do if (present(x_mean)) x_mean = mu if (method_ == "svd") then @@ -73,14 +75,29 @@ contains end if end block else if (method_ == "eig" .or. method_ == "cov") then - ! 3. Eigendecomposition of covariance matrix + ! 3. Eigendecomposition of covariance matrix (computed inline) block - ${t1}$, allocatable :: c(:,:), vectors(:,:) + ${t1}$, allocatable :: c(:,:), vectors(:,:), x_centered(:,:) real(${k1}$), allocatable :: lambda(:), lambda_copy(:) integer(ilp), allocatable :: idx(:) + real(${k1}$) :: scale_factor - allocate(lambda(p), lambda_copy(p), idx(p), vectors(p, p)) - c = cov(x, dim=1) + allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + allocate(x_centered(n, p)) + + ! Center data + do i = 1, n + x_centered(i, :) = x(i, :) - mu + end do + + ! Compute covariance matrix: C = X^T * X / (n-1) + scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) + do i = 1, p + do j = 1, p + c(i, j) = dot_product(x_centered(:, i), x_centered(:, j)) * scale_factor + end do + end do + call eigh(c, lambda, vectors=vectors, err=err0) if (err0%ok()) then From 9dd3212982d1d1d23295b093a081edbddf562873 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Thu, 8 Jan 2026 20:29:20 +0530 Subject: [PATCH 050/104] add stdlib_sorting.fypp in cmakelists.txt --- src/stats/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index b9b94ab4b..0b3f14516 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -13,6 +13,7 @@ set(stats_fppFiles ../stdlib_linalg_state.fypp ../stdlib_random.fypp ../stdlib_selection.fypp + ../stdlib_sorting.fypp ../stdlib_string_type.fypp stdlib_stats_corr.fypp stdlib_stats_cov.fypp From 202e6568147d32ace82adbcc05a97199e211b378 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 8 Jan 2026 17:09:29 +0100 Subject: [PATCH 051/104] Fix CMakeLists.txt for the addition of stdlib_storting_pca --- src/stats/CMakeLists.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 0b3f14516..3a9553c70 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -1,5 +1,9 @@ set(stats_cppFiles ../stdlib_linalg_constants.fypp + ../stdlib_sorting.fypp + ../stdlib_sorting_ord_sort.fypp + ../stdlib_sorting_sort_adjoint.fypp + ../stdlib_sorting_sort.fypp ) set(stats_fppFiles @@ -10,10 +14,11 @@ set(stats_fppFiles ../stdlib_error.fypp ../stdlib_linalg.fypp ../stdlib_linalg_diag.fypp + ../stdlib_linalg_eigenvalues.fypp ../stdlib_linalg_state.fypp + ../stdlib_linalg_svd.fypp ../stdlib_random.fypp ../stdlib_selection.fypp - ../stdlib_sorting.fypp ../stdlib_string_type.fypp stdlib_stats_corr.fypp stdlib_stats_cov.fypp @@ -31,5 +36,9 @@ set(stats_fppFiles stdlib_stats_var.fypp ) +set(f90Files + ../stdlib_sorting_radix_sort.f90 + ) + configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) -target_link_libraries(stats PUBLIC blas lapack) +target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) From c61eb79c771d2a746bbf5dd87f73df2ca9590019 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 16:44:35 +0530 Subject: [PATCH 052/104] Add center_data Helper Subroutine --- src/stats/stdlib_stats_pca.fypp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 057e2a34d..c6737f396 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -15,6 +15,19 @@ submodule (stdlib_stats) stdlib_stats_pca contains + ! Helper subroutine: Centers data in-place by subtracting the mean from each row + #:for k1, t1 in PCA_KINDS_TYPES + pure subroutine center_data_${k1}$(x, mu) + ${t1}$, intent(inout) :: x(:,:) + ${t1}$, intent(in) :: mu(:) + integer(ilp) :: i, n + n = size(x, 1, kind=ilp) + do i = 1, n + x(i, :) = x(i, :) - mu + end do + end subroutine center_data_${k1}$ + #:endfor + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) From 6daccc2ab4c1e460e852d929adf8c8ce5bcf1693 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 17:23:32 +0530 Subject: [PATCH 053/104] Replace Manual Mean with stdlib mean --- src/stats/stdlib_stats_pca.fypp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index c6737f396..b90807cb5 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -65,17 +65,14 @@ contains allocate(s_tmp(n_s), vt_tmp(n_s, p)) if (optval(overwrite_x, .false.)) then - do i = 1, n - x(i, :) = x(i, :) - mu - end do + call center_data_${k1}$(x, mu) call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) else block ${t1}$, allocatable :: x_centered(:,:) allocate(x_centered(n, p)) - do i = 1, n - x_centered(i, :) = x(i, :) - mu - end do + x_centered = x + call center_data_${k1}$(x_centered, mu) call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) end block end if @@ -99,9 +96,8 @@ contains allocate(x_centered(n, p)) ! Center data - do i = 1, n - x_centered(i, :) = x(i, :) - mu - end do + x_centered = x + call center_data_${k1}$(x_centered, mu) ! Compute covariance matrix: C = X^T * X / (n-1) scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) From 41a369093a3a6599f1f91e33dc710958317ffbef Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 17:49:29 +0530 Subject: [PATCH 054/104] Replace Covariance Loops with BLAS syrk --- src/stats/stdlib_stats_pca.fypp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index b90807cb5..8ff329293 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -8,7 +8,7 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp - use stdlib_linalg_blas, only: gemm + use stdlib_linalg_blas, only: gemm, syrk use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR use stdlib_sorting, only: sort_index implicit none @@ -99,11 +99,15 @@ contains x_centered = x call center_data_${k1}$(x_centered, mu) - ! Compute covariance matrix: C = X^T * X / (n-1) + ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X + ! syrk computes C := alpha*A**T*A + beta*C (upper triangle only) scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - do i = 1, p - do j = 1, p - c(i, j) = dot_product(x_centered(:, i), x_centered(:, j)) * scale_factor + c = 0.0_${k1}$ + call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) + do j = 1, p-1 + do i = j+1, p + c(i, j) = c(j, i) end do end do From 074d34e58f79eecaca60308e5eaf7ea533d751fb Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 18:17:40 +0530 Subject: [PATCH 055/104] Extract pca_svd_driver and pca_eigh_driver & Updated Main pca Subroutine --- src/stats/stdlib_stats_pca.fypp | 190 +++++++++++++++++--------------- 1 file changed, 103 insertions(+), 87 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 8ff329293..0e885cab5 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -28,6 +28,81 @@ contains end subroutine center_data_${k1}$ #:endfor + ! SVD-based PCA driver: computes principal components via SVD of centered data + #:for k1, t1 in PCA_KINDS_TYPES + subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) + ${t1}$, intent(inout) :: x_centered(:,:) + integer(ilp), intent(in) :: n, p + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + type(linalg_state_type), intent(out) :: err + + integer(ilp) :: n_s, m + ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) + + n_s = min(n, p) + allocate(s_tmp(n_s), vt_tmp(n_s, p)) + + call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) + + if (err%ok()) then + m = min(size(components, 1, kind=ilp), n_s) + components(:m, :) = vt_tmp(:m, :) + m = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(:m) = s_tmp(:m) + end if + end subroutine pca_svd_driver_${k1}$ + #:endfor + + ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix + #:for k1, t1 in PCA_KINDS_TYPES + subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) + ${t1}$, intent(in) :: x_centered(:,:) + integer(ilp), intent(in) :: n, p + ${t1}$, intent(out) :: components(:,:) + real(${k1}$), intent(out) :: singular_values(:) + type(linalg_state_type), intent(out) :: err + + integer(ilp) :: i, j, m + integer(ilp), allocatable :: idx(:) + real(${k1}$) :: scale_factor + real(${k1}$), allocatable :: lambda(:), lambda_copy(:) + ${t1}$, allocatable :: c(:,:), vectors(:,:) + + allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + + ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X + scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) + c = 0.0_${k1}$ + call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) + ! Fill lower triangle from upper triangle + do j = 1, p-1 + do i = j+1, p + c(i, j) = c(j, i) + end do + end do + + call eigh(c, lambda, vectors=vectors, err=err) + + if (err%ok()) then + ! Sort eigenvalues in descending order + lambda_copy = -lambda + call sort_index(lambda_copy, idx) + + ! Assign sorted results + m = min(size(components, 1, kind=ilp), p) + do i = 1, m + components(i, :) = vectors(:, idx(i)) + if (lambda(idx(i)) > 0.0_${k1}$) then + singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) + else + singular_values(i) = 0.0_${k1}$ + end if + end do + end if + end subroutine pca_eigh_driver_${k1}$ + #:endfor + #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) @@ -40,104 +115,45 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i, j, k, m, n_s - ${t1}$, allocatable :: mu(:) + integer(ilp) :: n, p + ${t1}$, allocatable :: mu(:), x_centered(:,:) character(16) :: method_ - + n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) - k = size(components, 1, kind=ilp) - method_ = optval(method, "svd") - - ! 1. Calculate mean using intrinsic sum (avoids submodule dependency issues) + + ! Calculate mean along dimension 1 (column means) allocate(mu(p)) - do j = 1, p - mu(j) = sum(x(:, j)) / real(n, ${k1}$) - end do + mu = sum(x, dim=1) / real(n, ${k1}$) if (present(x_mean)) x_mean = mu - if (method_ == "svd") then - ! 2. Center data and call SVD with temporaries for robustness - block - ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) - n_s = min(n, p) - allocate(s_tmp(n_s), vt_tmp(n_s, p)) - - if (optval(overwrite_x, .false.)) then - call center_data_${k1}$(x, mu) - call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) - else - block - ${t1}$, allocatable :: x_centered(:,:) - allocate(x_centered(n, p)) - x_centered = x - call center_data_${k1}$(x_centered, mu) - call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0) - end block - end if - - if (err0%ok()) then - m = min(size(components, 1, kind=ilp), n_s) - components(:m, :) = vt_tmp(:m, :) - m = min(size(singular_values, 1, kind=ilp), n_s) - singular_values(:m) = s_tmp(:m) - end if - end block - else if (method_ == "eig" .or. method_ == "cov") then - ! 3. Eigendecomposition of covariance matrix (computed inline) - block - ${t1}$, allocatable :: c(:,:), vectors(:,:), x_centered(:,:) - real(${k1}$), allocatable :: lambda(:), lambda_copy(:) - integer(ilp), allocatable :: idx(:) - real(${k1}$) :: scale_factor - - allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + ! Method dispatch + select case (trim(method_)) + case ("svd") + if (optval(overwrite_x, .false.)) then + call center_data_${k1}$(x, mu) + call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) + else allocate(x_centered(n, p)) - - ! Center data x_centered = x call center_data_${k1}$(x_centered, mu) - - ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X - ! syrk computes C := alpha*A**T*A + beta*C (upper triangle only) - scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - c = 0.0_${k1}$ - call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) - ! Fill lower triangle from upper triangle (syrk only fills upper) - do j = 1, p-1 - do i = j+1, p - c(i, j) = c(j, i) - end do - end do - - call eigh(c, lambda, vectors=vectors, err=err0) - - if (err0%ok()) then - ! Sort eigenvalues in descending order using stdlib_sorting - ! sort_index sorts in ascending order, so we negate values - lambda_copy = -lambda - call sort_index(lambda_copy, idx) - - ! Assign sorted results - m = min(size(components, 1, kind=ilp), p) - do i = 1, m - components(i, :) = vectors(:, idx(i)) - if (lambda(idx(i)) > 0.0_${k1}$) then - singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) - else - singular_values(i) = 0.0_${k1}$ - end if - end do - end if - end block - else - err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) - end if - - ! Handle error state: return error or stop if err not present + call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) + end if + + case ("eig", "cov") + allocate(x_centered(n, p)) + x_centered = x + call center_data_${k1}$(x_centered, mu) + call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) + + case default + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//trim(method_)) + end select + + ! Handle error state call err0%handle(err) - + end subroutine pca_${k1}$ #:endfor From bcabe8fea1dded921733b93196131f01e756c167 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 19:57:43 +0530 Subject: [PATCH 056/104] optimized for performance and stability --- src/stats/stdlib_stats_pca.fypp | 37 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 0e885cab5..2181a0d57 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -65,6 +65,7 @@ contains integer(ilp) :: i, j, m integer(ilp), allocatable :: idx(:) + ${t1}$ :: alpha, beta real(${k1}$) :: scale_factor real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) @@ -73,9 +74,11 @@ contains ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) + alpha = real(scale_factor, ${k1}$) + beta = 0.0_${k1}$ c = 0.0_${k1}$ - call syrk('U', 'T', p, n, scale_factor, x_centered, n, 0.0_${k1}$, c, p) - ! Fill lower triangle from upper triangle + call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) do j = 1, p-1 do i = j+1, p c(i, j) = c(j, i) @@ -89,8 +92,9 @@ contains lambda_copy = -lambda call sort_index(lambda_copy, idx) - ! Assign sorted results + ! Assign sorted results with safety bounds checks m = min(size(components, 1, kind=ilp), p) + m = min(m, size(singular_values, 1, kind=ilp)) do i = 1, m components(i, :) = vectors(:, idx(i)) if (lambda(idx(i)) > 0.0_${k1}$) then @@ -121,14 +125,14 @@ contains n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) - method_ = optval(method, "svd") + method_ = adjustl(optval(method, "svd")) ! Calculate mean along dimension 1 (column means) allocate(mu(p)) mu = sum(x, dim=1) / real(n, ${k1}$) if (present(x_mean)) x_mean = mu - ! Method dispatch + ! Method dispatch using trimmed string for robustness select case (trim(method_)) case ("svd") if (optval(overwrite_x, .false.)) then @@ -165,26 +169,21 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) - integer(ilp) :: i, n, p, nc + integer(ilp) :: n, p, nc + ${t1}$ :: alpha, beta ${t1}$, allocatable :: x_centered(:,:) - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) nc = size(components, 1, kind=ilp) allocate(x_centered(n, p)) - if (present(x_mean)) then - do i = 1, n - x_centered(i, :) = x(i, :) - x_mean - end do - else - x_centered = x - end if + x_centered = x + if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) ! x_transformed = x_centered * components^T using GEMM - ! GEMM: C = alpha * op(A) * op(B) + beta * C - ! x_transformed(n, nc) = x_centered(n, p) * components(nc, p)^T + alpha = 1.0_${k1}$ + beta = 0.0_${k1}$ call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n) end subroutine pca_transform_${k1}$ #:endfor @@ -198,15 +197,15 @@ contains ${t1}$, intent(out) :: x_reconstructed(:,:) integer(ilp) :: i, n, nc, p - ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$ + ${t1}$ :: alpha, beta n = size(x_reduced, 1, kind=ilp) nc = size(x_reduced, 2, kind=ilp) p = size(components, 2, kind=ilp) ! x_reconstructed = x_reduced * components using GEMM - ! GEMM: C = alpha * op(A) * op(B) + beta * C - ! x_reconstructed(n, p) = x_reduced(n, nc) * components(nc, p) + alpha = 1.0_${k1}$ + beta = 0.0_${k1}$ call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then From a769f2596dfb0050ce401dd2dea7c5d376e0aa22 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 13 Jan 2026 22:18:25 +0530 Subject: [PATCH 057/104] Cache efficency --- src/stats/stdlib_stats_pca.fypp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 2181a0d57..b1dc0a052 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -20,10 +20,11 @@ contains pure subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) - integer(ilp) :: i, n + integer(ilp) :: i, j, n, m n = size(x, 1, kind=ilp) - do i = 1, n - x(i, :) = x(i, :) - mu + m = size(x, 2, kind=ilp) + do concurrent( j=1:m, i=1:n ) + x(i, j) = x(i, j) - mu(j) end do end subroutine center_data_${k1}$ #:endfor @@ -196,7 +197,7 @@ contains ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) - integer(ilp) :: i, n, nc, p + integer(ilp) :: i, j, n, nc, p ${t1}$ :: alpha, beta n = size(x_reduced, 1, kind=ilp) @@ -209,8 +210,8 @@ contains call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then - do i = 1, n - x_reconstructed(i, :) = x_reconstructed(i, :) + x_mean + do concurrent( j=1:p, i=1:n ) + x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) end do end if end subroutine pca_inverse_transform_${k1}$ From 587abf7117b275f788ce9f19c4bdb68dfc07fdf9 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 14 Jan 2026 01:20:55 +0530 Subject: [PATCH 058/104] fix issues build issues. --- example/CMakeLists.txt | 8 ++++---- src/CMakeLists.txt | 5 +++++ src/stats/CMakeLists.txt | 10 ++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 7819170c0..9fb02f0fa 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,16 +2,16 @@ macro(ADD_EXAMPLE name) add_executable(example_${name} example_${name}.f90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endmacro(ADD_EXAMPLE) macro(ADD_EXAMPLEPP name) add_executable(example_${name} example_${name}.F90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endmacro(ADD_EXAMPLEPP) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5178e0551..6aeecac2b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,10 @@ if (STDLIB_BITSET) add_subdirectory(bitsets) +else() + # Provide a dummy INTERFACE target so other targets can link to "bitsets" + # without causing the linker to be passed unresolved -lbitsets flags. + add_library(bitsets INTERFACE) + target_compile_definitions(bitsets INTERFACE STDLIB_NO_BITSET=1) endif() add_subdirectory(blas) add_subdirectory(lapack) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 3a9553c70..d6e7c7c1f 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -42,3 +42,13 @@ set(f90Files configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) + +# Workaround for Intel Fortran compiler ICE (Internal Compiler Error) in stdlib_stats_pca.f90 +# The Intel ifx compiler (2024.1) triggers a segmentation violation during optimization. +# Compiling with -O0 avoids the optimizer codepath that causes the ICE. +if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") + set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/stdlib_stats_pca.f90 + PROPERTIES COMPILE_FLAGS "-O0 -g" + ) +endif() From 83fe1d0248ff8ed1eaa9407227d0800ad6b7a322 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 14 Jan 2026 01:25:58 +0530 Subject: [PATCH 059/104] Revert "fix issues build issues." This reverts commit 7348fafb6f0e2db5d208a1434b13fbfb65d5c879. --- example/CMakeLists.txt | 8 ++++---- src/CMakeLists.txt | 5 ----- src/stats/CMakeLists.txt | 10 ---------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 9fb02f0fa..7819170c0 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,16 +2,16 @@ macro(ADD_EXAMPLE name) add_executable(example_${name} example_${name}.f90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endmacro(ADD_EXAMPLE) macro(ADD_EXAMPLEPP name) add_executable(example_${name} example_${name}.F90) target_link_libraries(example_${name} "${PROJECT_NAME}") add_test(NAME ${name} - COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endmacro(ADD_EXAMPLEPP) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6aeecac2b..5178e0551 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,10 +1,5 @@ if (STDLIB_BITSET) add_subdirectory(bitsets) -else() - # Provide a dummy INTERFACE target so other targets can link to "bitsets" - # without causing the linker to be passed unresolved -lbitsets flags. - add_library(bitsets INTERFACE) - target_compile_definitions(bitsets INTERFACE STDLIB_NO_BITSET=1) endif() add_subdirectory(blas) add_subdirectory(lapack) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index d6e7c7c1f..3a9553c70 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -42,13 +42,3 @@ set(f90Files configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) - -# Workaround for Intel Fortran compiler ICE (Internal Compiler Error) in stdlib_stats_pca.f90 -# The Intel ifx compiler (2024.1) triggers a segmentation violation during optimization. -# Compiling with -O0 avoids the optimizer codepath that causes the ICE. -if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") - set_source_files_properties( - ${CMAKE_CURRENT_BINARY_DIR}/stdlib_stats_pca.f90 - PROPERTIES COMPILE_FLAGS "-O0 -g" - ) -endif() From 5d0c88e0e1188ed590b8d21018490af082ee6c9c Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 00:38:09 +0530 Subject: [PATCH 060/104] use nested do loops --- src/stats/stdlib_stats_pca.fypp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index b1dc0a052..175c283f5 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -23,8 +23,10 @@ contains integer(ilp) :: i, j, n, m n = size(x, 1, kind=ilp) m = size(x, 2, kind=ilp) - do concurrent( j=1:m, i=1:n ) - x(i, j) = x(i, j) - mu(j) + do j = 1, m + do i = 1, n + x(i, j) = x(i, j) - mu(j) + end do end do end subroutine center_data_${k1}$ #:endfor @@ -210,8 +212,10 @@ contains call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) if (present(x_mean)) then - do concurrent( j=1:p, i=1:n ) - x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) + do j = 1, p + do i = 1, n + x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) + end do end do end if end subroutine pca_inverse_transform_${k1}$ From 997944908238a17a28b98b98e4e786c1bbdb611c Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 01:08:30 +0530 Subject: [PATCH 061/104] resolve compiler errors --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 47 +++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 79774c163..9ca59cbc8 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -655,7 +655,7 @@ module stdlib_stats #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) - ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) ${t1}$, intent(out), optional :: x_mean(:) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 175c283f5..793e60550 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -17,7 +17,7 @@ contains ! Helper subroutine: Centers data in-place by subtracting the mean from each row #:for k1, t1 in PCA_KINDS_TYPES - pure subroutine center_data_${k1}$(x, mu) + subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) integer(ilp) :: i, j, n, m @@ -40,19 +40,27 @@ contains real(${k1}$), intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: n_s, m + integer(ilp) :: n_s, m, i ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) n_s = min(n, p) - allocate(s_tmp(n_s), vt_tmp(n_s, p)) + allocate(s_tmp(n_s)) + allocate(vt_tmp(n_s, p)) call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) if (err%ok()) then - m = min(size(components, 1, kind=ilp), n_s) - components(:m, :) = vt_tmp(:m, :) - m = min(size(singular_values, 1, kind=ilp), n_s) - singular_values(:m) = s_tmp(:m) + m = size(components, 1, kind=ilp) + if (n_s < m) m = n_s + do i = 1, m + components(i, :) = vt_tmp(i, :) + end do + + m = size(singular_values, 1, kind=ilp) + if (n_s < m) m = n_s + do i = 1, m + singular_values(i) = s_tmp(i) + end do end if end subroutine pca_svd_driver_${k1}$ #:endfor @@ -73,7 +81,11 @@ contains real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) - allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p)) + allocate(c(p, p)) + allocate(lambda(p)) + allocate(lambda_copy(p)) + allocate(idx(p)) + allocate(vectors(p, p)) ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) @@ -113,7 +125,7 @@ contains #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) - ${t1}$, intent(inout), target :: x(:,:) + ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) ${t1}$, intent(out), optional :: x_mean(:) @@ -135,9 +147,8 @@ contains mu = sum(x, dim=1) / real(n, ${k1}$) if (present(x_mean)) x_mean = mu - ! Method dispatch using trimmed string for robustness - select case (trim(method_)) - case ("svd") + ! Method dispatch using if-else for maximum compiler stability + if (trim(method_) == "svd") then if (optval(overwrite_x, .false.)) then call center_data_${k1}$(x, mu) call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) @@ -148,18 +159,20 @@ contains call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) end if - case ("eig", "cov") + else if (trim(method_) == "eig" .or. trim(method_) == "cov") then allocate(x_centered(n, p)) x_centered = x call center_data_${k1}$(x_centered, mu) call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) - case default + else err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//trim(method_)) - end select + end if - ! Handle error state - call err0%handle(err) + ! Handle error state explicitly + if (present(err)) then + err = err0 + end if end subroutine pca_${k1}$ #:endfor From b23a670ee1ea0c48e6abf42d95cda4ab997a5fb1 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 13:22:55 +0530 Subject: [PATCH 062/104] fix issue --- src/stats/CMakeLists.txt | 2 +- src/stats/stdlib_stats.fypp | 11 ++- src/stats/stdlib_stats_pca.fypp | 124 ++++++++++++++++---------------- test/stats/test_pca.fypp | 8 ++- 4 files changed, 78 insertions(+), 67 deletions(-) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index 3a9553c70..ab0e548ee 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -41,4 +41,4 @@ set(f90Files ) configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) -target_link_libraries(stats PUBLIC blas lapack $<$>:bitsets>) +target_link_libraries(stats PUBLIC blas lapack $<$:bitsets>) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 9ca59cbc8..d2bfc4eb6 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,10 +642,15 @@ module stdlib_stats end interface moment - #! Note: PCA is limited to single (sp) and double (dp) precision because external - #! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. - #! Extended (xdp) and quadruple (qp) precision are not supported for PCA. + #! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. + #! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] + #:if WITH_XDP + #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] + #:endif + #:if WITH_QP + #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] + #:endif interface pca !! version: experimental diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 793e60550..4cecde8da 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,30 +1,36 @@ #:include "common.fypp" -#! Note: PCA is limited to single (sp) and double (dp) precision because external -#! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions. +#! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. +#! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] +#:if WITH_XDP +#:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] +#:endif +#:if WITH_QP +#:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] +#:endif submodule (stdlib_stats) stdlib_stats_pca - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp use stdlib_linalg_blas, only: gemm, syrk - use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR + use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR, LINALG_VALUE_ERROR use stdlib_sorting, only: sort_index implicit none contains - ! Helper subroutine: Centers data in-place by subtracting the mean from each row + ! Helper subroutine: Centers data in-place by subtracting the mean from each column #:for k1, t1 in PCA_KINDS_TYPES subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) - integer(ilp) :: i, j, n, m - n = size(x, 1, kind=ilp) - m = size(x, 2, kind=ilp) - do j = 1, m - do i = 1, n + integer(ilp) :: i, j, m, n + m = size(x, 1, kind=ilp) + n = size(x, 2, kind=ilp) + do j = 1, n + do i = 1, m x(i, j) = x(i, j) - mu(j) end do end do @@ -34,13 +40,14 @@ contains ! SVD-based PCA driver: computes principal components via SVD of centered data #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(inout) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) real(${k1}$), intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: n_s, m, i + integer(ilp) :: n_s, m ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) n_s = min(n, p) @@ -50,17 +57,10 @@ contains call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) if (err%ok()) then - m = size(components, 1, kind=ilp) - if (n_s < m) m = n_s - do i = 1, m - components(i, :) = vt_tmp(i, :) - end do - - m = size(singular_values, 1, kind=ilp) - if (n_s < m) m = n_s - do i = 1, m - singular_values(i) = s_tmp(i) - end do + m = min(size(components, 1, kind=ilp), n_s) + components(1:m, :) = vt_tmp(1:m, :) + m = min(size(singular_values, 1, kind=ilp), n_s) + singular_values(1:m) = s_tmp(1:m) end if end subroutine pca_svd_driver_${k1}$ #:endfor @@ -68,6 +68,7 @@ contains ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) @@ -81,18 +82,13 @@ contains real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) - allocate(c(p, p)) - allocate(lambda(p)) - allocate(lambda_copy(p)) - allocate(idx(p)) - allocate(vectors(p, p)) - ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - alpha = real(scale_factor, ${k1}$) - beta = 0.0_${k1}$ - c = 0.0_${k1}$ + alpha = scale_factor + beta = zero + allocate(c(p, p), source=zero) call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) do j = 1, p-1 do i = j+1, p @@ -100,11 +96,14 @@ contains end do end do + allocate(lambda(p)) + allocate(vectors(p, p)) call eigh(c, lambda, vectors=vectors, err=err) if (err%ok()) then ! Sort eigenvalues in descending order - lambda_copy = -lambda + allocate(lambda_copy, source=-lambda) + allocate(idx(p)) call sort_index(lambda_copy, idx) ! Assign sorted results with safety bounds checks @@ -136,43 +135,49 @@ contains type(linalg_state_type) :: err0 integer(ilp) :: n, p ${t1}$, allocatable :: mu(:), x_centered(:,:) - character(16) :: method_ + character(len=:), allocatable :: method_ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) - method_ = adjustl(optval(method, "svd")) + method_ = trim(adjustl(optval(method, "svd"))) + + ! Calculate mean along dimension 1 (column means) using stdlib mean + allocate(mu, source=mean(x, 1)) - ! Calculate mean along dimension 1 (column means) - allocate(mu(p)) - mu = sum(x, dim=1) / real(n, ${k1}$) - if (present(x_mean)) x_mean = mu + ! Validate and assign x_mean if present + if (present(x_mean)) then + if (size(x_mean) < p) then + err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & + "x_mean array has insufficient size:", size(x_mean), ", expected:", p) + call err0%handle(err) + return + end if + x_mean(1:p) = mu + end if - ! Method dispatch using if-else for maximum compiler stability - if (trim(method_) == "svd") then + ! Method dispatch + select case (method_) + case ("svd") if (optval(overwrite_x, .false.)) then call center_data_${k1}$(x, mu) call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) else - allocate(x_centered(n, p)) - x_centered = x + allocate(x_centered, source=x) call center_data_${k1}$(x_centered, mu) call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) end if - else if (trim(method_) == "eig" .or. trim(method_) == "cov") then - allocate(x_centered(n, p)) - x_centered = x + case ("eig", "cov") + allocate(x_centered, source=x) call center_data_${k1}$(x_centered, mu) call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) - else - err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//trim(method_)) - end if + case default + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) + end select - ! Handle error state explicitly - if (present(err)) then - err = err0 - end if + ! Handle error state + call err0%handle(err) end subroutine pca_${k1}$ #:endfor @@ -180,49 +185,44 @@ contains #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) integer(ilp) :: n, p, nc - ${t1}$ :: alpha, beta ${t1}$, allocatable :: x_centered(:,:) n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) nc = size(components, 1, kind=ilp) - allocate(x_centered(n, p)) - x_centered = x + allocate(x_centered, source=x) if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) ! x_transformed = x_centered * components^T using GEMM - alpha = 1.0_${k1}$ - beta = 0.0_${k1}$ - call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n) + call gemm('N', 'T', n, nc, p, one, x_centered, n, components, nc, zero, x_transformed, n) end subroutine pca_transform_${k1}$ #:endfor #:for k1, t1 in PCA_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) + use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) integer(ilp) :: i, j, n, nc, p - ${t1}$ :: alpha, beta n = size(x_reduced, 1, kind=ilp) nc = size(x_reduced, 2, kind=ilp) p = size(components, 2, kind=ilp) ! x_reconstructed = x_reduced * components using GEMM - alpha = 1.0_${k1}$ - beta = 0.0_${k1}$ - call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n) + call gemm('N', 'N', n, p, nc, one, x_reduced, n, components, nc, zero, x_reconstructed, n) if (present(x_mean)) then do j = 1, p diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 2f0c8d05e..e318990ee 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,8 +1,14 @@ #:include "common.fypp" #:set PCA_KINDS = ["sp", "dp"] +#:if WITH_XDP +#:set PCA_KINDS = PCA_KINDS + ["xdp"] +#:endif +#:if WITH_QP +#:set PCA_KINDS = PCA_KINDS + ["qp"] +#:endif program test_pca use stdlib_error, only: check - use stdlib_kinds, only: sp, dp + use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_stats, only: pca, pca_transform, pca_inverse_transform use stdlib_linalg_state, only: linalg_state_type implicit none From ecbccd18a3f365417cb2882fe87221b034b719be Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 14:07:32 +0530 Subject: [PATCH 063/104] remove unused BLAS constants to prevent compiler warnings --- src/stats/stdlib_stats_pca.fypp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 4cecde8da..a594d797d 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -40,7 +40,6 @@ contains ! SVD-based PCA driver: computes principal components via SVD of centered data #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) - use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(inout) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) @@ -68,7 +67,7 @@ contains ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) - use stdlib_blas_constants_${k1}$, only: one, zero + use stdlib_blas_constants_${k1}$, only: zero ${t1}$, intent(in) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) From cc10e9578cd9ac3fb262ee24f39a8483e36b43cc Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 14:13:25 +0530 Subject: [PATCH 064/104] remove unused import --- src/stats/stdlib_stats_pca.fypp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index a594d797d..11b1901c8 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -10,7 +10,6 @@ #:endif submodule (stdlib_stats) stdlib_stats_pca use stdlib_kinds, only: sp, dp, xdp, qp - use stdlib_error, only: error_stop use stdlib_optval, only: optval use stdlib_linalg, only: svd, eigh use stdlib_linalg_constants, only: ilp From 496d744218979a1dc09c2c9f6ee90d2fdf0b57a0 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 14:21:26 +0530 Subject: [PATCH 065/104] remove unused output arrays --- src/stats/stdlib_stats_pca.fypp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 11b1901c8..cd6fcaedc 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -39,6 +39,7 @@ contains ! SVD-based PCA driver: computes principal components via SVD of centered data #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) + use stdlib_blas_constants_${k1}$, only: zero ${t1}$, intent(inout) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) @@ -48,6 +49,10 @@ contains integer(ilp) :: n_s, m ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) + ! Initialize outputs to zero to prevent uninitialized memory access + components = zero + singular_values = zero + n_s = min(n, p) allocate(s_tmp(n_s)) allocate(vt_tmp(n_s, p)) @@ -80,6 +85,10 @@ contains real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) + ! Initialize outputs to zero to prevent uninitialized memory access + components = zero + singular_values = zero + ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) alpha = scale_factor From 6c48366142c1674ad5d849ca4d2de27c14027a69 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 17 Jan 2026 14:21:26 +0530 Subject: [PATCH 066/104] remove unused output arrays --- src/stats/stdlib_stats_pca.fypp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 11b1901c8..c4f43a987 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -39,6 +39,7 @@ contains ! SVD-based PCA driver: computes principal components via SVD of centered data #:for k1, t1 in PCA_KINDS_TYPES subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) + use stdlib_blas_constants_${k1}$, only: zero ${t1}$, intent(inout) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) @@ -48,6 +49,10 @@ contains integer(ilp) :: n_s, m ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) + ! Initialize outputs to zero to prevent uninitialized memory access + components = zero + singular_values = zero + n_s = min(n, p) allocate(s_tmp(n_s)) allocate(vt_tmp(n_s, p)) @@ -80,11 +85,16 @@ contains real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) + ! Initialize outputs to zero to prevent uninitialized memory access + components = zero + singular_values = zero + ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) alpha = scale_factor beta = zero - allocate(c(p, p), source=zero) + allocate(c(p, p)) + c = zero call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) ! Fill lower triangle from upper triangle (syrk only fills upper) @@ -100,7 +110,8 @@ contains if (err%ok()) then ! Sort eigenvalues in descending order - allocate(lambda_copy, source=-lambda) + allocate(lambda_copy(p)) + lambda_copy = -lambda allocate(idx(p)) call sort_index(lambda_copy, idx) @@ -140,7 +151,8 @@ contains method_ = trim(adjustl(optval(method, "svd"))) ! Calculate mean along dimension 1 (column means) using stdlib mean - allocate(mu, source=mean(x, 1)) + allocate(mu(p)) + mu = mean(x, 1) ! Validate and assign x_mean if present if (present(x_mean)) then From a837b6b8443070ed909e09626c4b88bfdd988ff8 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Mon, 19 Jan 2026 22:34:18 +0530 Subject: [PATCH 067/104] fix: replace string concatenation with comma args to fix ifx crash --- src/stats/stdlib_stats_pca.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index c4f43a987..4af5275aa 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -183,7 +183,7 @@ contains call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) case default - err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) + err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: ", method_) end select ! Handle error state From baf8ff55a3d0e0868283317b06622cb111a8b88e Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 20 Jan 2026 22:06:25 +0530 Subject: [PATCH 068/104] Use REAL_KINDS_TYPES --- src/stats/stdlib_stats.fypp | 16 +++------------- src/stats/stdlib_stats_pca.fypp | 21 ++++++--------------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index d2bfc4eb6..0bc99e8f2 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -642,22 +642,12 @@ module stdlib_stats end interface moment - #! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. - #! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. - #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] - #:if WITH_XDP - #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] - #:endif - #:if WITH_QP - #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] - #:endif - interface pca !! version: experimental !! !! Principal Component Analysis (PCA) !! ([Specification](../page/specs/stdlib_stats.html#pca)) - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout) :: x(:,:) @@ -677,7 +667,7 @@ module stdlib_stats !! !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) @@ -693,7 +683,7 @@ module stdlib_stats !! !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 4af5275aa..bf9fbc9b1 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -1,13 +1,4 @@ #:include "common.fypp" -#! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. -#! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. -#:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] -#:if WITH_XDP -#:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] -#:endif -#:if WITH_QP -#:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] -#:endif submodule (stdlib_stats) stdlib_stats_pca use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_optval, only: optval @@ -21,7 +12,7 @@ submodule (stdlib_stats) stdlib_stats_pca contains ! Helper subroutine: Centers data in-place by subtracting the mean from each column - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES subroutine center_data_${k1}$(x, mu) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(in) :: mu(:) @@ -37,7 +28,7 @@ contains #:endfor ! SVD-based PCA driver: computes principal components via SVD of centered data - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) use stdlib_blas_constants_${k1}$, only: zero ${t1}$, intent(inout) :: x_centered(:,:) @@ -69,7 +60,7 @@ contains #:endfor ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) use stdlib_blas_constants_${k1}$, only: zero ${t1}$, intent(in) :: x_centered(:,:) @@ -130,7 +121,7 @@ contains end subroutine pca_eigh_driver_${k1}$ #:endfor - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) ${t1}$, intent(inout) :: x(:,:) @@ -193,7 +184,7 @@ contains #:endfor - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x(:,:) @@ -217,7 +208,7 @@ contains #:endfor - #:for k1, t1 in PCA_KINDS_TYPES + #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x_reduced(:,:) From f9319089e3d5641ef458c629f2e2426d8aaa9568 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 20 Jan 2026 23:40:08 +0530 Subject: [PATCH 069/104] Change singular_values --- src/stats/stdlib_stats.fypp | 2 +- src/stats/stdlib_stats_pca.fypp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index 0bc99e8f2..d22cd8cf0 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -652,7 +652,7 @@ module stdlib_stats method, overwrite_x, err) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) - real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out) :: singular_values(:) ${t1}$, intent(out), optional :: x_mean(:) character(*), intent(in), optional :: method logical, intent(in), optional :: overwrite_x diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index bf9fbc9b1..55ceaffbe 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -34,7 +34,7 @@ contains ${t1}$, intent(inout) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) - real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err integer(ilp) :: n_s, m @@ -66,7 +66,7 @@ contains ${t1}$, intent(in) :: x_centered(:,:) integer(ilp), intent(in) :: n, p ${t1}$, intent(out) :: components(:,:) - real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err integer(ilp) :: i, j, m @@ -126,7 +126,7 @@ contains method, overwrite_x, err) ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) - real(${k1}$), intent(out) :: singular_values(:) + ${t1}$, intent(out) :: singular_values(:) ${t1}$, intent(out), optional :: x_mean(:) character(*), intent(in), optional :: method logical, intent(in), optional :: overwrite_x From 53bb939d883e1e824778b3faf673b18179fa7690 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 20 Jan 2026 23:42:45 +0530 Subject: [PATCH 070/104] Remove scale_factor variable --- src/stats/stdlib_stats_pca.fypp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 55ceaffbe..7b1ae7015 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -72,7 +72,6 @@ contains integer(ilp) :: i, j, m integer(ilp), allocatable :: idx(:) ${t1}$ :: alpha, beta - real(${k1}$) :: scale_factor real(${k1}$), allocatable :: lambda(:), lambda_copy(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) @@ -81,8 +80,7 @@ contains singular_values = zero ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X - scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) - alpha = scale_factor + alpha = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) beta = zero allocate(c(p, p)) c = zero From 3c98deec7550375fbec3d35a766f2fda93db5d3b Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 21 Jan 2026 00:16:32 +0530 Subject: [PATCH 071/104] fix issues --- src/stats/stdlib_stats_pca.fypp | 13 +++++-------- test/stats/test_pca.fypp | 13 +++---------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 7b1ae7015..96a13dd11 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -72,7 +72,7 @@ contains integer(ilp) :: i, j, m integer(ilp), allocatable :: idx(:) ${t1}$ :: alpha, beta - real(${k1}$), allocatable :: lambda(:), lambda_copy(:) + real(${k1}$), allocatable :: lambda(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) ! Initialize outputs to zero to prevent uninitialized memory access @@ -82,8 +82,7 @@ contains ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X alpha = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) beta = zero - allocate(c(p, p)) - c = zero + allocate(c(p, p), source=zero) call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) ! Fill lower triangle from upper triangle (syrk only fills upper) @@ -99,18 +98,16 @@ contains if (err%ok()) then ! Sort eigenvalues in descending order - allocate(lambda_copy(p)) - lambda_copy = -lambda allocate(idx(p)) - call sort_index(lambda_copy, idx) + call sort_index(lambda, idx, reverse=.true.) ! Assign sorted results with safety bounds checks m = min(size(components, 1, kind=ilp), p) m = min(m, size(singular_values, 1, kind=ilp)) do i = 1, m components(i, :) = vectors(:, idx(i)) - if (lambda(idx(i)) > 0.0_${k1}$) then - singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) + if (lambda(i) > 0.0_${k1}$) then + singular_values(i) = sqrt(lambda(i) * real(n-1, ${k1}$)) else singular_values(i) = 0.0_${k1}$ end if diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index e318990ee..327950753 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,11 +1,4 @@ #:include "common.fypp" -#:set PCA_KINDS = ["sp", "dp"] -#:if WITH_XDP -#:set PCA_KINDS = PCA_KINDS + ["xdp"] -#:endif -#:if WITH_QP -#:set PCA_KINDS = PCA_KINDS + ["qp"] -#:endif program test_pca use stdlib_error, only: check use stdlib_kinds, only: sp, dp, xdp, qp @@ -13,17 +6,17 @@ program test_pca use stdlib_linalg_state, only: linalg_state_type implicit none - #:for k1 in PCA_KINDS + #:for k1 in REAL_KINDS real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) #:endfor - #:for k1 in PCA_KINDS + #:for k1 in REAL_KINDS call test_pca_${k1}$() #:endfor contains - #:for k1 in PCA_KINDS + #:for k1 in REAL_KINDS subroutine test_pca_${k1}$() real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) From cb9a5ea6ac37b40232d93146b87a361783a755ce Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 23 Jan 2026 16:46:19 +0530 Subject: [PATCH 072/104] refactor --- src/stats/stdlib_stats_pca.fypp | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 96a13dd11..08aa44b6f 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -6,7 +6,6 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_linalg_constants, only: ilp use stdlib_linalg_blas, only: gemm, syrk use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR, LINALG_VALUE_ERROR - use stdlib_sorting, only: sort_index implicit none contains @@ -69,8 +68,7 @@ contains ${t1}$, intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: i, j, m - integer(ilp), allocatable :: idx(:) + integer(ilp) :: m ${t1}$ :: alpha, beta real(${k1}$), allocatable :: lambda(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) @@ -85,33 +83,22 @@ contains allocate(c(p, p), source=zero) call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) - ! Fill lower triangle from upper triangle (syrk only fills upper) - do j = 1, p-1 - do i = j+1, p - c(i, j) = c(j, i) - end do - end do - allocate(lambda(p)) allocate(vectors(p, p)) - call eigh(c, lambda, vectors=vectors, err=err) + call eigh(c, lambda, vectors=vectors, lower=.false., err=err) if (err%ok()) then - ! Sort eigenvalues in descending order - allocate(idx(p)) - call sort_index(lambda, idx, reverse=.true.) + ! LAPACK returns eigenvalues in ascending order. + ! Flip them to get descending order for PCA. + lambda = lambda(p:1:-1) + vectors = vectors(:, p:1:-1) - ! Assign sorted results with safety bounds checks + ! Assign results with safety bounds checks m = min(size(components, 1, kind=ilp), p) m = min(m, size(singular_values, 1, kind=ilp)) - do i = 1, m - components(i, :) = vectors(:, idx(i)) - if (lambda(i) > 0.0_${k1}$) then - singular_values(i) = sqrt(lambda(i) * real(n-1, ${k1}$)) - else - singular_values(i) = 0.0_${k1}$ - end if - end do + + components(1:m, :) = transpose(vectors(:, 1:m)) + singular_values(1:m) = sqrt(max(lambda(1:m) * real(n-1, ${k1}$), 0.0_${k1}$)) end if end subroutine pca_eigh_driver_${k1}$ #:endfor From 3b9b085e63f68d3c1af0593d7164d644fa92d93c Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 23 Jan 2026 17:14:37 +0530 Subject: [PATCH 073/104] remove sort index,lower triangle fill. --- src/stats/CMakeLists.txt | 33 --------------------------------- src/stats/stdlib_stats_pca.fypp | 14 ++++++++++---- 2 files changed, 10 insertions(+), 37 deletions(-) diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index daa8d6a28..3d7d3db53 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -1,32 +1,8 @@ set(stats_cppFiles -<<<<<<< HEAD - ../stdlib_linalg_constants.fypp - ../stdlib_sorting.fypp - ../stdlib_sorting_ord_sort.fypp - ../stdlib_sorting_sort_adjoint.fypp - ../stdlib_sorting_sort.fypp - ) - -set(stats_fppFiles - ../stdlib_ascii.fypp - ../stdlib_io.fypp - ../stdlib_kinds.fypp - ../stdlib_optval.fypp - ../stdlib_error.fypp - ../stdlib_linalg.fypp - ../stdlib_linalg_diag.fypp - ../stdlib_linalg_eigenvalues.fypp - ../stdlib_linalg_state.fypp - ../stdlib_linalg_svd.fypp - ../stdlib_random.fypp - ../stdlib_selection.fypp - ../stdlib_string_type.fypp -======= ) set(stats_fppFiles stdlib_random.fypp ->>>>>>> b32b42a5f862026461df895140646de38928c4d7 stdlib_stats_corr.fypp stdlib_stats_cov.fypp stdlib_stats_pca.fypp @@ -43,18 +19,9 @@ set(stats_fppFiles stdlib_stats_var.fypp ) -<<<<<<< HEAD -set(f90Files - ../stdlib_sorting_radix_sort.f90 - ) - -configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles) -target_link_libraries(stats PUBLIC blas lapack $<$:bitsets>) -======= set(stats_f90Files ) configure_stdlib_target(stats stats_f90Files stats_fppFiles stats_cppFiles) target_link_libraries(stats PUBLIC core linalg_core linalg selection strings) ->>>>>>> b32b42a5f862026461df895140646de38928c4d7 diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 08aa44b6f..577949f05 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -68,7 +68,7 @@ contains ${t1}$, intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: m + integer(ilp) :: i, j, m ${t1}$ :: alpha, beta real(${k1}$), allocatable :: lambda(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) @@ -83,6 +83,7 @@ contains allocate(c(p, p), source=zero) call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) + ! Fill lower triangle from upper triangle (syrk only fills upper) allocate(lambda(p)) allocate(vectors(p, p)) call eigh(c, lambda, vectors=vectors, lower=.false., err=err) @@ -96,9 +97,14 @@ contains ! Assign results with safety bounds checks m = min(size(components, 1, kind=ilp), p) m = min(m, size(singular_values, 1, kind=ilp)) - - components(1:m, :) = transpose(vectors(:, 1:m)) - singular_values(1:m) = sqrt(max(lambda(1:m) * real(n-1, ${k1}$), 0.0_${k1}$)) + do i = 1, m + components(i, :) = vectors(:, i) + if (lambda(i) > 0.0_${k1}$) then + singular_values(i) = sqrt(lambda(i) * real(n-1, ${k1}$)) + else + singular_values(i) = 0.0_${k1}$ + end if + end do end if end subroutine pca_eigh_driver_${k1}$ #:endfor From 25e4eab527a8d25e1267d015fbe15bbe7e33f71a Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 24 Jan 2026 17:06:10 +0530 Subject: [PATCH 074/104] Fix eigh: use upper_a instead of lower --- src/stats/stdlib_stats_pca.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 577949f05..ccf1aadf6 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -86,7 +86,7 @@ contains ! Fill lower triangle from upper triangle (syrk only fills upper) allocate(lambda(p)) allocate(vectors(p, p)) - call eigh(c, lambda, vectors=vectors, lower=.false., err=err) + call eigh(c, lambda, vectors=vectors, upper_a=.true., err=err) if (err%ok()) then ! LAPACK returns eigenvalues in ascending order. From 9604ccb2a089485b8439c4c1d4873ee69e454450 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 24 Jan 2026 18:02:55 +0530 Subject: [PATCH 075/104] update center data subroutine --- src/stats/stdlib_stats_pca.fypp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index ccf1aadf6..f400cf631 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -18,10 +18,8 @@ contains integer(ilp) :: i, j, m, n m = size(x, 1, kind=ilp) n = size(x, 2, kind=ilp) - do j = 1, n - do i = 1, m - x(i, j) = x(i, j) - mu(j) - end do + do concurrent(i=1:m, j=1:n) + x(i, j) = x(i, j) - mu(j) end do end subroutine center_data_${k1}$ #:endfor @@ -214,10 +212,8 @@ contains call gemm('N', 'N', n, p, nc, one, x_reduced, n, components, nc, zero, x_reconstructed, n) if (present(x_mean)) then - do j = 1, p - do i = 1, n - x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) - end do + do concurrent(i=1:n, j=1:p) + x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) end do end if end subroutine pca_inverse_transform_${k1}$ From a66aee6ff3d920a4c9ac6030da0023dbfa50fe16 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 24 Jan 2026 18:19:08 +0530 Subject: [PATCH 076/104] remove elsewhere clause --- src/stats/stdlib_stats_pca.fypp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index f400cf631..b1dd7610f 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -95,14 +95,16 @@ contains ! Assign results with safety bounds checks m = min(size(components, 1, kind=ilp), p) m = min(m, size(singular_values, 1, kind=ilp)) - do i = 1, m - components(i, :) = vectors(:, i) - if (lambda(i) > 0.0_${k1}$) then - singular_values(i) = sqrt(lambda(i) * real(n-1, ${k1}$)) - else - singular_values(i) = 0.0_${k1}$ - end if - end do + + ! Components are eigenvectors as rows (transpose of vectors columns) + components(1:m, :) = transpose(vectors(:, 1:m)) + + ! Convert eigenvalues to singular values: s = sqrt(lambda * (n-1)) + where (lambda(1:m) > 0.0_${k1}$) + singular_values(1:m) = sqrt(lambda(1:m) * real(n-1, ${k1}$)) + elsewhere + singular_values(1:m) = 0.0_${k1}$ + end where end if end subroutine pca_eigh_driver_${k1}$ #:endfor From 641243ff2081d487b4c565cd095266752d94c794 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Mon, 26 Jan 2026 14:42:42 +0530 Subject: [PATCH 077/104] update specs --- doc/specs/stdlib_stats.md | 104 ++++++++++++++++++++++++++++++++++ example/stats/example_pca.f90 | 50 ++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 example/stats/example_pca.f90 diff --git a/doc/specs/stdlib_stats.md b/doc/specs/stdlib_stats.md index dbad5567e..bafed2478 100644 --- a/doc/specs/stdlib_stats.md +++ b/doc/specs/stdlib_stats.md @@ -283,6 +283,110 @@ If `mask` is specified, the result is the _k_-th (central) moment of all elemen {!example/stats/example_moment.f90!} ``` +## `pca` - Principal Component Analysis + +### Status + +Experimental + +### Description + +Performs Principal Component Analysis (PCA) on a 2D array of observations and features. +The input matrix `x` has shape `(m, n)`, where `m` is the number of observations and `n` is the number of features. +The subroutine computes the principal components (loadings), the singular values, and optionally the feature means. + +Two methods are supported: +- `"svd"`: (Default) Computes PCA via Singular Value Decomposition of the centered data. This is generally more numerically stable. +- `"eig"` or `"cov"`: Computes PCA via Eigendecomposition of the covariance matrix. + +### Syntax + +`call ` [[stdlib_stats(module):pca(interface)]] `(x, components, singular_values [, x_mean [, method [, overwrite_x [, err]]]])` + +### Class + +Generic subroutine + +### Arguments + +`x`: Shall be a rank-2 real array with shape `(m, n)`. It is an `intent(inout)` argument. If `overwrite_x` is `.true.`, `x` may be modified during computation. + +`components`: Shall be a rank-2 real array with shape `(n_components, n)`. It stores the principal components as rows. It is an `intent(out)` argument. + +`singular_values`: Shall be a rank-1 real array with shape `(n_components)`. It stores the singular values in descending order. It is an `intent(out)` argument. + +`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the mean of each feature (column). It is an `intent(out)` argument. + +`method` (optional): Shall be a character string. Either `"svd"` or `"eig"`/`"cov"`. It is an `intent(in)` argument. + +`overwrite_x` (optional): Shall be a scalar of type `logical`. If `.true.`, the input matrix `x` can be used as a workspace and modified. It is an `intent(in)` argument. + +`err` (optional): Shall be of type `linalg_state_type`. It is an `intent(out)` argument. + +### Example + +```fortran +{!example/stats/example_pca.f90!} +``` + +## `pca_transform` - Projects data into principal component space + +### Status + +Experimental + +### Description + +Projects the input data `x` into the reduced dimensional space defined by the provided principal components. +The transformation is defined as `x_transformed = (x - x_mean) * components^T`. + +### Syntax + +`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components [, x_mean], x_transformed)` + +### Class + +Generic subroutine + +### Arguments + +`x`: Shall be a rank-2 real array with shape `(m, n)`. It is an `intent(in)` argument. + +`components`: Shall be a rank-2 real array with shape `(nc, n)`. It stores the principal components as rows. It is an `intent(in)` argument. + +`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the feature means to subtract. It is an `intent(in)` argument. + +`x_transformed`: Shall be a rank-2 real array with shape `(m, nc)`. It stores the projected data. It is an `intent(out)` argument. + +## `pca_inverse_transform` - Reconstructs original data from principal component space + +### Status + +Experimental + +### Description + +Reconstructs the original data representation from the principal component space. +The reconstruction is defined as `x_reconstructed = x_reduced * components + x_mean`. + +### Syntax + +`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components [, x_mean], x_reconstructed)` + +### Class + +Generic subroutine + +### Arguments + +`x_reduced`: Shall be a rank-2 real array with shape `(m, nc)`. It is an `intent(in)` argument. + +`components`: Shall be a rank-2 real array with shape `(nc, n)`. It stores the principal components as rows. It is an `intent(in)` argument. + +`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the feature means to add back. It is an `intent(in)` argument. + +`x_reconstructed`: Shall be a rank-2 real array with shape `(m, n)`. It stores the reconstructed data. It is an `intent(out)` argument. + ## `var` - variance of array elements ### Status diff --git a/example/stats/example_pca.f90 b/example/stats/example_pca.f90 new file mode 100644 index 000000000..78bef5359 --- /dev/null +++ b/example/stats/example_pca.f90 @@ -0,0 +1,50 @@ +program example_pca + use stdlib_kinds, only: dp + use stdlib_stats, only: pca, pca_transform, pca_inverse_transform + use stdlib_linalg_state, only: linalg_state_type + implicit none + + real(dp) :: x(3, 2), components(2, 2), s(2), mu(2) + real(dp) :: x_trans(3, 2), x_inv(3, 2) + type(linalg_state_type) :: err + integer :: i + + ! Input data: 3 observations, 2 features + x = reshape([1.0_dp, 3.0_dp, 5.0_dp, 2.0_dp, 4.0_dp, 6.0_dp], [3, 2]) + + print *, "Original data:" + do i = 1, 3 + print "(2f6.2)", x(i, :) + end do + + ! Perform PCA + call pca(x, components, s, x_mean=mu, err=err) + + if (err%ok()) then + print *, "" + print *, "Feature means:", mu + print *, "Singular values:", s + print *, "Principal components (as rows):" + print "(2f6.3)", components(1, :) + print "(2f6.3)", components(2, :) + + ! Transform data to principal components space + call pca_transform(x, components, mu, x_trans) + print *, "" + print *, "Transformed data (projected):" + do i = 1, 3 + print "(2f8.3)", x_trans(i, :) + end do + + ! Inverse transform to reconstruct original data + call pca_inverse_transform(x_trans, components, mu, x_inv) + print *, "" + print *, "Reconstructed data:" + do i = 1, 3 + print "(2f6.2)", x_inv(i, :) + end do + else + print *, "PCA failed: ", err%message + end if + +end program example_pca From ef2f624715eb21126ddc40a7c422e7f5155b27ca Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Tue, 27 Jan 2026 21:05:31 +0530 Subject: [PATCH 078/104] fix --- src/stats/stdlib_stats_pca.fypp | 35 +++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index b1dd7610f..e2eee4b27 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -42,8 +42,7 @@ contains singular_values = zero n_s = min(n, p) - allocate(s_tmp(n_s)) - allocate(vt_tmp(n_s, p)) + allocate(s_tmp(n_s), vt_tmp(n_s, p)) call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) @@ -66,7 +65,7 @@ contains ${t1}$, intent(out) :: singular_values(:) type(linalg_state_type), intent(out) :: err - integer(ilp) :: i, j, m + integer(ilp) :: m ${t1}$ :: alpha, beta real(${k1}$), allocatable :: lambda(:) ${t1}$, allocatable :: c(:,:), vectors(:,:) @@ -81,9 +80,7 @@ contains allocate(c(p, p), source=zero) call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) - ! Fill lower triangle from upper triangle (syrk only fills upper) - allocate(lambda(p)) - allocate(vectors(p, p)) + allocate(lambda(p), vectors(p, p)) call eigh(c, lambda, vectors=vectors, upper_a=.true., err=err) if (err%ok()) then @@ -102,8 +99,6 @@ contains ! Convert eigenvalues to singular values: s = sqrt(lambda * (n-1)) where (lambda(1:m) > 0.0_${k1}$) singular_values(1:m) = sqrt(lambda(1:m) * real(n-1, ${k1}$)) - elsewhere - singular_values(1:m) = 0.0_${k1}$ end where end if end subroutine pca_eigh_driver_${k1}$ @@ -121,16 +116,30 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p + integer(ilp) :: n, p, i ${t1}$, allocatable :: mu(:), x_centered(:,:) character(len=:), allocatable :: method_ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) + + ! Input validation: check for empty arrays + if (n < 1 .or. p < 1) then + err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & + "Input array must have at least 1 observation and 1 feature") + call err0%handle(err) + return + end if + + ! Convert method to lowercase for case-insensitive comparison method_ = trim(adjustl(optval(method, "svd"))) + do i = 1, len(method_) + if (method_(i:i) >= 'A' .and. method_(i:i) <= 'Z') then + method_(i:i) = achar(iachar(method_(i:i)) + 32) + end if + end do ! Calculate mean along dimension 1 (column means) using stdlib mean - allocate(mu(p)) mu = mean(x, 1) ! Validate and assign x_mean if present @@ -151,13 +160,13 @@ contains call center_data_${k1}$(x, mu) call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) else - allocate(x_centered, source=x) + x_centered = x call center_data_${k1}$(x_centered, mu) call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) end if case ("eig", "cov") - allocate(x_centered, source=x) + x_centered = x call center_data_${k1}$(x_centered, mu) call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) @@ -187,7 +196,7 @@ contains p = size(x, 2, kind=ilp) nc = size(components, 1, kind=ilp) - allocate(x_centered, source=x) + x_centered = x if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) ! x_transformed = x_centered * components^T using GEMM From 33c02704268081bd70daa1506c31d10cacfb420c Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 30 Jan 2026 21:35:22 +0530 Subject: [PATCH 079/104] , --- src/stats/stdlib_stats_pca.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index e2eee4b27..8dbab2654 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -230,4 +230,4 @@ contains end subroutine pca_inverse_transform_${k1}$ #:endfor -end submodule stdlib_stats_pca +end submodule stdlib_stats_pca \ No newline at end of file From 23257e9048e84eea406910029fe569698f54612a Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 6 Feb 2026 09:11:13 +0530 Subject: [PATCH 080/104] update test file --- test/stats/test_pca.fypp | 85 ++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 327950753..93f41ead3 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -1,23 +1,34 @@ #:include "common.fypp" -program test_pca - use stdlib_error, only: check +module test_stats_pca + use testdrive, only: new_unittest, unittest_type, error_type, check use stdlib_kinds, only: sp, dp, xdp, qp use stdlib_stats, only: pca, pca_transform, pca_inverse_transform use stdlib_linalg_state, only: linalg_state_type implicit none - #:for k1 in REAL_KINDS - real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) - #:endfor + public :: collect_stats_pca #:for k1 in REAL_KINDS - call test_pca_${k1}$() + real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$) #:endfor contains + !> Collect all exported unit tests + subroutine collect_stats_pca(testsuite) + !> Collection of tests + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + #:for k1 in REAL_KINDS + new_unittest("test_pca_${k1}$", test_pca_${k1}$) ${", &" if k1 != REAL_KINDS[-1] else "&"}$ + #:endfor + ] + end subroutine collect_stats_pca + #:for k1 in REAL_KINDS - subroutine test_pca_${k1}$() + subroutine test_pca_${k1}$(error) + type(error_type), allocatable, intent(out) :: error real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) type(linalg_state_type) :: err @@ -27,28 +38,68 @@ contains ! Test SVD method call pca(x, components, s, x_mean=mu, method="svd", err=err) - call check(err%ok(), "pca_${k1}$ svd err") - call check(all(abs(mu - [3.0_${k1}$, 4.0_${k1}$]) < ${k1}$tol), "pca_${k1}$ svd mean") + call check(error, err%ok(), "pca_${k1}$ svd err") + if (allocated(error)) return + + call check(error, all(abs(mu - [3.0_${k1}$, 4.0_${k1}$]) < ${k1}$tol), "pca_${k1}$ svd mean") + if (allocated(error)) return + ! First component should be approx [0.707, 0.707] (or negative) - call check(abs(abs(components(1,1)) - 1.0_${k1}$/sqrt(2.0_${k1}$)) < ${k1}$tol, "pca_${k1}$ svd comp1") - call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ svd s1") - call check(abs(s(2)) < ${k1}$tol, "pca_${k1}$ svd s2") + call check(error, abs(abs(components(1,1)) - 1.0_${k1}$/sqrt(2.0_${k1}$)) < ${k1}$tol, "pca_${k1}$ svd comp1") + if (allocated(error)) return + + call check(error, abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ svd s1") + if (allocated(error)) return + + call check(error, abs(s(2)) < ${k1}$tol, "pca_${k1}$ svd s2") + if (allocated(error)) return ! Test Transform call pca_transform(x, components, mu, x_trans) ! Second dimension should be zero - call check(all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") + call check(error, all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") + if (allocated(error)) return ! Test Inverse Transform call pca_inverse_transform(x_trans, components, mu, x_inv) - call check(all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") + call check(error, all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") + if (allocated(error)) return ! Test EIG method call pca(x, components, s, method="eig", err=err) - call check(err%ok(), "pca_${k1}$ eig err") - call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ eig s1") + call check(error, err%ok(), "pca_${k1}$ eig err") + if (allocated(error)) return + + call check(error, abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ eig s1") + if (allocated(error)) return end subroutine test_pca_${k1}$ #:endfor -end program test_pca +end module test_stats_pca + +program tester + use, intrinsic :: iso_fortran_env, only : error_unit + use testdrive, only : run_testsuite, new_testsuite, testsuite_type + use test_stats_pca, only : collect_stats_pca + implicit none + integer :: stat, is + type(testsuite_type), allocatable :: testsuites(:) + character(len=*), parameter :: fmt = '("#", *(1x, a))' + + stat = 0 + + testsuites = [ & + new_testsuite("stats_pca", collect_stats_pca) & + ] + + do is = 1, size(testsuites) + write(error_unit, fmt) "Testing:", testsuites(is)%name + call run_testsuite(testsuites(is)%collect, error_unit, stat) + end do + + if (stat > 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if +end program tester From cb908bf95e799875c931ddd665d4d0ec6c2d4b63 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 6 Feb 2026 13:00:17 +0530 Subject: [PATCH 081/104] loop fix --- src/stats/stdlib_stats_pca.fypp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 8dbab2654..6e81c34b7 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -6,6 +6,7 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_linalg_constants, only: ilp use stdlib_linalg_blas, only: gemm, syrk use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR, LINALG_VALUE_ERROR + use stdlib_ascii, only: to_lower implicit none contains @@ -90,16 +91,13 @@ contains vectors = vectors(:, p:1:-1) ! Assign results with safety bounds checks - m = min(size(components, 1, kind=ilp), p) - m = min(m, size(singular_values, 1, kind=ilp)) + m = min(size(components, 1, kind=ilp), size(singular_values, 1, kind=ilp), p) ! Components are eigenvectors as rows (transpose of vectors columns) components(1:m, :) = transpose(vectors(:, 1:m)) ! Convert eigenvalues to singular values: s = sqrt(lambda * (n-1)) - where (lambda(1:m) > 0.0_${k1}$) - singular_values(1:m) = sqrt(lambda(1:m) * real(n-1, ${k1}$)) - end where + where (lambda(1:m) > 0.0_${k1}$) singular_values(1:m) = sqrt(lambda(1:m) * real(n-1, ${k1}$)) end if end subroutine pca_eigh_driver_${k1}$ #:endfor @@ -132,12 +130,7 @@ contains end if ! Convert method to lowercase for case-insensitive comparison - method_ = trim(adjustl(optval(method, "svd"))) - do i = 1, len(method_) - if (method_(i:i) >= 'A' .and. method_(i:i) <= 'Z') then - method_(i:i) = achar(iachar(method_(i:i)) + 32) - end if - end do + method_ = to_lower(trim(adjustl(optval(method, "svd")))) ! Calculate mean along dimension 1 (column means) using stdlib mean mu = mean(x, 1) From e2885ae5d089c9f60e3157fe900bf331ccfea1d9 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 21 Feb 2026 22:01:24 +0530 Subject: [PATCH 082/104] update documentation --- doc/specs/stdlib_stats.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_stats.md b/doc/specs/stdlib_stats.md index bafed2478..1d4f1ce67 100644 --- a/doc/specs/stdlib_stats.md +++ b/doc/specs/stdlib_stats.md @@ -342,7 +342,12 @@ The transformation is defined as `x_transformed = (x - x_mean) * components^T`. ### Syntax -`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components [, x_mean], x_transformed)` +`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components, x_mean, x_transformed)` + +If `x_mean` is omitted, the non-trailing optional argument rule in Fortran requires +`x_transformed` to be passed by keyword, for example: + +`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components, x_transformed=x_transformed)` ### Class @@ -371,7 +376,12 @@ The reconstruction is defined as `x_reconstructed = x_reduced * components + x_m ### Syntax -`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components [, x_mean], x_reconstructed)` +`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components, x_mean, x_reconstructed)` + +If `x_mean` is omitted, the non-trailing optional argument rule in Fortran requires +`x_reconstructed` to be passed by keyword, for example: + +`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components, x_reconstructed=x_reconstructed)` ### Class From 775ccc3a90f1cc101436acf8443bd7aa3185a617 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 21 Feb 2026 22:04:22 +0530 Subject: [PATCH 083/104] add checks --- src/stats/stdlib_stats_pca.fypp | 73 +++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 6e81c34b7..1d7c78b77 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -7,6 +7,7 @@ submodule (stdlib_stats) stdlib_stats_pca use stdlib_linalg_blas, only: gemm, syrk use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR, LINALG_VALUE_ERROR use stdlib_ascii, only: to_lower + use stdlib_error, only: error_stop implicit none contains @@ -105,6 +106,7 @@ contains #:for k1, t1, ri, cpp in REAL_KINDS_TYPES module subroutine pca_${k1}$(x, components, singular_values, x_mean, & method, overwrite_x, err) + use stdlib_blas_constants_${k1}$, only: zero ${t1}$, intent(inout) :: x(:,:) ${t1}$, intent(out) :: components(:,:) ${t1}$, intent(out) :: singular_values(:) @@ -114,12 +116,18 @@ contains type(linalg_state_type), intent(out), optional :: err type(linalg_state_type) :: err0 - integer(ilp) :: n, p, i + integer(ilp) :: n, p, nc, ns ${t1}$, allocatable :: mu(:), x_centered(:,:) character(len=:), allocatable :: method_ n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) + nc = size(components, 1, kind=ilp) + ns = size(singular_values, 1, kind=ilp) + + ! Initialize outputs to zero to prevent uninitialized values on error paths + components = zero + singular_values = zero ! Input validation: check for empty arrays if (n < 1 .or. p < 1) then @@ -129,6 +137,28 @@ contains return end if + ! Validate output shapes + if (size(components, 2, kind=ilp) /= p) then + err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & + "components must have ", p, " columns, got: ", size(components, 2, kind=ilp)) + call err0%handle(err) + return + end if + + if (nc > min(n, p)) then + err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & + "Number of components (", nc, ") exceeds min(n,p) = ", min(n, p)) + call err0%handle(err) + return + end if + + if (ns < nc) then + err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & + "singular_values size (", ns, ") must be >= n_components (", nc, ")") + call err0%handle(err) + return + end if + ! Convert method to lowercase for case-insensitive comparison method_ = to_lower(trim(adjustl(optval(method, "svd")))) @@ -165,6 +195,7 @@ contains case default err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: ", method_) + ! Outputs already initialized to zero above end select ! Handle error state @@ -184,14 +215,28 @@ contains integer(ilp) :: n, p, nc ${t1}$, allocatable :: x_centered(:,:) - + n = size(x, 1, kind=ilp) p = size(x, 2, kind=ilp) nc = size(components, 1, kind=ilp) - + + ! Validate dimensions + if (size(components, 2, kind=ilp) /= p) then + call error_stop("ERROR (pca_transform): components columns must match x columns") + end if + if (size(x_transformed, 1, kind=ilp) /= n .or. & + size(x_transformed, 2, kind=ilp) /= nc) then + call error_stop("ERROR (pca_transform): x_transformed shape must be [n, n_components]") + end if + if (present(x_mean)) then + if (size(x_mean, kind=ilp) /= p) then + call error_stop("ERROR (pca_transform): x_mean length must match x columns") + end if + end if + x_centered = x if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) - + ! x_transformed = x_centered * components^T using GEMM call gemm('N', 'T', n, nc, p, one, x_centered, n, components, nc, zero, x_transformed, n) end subroutine pca_transform_${k1}$ @@ -207,14 +252,28 @@ contains ${t1}$, intent(out) :: x_reconstructed(:,:) integer(ilp) :: i, j, n, nc, p - + n = size(x_reduced, 1, kind=ilp) nc = size(x_reduced, 2, kind=ilp) p = size(components, 2, kind=ilp) - + + ! Validate dimensions + if (size(components, 1, kind=ilp) /= nc) then + call error_stop("ERROR (pca_inverse_transform): components rows must match x_reduced columns") + end if + if (size(x_reconstructed, 1, kind=ilp) /= n .or. & + size(x_reconstructed, 2, kind=ilp) /= p) then + call error_stop("ERROR (pca_inverse_transform): x_reconstructed shape must be [n, p]") + end if + if (present(x_mean)) then + if (size(x_mean, kind=ilp) /= p) then + call error_stop("ERROR (pca_inverse_transform): x_mean length must match components columns") + end if + end if + ! x_reconstructed = x_reduced * components using GEMM call gemm('N', 'N', n, p, nc, one, x_reduced, n, components, nc, zero, x_reconstructed, n) - + if (present(x_mean)) then do concurrent(i=1:n, j=1:p) x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) From 1ac24a0317454abe8594909b36fbd76464609181 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 21 Feb 2026 22:19:30 +0530 Subject: [PATCH 084/104] add test --- test/stats/test_pca.fypp | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 93f41ead3..8a6c65e6e 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -29,12 +29,13 @@ contains #:for k1 in REAL_KINDS subroutine test_pca_${k1}$(error) type(error_type), allocatable, intent(out) :: error - real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2) + real(${k1}$) :: x(3, 2), x_copy(3, 2), components(2, 2), s(2), mu(2) real(${k1}$) :: x_trans(3, 2), x_inv(3, 2) type(linalg_state_type) :: err ! Data: [1, 2], [3, 4], [5, 6] x = reshape([1.0_${k1}$, 3.0_${k1}$, 5.0_${k1}$, 2.0_${k1}$, 4.0_${k1}$, 6.0_${k1}$], [3, 2]) + x_copy = x ! Test SVD method call pca(x, components, s, x_mean=mu, method="svd", err=err) @@ -47,10 +48,10 @@ contains ! First component should be approx [0.707, 0.707] (or negative) call check(error, abs(abs(components(1,1)) - 1.0_${k1}$/sqrt(2.0_${k1}$)) < ${k1}$tol, "pca_${k1}$ svd comp1") if (allocated(error)) return - + call check(error, abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ svd s1") if (allocated(error)) return - + call check(error, abs(s(2)) < ${k1}$tol, "pca_${k1}$ svd s2") if (allocated(error)) return @@ -60,6 +61,11 @@ contains call check(error, all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") if (allocated(error)) return + ! Test Transform without x_mean (keyword-based call) + call pca_transform(x, components, x_transformed=x_trans) + call check(error, size(x_trans, 1) == 3 .and. size(x_trans, 2) == 2, "pca_${k1}$ transform_no_mean shape") + if (allocated(error)) return + ! Test Inverse Transform call pca_inverse_transform(x_trans, components, mu, x_inv) call check(error, all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") @@ -73,6 +79,28 @@ contains call check(error, abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ eig s1") if (allocated(error)) return + ! Test COV method (alias for EIG) + call pca(x, components, s, method="cov", err=err) + call check(error, err%ok(), "pca_${k1}$ cov err") + if (allocated(error)) return + + call check(error, abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ cov s1") + if (allocated(error)) return + + ! Test overwrite_x=.true. + x_copy = x + call pca(x_copy, components, s, method="svd", overwrite_x=.true., err=err) + call check(error, err%ok(), "pca_${k1}$ overwrite err") + if (allocated(error)) return + + call check(error, abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ overwrite s1") + if (allocated(error)) return + + ! Test case-insensitivity + call pca(x, components, s, method="SVD", err=err) + call check(error, err%ok(), "pca_${k1}$ case_insensitive err") + if (allocated(error)) return + end subroutine test_pca_${k1}$ #:endfor From 3b179e8c5f5a2fcae278d4c95b78a841906eea51 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 21 Feb 2026 22:25:59 +0530 Subject: [PATCH 085/104] add keyword-based transform --- test/stats/test_pca.fypp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 8a6c65e6e..84a283870 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -61,16 +61,16 @@ contains call check(error, all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") if (allocated(error)) return + ! Test Inverse Transform (must run before the no-mean transform overwrites x_trans) + call pca_inverse_transform(x_trans, components, mu, x_inv) + call check(error, all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") + if (allocated(error)) return + ! Test Transform without x_mean (keyword-based call) call pca_transform(x, components, x_transformed=x_trans) call check(error, size(x_trans, 1) == 3 .and. size(x_trans, 2) == 2, "pca_${k1}$ transform_no_mean shape") if (allocated(error)) return - ! Test Inverse Transform - call pca_inverse_transform(x_trans, components, mu, x_inv) - call check(error, all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") - if (allocated(error)) return - ! Test EIG method call pca(x, components, s, method="eig", err=err) call check(error, err%ok(), "pca_${k1}$ eig err") From 4d2d7a1ba0bc1be8d2c6d9dee377e7b5e259b633 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Thu, 12 Mar 2026 23:58:53 +0530 Subject: [PATCH 086/104] fix interface declarations --- src/stats/stdlib_stats.fypp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp index d22cd8cf0..a7eaf010a 100644 --- a/src/stats/stdlib_stats.fypp +++ b/src/stats/stdlib_stats.fypp @@ -668,11 +668,11 @@ module stdlib_stats !! Projects data into the reduced dimensional space !! ([Specification](../page/specs/stdlib_stats.html#pca_transform)) #:for k1, t1, ri, cpp in REAL_KINDS_TYPES - module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) + module subroutine pca_transform_${k1}$(x, components, x_transformed, x_mean) ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) + ${t1}$, intent(in), optional :: x_mean(:) end subroutine pca_transform_${k1}$ #:endfor end interface pca_transform @@ -684,11 +684,11 @@ module stdlib_stats !! Reconstructs original data from the reduced space !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform)) #:for k1, t1, ri, cpp in REAL_KINDS_TYPES - module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) + module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_reconstructed, x_mean) ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) + ${t1}$, intent(in), optional :: x_mean(:) end subroutine pca_inverse_transform_${k1}$ #:endfor end interface pca_inverse_transform From f4c82458573f605f23a5f51c44557e82ce980844 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 13 Mar 2026 00:00:28 +0530 Subject: [PATCH 087/104] reorder arguments --- src/stats/stdlib_stats_pca.fypp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index 1d7c78b77..d08276d0f 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -206,12 +206,12 @@ contains #:for k1, t1, ri, cpp in REAL_KINDS_TYPES - module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) + module subroutine pca_transform_${k1}$(x, components, x_transformed, x_mean) use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_transformed(:,:) + ${t1}$, intent(in), optional :: x_mean(:) integer(ilp) :: n, p, nc ${t1}$, allocatable :: x_centered(:,:) @@ -244,12 +244,12 @@ contains #:for k1, t1, ri, cpp in REAL_KINDS_TYPES - module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) + module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_reconstructed, x_mean) use stdlib_blas_constants_${k1}$, only: one, zero ${t1}$, intent(in) :: x_reduced(:,:) ${t1}$, intent(in) :: components(:,:) - ${t1}$, intent(in), optional :: x_mean(:) ${t1}$, intent(out) :: x_reconstructed(:,:) + ${t1}$, intent(in), optional :: x_mean(:) integer(ilp) :: i, j, n, nc, p From b267c3312e3e947a0d0ca7c2c999b5c0cc47c9de Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 13 Mar 2026 00:05:16 +0530 Subject: [PATCH 088/104] update docs and tests --- doc/specs/stdlib_stats.md | 24 +++++++----------------- example/stats/example_pca.f90 | 4 ++-- test/stats/test_pca.fypp | 6 +++--- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/doc/specs/stdlib_stats.md b/doc/specs/stdlib_stats.md index 1d4f1ce67..fc2ffc780 100644 --- a/doc/specs/stdlib_stats.md +++ b/doc/specs/stdlib_stats.md @@ -301,7 +301,7 @@ Two methods are supported: ### Syntax -`call ` [[stdlib_stats(module):pca(interface)]] `(x, components, singular_values [, x_mean [, method [, overwrite_x [, err]]]])` +`call ` [[stdlib_stats(module):pca(interface)]] `(x, components, singular_values [, x_mean, method, overwrite_x, err])` ### Class @@ -342,12 +342,7 @@ The transformation is defined as `x_transformed = (x - x_mean) * components^T`. ### Syntax -`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components, x_mean, x_transformed)` - -If `x_mean` is omitted, the non-trailing optional argument rule in Fortran requires -`x_transformed` to be passed by keyword, for example: - -`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components, x_transformed=x_transformed)` +`call ` [[stdlib_stats(module):pca_transform(interface)]] `(x, components, x_transformed [, x_mean])` ### Class @@ -359,10 +354,10 @@ Generic subroutine `components`: Shall be a rank-2 real array with shape `(nc, n)`. It stores the principal components as rows. It is an `intent(in)` argument. -`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the feature means to subtract. It is an `intent(in)` argument. - `x_transformed`: Shall be a rank-2 real array with shape `(m, nc)`. It stores the projected data. It is an `intent(out)` argument. +`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the feature means to subtract. It is an `intent(in)` argument. + ## `pca_inverse_transform` - Reconstructs original data from principal component space ### Status @@ -376,12 +371,7 @@ The reconstruction is defined as `x_reconstructed = x_reduced * components + x_m ### Syntax -`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components, x_mean, x_reconstructed)` - -If `x_mean` is omitted, the non-trailing optional argument rule in Fortran requires -`x_reconstructed` to be passed by keyword, for example: - -`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components, x_reconstructed=x_reconstructed)` +`call ` [[stdlib_stats(module):pca_inverse_transform(interface)]] `(x_reduced, components, x_reconstructed [, x_mean])` ### Class @@ -393,10 +383,10 @@ Generic subroutine `components`: Shall be a rank-2 real array with shape `(nc, n)`. It stores the principal components as rows. It is an `intent(in)` argument. -`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the feature means to add back. It is an `intent(in)` argument. - `x_reconstructed`: Shall be a rank-2 real array with shape `(m, n)`. It stores the reconstructed data. It is an `intent(out)` argument. +`x_mean` (optional): Shall be a rank-1 real array with shape `(n)`. It stores the feature means to add back. It is an `intent(in)` argument. + ## `var` - variance of array elements ### Status diff --git a/example/stats/example_pca.f90 b/example/stats/example_pca.f90 index 78bef5359..d0e082ece 100644 --- a/example/stats/example_pca.f90 +++ b/example/stats/example_pca.f90 @@ -29,7 +29,7 @@ program example_pca print "(2f6.3)", components(2, :) ! Transform data to principal components space - call pca_transform(x, components, mu, x_trans) + call pca_transform(x, components, x_trans, mu) print *, "" print *, "Transformed data (projected):" do i = 1, 3 @@ -37,7 +37,7 @@ program example_pca end do ! Inverse transform to reconstruct original data - call pca_inverse_transform(x_trans, components, mu, x_inv) + call pca_inverse_transform(x_trans, components, x_inv, mu) print *, "" print *, "Reconstructed data:" do i = 1, 3 diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp index 84a283870..1926c3ec3 100644 --- a/test/stats/test_pca.fypp +++ b/test/stats/test_pca.fypp @@ -56,18 +56,18 @@ contains if (allocated(error)) return ! Test Transform - call pca_transform(x, components, mu, x_trans) + call pca_transform(x, components, x_trans, mu) ! Second dimension should be zero call check(error, all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform") if (allocated(error)) return ! Test Inverse Transform (must run before the no-mean transform overwrites x_trans) - call pca_inverse_transform(x_trans, components, mu, x_inv) + call pca_inverse_transform(x_trans, components, x_inv, mu) call check(error, all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse") if (allocated(error)) return ! Test Transform without x_mean (keyword-based call) - call pca_transform(x, components, x_transformed=x_trans) + call pca_transform(x, components, x_trans) call check(error, size(x_trans, 1) == 3 .and. size(x_trans, 2) == 2, "pca_${k1}$ transform_no_mean shape") if (allocated(error)) return From fd611a7be2092892edf96970e7043bd5eddf6aac Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 15 Mar 2026 09:50:10 +0530 Subject: [PATCH 089/104] Add explicit shape validation and nc < 1 checks to PCA subroutines --- src/stats/stdlib_stats_pca.fypp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp index d08276d0f..038a41ccc 100644 --- a/src/stats/stdlib_stats_pca.fypp +++ b/src/stats/stdlib_stats_pca.fypp @@ -129,10 +129,10 @@ contains components = zero singular_values = zero - ! Input validation: check for empty arrays - if (n < 1 .or. p < 1) then + ! Input validation: check for empty arrays or non-positive dimensions + if (n < 1 .or. p < 1 .or. nc < 1) then err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & - "Input array must have at least 1 observation and 1 feature") + "Input array and output components must have at least 1 observation, feature, and component") call err0%handle(err) return end if @@ -221,6 +221,9 @@ contains nc = size(components, 1, kind=ilp) ! Validate dimensions + if (nc < 1) then + call error_stop("ERROR (pca_transform): number of components must be at least 1") + end if if (size(components, 2, kind=ilp) /= p) then call error_stop("ERROR (pca_transform): components columns must match x columns") end if @@ -258,6 +261,9 @@ contains p = size(components, 2, kind=ilp) ! Validate dimensions + if (nc < 1) then + call error_stop("ERROR (pca_inverse_transform): number of components must be at least 1") + end if if (size(components, 1, kind=ilp) /= nc) then call error_stop("ERROR (pca_inverse_transform): components rows must match x_reduced columns") end if From 87a1783b0dff2d3ef9fed1b918c4b4bd01b36bda Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 1 Apr 2026 00:25:13 +0530 Subject: [PATCH 090/104] Initialize regex module with minimal boilerplate --- CMakeLists.txt | 1 + doc/specs/stdlib_regex.md | 50 ++++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 2 ++ src/regex/CMakeLists.txt | 8 ++++++ src/regex/stdlib_regex.f90 | 26 ++++++++++++++++++++ 5 files changed, 87 insertions(+) create mode 100644 doc/specs/stdlib_regex.md create mode 100644 src/regex/CMakeLists.txt create mode 100644 src/regex/stdlib_regex.f90 diff --git a/CMakeLists.txt b/CMakeLists.txt index a9e36de8c..215013d01 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,7 @@ check_modular("SPECIALMATRICES") check_modular("STRINGLIST") check_modular("STATS") check_modular("SYSTEM") +check_modular("REGEX") option(FIND_BLAS "Find external BLAS and LAPACK" ON) diff --git a/doc/specs/stdlib_regex.md b/doc/specs/stdlib_regex.md new file mode 100644 index 000000000..b8dbcf7f6 --- /dev/null +++ b/doc/specs/stdlib_regex.md @@ -0,0 +1,50 @@ +--- +title: regex +--- + +# Regular Expressions + +[TOC] + +## `regex_type` - Regular expression type + +### Status + +Experimental + +### Description + +A type for representing compiled regular expressions. + +### Syntax + +```fortran +type(regex_type) :: re +``` + +## `regcomp` - Compile a regular expression + +### Status + +Experimental + +### Description + +Compiles a regular expression string into a `regex_type` object. + +### Syntax + +```fortran +call regcomp(re, pattern [, flags, status]) +``` + +### Class + +Subroutine + +### Arguments + +`re`: An `intent(out)` argument of type `regex_type`. +`pattern`: An `intent(in)` argument of type `character(len=*)`. +`flags` (optional): An `intent(in)` argument of type `integer`. +`status` (optional): An `intent(out)` argument of type `integer`. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f955ed352..8868930bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,6 +42,7 @@ add_subdirectory(strings) ADD_SUBDIR(system) ADD_SUBDIR(stats) +ADD_SUBDIR(regex) add_subdirectory(sparse) @@ -70,5 +71,6 @@ target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_strings ${PROJECT_NAME}_blas ${PROJECT_NAME}_lapack ${PROJECT_NAME}_lapack_extended ${PROJECT_NAME}_sparse + ${PROJECT_NAME}_regex ${OPTIONAL_LIB} ) diff --git a/src/regex/CMakeLists.txt b/src/regex/CMakeLists.txt new file mode 100644 index 000000000..73d5e6624 --- /dev/null +++ b/src/regex/CMakeLists.txt @@ -0,0 +1,8 @@ +# CMakeLists.txt for stdlib_regex + +set(srcs + stdlib_regex.f90 +) + +add_library(stdlib_regex ${srcs}) +target_link_libraries(stdlib_regex PRIVATE stdlib_kinds) diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 new file mode 100644 index 000000000..6001647ae --- /dev/null +++ b/src/regex/stdlib_regex.f90 @@ -0,0 +1,26 @@ +module stdlib_regex + use stdlib_kinds, only: dp, sp + implicit none + private + + public :: regex_type + public :: regcomp + + type :: regex_type + private + character(len=:), allocatable :: pattern + end type regex_type + +contains + + subroutine regcomp(re, pattern, flags, status) + type(regex_type), intent(out) :: re + character(len=*), intent(in) :: pattern + integer, intent(in), optional :: flags + integer, intent(out), optional :: status + + re%pattern = pattern + if (present(status)) status = 0 + end subroutine regcomp + +end module stdlib_regex From b11779ba25d656c567e5978dab29b92d8965cea3 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 1 Apr 2026 09:23:45 +0530 Subject: [PATCH 091/104] resolve parsing and bounding checking --- src/regex/stdlib_regex.f90 | 627 +++++++++++++++++++++++++++++++++++- test/regex/stdlib_kinds.f90 | 7 + test/regex/test_regex.f90 | 40 +++ 3 files changed, 666 insertions(+), 8 deletions(-) create mode 100644 test/regex/stdlib_kinds.f90 create mode 100644 test/regex/test_regex.f90 diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index 6001647ae..33efada85 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -1,26 +1,637 @@ module stdlib_regex - use stdlib_kinds, only: dp, sp + use stdlib_kinds, only: dp, sp, int32 implicit none private public :: regex_type public :: regcomp + public :: regmatch + + ! Opcodes for NFA states + integer, parameter :: OP_CHAR = 1 + integer, parameter :: OP_ANY = 2 + integer, parameter :: OP_CLASS = 3 + integer, parameter :: OP_START = 4 + integer, parameter :: OP_END = 5 + integer, parameter :: OP_SPLIT = 6 + integer, parameter :: OP_MATCH = 7 + integer, parameter :: OP_JMP = 8 + + ! Tags for tokens + integer, parameter :: TOK_CHAR = 1 + integer, parameter :: TOK_ANY = 2 + integer, parameter :: TOK_CLASS = 3 + integer, parameter :: TOK_START = 4 + integer, parameter :: TOK_END = 5 + integer, parameter :: TOK_STAR = 6 + integer, parameter :: TOK_PLUS = 7 + integer, parameter :: TOK_QUEST = 8 + integer, parameter :: TOK_LPAREN = 9 + integer, parameter :: TOK_RPAREN = 10 + integer, parameter :: TOK_ALT = 11 + integer, parameter :: TOK_CONCAT = 12 + + type :: state_type + integer :: op + character(len=1) :: c + logical :: bmap(0:127) + logical :: invert + integer :: out1 + integer :: out2 + end type state_type + + type :: token_type + integer :: tag + character(len=1) :: c + logical :: bmap(0:127) + logical :: invert + end type token_type type :: regex_type - private - character(len=:), allocatable :: pattern + type(state_type), allocatable :: states(:) + integer :: start_state + integer :: n_states end type regex_type + type :: out_node + integer :: s + integer :: o + integer :: next + end type out_node + + type :: out_list_type + integer :: head + integer :: tail + end type out_list_type + + type :: frag_type + integer :: start + type(out_list_type) :: out_list + end type frag_type + + type :: thread + integer :: state + integer :: start_pos + end type thread + + type(out_node), allocatable :: out_pool(:) + contains - subroutine regcomp(re, pattern, flags, status) + logical function is_term_ender(tag) + integer, intent(in) :: tag + is_term_ender = (tag == TOK_CHAR .or. tag == TOK_ANY .or. & + tag == TOK_CLASS .or. tag == TOK_STAR .or. & + tag == TOK_PLUS .or. tag == TOK_QUEST .or. & + tag == TOK_RPAREN .or. tag == TOK_END .or. & + tag == TOK_START) + end function is_term_ender + + logical function is_term_starter(tag) + integer, intent(in) :: tag + is_term_starter = (tag == TOK_CHAR .or. tag == TOK_ANY .or. & + tag == TOK_CLASS .or. tag == TOK_LPAREN .or. & + tag == TOK_START .or. tag == TOK_END) + end function is_term_starter + + integer function prec(tag) + integer, intent(in) :: tag + if (tag == TOK_STAR .or. tag == TOK_PLUS .or. tag == TOK_QUEST) then + prec = 3 + else if (tag == TOK_CONCAT) then + prec = 2 + else if (tag == TOK_ALT) then + prec = 1 + else + prec = 0 + end if + end function prec + + subroutine tokenize(pattern, tokens, num_tokens, stat) + character(len=*), intent(in) :: pattern + type(token_type), allocatable, intent(out) :: tokens(:) + integer, intent(out) :: num_tokens + integer, intent(out) :: stat + + type(token_type), allocatable :: tmp_tokens(:) + type(token_type) :: t + integer :: i, k, len_p + character(len=1) :: c, c1, c2 + + len_p = len(pattern) + allocate(tmp_tokens(len_p * 4 + 1)) + num_tokens = 0 + stat = 0 + i = 1 + + do while (i <= len_p) + c = pattern(i:i) + t%tag = TOK_CHAR + t%c = ' ' + t%bmap = .false. + t%invert = .false. + + if (c == '\') then + if (i < len_p) then + i = i + 1 + c = pattern(i:i) + end if + t%tag = TOK_CHAR + t%c = c + if (c == 'd') then + t%tag = TOK_CLASS + do k = iachar('0'), iachar('9'); t%bmap(k) = .true.; end do + else if (c == 's') then + t%tag = TOK_CLASS + t%bmap(iachar(' ')) = .true. + t%bmap(iachar(char(9))) = .true. + t%bmap(iachar(char(10))) = .true. + t%bmap(iachar(char(13))) = .true. + else if (c == 'w') then + t%tag = TOK_CLASS + do k = iachar('a'), iachar('z'); t%bmap(k) = .true.; end do + do k = iachar('A'), iachar('Z'); t%bmap(k) = .true.; end do + do k = iachar('0'), iachar('9'); t%bmap(k) = .true.; end do + t%bmap(iachar('_')) = .true. + end if + else if (c == '.') then + t%tag = TOK_ANY + else if (c == '*') then + t%tag = TOK_STAR + else if (c == '+') then + t%tag = TOK_PLUS + else if (c == '?') then + t%tag = TOK_QUEST + else if (c == '|') then + t%tag = TOK_ALT + else if (c == '(') then + t%tag = TOK_LPAREN + else if (c == ')') then + t%tag = TOK_RPAREN + else if (c == '^') then + t%tag = TOK_START + else if (c == '$') then + t%tag = TOK_END + else if (c == '[') then + t%tag = TOK_CLASS + i = i + 1 + if (i <= len_p .and. pattern(i:i) == '^') then + t%invert = .true. + i = i + 1 + end if + do while (i <= len_p .and. pattern(i:i) /= ']') + if (pattern(i:i) == '\') then + i = i + 1 + if (i > len_p) exit + end if + c1 = pattern(i:i) + if (i + 2 <= len_p .and. pattern(i+1:i+1) == '-') then + if (pattern(i+2:i+2) /= ']') then + c2 = pattern(i+2:i+2) + do k = iachar(c1), iachar(c2) + if (k >= 0 .and. k <= 127) t%bmap(k) = .true. + end do + i = i + 3 + cycle + end if + end if + k = iachar(c1) + if (k >= 0 .and. k <= 127) t%bmap(k) = .true. + i = i + 1 + end do + if (i > len_p) stat = 1 ! missing ] + else + t%tag = TOK_CHAR + t%c = c + end if + + num_tokens = num_tokens + 1 + tmp_tokens(num_tokens) = t + i = i + 1 + end do + + allocate(tokens(num_tokens * 2 + 1)) + ! Inject concats + k = 0 + do i = 1, num_tokens + if (i > 1) then + if (is_term_ender(tmp_tokens(i-1)%tag) .and. is_term_starter(tmp_tokens(i)%tag)) then + k = k + 1 + tokens(k)%tag = TOK_CONCAT + tokens(k)%c = ' ' + tokens(k)%invert = .false. + tokens(k)%bmap = .false. + end if + end if + k = k + 1 + tokens(k) = tmp_tokens(i) + end do + num_tokens = k + + end subroutine tokenize + + subroutine parse_to_postfix(tokens, num_tokens, postfix, num_postfix, stat) + type(token_type), intent(in) :: tokens(:) + integer, intent(in) :: num_tokens + type(token_type), allocatable, intent(out) :: postfix(:) + integer, intent(out) :: num_postfix + integer, intent(out) :: stat + + type(token_type), allocatable :: stack(:) + integer :: top, i, tag + + allocate(postfix(num_tokens + 1)) + allocate(stack(num_tokens + 1)) + num_postfix = 0 + top = 0 + stat = 0 + + do i = 1, num_tokens + tag = tokens(i)%tag + if (tag == TOK_CHAR .or. tag == TOK_ANY .or. tag == TOK_CLASS .or. & + tag == TOK_START .or. tag == TOK_END) then + num_postfix = num_postfix + 1 + postfix(num_postfix) = tokens(i) + else if (tag == TOK_STAR .or. tag == TOK_PLUS .or. tag == TOK_QUEST) then + num_postfix = num_postfix + 1 + postfix(num_postfix) = tokens(i) + else if (tag == TOK_LPAREN) then + top = top + 1 + stack(top) = tokens(i) + else if (tag == TOK_RPAREN) then + do while (top > 0) + if (stack(top)%tag == TOK_LPAREN) exit + num_postfix = num_postfix + 1 + postfix(num_postfix) = stack(top) + top = top - 1 + end do + if (top == 0) then + stat = 1 ! mismatched parens + return + end if + top = top - 1 + else if (tag == TOK_CONCAT .or. tag == TOK_ALT) then + do while (top > 0) + if (stack(top)%tag == TOK_LPAREN) exit + if (prec(stack(top)%tag) < prec(tag)) exit + num_postfix = num_postfix + 1 + postfix(num_postfix) = stack(top) + top = top - 1 + end do + top = top + 1 + stack(top) = tokens(i) + end if + end do + + do while (top > 0) + if (stack(top)%tag == TOK_LPAREN) then + stat = 1 + return + end if + num_postfix = num_postfix + 1 + postfix(num_postfix) = stack(top) + top = top - 1 + end do + end subroutine parse_to_postfix + + integer function new_out(s, o, pool, p_size) + integer, intent(in) :: s, o + type(out_node), intent(inout) :: pool(:) + integer, intent(inout) :: p_size + p_size = p_size + 1 + pool(p_size)%s = s + pool(p_size)%o = o + pool(p_size)%next = 0 + new_out = p_size + end function new_out + + subroutine merge_lists(l1, l2, res, pool) + type(out_list_type), intent(in) :: l1, l2 + type(out_list_type), intent(out) :: res + type(out_node), intent(inout) :: pool(:) + if (l1%head == 0) then + res = l2 + else if (l2%head == 0) then + res = l1 + else + pool(l1%tail)%next = l2%head + res%head = l1%head + res%tail = l2%tail + end if + end subroutine merge_lists + + subroutine do_patch(states, list, target, pool) + type(state_type), intent(inout) :: states(:) + type(out_list_type), intent(in) :: list + integer, intent(in) :: target + type(out_node), intent(in) :: pool(:) + integer :: curr + curr = list%head + do while (curr /= 0) + if (pool(curr)%o == 1) then + states(pool(curr)%s)%out1 = target + else + states(pool(curr)%s)%out2 = target + end if + curr = pool(curr)%next + end do + end subroutine do_patch + + subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) + type(token_type), intent(in) :: postfix(:) + integer, intent(in) :: num_postfix + type(state_type), allocatable, intent(out) :: states(:) + integer, intent(out) :: n_states + integer, intent(out) :: start_state + integer, intent(out) :: stat + + type(frag_type), allocatable :: stack(:) + integer :: top, i, tag, out_idx + type(frag_type) :: f1, f2 + type(out_list_type) :: t_list, empty_list + type(out_node), allocatable :: local_pool(:) + integer :: p_size + + empty_list%head = 0 + empty_list%tail = 0 + + allocate(states((num_postfix+1) * 2)) + allocate(stack((num_postfix+1) * 2)) + allocate(local_pool((num_postfix+1) * 4)) + p_size = 0 + n_states = 0 + top = 0 + stat = 0 + + ! Empty pattern matches immediately + if (num_postfix == 0) then + n_states = n_states + 1 + states(n_states)%op = OP_MATCH + states(n_states)%out1 = 0 + states(n_states)%out2 = 0 + start_state = 1 + return + end if + + do i = 1, num_postfix + tag = postfix(i)%tag + + select case(tag) + case (TOK_CHAR, TOK_ANY, TOK_CLASS, TOK_START, TOK_END) + n_states = n_states + 1 + if (tag == TOK_CHAR) states(n_states)%op = OP_CHAR + if (tag == TOK_ANY) states(n_states)%op = OP_ANY + if (tag == TOK_CLASS) states(n_states)%op = OP_CLASS + if (tag == TOK_START) states(n_states)%op = OP_START + if (tag == TOK_END) states(n_states)%op = OP_END + + states(n_states)%c = postfix(i)%c + states(n_states)%bmap = postfix(i)%bmap + states(n_states)%invert = postfix(i)%invert + states(n_states)%out1 = 0 + states(n_states)%out2 = 0 + + top = top + 1 + stack(top)%start = n_states + out_idx = new_out(n_states, 1, local_pool, p_size) + stack(top)%out_list%head = out_idx + stack(top)%out_list%tail = out_idx + + case (TOK_CONCAT) + if (top < 2) then; stat = 1; return; end if + f2 = stack(top); top = top - 1 + f1 = stack(top) + + call do_patch(states, f1%out_list, f2%start, local_pool) + stack(top)%start = f1%start + stack(top)%out_list = f2%out_list + + case (TOK_ALT) + if (top < 2) then; stat = 1; return; end if + f2 = stack(top); top = top - 1 + f1 = stack(top) + + n_states = n_states + 1 + states(n_states)%op = OP_SPLIT + states(n_states)%out1 = f1%start + states(n_states)%out2 = f2%start + + stack(top)%start = n_states + call merge_lists(f1%out_list, f2%out_list, stack(top)%out_list, local_pool) + + case (TOK_QUEST) + if (top < 1) then; stat = 1; return; end if + f1 = stack(top) + + n_states = n_states + 1 + states(n_states)%op = OP_SPLIT + states(n_states)%out1 = f1%start + states(n_states)%out2 = 0 + + out_idx = new_out(n_states, 2, local_pool, p_size) + t_list%head = out_idx + t_list%tail = out_idx + call merge_lists(t_list, f1%out_list, stack(top)%out_list, local_pool) + stack(top)%start = n_states + + case (TOK_STAR) + if (top < 1) then; stat = 1; return; end if + f1 = stack(top) + + n_states = n_states + 1 + states(n_states)%op = OP_SPLIT + states(n_states)%out1 = f1%start + states(n_states)%out2 = 0 + + call do_patch(states, f1%out_list, n_states, local_pool) + + out_idx = new_out(n_states, 2, local_pool, p_size) + stack(top)%out_list%head = out_idx + stack(top)%out_list%tail = out_idx + stack(top)%start = n_states + + case (TOK_PLUS) + if (top < 1) then; stat = 1; return; end if + f1 = stack(top) + + n_states = n_states + 1 + states(n_states)%op = OP_SPLIT + states(n_states)%out1 = f1%start + states(n_states)%out2 = 0 + + call do_patch(states, f1%out_list, n_states, local_pool) + + out_idx = new_out(n_states, 2, local_pool, p_size) + stack(top)%out_list%head = out_idx + stack(top)%out_list%tail = out_idx + stack(top)%start = f1%start + + end select + end do + + if (top /= 1) then; stat = 1; return; end if + f1 = stack(1) + + n_states = n_states + 1 + states(n_states)%op = OP_MATCH + states(n_states)%out1 = 0 + states(n_states)%out2 = 0 + + call do_patch(states, f1%out_list, n_states, local_pool) + start_state = f1%start + ! print *, "DEBUG build_nfa success. n_states=", n_states + + + end subroutine build_nfa + + subroutine regcomp(re, pattern, status) type(regex_type), intent(out) :: re character(len=*), intent(in) :: pattern - integer, intent(in), optional :: flags integer, intent(out), optional :: status - - re%pattern = pattern - if (present(status)) status = 0 + + type(token_type), allocatable :: tokens(:) + type(token_type), allocatable :: postfix(:) + integer :: n_tok, n_post, stat + + call tokenize(pattern, tokens, n_tok, stat) + if (stat /= 0) then + if (present(status)) status = stat + return + end if + + call parse_to_postfix(tokens, n_tok, postfix, n_post, stat) + if (stat /= 0) then + if (present(status)) status = stat + return + end if + + call build_nfa(postfix, n_post, re%states, re%n_states, re%start_state, stat) + if (present(status)) status = stat end subroutine regcomp + recursive subroutine add_thread(list, count, state_idx, start_pos, step_index, states, str_len, visited) + type(thread), intent(inout) :: list(:) + integer, intent(inout) :: count + integer, intent(in) :: state_idx, start_pos, step_index + type(state_type), intent(in) :: states(:) + integer, intent(in) :: str_len + integer, intent(inout) :: visited(:) + integer :: op + + if (state_idx == 0) return + if (visited(state_idx) == step_index) return + visited(state_idx) = step_index + + op = states(state_idx)%op + if (op == OP_SPLIT) then + call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) + call add_thread(list, count, states(state_idx)%out2, start_pos, step_index, states, str_len, visited) + else if (op == OP_JMP) then + call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) + else if (op == OP_START) then + if (step_index == 0) then + call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) + end if + else if (op == OP_END) then + if (step_index == str_len) then + call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) + end if + else + count = count + 1 + list(count)%state = state_idx + list(count)%start_pos = start_pos + end if + end subroutine add_thread + + subroutine regmatch(re, string, is_match, match_start, match_end) + type(regex_type), intent(in) :: re + character(len=*), intent(in) :: string + logical, intent(out) :: is_match + integer, intent(out), optional :: match_start + integer, intent(out), optional :: match_end + + type(thread), allocatable :: clist(:), nlist(:) + integer :: c_cnt, n_cnt, i, j, step_index, str_len + integer :: c_code, op + integer :: b_start, b_end + logical :: match_char + type(thread) :: t + integer, allocatable :: visited(:) + + str_len = len(string) + allocate(clist(re%n_states * 2)) + allocate(nlist(re%n_states * 2)) + allocate(visited(re%n_states)) + + b_start = -1 + b_end = -1 + is_match = .false. + + ! Empty matches at the very beginning + visited = -1 + c_cnt = 0 + call add_thread(clist, c_cnt, re%start_state, 1, 0, re%states, str_len, visited) + + do j = 1, c_cnt + if (re%states(clist(j)%state)%op == OP_MATCH) then + b_start = 1 + b_end = 0 + end if + end do + + do i = 1, str_len + step_index = i + n_cnt = 0 + visited = -1 + + ! Always see if a new match can start here if we don't have one yet + if (b_start == -1) then + call add_thread(nlist, n_cnt, re%start_state, i, step_index, re%states, str_len, visited) + end if + + do j = 1, c_cnt + t = clist(j) + if (t%state == 0) cycle + op = re%states(t%state)%op + match_char = .false. + + if (op == OP_CHAR) then + if (re%states(t%state)%c == string(i:i)) match_char = .true. + else if (op == OP_ANY) then + match_char = .true. + else if (op == OP_CLASS) then + c_code = iachar(string(i:i)) + if (c_code >= 0 .and. c_code <= 127) then + if (re%states(t%state)%bmap(c_code)) match_char = .true. + end if + if (re%states(t%state)%invert) match_char = .not. match_char + end if + + if (match_char) then + call add_thread(nlist, n_cnt, re%states(t%state)%out1, t%start_pos, step_index, re%states, str_len, visited) + end if + end do + + do j = 1, n_cnt + if (re%states(nlist(j)%state)%op == OP_MATCH) then + if (b_start == -1 .or. nlist(j)%start_pos < b_start) then + b_start = nlist(j)%start_pos + b_end = i + else if (nlist(j)%start_pos == b_start .and. i > b_end) then + b_end = i + end if + end if + end do + + clist = nlist + c_cnt = n_cnt + end do + + if (b_start /= -1) then + is_match = .true. + if (present(match_start)) match_start = b_start + if (present(match_end)) match_end = b_end + end if + end subroutine regmatch + end module stdlib_regex diff --git a/test/regex/stdlib_kinds.f90 b/test/regex/stdlib_kinds.f90 new file mode 100644 index 000000000..7530275d7 --- /dev/null +++ b/test/regex/stdlib_kinds.f90 @@ -0,0 +1,7 @@ +module stdlib_kinds + implicit none + public + integer, parameter :: sp = kind(1.0) + integer, parameter :: dp = kind(1.0d0) + integer, parameter :: int32 = selected_int_kind(9) +end module stdlib_kinds diff --git a/test/regex/test_regex.f90 b/test/regex/test_regex.f90 new file mode 100644 index 000000000..c833d04b8 --- /dev/null +++ b/test/regex/test_regex.f90 @@ -0,0 +1,40 @@ +program test_regex + use stdlib_regex + implicit none + + type(regex_type) :: re + integer :: stat, match_start, match_end + logical :: is_match + + print *, "=== Testing Fortran Regex (Thompson NFA) ===" + + ! Test 1: Basic characters + call regcomp(re, "abc", stat) + print *, "regcomp 'abc': status = ", stat + call regmatch(re, "xyz_abc_def", is_match, match_start, match_end) + print *, "Match 'xyz_abc_def' -> ", is_match, match_start, match_end + + ! Test 2: Star operator + call regcomp(re, "a*b", stat) + call regmatch(re, "aaaab", is_match, match_start, match_end) + print *, "Match 'aaaab' with 'a*b' -> ", is_match, match_start, match_end + + ! Test 3: Plus and character classes + call regcomp(re, "[0-9]+", stat) + call regmatch(re, "foo123bar", is_match, match_start, match_end) + print *, "Match 'foo123bar' with '[0-9]+' -> ", is_match, match_start, match_end + + ! Test 4: Alternation and grouping + call regcomp(re, "(dog|cat)s?", stat) + call regmatch(re, "I have cats and dogs.", is_match, match_start, match_end) + print *, "Match 'cats' with '(dog|cat)s?' -> ", is_match, match_start, match_end + + ! Test 5: Anchors + call regcomp(re, "^foo", stat) + call regmatch(re, "bar foo", is_match) + print *, "Match 'bar foo' with '^foo' -> ", is_match + call regmatch(re, "foo bar", is_match) + print *, "Match 'foo bar' with '^foo' -> ", is_match + + print *, "=== End Tests ===" +end program test_regex From 48e252641d5d1177928bc80511b95588fb46ab38 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Wed, 1 Apr 2026 15:37:49 +0530 Subject: [PATCH 092/104] remove use statement, add_subdir --- src/CMakeLists.txt | 1 - src/regex/CMakeLists.txt | 15 +++++++++------ src/regex/stdlib_regex.f90 | 5 ----- test/CMakeLists.txt | 3 +++ test/regex/CMakeLists.txt | 1 + test/regex/stdlib_kinds.f90 | 7 ------- 6 files changed, 13 insertions(+), 19 deletions(-) create mode 100644 test/regex/CMakeLists.txt delete mode 100644 test/regex/stdlib_kinds.f90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8868930bd..64751d8cf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,6 +71,5 @@ target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_strings ${PROJECT_NAME}_blas ${PROJECT_NAME}_lapack ${PROJECT_NAME}_lapack_extended ${PROJECT_NAME}_sparse - ${PROJECT_NAME}_regex ${OPTIONAL_LIB} ) diff --git a/src/regex/CMakeLists.txt b/src/regex/CMakeLists.txt index 73d5e6624..2408bbe8a 100644 --- a/src/regex/CMakeLists.txt +++ b/src/regex/CMakeLists.txt @@ -1,8 +1,11 @@ -# CMakeLists.txt for stdlib_regex +set(regex_fppFiles + ) -set(srcs - stdlib_regex.f90 -) +set(regex_cppFiles + ) -add_library(stdlib_regex ${srcs}) -target_link_libraries(stdlib_regex PRIVATE stdlib_kinds) +set(regex_f90Files + stdlib_regex.f90 + ) + +configure_stdlib_target(${PROJECT_NAME}_regex regex_f90Files regex_fppFiles regex_cppFiles) diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index 33efada85..a41e230b8 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -1,5 +1,4 @@ module stdlib_regex - use stdlib_kinds, only: dp, sp, int32 implicit none private @@ -74,8 +73,6 @@ module stdlib_regex integer :: start_pos end type thread - type(out_node), allocatable :: out_pool(:) - contains logical function is_term_ender(tag) @@ -479,8 +476,6 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) call do_patch(states, f1%out_list, n_states, local_pool) start_state = f1%start - ! print *, "DEBUG build_nfa success. n_states=", n_states - end subroutine build_nfa diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ea43bc8eb..a9d130efa 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -64,6 +64,9 @@ add_subdirectory(math) if (STDLIB_STRINGLIST) add_subdirectory(stringlist) endif() +if (STDLIB_REGEX) + add_subdirectory(regex) +endif() if (STDLIB_ANSI) add_subdirectory(terminal) endif() diff --git a/test/regex/CMakeLists.txt b/test/regex/CMakeLists.txt new file mode 100644 index 000000000..42bbe6b04 --- /dev/null +++ b/test/regex/CMakeLists.txt @@ -0,0 +1 @@ +ADDTEST(regex) diff --git a/test/regex/stdlib_kinds.f90 b/test/regex/stdlib_kinds.f90 deleted file mode 100644 index 7530275d7..000000000 --- a/test/regex/stdlib_kinds.f90 +++ /dev/null @@ -1,7 +0,0 @@ -module stdlib_kinds - implicit none - public - integer, parameter :: sp = kind(1.0) - integer, parameter :: dp = kind(1.0d0) - integer, parameter :: int32 = selected_int_kind(9) -end module stdlib_kinds From 629e5bb9106e1ed2a68e231f75055cee8a757a68 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 3 Apr 2026 20:48:44 +0530 Subject: [PATCH 093/104] rewrote testdrive --- test/regex/test_regex.f90 | 287 ++++++++++++++++++++++++++++++++------ 1 file changed, 247 insertions(+), 40 deletions(-) diff --git a/test/regex/test_regex.f90 b/test/regex/test_regex.f90 index c833d04b8..8ea53bbb3 100644 --- a/test/regex/test_regex.f90 +++ b/test/regex/test_regex.f90 @@ -1,40 +1,247 @@ -program test_regex - use stdlib_regex - implicit none - - type(regex_type) :: re - integer :: stat, match_start, match_end - logical :: is_match - - print *, "=== Testing Fortran Regex (Thompson NFA) ===" - - ! Test 1: Basic characters - call regcomp(re, "abc", stat) - print *, "regcomp 'abc': status = ", stat - call regmatch(re, "xyz_abc_def", is_match, match_start, match_end) - print *, "Match 'xyz_abc_def' -> ", is_match, match_start, match_end - - ! Test 2: Star operator - call regcomp(re, "a*b", stat) - call regmatch(re, "aaaab", is_match, match_start, match_end) - print *, "Match 'aaaab' with 'a*b' -> ", is_match, match_start, match_end - - ! Test 3: Plus and character classes - call regcomp(re, "[0-9]+", stat) - call regmatch(re, "foo123bar", is_match, match_start, match_end) - print *, "Match 'foo123bar' with '[0-9]+' -> ", is_match, match_start, match_end - - ! Test 4: Alternation and grouping - call regcomp(re, "(dog|cat)s?", stat) - call regmatch(re, "I have cats and dogs.", is_match, match_start, match_end) - print *, "Match 'cats' with '(dog|cat)s?' -> ", is_match, match_start, match_end - - ! Test 5: Anchors - call regcomp(re, "^foo", stat) - call regmatch(re, "bar foo", is_match) - print *, "Match 'bar foo' with '^foo' -> ", is_match - call regmatch(re, "foo bar", is_match) - print *, "Match 'foo bar' with '^foo' -> ", is_match - - print *, "=== End Tests ===" -end program test_regex +module test_regex_mod + use testdrive, only : new_unittest, unittest_type, error_type, check + use stdlib_regex, only : regex_type, regcomp, regmatch + implicit none + private + + public :: collect_regex + +contains + + !> Collect all exported unit tests + subroutine collect_regex(testsuite) + !> Collection of tests + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + new_unittest("literal_match", test_literal_match), & + new_unittest("literal_no_match", test_literal_no_match), & + new_unittest("star_operator", test_star_operator), & + new_unittest("plus_char_class", test_plus_char_class), & + new_unittest("alternation_grouping", test_alternation_grouping), & + new_unittest("anchor_start_fail", test_anchor_start_fail), & + new_unittest("anchor_start_pass", test_anchor_start_pass), & + new_unittest("dot_any", test_dot_any), & + new_unittest("question_mark", test_question_mark), & + new_unittest("empty_pattern", test_empty_pattern) & + ] + end subroutine collect_regex + + subroutine test_literal_match(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat, ms, me + logical :: found + + call regcomp(re, "abc", stat) + call check(error, stat == 0, "regcomp failed for 'abc'") + if (allocated(error)) return + + call regmatch(re, "xyz_abc_def", found, ms, me) + call check(error, found, "Should find 'abc' in 'xyz_abc_def'") + if (allocated(error)) return + + call check(error, ms == 5, "match_start should be 5") + if (allocated(error)) return + + call check(error, me == 7, "match_end should be 7") + if (allocated(error)) return + end subroutine test_literal_match + + subroutine test_literal_no_match(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "xyz", stat) + call check(error, stat == 0, "regcomp failed for 'xyz'") + if (allocated(error)) return + + call regmatch(re, "abcdef", found) + call check(error, .not. found, "Should not find 'xyz' in 'abcdef'") + if (allocated(error)) return + end subroutine test_literal_no_match + + subroutine test_star_operator(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "a*b", stat) + call check(error, stat == 0, "regcomp failed for 'a*b'") + if (allocated(error)) return + + call regmatch(re, "aaaab", found) + call check(error, found, "Should match 'aaaab' with 'a*b'") + if (allocated(error)) return + + call regmatch(re, "b", found) + call check(error, found, "Should match 'b' with 'a*b' (zero a's)") + if (allocated(error)) return + end subroutine test_star_operator + + subroutine test_plus_char_class(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat, ms, me + logical :: found + + call regcomp(re, "[0-9]+", stat) + call check(error, stat == 0, "regcomp failed for '[0-9]+'") + if (allocated(error)) return + + call regmatch(re, "foo123bar", found, ms, me) + call check(error, found, "Should find digits in 'foo123bar'") + if (allocated(error)) return + + call regmatch(re, "no_digits_here", found) + call check(error, .not. found, "Should not find digits in 'no_digits_here'") + if (allocated(error)) return + end subroutine test_plus_char_class + + subroutine test_alternation_grouping(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "(dog|cat)s?", stat) + call check(error, stat == 0, "regcomp failed for '(dog|cat)s?'") + if (allocated(error)) return + + call regmatch(re, "I have cats and dogs.", found) + call check(error, found, "Should find 'cats' or 'dogs' in sentence") + if (allocated(error)) return + + call regmatch(re, "I have birds.", found) + call check(error, .not. found, "Should not find 'cat' or 'dog' in 'I have birds.'") + if (allocated(error)) return + end subroutine test_alternation_grouping + + subroutine test_anchor_start_fail(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "^foo", stat) + call check(error, stat == 0, "regcomp failed for '^foo'") + if (allocated(error)) return + + call regmatch(re, "bar foo", found) + call check(error, .not. found, "'^foo' should not match 'bar foo'") + if (allocated(error)) return + end subroutine test_anchor_start_fail + + subroutine test_anchor_start_pass(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "^foo", stat) + call check(error, stat == 0, "regcomp failed for '^foo'") + if (allocated(error)) return + + call regmatch(re, "foo bar", found) + call check(error, found, "'^foo' should match 'foo bar'") + if (allocated(error)) return + end subroutine test_anchor_start_pass + + subroutine test_dot_any(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "a.c", stat) + call check(error, stat == 0, "regcomp failed for 'a.c'") + if (allocated(error)) return + + call regmatch(re, "abc", found) + call check(error, found, "'a.c' should match 'abc'") + if (allocated(error)) return + + call regmatch(re, "aXc", found) + call check(error, found, "'a.c' should match 'aXc'") + if (allocated(error)) return + + call regmatch(re, "ac", found) + call check(error, .not. found, "'a.c' should not match 'ac'") + if (allocated(error)) return + end subroutine test_dot_any + + subroutine test_question_mark(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "colou?r", stat) + call check(error, stat == 0, "regcomp failed for 'colou?r'") + if (allocated(error)) return + + call regmatch(re, "color", found) + call check(error, found, "'colou?r' should match 'color'") + if (allocated(error)) return + + call regmatch(re, "colour", found) + call check(error, found, "'colou?r' should match 'colour'") + if (allocated(error)) return + end subroutine test_question_mark + + subroutine test_empty_pattern(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + type(regex_type) :: re + integer :: stat + logical :: found + + call regcomp(re, "", stat) + call check(error, stat == 0, "regcomp should succeed for empty pattern") + if (allocated(error)) return + + call regmatch(re, "anything", found) + call check(error, found, "Empty pattern should match any string") + if (allocated(error)) return + end subroutine test_empty_pattern + +end module test_regex_mod + + +program tester + use, intrinsic :: iso_fortran_env, only : error_unit + use testdrive, only : run_testsuite, new_testsuite, testsuite_type + use test_regex_mod, only : collect_regex + implicit none + integer :: stat, is + type(testsuite_type), allocatable :: testsuites(:) + character(len=*), parameter :: fmt = '("#", *(1x, a))' + + stat = 0 + + testsuites = [ & + new_testsuite("regex", collect_regex) & + ] + + do is = 1, size(testsuites) + write(error_unit, fmt) "Testing:", testsuites(is)%name + call run_testsuite(testsuites(is)%collect, error_unit, stat) + end do + + if (stat > 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if +end program From 4d991c0e99fcd3f5218fe5b9b5f52d9f2a2e9572 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 4 Apr 2026 02:29:00 +0530 Subject: [PATCH 094/104] fix off-by-one match_start position --- src/regex/stdlib_regex.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index a41e230b8..df20cde7a 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -581,7 +581,7 @@ subroutine regmatch(re, string, is_match, match_start, match_end) ! Always see if a new match can start here if we don't have one yet if (b_start == -1) then - call add_thread(nlist, n_cnt, re%start_state, i, step_index, re%states, str_len, visited) + call add_thread(nlist, n_cnt, re%start_state, i + 1, step_index, re%states, str_len, visited) end if do j = 1, c_cnt From 8e2390fe84e407db0357e03dbb51a2ac6ec6ce67 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sat, 4 Apr 2026 14:15:26 +0530 Subject: [PATCH 095/104] update doc --- doc/specs/index.md | 1 + doc/specs/stdlib_regex.md | 128 +++++++++++++++++++++++++++++++++++--- 2 files changed, 122 insertions(+), 7 deletions(-) diff --git a/doc/specs/index.md b/doc/specs/index.md index f709fb2ca..fc3ddeb11 100644 --- a/doc/specs/index.md +++ b/doc/specs/index.md @@ -30,6 +30,7 @@ This is an index/directory of the specifications (specs) for each new module/fea - [optval](./stdlib_optval.html) - Fallback value for optional arguments - [quadrature](./stdlib_quadrature.html) - Numerical integration - [random](./stdlib_random.html) - Probability Distributions random number generator + - [regex](./stdlib_regex.html) - Regular expression pattern matching - [sorting](./stdlib_sorting.html) - Sorting of rank one arrays - [stats](./stdlib_stats.html) - Descriptive Statistics - [stats_distributions_uniform](./stdlib_stats_distribution_uniform.html) - Uniform Probability Distribution diff --git a/doc/specs/stdlib_regex.md b/doc/specs/stdlib_regex.md index b8dbcf7f6..dc4602a7f 100644 --- a/doc/specs/stdlib_regex.md +++ b/doc/specs/stdlib_regex.md @@ -6,6 +6,33 @@ title: regex [TOC] +## Overview + +The `stdlib_regex` module provides a pure Fortran regular expression engine +based on Thompson's NFA (Nondeterministic Finite Automaton) construction. +It guarantees linear-time matching `O(n × m)` with no backtracking, +making it safe for use with arbitrary input without risk of catastrophic +performance degradation. + +### Supported Syntax + +| Pattern | Description | Example | +|-------------|--------------------------------------|------------------| +| `.` | Match any single character | `a.c` → `abc` | +| `*` | Zero or more of preceding element | `ab*c` → `ac` | +| `+` | One or more of preceding element | `ab+c` → `abbc` | +| `?` | Zero or one of preceding element | `colou?r` | +| `\|` | Alternation | `cat\|dog` | +| `(` `)` | Grouping | `(ab)+` | +| `[...]` | Character class | `[a-z]` | +| `[^...]` | Negated character class | `[^0-9]` | +| `^` | Start of string anchor | `^foo` | +| `$` | End of string anchor | `bar$` | +| `\d` | Digit `[0-9]` | `\d+` | +| `\w` | Word character `[a-zA-Z0-9_]` | `\w+` | +| `\s` | Whitespace (space, tab, newline, CR) | `\s+` | +| `\` | Escape next character | `\.` | + ## `regex_type` - Regular expression type ### Status @@ -14,7 +41,9 @@ Experimental ### Description -A type for representing compiled regular expressions. +A derived type representing a compiled regular expression. It stores the +internal NFA state graph produced by `regcomp` and is passed to `regmatch` +for pattern matching. ### Syntax @@ -30,12 +59,14 @@ Experimental ### Description -Compiles a regular expression string into a `regex_type` object. +Compiles a regular expression pattern string into a `regex_type` object. +The compiled object can then be reused for multiple calls to `regmatch` +without recompilation. ### Syntax ```fortran -call regcomp(re, pattern [, flags, status]) +call [[stdlib_regex(module):regcomp(subroutine)]](re, pattern [, status]) ``` ### Class @@ -44,7 +75,90 @@ Subroutine ### Arguments -`re`: An `intent(out)` argument of type `regex_type`. -`pattern`: An `intent(in)` argument of type `character(len=*)`. -`flags` (optional): An `intent(in)` argument of type `integer`. -`status` (optional): An `intent(out)` argument of type `integer`. +`re`: Shall be of type `regex_type`. It is an `intent(out)` argument. +The compiled regular expression. + +`pattern`: Shall be of type `character(len=*)`. It is an `intent(in)` argument. +The regular expression pattern string to compile. + +`status` (optional): Shall be of type `integer`. It is an `intent(out)` argument. +Returns 0 on success, or a non-zero value if the pattern is invalid +(e.g., mismatched parentheses or brackets). + +### Example + +```fortran +use stdlib_regex, only: regex_type, regcomp +type(regex_type) :: re +integer :: stat + +call regcomp(re, "(cat|dog)s?", stat) +if (stat /= 0) error stop "Invalid regex pattern" +``` + +## `regmatch` - Match a compiled regular expression + +### Status + +Experimental + +### Description + +Searches for the first occurrence of the compiled regular expression `re` +within the input `string`. If a match is found, `is_match` is set to `.true.` +and the optional `match_start` and `match_end` arguments are set to +the 1-based start and end positions of the matched substring. + +### Syntax + +```fortran +call [[stdlib_regex(module):regmatch(subroutine)]](re, string, is_match [, match_start, match_end]) +``` + +### Class + +Subroutine + +### Arguments + +`re`: Shall be of type `regex_type`. It is an `intent(in)` argument. +A compiled regular expression obtained from `regcomp`. + +`string`: Shall be of type `character(len=*)`. It is an `intent(in)` argument. +The input string to search for a match. + +`is_match`: Shall be of type `logical`. It is an `intent(out)` argument. +Set to `.true.` if a match is found, `.false.` otherwise. + +`match_start` (optional): Shall be of type `integer`. It is an `intent(out)` argument. +The 1-based index of the first character of the match. + +`match_end` (optional): Shall be of type `integer`. It is an `intent(out)` argument. +The 1-based index of the last character of the match. + +### Example + +```fortran +use stdlib_regex, only: regex_type, regcomp, regmatch + +type(regex_type) :: re +logical :: found +integer :: stat, ms, me + +! Find a sequence of digits +call regcomp(re, "[0-9]+", stat) +call regmatch(re, "foo123bar", found, ms, me) +! found = .true., ms = 4, me = 6 + +! Anchored match +call regcomp(re, "^hello", stat) +call regmatch(re, "hello world", found) +! found = .true. +call regmatch(re, "say hello", found) +! found = .false. + +! Alternation with optional suffix +call regcomp(re, "(cat|dog)s?", stat) +call regmatch(re, "I like cats", found, ms, me) +! found = .true., ms = 8, me = 11 +``` From bab5e5e92422c34837274d8208bbfd3ea0947b5c Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 5 Apr 2026 19:08:43 +0530 Subject: [PATCH 096/104] core engine logic, purity fix --- src/regex/stdlib_regex.f90 | 145 ++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 60 deletions(-) diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index df20cde7a..72eec6ddf 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -30,6 +30,19 @@ module stdlib_regex integer, parameter :: TOK_ALT = 11 integer, parameter :: TOK_CONCAT = 12 + ! Ascii character constants + integer, parameter :: CHAR_ZERO = iachar('0') + integer, parameter :: CHAR_NINE = iachar('9') + integer, parameter :: CHAR_LOWER_A = iachar('a') + integer, parameter :: CHAR_LOWER_Z = iachar('z') + integer, parameter :: CHAR_UPPER_A = iachar('A') + integer, parameter :: CHAR_UPPER_Z = iachar('Z') + integer, parameter :: CHAR_SPACE = iachar(' ') + integer, parameter :: CHAR_TAB = 9 + integer, parameter :: CHAR_LF = 10 + integer, parameter :: CHAR_CR = 13 + integer, parameter :: CHAR_UNDERSCORE = iachar('_') + type :: state_type integer :: op character(len=1) :: c @@ -75,7 +88,7 @@ module stdlib_regex contains - logical function is_term_ender(tag) + pure logical function is_term_ender(tag) integer, intent(in) :: tag is_term_ender = (tag == TOK_CHAR .or. tag == TOK_ANY .or. & tag == TOK_CLASS .or. tag == TOK_STAR .or. & @@ -84,14 +97,14 @@ logical function is_term_ender(tag) tag == TOK_START) end function is_term_ender - logical function is_term_starter(tag) + pure logical function is_term_starter(tag) integer, intent(in) :: tag is_term_starter = (tag == TOK_CHAR .or. tag == TOK_ANY .or. & tag == TOK_CLASS .or. tag == TOK_LPAREN .or. & tag == TOK_START .or. tag == TOK_END) end function is_term_starter - integer function prec(tag) + pure integer function prec(tag) integer, intent(in) :: tag if (tag == TOK_STAR .or. tag == TOK_PLUS .or. tag == TOK_QUEST) then prec = 3 @@ -137,19 +150,19 @@ subroutine tokenize(pattern, tokens, num_tokens, stat) t%c = c if (c == 'd') then t%tag = TOK_CLASS - do k = iachar('0'), iachar('9'); t%bmap(k) = .true.; end do + t%bmap(CHAR_ZERO:CHAR_NINE) = .true. else if (c == 's') then t%tag = TOK_CLASS - t%bmap(iachar(' ')) = .true. - t%bmap(iachar(char(9))) = .true. - t%bmap(iachar(char(10))) = .true. - t%bmap(iachar(char(13))) = .true. + t%bmap(CHAR_SPACE) = .true. + t%bmap(CHAR_TAB) = .true. + t%bmap(CHAR_LF) = .true. + t%bmap(CHAR_CR) = .true. else if (c == 'w') then t%tag = TOK_CLASS - do k = iachar('a'), iachar('z'); t%bmap(k) = .true.; end do - do k = iachar('A'), iachar('Z'); t%bmap(k) = .true.; end do - do k = iachar('0'), iachar('9'); t%bmap(k) = .true.; end do - t%bmap(iachar('_')) = .true. + t%bmap(CHAR_LOWER_A:CHAR_LOWER_Z) = .true. + t%bmap(CHAR_UPPER_A:CHAR_UPPER_Z) = .true. + t%bmap(CHAR_ZERO:CHAR_NINE) = .true. + t%bmap(CHAR_UNDERSCORE) = .true. end if else if (c == '.') then t%tag = TOK_ANY @@ -185,9 +198,7 @@ subroutine tokenize(pattern, tokens, num_tokens, stat) if (i + 2 <= len_p .and. pattern(i+1:i+1) == '-') then if (pattern(i+2:i+2) /= ']') then c2 = pattern(i+2:i+2) - do k = iachar(c1), iachar(c2) - if (k >= 0 .and. k <= 127) t%bmap(k) = .true. - end do + t%bmap(max(0, iachar(c1)):min(127, iachar(c2))) = .true. i = i + 3 cycle end if @@ -291,16 +302,17 @@ subroutine parse_to_postfix(tokens, num_tokens, postfix, num_postfix, stat) end do end subroutine parse_to_postfix - integer function new_out(s, o, pool, p_size) + subroutine new_out(s, o, pool, p_size, return_idx) integer, intent(in) :: s, o type(out_node), intent(inout) :: pool(:) integer, intent(inout) :: p_size + integer, intent(out) :: return_idx p_size = p_size + 1 pool(p_size)%s = s pool(p_size)%o = o pool(p_size)%next = 0 - new_out = p_size - end function new_out + return_idx = p_size + end subroutine new_out subroutine merge_lists(l1, l2, res, pool) type(out_list_type), intent(in) :: l1, l2 @@ -317,11 +329,11 @@ subroutine merge_lists(l1, l2, res, pool) end if end subroutine merge_lists - subroutine do_patch(states, list, target, pool) - type(state_type), intent(inout) :: states(:) + subroutine do_patch(list, target, pool, states) type(out_list_type), intent(in) :: list integer, intent(in) :: target type(out_node), intent(in) :: pool(:) + type(state_type), intent(inout) :: states(:) integer :: curr curr = list%head do while (curr /= 0) @@ -390,7 +402,7 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) top = top + 1 stack(top)%start = n_states - out_idx = new_out(n_states, 1, local_pool, p_size) + call new_out(n_states, 1, local_pool, p_size, out_idx) stack(top)%out_list%head = out_idx stack(top)%out_list%tail = out_idx @@ -399,7 +411,7 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) f2 = stack(top); top = top - 1 f1 = stack(top) - call do_patch(states, f1%out_list, f2%start, local_pool) + call do_patch(f1%out_list, f2%start, local_pool, states) stack(top)%start = f1%start stack(top)%out_list = f2%out_list @@ -425,7 +437,7 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) states(n_states)%out1 = f1%start states(n_states)%out2 = 0 - out_idx = new_out(n_states, 2, local_pool, p_size) + call new_out(n_states, 2, local_pool, p_size, out_idx) t_list%head = out_idx t_list%tail = out_idx call merge_lists(t_list, f1%out_list, stack(top)%out_list, local_pool) @@ -440,9 +452,9 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) states(n_states)%out1 = f1%start states(n_states)%out2 = 0 - call do_patch(states, f1%out_list, n_states, local_pool) + call do_patch(f1%out_list, n_states, local_pool, states) - out_idx = new_out(n_states, 2, local_pool, p_size) + call new_out(n_states, 2, local_pool, p_size, out_idx) stack(top)%out_list%head = out_idx stack(top)%out_list%tail = out_idx stack(top)%start = n_states @@ -456,9 +468,9 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) states(n_states)%out1 = f1%start states(n_states)%out2 = 0 - call do_patch(states, f1%out_list, n_states, local_pool) + call do_patch(f1%out_list, n_states, local_pool, states) - out_idx = new_out(n_states, 2, local_pool, p_size) + call new_out(n_states, 2, local_pool, p_size, out_idx) stack(top)%out_list%head = out_idx stack(top)%out_list%tail = out_idx stack(top)%start = f1%start @@ -474,7 +486,7 @@ subroutine build_nfa(postfix, num_postfix, states, n_states, start_state, stat) states(n_states)%out1 = 0 states(n_states)%out2 = 0 - call do_patch(states, f1%out_list, n_states, local_pool) + call do_patch(f1%out_list, n_states, local_pool, states) start_state = f1%start end subroutine build_nfa @@ -504,38 +516,51 @@ subroutine regcomp(re, pattern, status) if (present(status)) status = stat end subroutine regcomp - recursive subroutine add_thread(list, count, state_idx, start_pos, step_index, states, str_len, visited) - type(thread), intent(inout) :: list(:) - integer, intent(inout) :: count + subroutine add_thread(state_idx, start_pos, step_index, states, str_len, list, count, visited) integer, intent(in) :: state_idx, start_pos, step_index type(state_type), intent(in) :: states(:) integer, intent(in) :: str_len + type(thread), intent(inout) :: list(:) + integer, intent(inout) :: count integer, intent(inout) :: visited(:) - integer :: op - + + integer :: op, curr_state, top + integer, allocatable :: stack(:) + if (state_idx == 0) return - if (visited(state_idx) == step_index) return - visited(state_idx) = step_index - op = states(state_idx)%op - if (op == OP_SPLIT) then - call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) - call add_thread(list, count, states(state_idx)%out2, start_pos, step_index, states, str_len, visited) - else if (op == OP_JMP) then - call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) - else if (op == OP_START) then - if (step_index == 0) then - call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) - end if - else if (op == OP_END) then - if (step_index == str_len) then - call add_thread(list, count, states(state_idx)%out1, start_pos, step_index, states, str_len, visited) + allocate(stack(max(1, size(states) * 2))) + top = 1 + stack(top) = state_idx + + do while (top > 0) + curr_state = stack(top) + top = top - 1 + + if (curr_state == 0) cycle + if (visited(curr_state) == step_index) cycle + visited(curr_state) = step_index + + op = states(curr_state)%op + if (op == OP_SPLIT) then + top = top + 1; stack(top) = states(curr_state)%out1 + top = top + 1; stack(top) = states(curr_state)%out2 + else if (op == OP_JMP) then + top = top + 1; stack(top) = states(curr_state)%out1 + else if (op == OP_START) then + if (step_index == 0) then + top = top + 1; stack(top) = states(curr_state)%out1 + end if + else if (op == OP_END) then + if (step_index == str_len) then + top = top + 1; stack(top) = states(curr_state)%out1 + end if + else + count = count + 1 + list(count)%state = curr_state + list(count)%start_pos = start_pos end if - else - count = count + 1 - list(count)%state = state_idx - list(count)%start_pos = start_pos - end if + end do end subroutine add_thread subroutine regmatch(re, string, is_match, match_start, match_end) @@ -565,7 +590,7 @@ subroutine regmatch(re, string, is_match, match_start, match_end) ! Empty matches at the very beginning visited = -1 c_cnt = 0 - call add_thread(clist, c_cnt, re%start_state, 1, 0, re%states, str_len, visited) + call add_thread(re%start_state, 1, 0, re%states, str_len, clist, c_cnt, visited) do j = 1, c_cnt if (re%states(clist(j)%state)%op == OP_MATCH) then @@ -579,11 +604,6 @@ subroutine regmatch(re, string, is_match, match_start, match_end) n_cnt = 0 visited = -1 - ! Always see if a new match can start here if we don't have one yet - if (b_start == -1) then - call add_thread(nlist, n_cnt, re%start_state, i + 1, step_index, re%states, str_len, visited) - end if - do j = 1, c_cnt t = clist(j) if (t%state == 0) cycle @@ -603,10 +623,15 @@ subroutine regmatch(re, string, is_match, match_start, match_end) end if if (match_char) then - call add_thread(nlist, n_cnt, re%states(t%state)%out1, t%start_pos, step_index, re%states, str_len, visited) + call add_thread(re%states(t%state)%out1, t%start_pos, step_index, re%states, str_len, nlist, n_cnt, visited) end if end do + ! Always see if a new match can start here if we don't have one yet + if (b_start == -1) then + call add_thread(re%start_state, i + 1, step_index, re%states, str_len, nlist, n_cnt, visited) + end if + do j = 1, n_cnt if (re%states(nlist(j)%state)%op == OP_MATCH) then if (b_start == -1 .or. nlist(j)%start_pos < b_start) then From c58b15d3885be9280c3041641a435f02dc38d9a8 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 5 Apr 2026 20:05:44 +0530 Subject: [PATCH 097/104] standalone example (pattern matching) --- example/regex/example_regex_regcomp.f90 | 10 ++++++++++ example/regex/example_regex_regmatch.f90 | 25 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 example/regex/example_regex_regcomp.f90 create mode 100644 example/regex/example_regex_regmatch.f90 diff --git a/example/regex/example_regex_regcomp.f90 b/example/regex/example_regex_regcomp.f90 new file mode 100644 index 000000000..79b93f273 --- /dev/null +++ b/example/regex/example_regex_regcomp.f90 @@ -0,0 +1,10 @@ +program example_regex_regcomp + use stdlib_regex, only: regex_type, regcomp + implicit none + type(regex_type) :: re + integer :: stat + + call regcomp(re, "(cat|dog)s?", stat) + if (stat /= 0) error stop "Invalid regex pattern" + print *, "Pattern compiled successfully." +end program example_regex_regcomp diff --git a/example/regex/example_regex_regmatch.f90 b/example/regex/example_regex_regmatch.f90 new file mode 100644 index 000000000..4b0e03357 --- /dev/null +++ b/example/regex/example_regex_regmatch.f90 @@ -0,0 +1,25 @@ +program example_regex_regmatch + use stdlib_regex, only: regex_type, regcomp, regmatch + implicit none + type(regex_type) :: re + logical :: found + integer :: stat, ms, me + + ! Find a sequence of digits + call regcomp(re, "[0-9]+", stat) + call regmatch(re, "foo123bar", found, ms, me) + print "(A,L1,A,I0,A,I0)", "found = ", found, ", ms = ", ms, ", me = ", me + + ! Anchored match + call regcomp(re, "^hello", stat) + call regmatch(re, "hello world", found) + print "(A,L1)", "found = ", found + call regmatch(re, "say hello", found) + print "(A,L1)", "found = ", found + + ! Alternation with optional suffix + call regcomp(re, "(cat|dog)s?", stat) + call regmatch(re, "I like cats", found, ms, me) + print "(A,L1,A,I0,A,I0)", "found = ", found, ", ms = ", ms, ", me = ", me + +end program example_regex_regmatch From 8d27abc253fb1a69b375ecc8941947097695ed4f Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 5 Apr 2026 20:06:27 +0530 Subject: [PATCH 098/104] new build config --- example/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index c2ce46fcf..103514a2a 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -44,6 +44,9 @@ add_subdirectory(optval) if (STDLIB_QUADRATURE) add_subdirectory(quadrature) endif() +if (STDLIB_REGEX) + add_subdirectory(regex) +endif() add_subdirectory(selection) add_subdirectory(sorting) add_subdirectory(specialfunctions_gamma) From 8ed79422550854901a4193d19b185ce32c6b06c4 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 5 Apr 2026 20:07:46 +0530 Subject: [PATCH 099/104] add regex examples --- example/regex/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 example/regex/CMakeLists.txt diff --git a/example/regex/CMakeLists.txt b/example/regex/CMakeLists.txt new file mode 100644 index 000000000..1e33d32bc --- /dev/null +++ b/example/regex/CMakeLists.txt @@ -0,0 +1,2 @@ +ADD_EXAMPLE(regex_regcomp) +ADD_EXAMPLE(regex_regmatch) From 29f598be5a88aef57a5e55ea2496299ecc650755 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Mon, 6 Apr 2026 01:09:38 +0530 Subject: [PATCH 100/104] update docs --- doc/specs/stdlib_regex.md | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/doc/specs/stdlib_regex.md b/doc/specs/stdlib_regex.md index dc4602a7f..bb1fa75e8 100644 --- a/doc/specs/stdlib_regex.md +++ b/doc/specs/stdlib_regex.md @@ -88,12 +88,7 @@ Returns 0 on success, or a non-zero value if the pattern is invalid ### Example ```fortran -use stdlib_regex, only: regex_type, regcomp -type(regex_type) :: re -integer :: stat - -call regcomp(re, "(cat|dog)s?", stat) -if (stat /= 0) error stop "Invalid regex pattern" +{!example/regex/example_regex_regcomp.f90!} ``` ## `regmatch` - Match a compiled regular expression @@ -139,26 +134,5 @@ The 1-based index of the last character of the match. ### Example ```fortran -use stdlib_regex, only: regex_type, regcomp, regmatch - -type(regex_type) :: re -logical :: found -integer :: stat, ms, me - -! Find a sequence of digits -call regcomp(re, "[0-9]+", stat) -call regmatch(re, "foo123bar", found, ms, me) -! found = .true., ms = 4, me = 6 - -! Anchored match -call regcomp(re, "^hello", stat) -call regmatch(re, "hello world", found) -! found = .true. -call regmatch(re, "say hello", found) -! found = .false. - -! Alternation with optional suffix -call regcomp(re, "(cat|dog)s?", stat) -call regmatch(re, "I like cats", found, ms, me) -! found = .true., ms = 8, me = 11 +{!example/regex/example_regex_regmatch.f90!} ``` From dbeedced00a1485039274040e5eb7e4fe209fcc9 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 10 Apr 2026 13:52:32 +0530 Subject: [PATCH 101/104] add strict rules --- src/regex/stdlib_regex.f90 | 24 +++-- test/regex/catalogue_regex.f90 | 178 +++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 test/regex/catalogue_regex.f90 diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index 72eec6ddf..15bef89b7 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -166,17 +166,29 @@ subroutine tokenize(pattern, tokens, num_tokens, stat) end if else if (c == '.') then t%tag = TOK_ANY - else if (c == '*') then - t%tag = TOK_STAR - else if (c == '+') then - t%tag = TOK_PLUS - else if (c == '?') then - t%tag = TOK_QUEST + else if (c == '*' .or. c == '+' .or. c == '?') then + if (num_tokens == 0) then + stat = 1 + else + ! Valid repeatable tags: CHAR, ANY, CLASS, RPAREN + if (tmp_tokens(num_tokens)%tag /= TOK_CHAR .and. & + tmp_tokens(num_tokens)%tag /= TOK_ANY .and. & + tmp_tokens(num_tokens)%tag /= TOK_CLASS .and. & + tmp_tokens(num_tokens)%tag /= TOK_RPAREN) then + stat = 1 + end if + end if + if (c == '*') t%tag = TOK_STAR + if (c == '+') t%tag = TOK_PLUS + if (c == '?') t%tag = TOK_QUEST else if (c == '|') then t%tag = TOK_ALT else if (c == '(') then t%tag = TOK_LPAREN else if (c == ')') then + if (num_tokens > 0) then + if (tmp_tokens(num_tokens)%tag == TOK_LPAREN) stat = 1 + end if t%tag = TOK_RPAREN else if (c == '^') then t%tag = TOK_START diff --git a/test/regex/catalogue_regex.f90 b/test/regex/catalogue_regex.f90 new file mode 100644 index 000000000..3ca4e5a6e --- /dev/null +++ b/test/regex/catalogue_regex.f90 @@ -0,0 +1,178 @@ +program catalogue_regex + use stdlib_regex + + implicit none + + type(regex_type) :: re + character(len=100) :: line + character(len=20) :: keyword + character(len=:), allocatable :: value + character(len=:), allocatable :: expression + character(len=:), allocatable :: string + character(len=:), allocatable :: expected + + integer :: match_start, match_end, status, ierr + integer :: mismatches + logical :: matched + + open( 10, file = 'catalogue_regex.inp', status = 'old', iostat = ierr ) + if ( ierr /= 0 ) then + write( *, '(a)' ) 'Could not open the file "catalogue_regex.inp"' + write( *, '(a)' ) 'It should exist - please check' + error stop + endif + + open( 20, file = 'catalogue_regex.report' ) + + mismatches = 0 + + do + read( 10, '(a)', iostat = ierr ) line + + if ( ierr /= 0 ) then + exit + endif + + call extract_information( line, keyword, value ) + + select case( keyword ) + case( 'expression' ) + expression = value + + case( 'input' ) + string = value + + case( 'expected' ) + write( 20, '(a)' ) '' + + expected = value + + call regcomp( re, expression, status ) + + if ( status /= 0 ) then + mismatches = mismatches + 1 + write( 20, '(a,i0)' ) 'Error compiling the expression: status = ', status + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + else + call regmatch( re, string, matched, match_start, match_end ) + + if ( matched ) then + write( 20, '(a,2a)' ) 'Match found:' + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( 20, '(a,2a)' ) ' Input string: "', string, '"' + write( 20, '(a,2a)' ) ' Substring: "', string(match_start:match_end), '"' + write( 20, '(a,2a)' ) ' Expected: "', expected, '"' + if ( expected == string(match_start:match_end) ) then + write( 20, '(a,2a)' ) ' Success!' + else + mismatches = mismatches + 1 + write( 20, '(a,2a)' ) ' MISMATCH!' + endif + else + mismatches = mismatches + 1 + write( 20, '(a,2a)' ) 'NO match found:' + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( 20, '(a,2a)' ) ' Input string: "', string, '"' + write( 20, '(a,2a)' ) ' Substring: (none)' + write( 20, '(a,2a)' ) ' Expected: "', expected, '"' + endif + endif + + case( 'error-exp' ) + write( 20, '(a)' ) '' + call regcomp( re, expression, status ) + + if ( status /= 0 ) then + write( 20, '(a)' ) 'Error detected as expected:' + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + else + mismatches = mismatches + 1 + write( 20, '(a)' ) 'An error was expected but not detected:' + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + endif + + case( 'no-match' ) + write( 20, '(a)' ) '' + call regcomp( re, expression, status ) + + if ( status /= 0 ) then + mismatches = mismatches + 1 + write( 20, '(a,i0)' ) 'Error compiling the expression: status = ', status + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + else + call regmatch( re, string, matched, match_start, match_end ) + + if ( matched ) then + mismatches = mismatches + 1 + write( 20, '(a,2a)' ) 'Match found where none expected:' + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( 20, '(a,2a)' ) ' Input string: "', string, '"' + write( 20, '(a,2a)' ) ' Substring: "', string(match_start:match_end), '"' + write( 20, '(a,2a)' ) ' Expected: (none)' + else + write( 20, '(a,2a)' ) 'No match found, as expected:' + write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( 20, '(a,2a)' ) ' Input string: "', string, '"' + write( 20, '(a,2a)' ) ' Expected: (none)' + endif + endif + + case default + ! Treat any other keyword as comment + + end select + enddo + + write( 20, '(/,a,i0)' ) 'Number of mismatches or other errors: ', mismatches + write( *, '(a)' ) 'Program completed' + +contains + +subroutine extract_information( line, keyword, value ) + character(len=*), intent(in) :: line + character(len=*), intent(out) :: keyword + character(len=:), intent(out), allocatable :: value + + character(len=20), dimension(5) :: known_keywords = & + [ 'expression ', & + 'input ', & + 'expected ', & + 'error-exp ', & + 'no-match ' ] + integer :: k1, k2 + + if ( line == " " ) then + keyword = "" + value = "" + return + endif + + read( line, *, iostat = ierr ) keyword + + if ( keyword == 'error-exp' .or. keyword == 'no-match' ) then + value = "" + return + endif + + if ( any( keyword == known_keywords ) ) then + allocate( value, mold = line ) + + k1 = index( line, '"' ) + if ( k1 > 0 ) then + k2 = k1 + index( line(k1+1:), '"' ) + if ( k2 > 0 ) then + value = line(k1+1:k2-1) + else + write( 20, '(a)' ) 'Error interpreting the input line:' + write( 20, '(2a)' ) ' "', trim(line), '"' + write( 20, '(2a)' ) 'Program stopped' + write( *, '(2a)' ) 'Program stopped - error reading input. Please check' + error stop + endif + endif + else + value = "" + endif +end subroutine extract_information + +end program catalogue_regex From 0cc9abd6fc5e1a494db4f6d63addc01d88e4ddc8 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Fri, 17 Apr 2026 19:55:59 +0530 Subject: [PATCH 102/104] refactor utility function --- src/regex/stdlib_regex.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index 15bef89b7..1f2ff73e2 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -88,7 +88,7 @@ module stdlib_regex contains - pure logical function is_term_ender(tag) + elemental logical function is_term_ender(tag) integer, intent(in) :: tag is_term_ender = (tag == TOK_CHAR .or. tag == TOK_ANY .or. & tag == TOK_CLASS .or. tag == TOK_STAR .or. & @@ -97,14 +97,14 @@ pure logical function is_term_ender(tag) tag == TOK_START) end function is_term_ender - pure logical function is_term_starter(tag) + elemental logical function is_term_starter(tag) integer, intent(in) :: tag is_term_starter = (tag == TOK_CHAR .or. tag == TOK_ANY .or. & tag == TOK_CLASS .or. tag == TOK_LPAREN .or. & tag == TOK_START .or. tag == TOK_END) end function is_term_starter - pure integer function prec(tag) + elemental integer function prec(tag) integer, intent(in) :: tag if (tag == TOK_STAR .or. tag == TOK_PLUS .or. tag == TOK_QUEST) then prec = 3 From 544f34a91591fa816ad6ec70d4e9cae5d6126187 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 19 Apr 2026 16:12:01 +0530 Subject: [PATCH 103/104] address review feedback on unit numbers and constants --- example/regex/example_regex_regmatch.f90 | 8 +-- src/regex/stdlib_regex.f90 | 32 +++------- test/regex/catalogue_regex.f90 | 80 ++++++++++++------------ 3 files changed, 54 insertions(+), 66 deletions(-) diff --git a/example/regex/example_regex_regmatch.f90 b/example/regex/example_regex_regmatch.f90 index 4b0e03357..972b6c302 100644 --- a/example/regex/example_regex_regmatch.f90 +++ b/example/regex/example_regex_regmatch.f90 @@ -8,18 +8,18 @@ program example_regex_regmatch ! Find a sequence of digits call regcomp(re, "[0-9]+", stat) call regmatch(re, "foo123bar", found, ms, me) - print "(A,L1,A,I0,A,I0)", "found = ", found, ", ms = ", ms, ", me = ", me + print "(a,l1,a,i0,a,i0)", "found = ", found, ", ms = ", ms, ", me = ", me ! Anchored match call regcomp(re, "^hello", stat) call regmatch(re, "hello world", found) - print "(A,L1)", "found = ", found + print "(a,l1)", "found = ", found call regmatch(re, "say hello", found) - print "(A,L1)", "found = ", found + print "(a,l1)", "found = ", found ! Alternation with optional suffix call regcomp(re, "(cat|dog)s?", stat) call regmatch(re, "I like cats", found, ms, me) - print "(A,L1,A,I0,A,I0)", "found = ", found, ", ms = ", ms, ", me = ", me + print "(a,l1,a,i0,a,i0)", "found = ", found, ", ms = ", ms, ", me = ", me end program example_regex_regmatch diff --git a/src/regex/stdlib_regex.f90 b/src/regex/stdlib_regex.f90 index 1f2ff73e2..2fe3bfd43 100644 --- a/src/regex/stdlib_regex.f90 +++ b/src/regex/stdlib_regex.f90 @@ -1,4 +1,5 @@ module stdlib_regex + use stdlib_ascii, only: TAB, LF, CR implicit none private @@ -30,19 +31,6 @@ module stdlib_regex integer, parameter :: TOK_ALT = 11 integer, parameter :: TOK_CONCAT = 12 - ! Ascii character constants - integer, parameter :: CHAR_ZERO = iachar('0') - integer, parameter :: CHAR_NINE = iachar('9') - integer, parameter :: CHAR_LOWER_A = iachar('a') - integer, parameter :: CHAR_LOWER_Z = iachar('z') - integer, parameter :: CHAR_UPPER_A = iachar('A') - integer, parameter :: CHAR_UPPER_Z = iachar('Z') - integer, parameter :: CHAR_SPACE = iachar(' ') - integer, parameter :: CHAR_TAB = 9 - integer, parameter :: CHAR_LF = 10 - integer, parameter :: CHAR_CR = 13 - integer, parameter :: CHAR_UNDERSCORE = iachar('_') - type :: state_type integer :: op character(len=1) :: c @@ -150,19 +138,19 @@ subroutine tokenize(pattern, tokens, num_tokens, stat) t%c = c if (c == 'd') then t%tag = TOK_CLASS - t%bmap(CHAR_ZERO:CHAR_NINE) = .true. + t%bmap(iachar('0'):iachar('9')) = .true. else if (c == 's') then t%tag = TOK_CLASS - t%bmap(CHAR_SPACE) = .true. - t%bmap(CHAR_TAB) = .true. - t%bmap(CHAR_LF) = .true. - t%bmap(CHAR_CR) = .true. + t%bmap(iachar(' ')) = .true. + t%bmap(iachar(TAB)) = .true. + t%bmap(iachar(LF)) = .true. + t%bmap(iachar(CR)) = .true. else if (c == 'w') then t%tag = TOK_CLASS - t%bmap(CHAR_LOWER_A:CHAR_LOWER_Z) = .true. - t%bmap(CHAR_UPPER_A:CHAR_UPPER_Z) = .true. - t%bmap(CHAR_ZERO:CHAR_NINE) = .true. - t%bmap(CHAR_UNDERSCORE) = .true. + t%bmap(iachar('a'):iachar('z')) = .true. + t%bmap(iachar('A'):iachar('Z')) = .true. + t%bmap(iachar('0'):iachar('9')) = .true. + t%bmap(iachar('_')) = .true. end if else if (c == '.') then t%tag = TOK_ANY diff --git a/test/regex/catalogue_regex.f90 b/test/regex/catalogue_regex.f90 index 3ca4e5a6e..f2373e908 100644 --- a/test/regex/catalogue_regex.f90 +++ b/test/regex/catalogue_regex.f90 @@ -11,23 +11,23 @@ program catalogue_regex character(len=:), allocatable :: string character(len=:), allocatable :: expected - integer :: match_start, match_end, status, ierr + integer :: match_start, match_end, status, ierr, un, un20 integer :: mismatches logical :: matched - open( 10, file = 'catalogue_regex.inp', status = 'old', iostat = ierr ) + open( newunit=un, file = 'catalogue_regex.inp', status = 'old', iostat = ierr ) if ( ierr /= 0 ) then write( *, '(a)' ) 'Could not open the file "catalogue_regex.inp"' write( *, '(a)' ) 'It should exist - please check' error stop endif - open( 20, file = 'catalogue_regex.report' ) + open( newunit=un20, file = 'catalogue_regex.report' ) mismatches = 0 do - read( 10, '(a)', iostat = ierr ) line + read( un, '(a)', iostat = ierr ) line if ( ierr /= 0 ) then exit @@ -43,7 +43,7 @@ program catalogue_regex string = value case( 'expected' ) - write( 20, '(a)' ) '' + write( un20, '(a)' ) '' expected = value @@ -51,69 +51,69 @@ program catalogue_regex if ( status /= 0 ) then mismatches = mismatches + 1 - write( 20, '(a,i0)' ) 'Error compiling the expression: status = ', status - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a,i0)' ) 'Error compiling the expression: status = ', status + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' else call regmatch( re, string, matched, match_start, match_end ) if ( matched ) then - write( 20, '(a,2a)' ) 'Match found:' - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' - write( 20, '(a,2a)' ) ' Input string: "', string, '"' - write( 20, '(a,2a)' ) ' Substring: "', string(match_start:match_end), '"' - write( 20, '(a,2a)' ) ' Expected: "', expected, '"' + write( un20, '(a,2a)' ) 'Match found:' + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a,2a)' ) ' Input string: "', string, '"' + write( un20, '(a,2a)' ) ' Substring: "', string(match_start:match_end), '"' + write( un20, '(a,2a)' ) ' Expected: "', expected, '"' if ( expected == string(match_start:match_end) ) then - write( 20, '(a,2a)' ) ' Success!' + write( un20, '(a,2a)' ) ' Success!' else mismatches = mismatches + 1 - write( 20, '(a,2a)' ) ' MISMATCH!' + write( un20, '(a,2a)' ) ' MISMATCH!' endif else mismatches = mismatches + 1 - write( 20, '(a,2a)' ) 'NO match found:' - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' - write( 20, '(a,2a)' ) ' Input string: "', string, '"' - write( 20, '(a,2a)' ) ' Substring: (none)' - write( 20, '(a,2a)' ) ' Expected: "', expected, '"' + write( un20, '(a,2a)' ) 'NO match found:' + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a,2a)' ) ' Input string: "', string, '"' + write( un20, '(a,2a)' ) ' Substring: (none)' + write( un20, '(a,2a)' ) ' Expected: "', expected, '"' endif endif case( 'error-exp' ) - write( 20, '(a)' ) '' + write( un20, '(a)' ) '' call regcomp( re, expression, status ) if ( status /= 0 ) then - write( 20, '(a)' ) 'Error detected as expected:' - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a)' ) 'Error detected as expected:' + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' else mismatches = mismatches + 1 - write( 20, '(a)' ) 'An error was expected but not detected:' - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a)' ) 'An error was expected but not detected:' + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' endif case( 'no-match' ) - write( 20, '(a)' ) '' + write( un20, '(a)' ) '' call regcomp( re, expression, status ) if ( status /= 0 ) then mismatches = mismatches + 1 - write( 20, '(a,i0)' ) 'Error compiling the expression: status = ', status - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a,i0)' ) 'Error compiling the expression: status = ', status + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' else call regmatch( re, string, matched, match_start, match_end ) if ( matched ) then mismatches = mismatches + 1 - write( 20, '(a,2a)' ) 'Match found where none expected:' - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' - write( 20, '(a,2a)' ) ' Input string: "', string, '"' - write( 20, '(a,2a)' ) ' Substring: "', string(match_start:match_end), '"' - write( 20, '(a,2a)' ) ' Expected: (none)' + write( un20, '(a,2a)' ) 'Match found where none expected:' + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a,2a)' ) ' Input string: "', string, '"' + write( un20, '(a,2a)' ) ' Substring: "', string(match_start:match_end), '"' + write( un20, '(a,2a)' ) ' Expected: (none)' else - write( 20, '(a,2a)' ) 'No match found, as expected:' - write( 20, '(a,2a)' ) ' Expression: "', expression, '"' - write( 20, '(a,2a)' ) ' Input string: "', string, '"' - write( 20, '(a,2a)' ) ' Expected: (none)' + write( un20, '(a,2a)' ) 'No match found, as expected:' + write( un20, '(a,2a)' ) ' Expression: "', expression, '"' + write( un20, '(a,2a)' ) ' Input string: "', string, '"' + write( un20, '(a,2a)' ) ' Expected: (none)' endif endif @@ -123,7 +123,7 @@ program catalogue_regex end select enddo - write( 20, '(/,a,i0)' ) 'Number of mismatches or other errors: ', mismatches + write( un20, '(/,a,i0)' ) 'Number of mismatches or other errors: ', mismatches write( *, '(a)' ) 'Program completed' contains @@ -163,9 +163,9 @@ subroutine extract_information( line, keyword, value ) if ( k2 > 0 ) then value = line(k1+1:k2-1) else - write( 20, '(a)' ) 'Error interpreting the input line:' - write( 20, '(2a)' ) ' "', trim(line), '"' - write( 20, '(2a)' ) 'Program stopped' + write( un20, '(a)' ) 'Error interpreting the input line:' + write( un20, '(2a)' ) ' "', trim(line), '"' + write( un20, '(2a)' ) 'Program stopped' write( *, '(2a)' ) 'Program stopped - error reading input. Please check' error stop endif From d78f89ab89e65e95923ed619ea0f0cc1143b7053 Mon Sep 17 00:00:00 2001 From: jaya sathvik Date: Sun, 19 Apr 2026 16:18:35 +0530 Subject: [PATCH 104/104] regex: add CMake dependency for stdlib_core --- src/regex/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/regex/CMakeLists.txt b/src/regex/CMakeLists.txt index 2408bbe8a..05a711d6e 100644 --- a/src/regex/CMakeLists.txt +++ b/src/regex/CMakeLists.txt @@ -9,3 +9,4 @@ set(regex_f90Files ) configure_stdlib_target(${PROJECT_NAME}_regex regex_f90Files regex_fppFiles regex_cppFiles) +target_link_libraries(${PROJECT_NAME}_regex PUBLIC ${PROJECT_NAME}_core)