Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poisson solver #32

Merged
merged 26 commits into from Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
56688de
Add a Poisson solver selection mechanism.
semi-h Feb 15, 2024
f037d11
Implement a dedicated class for the FFT based Poisson solver.
semi-h Feb 15, 2024
2fdb1b0
Store compact scheme coefficients in tdsops class.
semi-h Feb 15, 2024
a306ee4
Store domain length and dx in dirps_t.
semi-h Feb 19, 2024
c7ff92a
Change the way we initialise poisson_fft class.
semi-h Feb 19, 2024
fc3810f
Construct the waves array for all periodic BCs.
semi-h Feb 19, 2024
991bb92
Create cufft plans.
semi-h Feb 19, 2024
2756ee7
Add postprocessing in spectral space.
semi-h Feb 19, 2024
d5c159a
Store plans at poisson_fft class level.
semi-h Feb 20, 2024
2f4ad61
Add forward FFT transforms.
semi-h Feb 21, 2024
3d46312
Add backward FFT transforms.
semi-h Feb 21, 2024
a7c25d9
Fix allocation size of the waves array on GPU.
semi-h Feb 21, 2024
95c1b36
Change shape of the c_x/y/z arrays.
semi-h Feb 21, 2024
0c5d456
Add postprocessing kernel call.
semi-h Feb 21, 2024
aac5af2
Add complex reordering functions.
semi-h Feb 22, 2024
279666d
Call complex reordering functions in forward and backward FFTs.
semi-h Feb 22, 2024
eaaf2c2
No need to initialise backends with dirps any more.
semi-h Feb 22, 2024
a262347
Cleanups.
semi-h Feb 22, 2024
9824841
Cleanup and style fix.
semi-h Mar 1, 2024
a5e2a1a
Add reshape subrotines for transposing pencil groups.
semi-h Mar 1, 2024
cbb2dcd
Use reshapes to carry out local transposes.
semi-h Mar 1, 2024
95164c3
Change the Poisson solver selection mechanism.
semi-h Mar 1, 2024
68b47d0
Add initial conditions for TGV.
semi-h Mar 1, 2024
0a4be35
Output enstrophy and divergence of u.
semi-h Mar 5, 2024
f8a1ecd
Cleanups and comments.
semi-h Mar 6, 2024
370d1c4
Comment.
semi-h Mar 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Expand Up @@ -2,12 +2,14 @@ set(SRC
allocator.f90
backend.f90
common.f90
poisson_fft.f90
solver.f90
tdsops.f90
time_integrator.f90
omp/backend.f90
omp/common.f90
omp/kernels_dist.f90
omp/poisson_fft.f90
omp/sendrecv.f90
omp/exec_dist.f90
)
Expand All @@ -17,7 +19,9 @@ set(CUDASRC
cuda/common.f90
cuda/exec_dist.f90
cuda/kernels/distributed.f90
cuda/kernels/complex.f90
cuda/kernels/reorder.f90
cuda/poisson_fft.f90
cuda/sendrecv.f90
cuda/tdsops.f90
)
Expand All @@ -40,6 +44,7 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR
set(CMAKE_Fortran_FLAGS_DEBUG "-g -O0 -traceback -Mbounds -Mchkptr -Ktrap=fp")
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -fast")
target_link_options(x3d2 INTERFACE "-cuda")
target_link_options(x3d2 INTERFACE "-lcufft")

target_compile_options(xcompact PRIVATE "-DCUDA")
# target_link_options(xcompact INTERFACE "-cuda")
Expand Down
14 changes: 14 additions & 0 deletions src/backend.f90
@@ -1,6 +1,7 @@
module m_base_backend
use m_allocator, only: allocator_t, field_t
use m_common, only: dp
use m_poisson_fft, only: poisson_fft_t
use m_tdsops, only: tdsops_t, dirps_t

implicit none
Expand All @@ -24,6 +25,7 @@ module m_base_backend
integer :: nx_loc, ny_loc, nz_loc
class(allocator_t), pointer :: allocator
class(dirps_t), pointer :: xdirps, ydirps, zdirps
class(poisson_fft_t), pointer :: poisson_fft
contains
procedure(transeq_ders), deferred :: transeq_x
procedure(transeq_ders), deferred :: transeq_y
Expand All @@ -37,6 +39,7 @@ module m_base_backend
procedure(get_field), deferred :: get_field
procedure(set_field), deferred :: set_field
procedure(alloc_tdsops), deferred :: alloc_tdsops
procedure(init_poisson_fft), deferred :: init_poisson_fft
end type base_backend_t

abstract interface
Expand Down Expand Up @@ -188,4 +191,15 @@ subroutine alloc_tdsops(self, tdsops, n, dx, operation, scheme, n_halo, &
end subroutine alloc_tdsops
end interface

abstract interface
subroutine init_poisson_fft(self, xdirps, ydirps, zdirps)
import :: base_backend_t
import :: dirps_t
implicit none

class(base_backend_t) :: self
type(dirps_t), intent(in) :: xdirps, ydirps, zdirps
end subroutine init_poisson_fft
end interface

end module m_base_backend
5 changes: 5 additions & 0 deletions src/common.f90
Expand Up @@ -7,14 +7,19 @@ module m_common
integer, parameter :: RDR_X2Y = 12, RDR_X2Z = 13, RDR_Y2X = 21, &
RDR_Y2Z = 23, RDR_Z2X = 31, RDR_Z2Y = 32

integer, parameter :: POISSON_SOLVER_FFT = 0, POISSON_SOLVER_CG = 1

type :: globs_t
integer :: nx, ny, nz
integer :: nx_loc, ny_loc, nz_loc
integer :: n_groups_x, n_groups_y, n_groups_z
real(dp) :: Lx, Ly, Lz
real(dp) :: dx, dy, dz
real(dp) :: nu, dt
integer :: n_iters, n_output
integer :: nproc_x = 1, nproc_y = 1, nproc_z = 1
character(len=20) :: BC_x_s, BC_x_e, BC_y_s, BC_y_e, BC_z_s, BC_z_e
integer :: poisson_solver_type
end type globs_t

contains
Expand Down
19 changes: 19 additions & 0 deletions src/cuda/backend.f90
Expand Up @@ -7,11 +7,13 @@ module m_cuda_backend
use m_base_backend, only: base_backend_t
use m_common, only: dp, globs_t, &
RDR_X2Y, RDR_X2Z, RDR_Y2X, RDR_Y2Z, RDR_Z2X, RDR_Z2Y
use m_poisson_fft, only: poisson_fft_t
use m_tdsops, only: dirps_t, tdsops_t

use m_cuda_allocator, only: cuda_allocator_t, cuda_field_t
use m_cuda_common, only: SZ
use m_cuda_exec_dist, only: exec_dist_transeq_3fused, exec_dist_tds_compact
use m_cuda_poisson_fft, only: cuda_poisson_fft_t
use m_cuda_sendrecv, only: sendrecv_fields, sendrecv_3fields
use m_cuda_tdsops, only: cuda_tdsops_t
use m_cuda_kernels_dist, only: transeq_3fused_dist, transeq_3fused_subs
Expand Down Expand Up @@ -45,6 +47,7 @@ module m_cuda_backend
procedure :: scalar_product => scalar_product_cuda
procedure :: set_field => set_field_cuda
procedure :: get_field => get_field_cuda
procedure :: init_poisson_fft => init_cuda_poisson_fft
procedure :: transeq_cuda_dist
procedure :: transeq_cuda_thom
procedure :: tds_solve_dist
Expand All @@ -63,6 +66,7 @@ function init(globs, allocator) result(backend)
class(allocator_t), target, intent(inout) :: allocator
type(cuda_backend_t) :: backend

type(cuda_poisson_fft_t) :: cuda_poisson_fft
integer :: n_halo, n_block

select type(allocator)
Expand Down Expand Up @@ -600,5 +604,20 @@ subroutine get_field_cuda(self, arr, f)

end subroutine get_field_cuda

subroutine init_cuda_poisson_fft(self, xdirps, ydirps, zdirps)
implicit none

class(cuda_backend_t) :: self
type(dirps_t), intent(in) :: xdirps, ydirps, zdirps

allocate(cuda_poisson_fft_t :: self%poisson_fft)

select type (poisson_fft => self%poisson_fft)
type is (cuda_poisson_fft_t)
poisson_fft = cuda_poisson_fft_t(xdirps, ydirps, zdirps)
end select

end subroutine init_cuda_poisson_fft

end module m_cuda_backend