Merge pull request DJ4Earth#15 from milankl/mk/speed

swilliamson7 · web-flow · commit 6cbc0bf421cf · 2023-05-17T14:23:55.000-05:00
Speedups
diff --git a/explicit_solver/ExplicitSolver.jl b/explicit_solver/ExplicitSolver.jl
@@ -0,0 +1,14 @@
+module ExplicitSolver
+
+using Plots, SparseArrays, Parameters, UnPack
+using JLD2, LinearAlgebra
+using Enzyme, Checkpointing, Zygote
+
+include("init_structs.jl")
+include("init_params.jl")
+include("build_grid.jl")
+include("build_discrete_operators.jl")
+include("advance.jl")
+include("compute_time_deriv.jl")
+
+end
diff --git a/explicit_solver/advance.jl b/explicit_solver/advance.jl
@@ -16,11 +16,7 @@ function advance(u_v_eta::gyre_vector,
     ) 
 
     nx = grid.nx 
-    dt = params.dt
-
-    # we now use RK4 as the timestepper, here I'm storing the coefficients needed for this 
-    rk_a = [1/6, 1/3, 1/3, 1/6]
-    rk_b = [1/2, 1/2, 1.]
+    (;dt, rk_a, rk_b) = params
 
     rhs.umid .= u_v_eta.u
     rhs.vmid .= u_v_eta.v
@@ -34,7 +30,7 @@ function advance(u_v_eta::gyre_vector,
     rhs.v1 .= u_v_eta.v
     rhs.eta1 .= u_v_eta.eta
 
-    for j in 1:4
+    @inbounds for j in 1:4
 
         comp_u_v_eta_t(nx, rhs, params, interp, grad, advec)
 
@@ -50,9 +46,14 @@ function advance(u_v_eta::gyre_vector,
 
     end
 
-    @assert all(x -> x < 7.0, rhs.u0)
-    @assert all(x -> x < 7.0, rhs.v0)
-    @assert all(x -> x < 7.0, rhs.eta0)
+    # Diffusion and bottom friction as Euler forward
+    dissipative_terms!(nx, rhs, params, interp, grad, advec)
+    rhs.u0 .+= dt .* rhs.u_t
+    rhs.v0 .+= dt .* rhs.v_t
+
+    # @assert all(x -> x < 7.0, rhs.u0)
+    # @assert all(x -> x < 7.0, rhs.v0)
+    # @assert all(x -> x < 7.0, rhs.eta0)
 
     copyto!(u_v_eta.u, rhs.u0)
     copyto!(u_v_eta.v, rhs.v0)
@@ -152,7 +153,7 @@ function integrate(T, nx, ny; Lx = 3840e3, Ly = 3840e3)
     # u_v_eta_mat = vec_to_mat(u_v_eta.u, u_v_eta.v, u_v_eta.eta, grid_params)
     
     return u_v_eta
-    
+    # return (u_v_eta, grid_params, rhs_terms, gyre_params, interp_ops, grad_ops, advec_ops)
 end
 
 # ****IMPORTANT**** not yet sure if I'm moving between high and low res grids, need to check with Patrick
diff --git a/explicit_solver/build_discrete_operators.jl b/explicit_solver/build_discrete_operators.jl
@@ -369,3 +369,23 @@ function build_advec(grid)
     return advec_ops
 
 end
+
+"""
+    @inplacemul c = A*b
+
+Macro to translate c = A*b with `A::SparseMatrixCSC`, `b` and `c` `Vector`s into
+`SparseArrays.mul!(c,A,b,true,false)` to perform the sparse matrix - 
+dense vector multiplication in-place."""
+macro inplacemul(ex)
+    @assert ex.head == :(=) "@inplacemul requires expression a = b*c"
+    @assert ex.args[2].args[1] == :(*) "@inplacemul requires expression a = b*c"
+    
+    return quote
+        local c = $(esc(ex.args[1]))              # output dense vector
+        local A = $(esc(ex.args[2].args[2]))      # input sparse matrix
+        local b = $(esc(ex.args[2].args[3]))      # input dense vector
+
+        # c = β*c + α*A*b, with α=1, β=0 so that c = A*b
+        SparseArrays.mul!(c,A,b,true,false)
+    end
+end
diff --git a/explicit_solver/compute_time_deriv.jl b/explicit_solver/compute_time_deriv.jl
@@ -8,48 +8,122 @@ function comp_u_v_eta_t(nx::Int,
         interp::Interps, 
         grad::Derivatives, 
         advec::Advection
-    ) 
+    )
+
+    # unpack stuff
+    u = rhs.u1
+    v = rhs.v1
+    eta = rhs.eta1
+
+    (;ITu, ITv, ITq, IuT, IvT) = interp                 # interpolation operators
+    (;Iuv, Ivu, Iqu, Iqv) = interp       
+    (;GTx, GTy, Gux, Guy, Gvx, Gvy) = grad              # gradient operators
+    (;h, h_u, h_v, h_q, U, V, p, q) = rhs               # diagnostic variables
+    (;kinetic) = rhs
+    (;GTx_p, GTy_p, Gux_U, Gvy_V, Gvx_v1, Guy_u1) = rhs
+    (;adv_u, adv_v) = rhs
+    (;IuT_u1, IvT_v1, ITu_ksq,ITv_ksq) = rhs
+    (;u_t, v_t, eta_t) = rhs                            # tendencies
+    (;H, coriolis, g, wind_stress) = params
+
+    h .= eta .+ H 
+    
+    @inplacemul h_q = ITq * h
+    @inplacemul h_u = ITu * h
+    @inplacemul h_v = ITv * h
+    U .= u .* h_u                  # volume fluxes U,V
+    V .= v .* h_v 
+    
+    # kinetic energy u² + v²
+    u²_T, u² = IuT_u1, ITu_ksq      # reuse and rename arrays for u²
+    v²_T, v² = IvT_v1, ITv_ksq      # and v², _T is on T-grid
+    u² .= u.^2
+    v² .= v.^2
+    @inplacemul u²_T = IuT * u² 
+    @inplacemul v²_T = IvT * v²
+    kinetic .= u²_T .+ v²_T
+    
+    # Kloewer defined new terms q and p corresponding to potential vorticity and 
+    # Bernoulli potential respectively. To avoid errors in my mimic I'm following 
+    # along and doing the same 
+    @inplacemul Guy_u1 = Guy * u
+    @inplacemul Gvx_v1 = Gvx * v
+    
+    q .= (coriolis .+ Gvx_v1 .- Guy_u1) ./ h_q 
+    p .= 0.5 .* kinetic .+ g .* h
 
-    rhs.h .= rhs.eta1 .+ params.H 
+    # deal with the advection term 
+    # comp_advection(nx, rhs, advec)    # Arakawa and Lamb advection scheme
 
-    rhs.h_u .= interp.ITu * rhs.h
-    rhs.h_v .= interp.ITv * rhs.h
-    rhs.h_q .= interp.ITq * rhs.h
+    # Sadourny, 1975 enstrophy conserving advection scheme
+    V_u = ITu_ksq                # reuse and rename array
+    @inplacemul V_u = Ivu * V
+    @inplacemul adv_u = Iqu * q  # u-component qhv
+    adv_u .*= V_u
 
-    rhs.U .= rhs.u1 .* rhs.h_u 
-    rhs.V .= rhs.v1 .* rhs.h_v 
+    U_v = ITv_ksq                # reuse and rename array
+    @inplacemul U_v = Iuv * U         
+    # @inplacemul adv_v = Iqv * q  # v-component -qhu
+    adv_v .*= .-U_v
 
-    rhs.kinetic .= interp.IuT * (rhs.u1.^2) .+ interp.IvT * (rhs.v1.^2)
+    # bernoulli gradient ∇p = ∇(1/2(u²+v² + gh))
+    @inplacemul GTx_p = GTx * p
+    @inplacemul GTy_p = GTy * p
 
-    # Kloewer defined new terms q and p corresponding to potential vorticity and 
-    # Bernoulli potential respectively. To avoid errors in my mimic I'm following 
-    # along and doing the same 
-    rhs.q .= (params.coriolis .+ grad.Gvx * rhs.v1 .- grad.Guy * rhs.u1) ./ rhs.h_q 
-    rhs.p .= 0.5 .* rhs.kinetic .+ params.g .* rhs.h
+    # momentum equations
+    u_t .= adv_u .- GTx_p .+ wind_stress ./ h_u
+    v_t .= adv_v .- GTy_p
 
-    # bottom friction
-    rhs.kinetic_sq .= sqrt.(rhs.kinetic)
-    rhs.bfric_u .= params.bottom_drag .* ((interp.ITu * rhs.kinetic_sq) .* rhs.u1) ./ rhs.h_u
-    rhs.bfric_v .= params.bottom_drag .* ((interp.ITv * rhs.kinetic_sq) .* rhs.v1) ./ rhs.h_v
+    # continuity equations
+    @inplacemul Gux_U = Gux * U    # volume flux divergence dUdx + dVdy
+    @inplacemul Gvy_V = Gvy * V
+    @. eta_t = -(Gux_U + Gvy_V)
 
-    # deal with the advection term 
-    comp_advection(nx, rhs, advec)
+    return nothing
+end 
 
-    # rhs.Mu .= params.A_h .* (grad.LLu * rhs.u1)
-    # rhs.Mv .= params.A_h .* (grad.LLv * rhs.v1) 
-    
-    rhs.Mu .= (interp.ITu * params.nu) .* (grad.LLu * rhs.u1)
-    rhs.Mv .= (interp.ITv * params.nu) .* (grad.LLv * rhs.v1) 
+function dissipative_terms!(nx::Int, 
+        rhs::RHS_terms, 
+        params::Params, 
+        interp::Interps, 
+        grad::Derivatives, 
+        advec::Advection
+    )
 
-    rhs.u_t .= rhs.adv_u .- grad.GTx * rhs.p .+ params.wind_stress ./ rhs.h_u .- rhs.Mu .- rhs.bfric_u
+    # unpack stuff
+    u = rhs.u0                          # calculate based on prognostics at
+    v = rhs.v0                          # t + dt of the non-dissipative RHS
 
-    rhs.v_t .= rhs.adv_v .- grad.GTy * rhs.p .- rhs.Mv .- rhs.bfric_v 
+    (;ITu, ITv) = interp                # interpolation operators   
+    (;LLu, LLv) = grad                  # gradient operators
+    (;h_u, h_v) = rhs                   # diagnostic variables
+    (;kinetic, kinetic_sq, Mu, Mv, nu_u, nu_v) = rhs
+    (;bfric_u, bfric_v) = rhs
+    (;ITu_ksq,ITv_ksq) = rhs
+    (;u_t, v_t) = rhs                   # tendencies
+    (;nu, bottom_drag) = params
 
-    rhs.eta_t .= - (grad.Gux * rhs.U .+ grad.Gvy * rhs.V)
+    # bottom friction
+    kinetic_sq .= sqrt.(kinetic)
+    @inplacemul ITu_ksq = ITu * kinetic_sq
+    @inplacemul ITv_ksq = ITv * kinetic_sq
+    bfric_u .= bottom_drag .* ITu_ksq .* u ./ h_u
+    bfric_v .= bottom_drag .* ITv_ksq .* v ./ h_v
+
+    # diffusion term ν∇⁴(u,v)
+    @inplacemul nu_u = ITu * nu
+    @inplacemul nu_v = ITv * nu
+    @inplacemul Mu = LLu * u
+    @inplacemul Mv = LLv * v
+    Mu .*= nu_u
+    Mv .*= nu_v
+
+    # tendencies for bottom friction and diffusion
+    u_t .= .- Mu .- bfric_u
+    v_t .= .- Mv .- bfric_v 
 
     return nothing
-
-end 
+end
 
 function comp_u_v_eta_t(nx::Int, 
         rhs::SWM_pde, 
diff --git a/explicit_solver/init_params.jl b/explicit_solver/init_params.jl
@@ -55,8 +55,14 @@ function def_params(grid)
     # removing the requirement that dt be an integer, not sure why that's there 
     dt = (0.9 * min(dx, dy)) / (sqrt(g * H))   # CFL condition for dt [seconds]
 
+    # we now use RK4 as the timestepper, here I'm storing the coefficients needed for this 
+    rk_a = [1/6, 1/3, 1/3, 1/6]
+    rk_b = [1/2, 1/2, 1.]
+
     gyre_params = Params(
     dt,
+    rk_a,
+    rk_b,
     g, 
     f0, 
     beta, 
diff --git a/explicit_solver/init_structs.jl b/explicit_solver/init_structs.jl
@@ -106,6 +106,8 @@ end
 # Parameters that appear in the model in various places
 struct Params 
     dt::Float64                     # timestep
+    rk_a::Vector{Float64}           # Runge Kutta 4th order coefficients
+    rk_b::Vector{Float64}
     g::Float64                      # gravity
     f0::Float64                     # Coriolis parameter
     beta::Float64                   # Coriolis parameter
@@ -255,8 +257,8 @@ end
     bfric_u::Vector{Float64} = zeros(Nu)
     bfric_v::Vector{Float64} = zeros(Nv)
 
-    LLu_u1::Vector{Float64} = zeros(Nu)
-    LLv_v1::Vector{Float64} = zeros(Nv)
+    nu_u::Vector{Float64} = zeros(Nu)
+    nu_v::Vector{Float64} = zeros(Nv)
     Mu::Vector{Float64} = zeros(Nu)
     Mv::Vector{Float64} = zeros(Nv)