From 3efbcb17ab8f10280094f66549faab62052217d5 Mon Sep 17 00:00:00 2001
From: Francesca Mele <francesca.mele@cmcc.it>
Date: Mon, 19 Sep 2022 13:18:31 +0200
Subject: [PATCH] extra optimization for ldfslp - #79

---
 src/OCE/LDF/ldfslp.F90 | 98 ++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 52 deletions(-)

diff --git a/src/OCE/LDF/ldfslp.F90 b/src/OCE/LDF/ldfslp.F90
index cfed0ff8f..a4bd5ddeb 100644
--- a/src/OCE/LDF/ldfslp.F90
+++ b/src/OCE/LDF/ldfslp.F90
@@ -118,9 +118,12 @@ CONTAINS
       REAL(wp) ::   zcj, zfj, zav, zbv, zaj, zbj   !   -      -
       REAL(wp) ::   zck, zfk,      zbw             !   -      -
       REAL(wp) ::   zdepu, zdepv                   !   -      -
+      REAL(wp) ::   zuslp, zvslp                   !   -      -
+      REAL(wp) ::   zwslpi, zwslpj                 !   -      -
       REAL(wp), DIMENSION(A2D(0))     ::  zslpml_hmlpu, zslpml_hmlpv
-      REAL(wp), DIMENSION(jpi,jpj,jpk) ::  zgru, zwz, zdzr
-      REAL(wp), DIMENSION(jpi,jpj,jpk) ::  zgrv, zww
+      REAL(wp), DIMENSION(A2D(1),jpk) ::  zdzr
+      REAL(wp), DIMENSION(A2D(1),jpk) ::  zgru, zgrv
+      REAL(wp), DIMENSION(jpi,jpj,jpk) ::  zwz, zww
       !!----------------------------------------------------------------------
       !
       IF( ln_timing )   CALL timing_start('ldf_slp')
@@ -152,15 +155,15 @@ CONTAINS
       ENDIF
       !
       zdzr(:,:,1) = 0._wp           !==   Local vertical density gradient at T-point   == !   (evaluated from N^2)
-      DO jk = 2, jpkm1
+      DO_3D( 1, 1, 1, 1, 2, jpkm1 )
          !                                ! zdzr = d/dz(prd)= - ( prd ) / grav * mk(pn2) -- at t point
          !                                !   trick: tmask(ik  )  = 0   =>   all pn2   = 0   =>   zdzr = 0
          !                                !    else  tmask(ik+1)  = 0   =>   pn2(ik+1) = 0   =>   zdzr divides by 1
          !                                !          umask(ik+1) /= 0   =>   all pn2  /= 0   =>   zdzr divides by 2
          !                                ! NB: 1/(tmask+1) = (1-.5*tmask)  substitute a / by a *  ==> faster
-         zdzr(:,:,jk) = zm1_g * ( prd(:,:,jk) + 1._wp )              &
-            &                 * ( pn2(:,:,jk) + pn2(:,:,jk+1) ) * ( 1._wp - 0.5_wp * tmask(:,:,jk+1) )
-      END DO
+         zdzr(ji,jj,jk) = zm1_g * ( prd(ji,jj,jk) + 1._wp )              &
+            &                 * ( pn2(ji,jj,jk) + pn2(ji,jj,jk+1) ) * ( 1._wp - 0.5_wp * tmask(ji,jj,jk+1) )
+      END_3D
       !
       !                             !==   Slopes just below the mixed layer   ==!
       CALL ldf_slp_mxl( prd, pn2, zgru, zgrv, zdzr, Kmm )        ! output: uslpml, vslpml, wslpiml, wslpjml
@@ -229,28 +232,23 @@ CONTAINS
       CALL lbc_lnk( 'ldfslp', zwz, 'U', -1.0_wp,  zww, 'V', -1.0_wp )      ! lateral boundary conditions
       !
       !                                    !* horizontal Shapiro filter
-      DO jk = 2, jpkm1
-         DO_2D( 0, 0, 0, 0 )                                 ! rows jj=2 and =jpjm1 only
-            uslp(ji,jj,jk) = z1_16 * (        zwz(ji-1,jj-1,jk) + zwz(ji+1,jj-1,jk)      &
-               &                       +      zwz(ji-1,jj+1,jk) + zwz(ji+1,jj+1,jk)      &
-               &                       + 2.*( zwz(ji  ,jj-1,jk) + zwz(ji-1,jj  ,jk)      &
-               &                       +      zwz(ji+1,jj  ,jk) + zwz(ji  ,jj+1,jk) )    &
-               &                       + 4.*  zwz(ji  ,jj  ,jk)                       )
-            vslp(ji,jj,jk) = z1_16 * (        zww(ji-1,jj-1,jk) + zww(ji+1,jj-1,jk)      &
-               &                       +      zww(ji-1,jj+1,jk) + zww(ji+1,jj+1,jk)      &
-               &                       + 2.*( zww(ji  ,jj-1,jk) + zww(ji-1,jj  ,jk)      &
-               &                       +      zww(ji+1,jj  ,jk) + zww(ji  ,jj+1,jk) )    &
-               &                       + 4.*  zww(ji,jj    ,jk)                       )
-         END_2D
+      DO_3D( 0, 0, 0, 0, 2, jpkm1 )                                 ! rows jj=2 and =jpjm1 only      
+         zuslp = z1_16 * (        zwz(ji-1,jj-1,jk) + zwz(ji+1,jj-1,jk)      &
+            &              +      zwz(ji-1,jj+1,jk) + zwz(ji+1,jj+1,jk)      &
+            &              + 2.*( zwz(ji  ,jj-1,jk) + zwz(ji-1,jj  ,jk)      &
+            &              +      zwz(ji+1,jj  ,jk) + zwz(ji  ,jj+1,jk) )    &
+            &              + 4.*  zwz(ji  ,jj  ,jk)                       )
+         zvslp = z1_16 * (        zww(ji-1,jj-1,jk) + zww(ji+1,jj-1,jk)      &
+            &              +      zww(ji-1,jj+1,jk) + zww(ji+1,jj+1,jk)      &
+            &              + 2.*( zww(ji  ,jj-1,jk) + zww(ji-1,jj  ,jk)      &
+            &              +      zww(ji+1,jj  ,jk) + zww(ji  ,jj+1,jk) )    &
+            &              + 4.*  zww(ji,jj    ,jk)                       )
          !                                 !* decrease along coastal boundaries
-         DO_2D( 0, 0, 0, 0 )
-            uslp(ji,jj,jk) = uslp(ji,jj,jk) * ( umask(ji,jj+1,jk) + umask(ji,jj-1,jk  ) ) * 0.5_wp   &
-               &                            * ( umask(ji,jj  ,jk) + umask(ji,jj  ,jk+1) ) * 0.5_wp
-            vslp(ji,jj,jk) = vslp(ji,jj,jk) * ( vmask(ji+1,jj,jk) + vmask(ji-1,jj,jk  ) ) * 0.5_wp   &
-               &                            * ( vmask(ji  ,jj,jk) + vmask(ji  ,jj,jk+1) ) * 0.5_wp
-         END_2D
-      END DO
-
+         uslp(ji,jj,jk) = zuslp * ( umask(ji,jj+1,jk) + umask(ji,jj-1,jk  ) ) * 0.5_wp   &
+            &                   * ( umask(ji,jj  ,jk) + umask(ji,jj  ,jk+1) ) * 0.5_wp
+         vslp(ji,jj,jk) = zvslp * ( vmask(ji+1,jj,jk) + vmask(ji-1,jj,jk  ) ) * 0.5_wp   &
+               &                * ( vmask(ji  ,jj,jk) + vmask(ji  ,jj,jk+1) ) * 0.5_wp
+      END_3D
 
       ! II.  slopes at w point           | wslpi = mij( d/di( prd ) / d/dz( prd )
       ! ===========================      | wslpj = mij( d/dj( prd ) / d/dz( prd )
@@ -289,29 +287,25 @@ CONTAINS
       CALL lbc_lnk( 'ldfslp', zwz, 'T', -1.0_wp,  zww, 'T', -1.0_wp )      ! lateral boundary conditions
       !
       !                                           !* horizontal Shapiro filter
-      DO jk = 2, jpkm1
-         DO_2D( 0, 0, 0, 0 )                             ! rows jj=2 and =jpjm1 only
-            zcofw = wmask(ji,jj,jk) * z1_16
-            wslpi(ji,jj,jk) = (         zwz(ji-1,jj-1,jk) + zwz(ji+1,jj-1,jk)     &
-                 &               +      zwz(ji-1,jj+1,jk) + zwz(ji+1,jj+1,jk)     &
-                 &               + 2.*( zwz(ji  ,jj-1,jk) + zwz(ji-1,jj  ,jk)     &
-                 &               +      zwz(ji+1,jj  ,jk) + zwz(ji  ,jj+1,jk) )   &
-                 &               + 4.*  zwz(ji  ,jj  ,jk)                         ) * zcofw
-
-            wslpj(ji,jj,jk) = (         zww(ji-1,jj-1,jk) + zww(ji+1,jj-1,jk)     &
-                 &               +      zww(ji-1,jj+1,jk) + zww(ji+1,jj+1,jk)     &
-                 &               + 2.*( zww(ji  ,jj-1,jk) + zww(ji-1,jj  ,jk)     &
-                 &               +      zww(ji+1,jj  ,jk) + zww(ji  ,jj+1,jk) )   &
-                 &               + 4.*  zww(ji  ,jj  ,jk)                         ) * zcofw
-         END_2D
-         !                                        !* decrease in vicinity of topography
-         DO_2D( 0, 0, 0, 0 )
-            zck =   ( umask(ji,jj,jk) + umask(ji-1,jj,jk) )   &
-               &  * ( vmask(ji,jj,jk) + vmask(ji,jj-1,jk) ) * 0.25
-            wslpi(ji,jj,jk) = wslpi(ji,jj,jk) * zck
-            wslpj(ji,jj,jk) = wslpj(ji,jj,jk) * zck
-         END_2D
-      END DO
+      DO_3D( 0, 0, 0, 0, 2, jpkm1 )                             ! rows jj=2 and =jpjm1 only
+         zcofw = wmask(ji,jj,jk) * z1_16
+         zwslpi = (         zwz(ji-1,jj-1,jk) + zwz(ji+1,jj-1,jk)     &
+              &      +      zwz(ji-1,jj+1,jk) + zwz(ji+1,jj+1,jk)     &
+              &      + 2.*( zwz(ji  ,jj-1,jk) + zwz(ji-1,jj  ,jk)     &
+              &      +      zwz(ji+1,jj  ,jk) + zwz(ji  ,jj+1,jk) )   &
+              &      + 4.*  zwz(ji  ,jj  ,jk)                         ) * zcofw
+
+         zwslpj = (         zww(ji-1,jj-1,jk) + zww(ji+1,jj-1,jk)     &
+              &      +      zww(ji-1,jj+1,jk) + zww(ji+1,jj+1,jk)     &
+              &      + 2.*( zww(ji  ,jj-1,jk) + zww(ji-1,jj  ,jk)     &
+              &      +      zww(ji+1,jj  ,jk) + zww(ji  ,jj+1,jk) )   &
+              &      + 4.*  zww(ji  ,jj  ,jk)                         ) * zcofw
+         !                                     !* decrease in vicinity of topography
+         zck =   ( umask(ji,jj,jk) + umask(ji-1,jj,jk) )   &
+            &  * ( vmask(ji,jj,jk) + vmask(ji,jj-1,jk) ) * 0.25
+         wslpi(ji,jj,jk) = zwslpi * zck
+         wslpj(ji,jj,jk) = zwslpj * zck
+      END_3D
 
       ! IV. Lateral boundary conditions
       ! ===============================
@@ -572,8 +566,8 @@ CONTAINS
       !!----------------------------------------------------------------------
       REAL(wp), DIMENSION(:,:,:), INTENT(in) ::   prd            ! in situ density
       REAL(wp), DIMENSION(:,:,:), INTENT(in) ::   pn2            ! Brunt-Vaisala frequency (locally ref.)
-      REAL(wp), DIMENSION(:,:,:), INTENT(in) ::   p_gru, p_grv   ! i- & j-gradient of density (u- & v-pts)
-      REAL(wp), DIMENSION(:,:,:), INTENT(in) ::   p_dzr          ! z-gradient of density      (T-point)
+      REAL(wp), DIMENSION(A2D(1),jpk), INTENT(in) ::   p_gru, p_grv   ! i- & j-gradient of density (u- & v-pts)
+      REAL(wp), DIMENSION(A2D(1),jpk), INTENT(in) ::   p_dzr          ! z-gradient of density      (T-point)
       INTEGER , INTENT(in)                   ::   Kmm            ! ocean time level indices
       !!
       INTEGER  ::   ji , jj , jk                   ! dummy loop indices
-- 
GitLab