From b08da96876511c088eacfd7c56018e6b4fe6bde5 Mon Sep 17 00:00:00 2001
From: Francesca Mele <francesca.mele@cmcc.it>
Date: Mon, 26 Sep 2022 11:42:44 +0200
Subject: [PATCH] fix loop fusion, #79

---
 src/OCE/DYN/dynvor.F90 | 66 +++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/src/OCE/DYN/dynvor.F90 b/src/OCE/DYN/dynvor.F90
index b7a84124a..b90b4bc3e 100644
--- a/src/OCE/DYN/dynvor.F90
+++ b/src/OCE/DYN/dynvor.F90
@@ -943,6 +943,34 @@ CONTAINS
             CALL ctl_stop('STOP','dyn_vor: wrong value for kvor'  )
          END SELECT
          !
+#if defined key_loop_fusion
+         DO_2D( 0, 0, 0, 0 )
+            !                                   !==  horizontal fluxes  ==!
+            zwx         = e2u(ji  ,jj  ) * e3u(ji  ,jj  ,jk,Kmm) * pu(ji  ,jj  ,jk)
+            zwx_im1     = e2u(ji-1,jj  ) * e3u(ji-1,jj  ,jk,Kmm) * pu(ji-1,jj  ,jk)
+            zwx_jp1     = e2u(ji  ,jj+1) * e3u(ji  ,jj+1,jk,Kmm) * pu(ji  ,jj+1,jk)
+            zwx_im1_jp1 = e2u(ji-1,jj+1) * e3u(ji-1,jj+1,jk,Kmm) * pu(ji-1,jj+1,jk)
+            zwy         = e1v(ji  ,jj  ) * e3v(ji  ,jj  ,jk,Kmm) * pv(ji  ,jj  ,jk)
+            zwy_ip1     = e1v(ji+1,jj  ) * e3v(ji+1,jj  ,jk,Kmm) * pv(ji+1,jj  ,jk)
+            zwy_jm1     = e1v(ji  ,jj-1) * e3v(ji  ,jj-1,jk,Kmm) * pv(ji  ,jj-1,jk)
+            zwy_ip1_jm1 = e1v(ji+1,jj-1) * e3v(ji+1,jj-1,jk,Kmm) * pv(ji+1,jj-1,jk)
+            !                                   !==  compute and add the vorticity term trend  =!
+            ztne     = zwz(ji-1,jj  ) + zwz(ji  ,jj  ) + zwz(ji  ,jj-1)
+            ztnw     = zwz(ji-1,jj-1) + zwz(ji-1,jj  ) + zwz(ji  ,jj  )
+            ztnw_ip1 = zwz(ji  ,jj-1) + zwz(ji  ,jj  ) + zwz(ji+1,jj  )
+            ztse     = zwz(ji  ,jj  ) + zwz(ji  ,jj-1) + zwz(ji-1,jj-1)
+            ztse_jp1 = zwz(ji  ,jj+1) + zwz(ji  ,jj  ) + zwz(ji-1,jj  )
+            ztsw_jp1 = zwz(ji  ,jj  ) + zwz(ji-1,jj  ) + zwz(ji-1,jj+1)
+            ztsw_ip1 = zwz(ji+1,jj-1) + zwz(ji  ,jj-1) + zwz(ji  ,jj  )
+            !
+            zua = + r1_12 * r1_e1u(ji,jj) * (  ztne * zwy + ztnw_ip1 * zwy_ip1   &
+               &                             + ztse * zwy_jm1 + ztsw_ip1 * zwy_ip1_jm1 )
+            zva = - r1_12 * r1_e2v(ji,jj) * (  ztsw_jp1 * zwx_im1_jp1 + ztse_jp1 * zwx_jp1   &
+               &                             + ztnw * zwx_im1 + ztne * zwx )
+            pu_rhs(ji,jj,jk) = pu_rhs(ji,jj,jk) + zua
+            pv_rhs(ji,jj,jk) = pv_rhs(ji,jj,jk) + zva
+         END_2D
+#else
          !                                   !==  horizontal fluxes  ==!
          DO_2D( 1, 1, 1, 1 )
             zwx(ji,jj) = e2u(ji,jj) * e3u(ji,jj,jk,Kmm) * pu(ji,jj,jk)
@@ -965,6 +993,7 @@ CONTAINS
             pu_rhs(ji,jj,jk) = pu_rhs(ji,jj,jk) + zua
             pv_rhs(ji,jj,jk) = pv_rhs(ji,jj,jk) + zva
          END_2D
+#endif
       END DO
       !                                                ! ===============
       !                                                !   End of slab
@@ -1000,14 +1029,8 @@ CONTAINS
       REAL(wp) ::   zua, zva     ! local scalars
       REAL(wp) ::   zmsk, ze3f   ! local scalars
       REAL(wp), DIMENSION(A2D(1))       ::   z1_e3f
-#if defined key_loop_fusion
-      REAL(wp) ::   ztne, ztnw, ztnw_ip1, ztse, ztse_jp1, ztsw_jp1, ztsw_ip1
-      REAL(wp) ::   zwx, zwx_im1, zwx_jp1, zwx_im1_jp1
-      REAL(wp) ::   zwy, zwy_ip1, zwy_jm1, zwy_ip1_jm1
-#else
       REAL(wp), DIMENSION(A2D(1))       ::   zwx , zwy
       REAL(wp), DIMENSION(A2D(1))       ::   ztnw, ztne, ztsw, ztse
-#endif
       REAL(wp), DIMENSION(A2D(1),jpkm1) ::   zwz   ! 3D workspace, jpkm1 -> jpkm1 -> avoid lbc_lnk on jpk that is not defined
       !!----------------------------------------------------------------------
       !
@@ -1106,39 +1129,11 @@ CONTAINS
       END DO                                           !   End of slab
       !                                                ! ===============
       !
-      IF (nn_hls==1) CALL lbc_lnk( 'dynvor', zwz, 'F', 1.0_wp )
+      CALL lbc_lnk( 'dynvor', zwz, 'F', 1.0_wp )
       !
       !                                                ! ===============
       !                                                ! Horizontal slab
       !                                                ! ===============
-#if defined key_loop_fusion
-      DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-         !                                   !==  horizontal fluxes  ==!
-         zwx         = e2u(ji  ,jj  ) * e3u(ji  ,jj  ,jk,Kmm) * pu(ji  ,jj  ,jk)
-         zwx_im1     = e2u(ji-1,jj  ) * e3u(ji-1,jj  ,jk,Kmm) * pu(ji-1,jj  ,jk)
-         zwx_jp1     = e2u(ji  ,jj+1) * e3u(ji  ,jj+1,jk,Kmm) * pu(ji  ,jj+1,jk)
-         zwx_im1_jp1 = e2u(ji-1,jj+1) * e3u(ji-1,jj+1,jk,Kmm) * pu(ji-1,jj+1,jk)
-         zwy         = e1v(ji  ,jj  ) * e3v(ji  ,jj  ,jk,Kmm) * pv(ji  ,jj  ,jk)
-         zwy_ip1     = e1v(ji+1,jj  ) * e3v(ji+1,jj  ,jk,Kmm) * pv(ji+1,jj  ,jk)
-         zwy_jm1     = e1v(ji  ,jj-1) * e3v(ji  ,jj-1,jk,Kmm) * pv(ji  ,jj-1,jk)
-         zwy_ip1_jm1 = e1v(ji+1,jj-1) * e3v(ji+1,jj-1,jk,Kmm) * pv(ji+1,jj-1,jk)
-         !                                   !==  compute and add the vorticity term trend  =!
-         ztne     = zwz(ji-1,jj  ,jk) + zwz(ji  ,jj  ,jk) + zwz(ji  ,jj-1,jk)
-         ztnw     = zwz(ji-1,jj-1,jk) + zwz(ji-1,jj  ,jk) + zwz(ji  ,jj  ,jk)
-         ztnw_ip1 = zwz(ji  ,jj-1,jk) + zwz(ji  ,jj  ,jk) + zwz(ji+1,jj  ,jk)
-         ztse     = zwz(ji  ,jj  ,jk) + zwz(ji  ,jj-1,jk) + zwz(ji-1,jj-1,jk)
-         ztse_jp1 = zwz(ji  ,jj+1,jk) + zwz(ji  ,jj  ,jk) + zwz(ji-1,jj  ,jk)
-         ztsw_jp1 = zwz(ji  ,jj  ,jk) + zwz(ji-1,jj  ,jk) + zwz(ji-1,jj+1,jk)
-         ztsw_ip1 = zwz(ji+1,jj-1,jk) + zwz(ji  ,jj-1,jk) + zwz(ji  ,jj  ,jk)
-         !
-         zua = + r1_12 * r1_e1u(ji,jj) * (  ztne * zwy + ztnw_ip1 * zwy_ip1   &
-            &                             + ztse * zwy_jm1 + ztsw_ip1 * zwy_ip1_jm1 )
-         zva = - r1_12 * r1_e2v(ji,jj) * (  ztsw_jp1 * zwx_im1_jp1 + ztse_jp1 * zwx_jp1   &
-            &                             + ztnw * zwx_im1 + ztne * zwx )
-         pu_rhs(ji,jj,jk) = pu_rhs(ji,jj,jk) + zua
-         pv_rhs(ji,jj,jk) = pv_rhs(ji,jj,jk) + zva
-      END_3D
-#else
       DO jk = 1, jpkm1
          !
          !                                   !==  horizontal fluxes  ==!
@@ -1164,7 +1159,6 @@ CONTAINS
             pv_rhs(ji,jj,jk) = pv_rhs(ji,jj,jk) + zva
          END_2D
       END DO
-#endif
          !                                             ! ===============
          !                                             !   End of slab
       !                                                ! ===============
-- 
GitLab