diff --git a/cfgs/SHARED/field_def_nemo-oce.xml b/cfgs/SHARED/field_def_nemo-oce.xml
index 750a3f93b7febc3ed0cac89ff86405f615874f15..6ad835d9c1873d0c1c5d856cc88f08da6aa968ac 100644
--- a/cfgs/SHARED/field_def_nemo-oce.xml
+++ b/cfgs/SHARED/field_def_nemo-oce.xml
@@ -111,7 +111,7 @@ that are available in the tidal-forcing implementation (see
   <field_group id="grid_T" grid_ref="grid_T_2D" >
     <field id="e3t"          long_name="T-cell thickness"                    standard_name="cell_thickness"        unit="m"   grid_ref="grid_T_3D_inner" />
     <field id="e3ts"         long_name="T-cell thickness"   field_ref="e3t"  standard_name="cell_thickness"        unit="m"   grid_ref="grid_T_SFC_inner"      />
-    <field id="e3t_0"        long_name="Initial T-cell thickness"            standard_name="ref_cell_thickness"    unit="m"   grid_ref="grid_T_3D"       />
+    <field id="e3t_0"        long_name="Initial T-cell thickness"            standard_name="ref_cell_thickness"    unit="m"   grid_ref="grid_T_3D_inner"  />
     <field id="e3tb"         long_name="bottom T-cell thickness"             standard_name="bottom_cell_thickness" unit="m"   grid_ref="grid_T_2D_inner" />
     <field id="e3t_300"      field_ref="e3t"                                                                                  grid_ref="grid_T_zoom_300_inner" detect_missing_value="true" />
     <field id="e3t_vsum300"  field_ref="e3t_300"                                                                              grid_ref="grid_T_vsum_inner"     detect_missing_value="true" />
@@ -168,7 +168,7 @@ that are available in the tidal-forcing implementation (see
     <field id="ssh"          long_name="sea surface height"                                 standard_name="sea_surface_height_above_geoid"             unit="m" />
     <field id="ssh2"         long_name="square of sea surface height"                       standard_name="square_of_sea_surface_height_above_geoid"   unit="m2" > ssh * ssh </field >
     <field id="wetdep"       long_name="wet depth"                                          standard_name="wet_depth"                                  unit="m" />
-    <field id="sshmax"       long_name="max of sea surface height"        field_ref="ssh"   operation="maximum"                                                 />
+    <field id="sshmax"       long_name="max of sea surface height"        field_ref="ssh"   operation="maximum"                                                 grid_ref="grid_T_2D_inner"/>
 
     <field id="mldkz5"       long_name="Turbocline depth (Kz = 5e-4)"                     standard_name="ocean_mixed_layer_thickness_defined_by_vertical_tracer_diffusivity"              unit="m"          grid_ref="grid_T_2D_inner" />
     <field id="mldr10_1"     long_name="Mixed Layer Depth (dsigma = 0.01 wrt 10m)"        standard_name="ocean_mixed_layer_thickness_defined_by_sigma_theta"                              unit="m"          grid_ref="grid_T_2D_inner" />
@@ -588,7 +588,7 @@ that are available in the tidal-forcing implementation (see
     <field id="hu"            long_name="water column height at U point"                         standard_name="water_column_height_U"       unit="m" />
     <field id="e2u"           long_name="U-cell width in meridional direction"                   standard_name="cell_width"                  unit="m"                                 />
     <field id="e3u"           long_name="U-cell thickness"                                       standard_name="cell_thickness"              unit="m"          grid_ref="grid_U_3D_inner"   />
-    <field id="e3u_0"         long_name="Initial U-cell thickness"                               standard_name="ref_cell_thickness"          unit="m"          grid_ref="grid_U_3D"   />
+    <field id="e3u_0"         long_name="Initial U-cell thickness"                               standard_name="ref_cell_thickness"          unit="m"          grid_ref="grid_U_3D_inner"   />
     <field id="uoce"          long_name="ocean current along i-axis"                             standard_name="sea_water_x_velocity"        unit="m/s"        grid_ref="grid_U_3D"   />
     <field id="uoce_e3u"      long_name="ocean current along i-axis  (thickness weighted)"                                                   unit="m/s"        grid_ref="grid_U_3D"    > uoce * e3u </field>
     <field id="uoce_e3u_vsum" long_name="ocean current along i-axis * e3u summed on the vertical"  field_ref="uoce_e3u"                      unit="m3/s"       grid_ref="grid_U_vsum" />
@@ -651,7 +651,7 @@ that are available in the tidal-forcing implementation (see
   <field_group id="grid_V"   grid_ref="grid_V_2D">
     <field id="e1v"          long_name="V-cell width in longitudinal direction"                 standard_name="cell_width"                  unit="m"                               />
     <field id="e3v"          long_name="V-cell thickness"                                       standard_name="cell_thickness"              unit="m"          grid_ref="grid_V_3D_inner" />
-    <field id="e3v_0"        long_name="Initial V-cell thickness"                               standard_name="ref_cell_thickness"          unit="m"          grid_ref="grid_V_3D" />
+    <field id="e3v_0"        long_name="Initial V-cell thickness"                               standard_name="ref_cell_thickness"          unit="m"          grid_ref="grid_V_3D_inner" />
     <field id="hv"            long_name="water column height at V point"                        standard_name="water_column_height_V"       unit="m" />
     <field id="voce"         long_name="ocean current along j-axis"                             standard_name="sea_water_y_velocity"        unit="m/s"        grid_ref="grid_V_3D" />
     <field id="voce_e3v"     long_name="ocean current along j-axis  (thickness weighted)"                                                   unit="m/s"        grid_ref="grid_V_3D"  > voce * e3v </field>
@@ -706,7 +706,7 @@ that are available in the tidal-forcing implementation (see
 
   <field_group id="grid_W" grid_ref="grid_W_3D">
     <field id="e3w"          long_name="W-cell thickness"                              standard_name="cell_thickness"                         unit="m"  grid_ref="grid_W_3D_inner"  />
-    <field id="woce"         long_name="ocean vertical velocity"                       standard_name="upward_sea_water_velocity"              unit="m/s"  />
+    <field id="woce"         long_name="ocean vertical velocity"                       standard_name="upward_sea_water_velocity"              unit="m/s"  grid_ref="grid_W_3D_inner"/>
     <field id="woce_e3w"     long_name="ocean vertical velocity * e3w"                                                                        unit="m2/s"  > woce * e3w </field>
     <field id="wocetr_eff"   long_name="effective ocean vertical transport"                                                                   unit="m3/s" />
 
@@ -766,7 +766,7 @@ that are available in the tidal-forcing implementation (see
   <!-- F grid -->
   <field_group id="grid_F" grid_ref="grid_F_2D">
     <field id="e3f"          long_name="F-cell thickness"                      standard_name="cell_thickness"                       unit="m"       grid_ref="grid_F_3D_inner" />
-    <field id="e3f_0"        long_name="F-cell thickness"                      standard_name="cell_thickness"                       unit="m"       grid_ref="grid_F_3D"       />
+    <field id="e3f_0"        long_name="F-cell thickness"                      standard_name="cell_thickness"                       unit="m"       grid_ref="grid_F_3D_inner" />
     <field id="hf"           long_name="water column height at F point"        standard_name="water_column_height_F"                unit="m"                                  />
     <field id="ssKEf"        long_name="surface kinetic energy at F point"     standard_name="specific_kinetic_energy_of_sea_water" unit="m2/s2"   grid_ref="grid_F_2D_inner" />
     <field id="ssrelvor"     long_name="surface relative vorticity"            standard_name="relative_vorticity"                   unit="1/s"     grid_ref="grid_F_2D_inner" />
@@ -1235,26 +1235,26 @@ that are available in the tidal-forcing implementation (see
 
   <!-- 25h diagnostic output -->
   <field_group id="25h_grid_T" grid_ref="grid_T_3D_inner" operation="instant">
-    <field id="temper25h"         name="potential temperature 25h mean"    unit="degC" />
-    <field id="tempis25h"         name="insitu temperature 25h mean"    unit="degC" />
-    <field id="salin25h"          name="salinity 25h mean"                 unit="psu"  />
-    <field id="ssh25h"            name="sea surface height 25h mean"  grid_ref="grid_T_2D_inner"      unit="m"    />
+    <field id="temper25h"         long_name="potential temperature 25h mean"   unit="degC" />
+    <field id="tempis25h"         long_name="insitu temperature 25h mean"    unit="degC" />
+    <field id="salin25h"          long_name="salinity 25h mean"                 unit="psu"  />
+    <field id="ssh25h"            long_name="sea surface height 25h mean"  grid_ref="grid_T_2D_inner"      unit="m"    />
   </field_group>
 
   <field_group id="25h_grid_U" grid_ref="grid_U_3D_inner" operation="instant" >
-    <field id="vozocrtx25h"         name="i current 25h mean"    unit="m/s"   />
+    <field id="vozocrtx25h"       long_name="i current 25h mean"    unit="m/s"   />
   </field_group>
 
   <field_group id="25h_grid_V" grid_ref="grid_V_3D_inner" operation="instant">
-    <field id="vomecrty25h"         name="j current 25h mean"    unit="m/s"    />
+    <field id="vomecrty25h"       long_name="j current 25h mean"    unit="m/s"    />
   </field_group>
 
   <field_group id="25h_grid_W" grid_ref="grid_W_3D_inner" operation="instant">
-    <field id="vovecrtz25h"         name="k current 25h mean"                 unit="m/s"      />
-    <field id="avt25h"              name="vertical diffusivity25h mean"       unit="m2/s" />
-    <field id="avm25h"              name="vertical viscosity 25h mean"        unit="m2/s" />
-    <field id="tke25h"              name="turbulent kinetic energy 25h mean" />
-    <field id="mxln25h"             name="mixing length 25h mean"             unit="m" />
+    <field id="vovecrtz25h"       long_name="k current 25h mean"                 unit="m/s"      />
+    <field id="avt25h"            long_name="vertical diffusivity25h mean"       unit="m2/s" />
+    <field id="avm25h"            long_name="vertical viscosity 25h mean"        unit="m2/s" />
+    <field id="tke25h"            long_name="turbulent kinetic energy 25h mean" />
+    <field id="mxln25h"           long_name="mixing length 25h mean"             unit="m" />
   </field_group>
 
   <!--
diff --git a/src/OCE/DIA/dia25h.F90 b/src/OCE/DIA/dia25h.F90
index b9bede46535ba40526e6256636bb09372bc90199..8ef854077eb8d8f81e1ccf5d408d57d5adf0ba46 100644
--- a/src/OCE/DIA/dia25h.F90
+++ b/src/OCE/DIA/dia25h.F90
@@ -96,7 +96,7 @@ CONTAINS
       ! ------------------------- !
       ! 2 - Assign Initial Values !
       ! ------------------------- !
-      cnt_25h = 1  ! sets the first value of sum at timestep 1 (note - should strictly be at timestep zero so before values used where possible)
+      cnt_25h = 2  ! sets the first value of sum at timestep 1 (note - should strictly be at timestep zero so before values used where possible)
       DO_3D( 0, 0, 0, 0, 1, jpk )
          tn_25h (ji,jj,jk) = ts (ji,jj,jk,jp_tem,Kbb)
          sn_25h (ji,jj,jk) = ts (ji,jj,jk,jp_sal,Kbb)
@@ -169,11 +169,12 @@ CONTAINS
       ! Sum of 25 hourly instantaneous values to give a 25h mean from 24hours every day
       IF( MOD( kt, i_steps ) == 0  .AND. kt /= nn_it000 ) THEN
 
-         IF (lwp) THEN
-              WRITE(numout,*) 'dia_wri_tide : Summing instantaneous hourly diagnostics at timestep ',kt
-              WRITE(numout,*) '~~~~~~~~~~~~ '
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN ! Do only for the first tile
+            IF (lwp) THEN
+               WRITE(numout,*) 'dia_wri_tide : Summing instantaneous hourly diagnostics at timestep ',kt
+               WRITE(numout,*) '~~~~~~~~~~~~ '
+            ENDIF
          ENDIF
-
          DO_3D( 0, 0, 0, 0, 1, jpk )
             tn_25h  (ji,jj,jk) = tn_25h  (ji,jj,jk) + ts (ji,jj,jk,jp_tem,Kmm)
             sn_25h  (ji,jj,jk) = sn_25h  (ji,jj,jk) + ts (ji,jj,jk,jp_sal,Kmm)
@@ -197,10 +198,12 @@ CONTAINS
                rmxln_25h(ji,jj,jk) = rmxln_25h(ji,jj,jk) + hmxl_n(ji,jj,jk)
             END_3D
          ENDIF
-         cnt_25h = cnt_25h + 1
          !
-         IF (lwp) THEN
-            WRITE(numout,*) 'dia_tide : Summed the following number of hourly values so far',cnt_25h
+         IF( .NOT. l_istiled .OR. ntile == 1 )  THEN   ! Do only for the first tile
+            cnt_25h = cnt_25h + 1
+            IF (lwp) THEN
+               WRITE(numout,*) 'dia_tide : Summed the following number of hourly values so far',cnt_25h
+            ENDIF
          ENDIF
          !
       ENDIF ! MOD( kt, i_steps ) == 0
@@ -208,25 +211,36 @@ CONTAINS
       ! Write data for 25 hour mean output streams
       IF( cnt_25h == 25 .AND.  MOD( kt, i_steps*24) == 0 .AND. kt /= nn_it000 ) THEN
          !
-         IF(lwp) THEN
-            WRITE(numout,*) 'dia_wri_tide : Writing 25 hour mean tide diagnostics at timestep', kt
-            WRITE(numout,*) '~~~~~~~~~~~~ '
+         IF( .NOT. l_istiled .OR. ntile == 1 )  THEN ! Do only for the first tile
+            IF(lwp) THEN
+               WRITE(numout,*) 'dia_wri_tide : Writing 25 hour mean tide diagnostics at timestep', kt
+               WRITE(numout,*) '~~~~~~~~~~~~ '
+            ENDIF
          ENDIF
          !
-         tn_25h  (:,:,:) = tn_25h  (:,:,:) * r1_25
-         sn_25h  (:,:,:) = sn_25h  (:,:,:) * r1_25
-         sshn_25h(:,:)   = sshn_25h(:,:)   * r1_25
-         un_25h  (:,:,:) = un_25h  (:,:,:) * r1_25
-         vn_25h  (:,:,:) = vn_25h  (:,:,:) * r1_25
-         wn_25h  (:,:,:) = wn_25h  (:,:,:) * r1_25
-         avt_25h (:,:,:) = avt_25h (:,:,:) * r1_25
-         avm_25h (:,:,:) = avm_25h (:,:,:) * r1_25
+         DO_3D( 0, 0, 0, 0, 1, jpk )
+            tn_25h  (ji,jj,jk) = tn_25h  (ji,jj,jk) * r1_25
+            sn_25h  (ji,jj,jk) = sn_25h  (ji,jj,jk) * r1_25
+            un_25h  (ji,jj,jk) = un_25h  (ji,jj,jk) * r1_25
+            vn_25h  (ji,jj,jk) = vn_25h  (ji,jj,jk) * r1_25
+            wn_25h  (ji,jj,jk) = wn_25h  (ji,jj,jk) * r1_25
+            avt_25h (ji,jj,jk) = avt_25h (ji,jj,jk) * r1_25
+            avm_25h (ji,jj,jk) = avm_25h (ji,jj,jk) * r1_25
+         END_3D
+         DO_2D( 0, 0, 0, 0 )
+            sshn_25h(ji,jj)    = sshn_25h(ji,jj)    * r1_25
+         END_2D
+         !
          IF( ln_zdftke ) THEN
-            en_25h(:,:,:) = en_25h(:,:,:) * r1_25
+            DO_3D( 0, 0, 0, 0, 1, jpk)
+               en_25h(ji,jj,jk) = en_25h(ji,jj,jk) * r1_25
+            END_3D
          ENDIF
          IF( ln_zdfgls ) THEN
-            en_25h   (:,:,:) = en_25h   (:,:,:) * r1_25
-            rmxln_25h(:,:,:) = rmxln_25h(:,:,:) * r1_25
+            DO_3D( 0, 0, 0, 0, 1, jpk)
+               en_25h   (ji,jj,jk) = en_25h   (ji,jj,jk) * r1_25
+               rmxln_25h(ji,jj,jk) = rmxln_25h(ji,jj,jk) * r1_25
+            END_3D
          ENDIF
          !
          IF(lwp)  WRITE(numout,*) 'dia_wri_tide : Mean calculated by dividing 25 hour sums and writing output'
@@ -311,9 +325,11 @@ CONTAINS
                rmxln_25h(ji,jj,jk) = hmxl_n(ji,jj,jk)
             END_3D
          ENDIF
-         cnt_25h = 1
-         IF(lwp)  WRITE(numout,*) 'dia_wri_tide :   &
-            &    After 25hr mean write, reset sum to current value and cnt_25h to one for overlapping average', cnt_25h
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN ! Do only for the first tile
+            cnt_25h = 1
+            IF(lwp)  WRITE(numout,*) 'dia_wri_tide :   &
+               &    After 25hr mean write, reset sum to current value and cnt_25h to one for overlapping average', cnt_25h
+         ENDIF
       ENDIF !  cnt_25h .EQ. 25 .AND.  MOD( kt, i_steps * 24) == 0 .AND. kt .NE. nn_it000
       !
    END SUBROUTINE dia_25h 
diff --git a/src/OCE/DIA/diaar5.F90 b/src/OCE/DIA/diaar5.F90
index a8eaa2c041b4f920f7af9c9c0ba46485f8b16764..d30024172905ff713b76036e937b2d05ae3de20c 100644
--- a/src/OCE/DIA/diaar5.F90
+++ b/src/OCE/DIA/diaar5.F90
@@ -11,6 +11,7 @@ MODULE diaar5
    !!----------------------------------------------------------------------
    USE oce            ! ocean dynamics and active tracers
    USE dom_oce        ! ocean space and time domain
+   USE domtile
    USE eosbn2         ! equation of state                (eos_bn2 routine)
    USE phycst         ! physical constant
    USE in_out_manager  ! I/O manager
@@ -28,6 +29,7 @@ MODULE diaar5
    PRIVATE
 
    PUBLIC   dia_ar5        ! routine called in step.F90 module
+   PUBLIC   dia_ar5_init
    PUBLIC   dia_ar5_alloc  ! routine called in nemogcm.F90 module
    PUBLIC   dia_ar5_hst    ! heat/salt transport
 
@@ -74,41 +76,50 @@ CONTAINS
       INTEGER, INTENT( in ) ::   Kmm  ! ocean time level index
       !
       INTEGER  ::   ji, jj, jk, iks, ikb                      ! dummy loop arguments
-      REAL(wp) ::   zvolssh, zvol, zssh_steric, zztmp, zarho, ztemp, zsal, zmass, zsst
+      REAL(wp) ::   zvol, zztmp
       REAL(wp) ::   zaw, zbw, zrw, ztf
+      REAL(wp)       ::                   zarho,           ztemp,           zsal
+      REAL(wp), SAVE :: zsvolssh, zssst, zsarho, zsarho2, zstemp, zstemp2, zssal, zssshice    ! Working sums
       !
-      REAL(wp), ALLOCATABLE, DIMENSION(:,:)       :: zarea_ssh   ! 2D workspace
-      REAL(wp), ALLOCATABLE, DIMENSION(:,:)       :: z2d         ! 2D workspace
-      REAL(wp), ALLOCATABLE, DIMENSION(:,:,:)     :: z3d, zrhd   ! 3D workspace
-      REAL(wp), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: ztsn        ! 5D workspace
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:)       :: z2d, zarea_ssh   ! 2D workspace
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:,:)     :: z3d, zrhd        ! 3D workspace
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: ztsn             ! 5D workspace
       !!--------------------------------------------------------------------
       IF( ln_timing )   CALL timing_start('dia_ar5')
 
-      IF( kt == nit000 )     CALL dia_ar5_init
-
-      IF( l_ar5 ) THEN
-         ALLOCATE( zarea_ssh(A2D(0)), z2d(A2D(0)), z3d(A2D(0),jpk) )
-         ALLOCATE( zrhd(A2D(0),jpk) )
-         ALLOCATE( ztsn(A2D(0),jpk,jpts,jpt) )
-         zarea_ssh(:,:) = e1e2t(A2D(0)) * ssh(A2D(0),Kmm)
-         ztsn(:,:,:,:,:) = 0._wp
-         zrhd(:,:,:) = 0._wp
+      IF( .NOT. l_istiled .OR. ntile == 1 )  THEN  ! Do only for the first tile
+         zsvolssh = 0._wp
+         zstemp = 0._wp
+         zssal = 0._wp
+         zstemp2 = 0._wp
+         zssst = 0._wp
+         zsarho = 0._wp
+         zsarho2 = 0._wp
+         zssshice = 0._wp
       ENDIF
       !
       CALL iom_put( 'e2u'      , e2u  (:,:) )
       CALL iom_put( 'e1v'      , e1v  (:,:) )
       CALL iom_put( 'areacello', e1e2t(:,:) )
       !
+      ALLOCATE( zarea_ssh(T2D(0)), z2d(T2D(0)) )
+      ALLOCATE( z3d(T2D(0),jpk) )
+      ALLOCATE( ztsn(T2D(0),jpk,jpts,jpt) )
+      !
+      DO_2D( 0, 0, 0, 0 )
+         zarea_ssh(ji,jj) = e1e2t(ji,jj) * ssh(ji,jj,Kmm)
+      END_2D
+      !
       IF( iom_use( 'volcello' ) .OR. iom_use( 'masscello' )  ) THEN
          z3d(:,:,jpk) = 0._wp        ! ocean volume ; rhd is used as workspace
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
             z3d(ji,jj,jk) = e1e2t(ji,jj) * e3t(ji,jj,jk,Kmm) * tmask(ji,jj,jk)
          END_3D
-         CALL iom_put( 'volcello'  , z3d(:,:,:)  )  ! WARNING not consistent with CMIP DR where volcello is at ca. 2000
+         CALL iom_put( 'volcello'  , z3d(:,:,:) )  ! WARNING not consistent with CMIP DR where volcello is at ca. 2000
          DO_3D( 0, 0, 0, 0, 1, jpk )
             z3d(ji,jj,jk) =  rho0 * e3t(ji,jj,jk,Kmm) * tmask(ji,jj,jk)
          END_3D
-         CALL iom_put( 'masscello' , z3d (:,:,:) )   ! ocean mass
+         CALL iom_put( 'masscello' , z3d(:,:,:) )   ! ocean mass
       ENDIF
       !
       IF( iom_use( 'e3tb' ) )  THEN    ! bottom layer thickness
@@ -116,38 +127,52 @@ CONTAINS
             ikb = mbkt(ji,jj)
             z2d(ji,jj) = e3t(ji,jj,ikb,Kmm)
          END_2D
-         CALL iom_put( 'e3tb', z2d )
+         CALL iom_put( 'e3tb', z2d(:,:) )
       ENDIF
       !
-      IF( iom_use( 'voltot' ) .OR. iom_use( 'sshtot' )  .OR. iom_use( 'sshdyn' )  ) THEN
-         !                                         ! total volume of liquid seawater
-         zvolssh = glob_sum( 'diaar5', zarea_ssh(:,:) )
-         zvol    = vol0 + zvolssh
-
-         CALL iom_put( 'voltot', zvol               )
-         CALL iom_put( 'sshtot', zvolssh / area_tot )
-         CALL iom_put( 'sshdyn', ssh(:,:,Kmm) - (zvolssh / area_tot) )
-         !
+      IF( iom_use( 'masstot' ) .OR. iom_use( 'temptot' )  .OR. iom_use( 'saltot' ) .OR. &
+         & iom_use( 'voltot' ) .OR. iom_use( 'sshtot' )  .OR. iom_use( 'sshdyn' )  ) THEN
+         !                                               ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+         zsvolssh = local_sum( zarea_ssh(:,:), zsvolssh )
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN  ! Do only for the last tile
+            !                                         ! total volume of liquid seawater
+            zztmp = glob_sum( 'diaar5', zsvolssh )
+            zvol    = vol0 + zztmp
+            IF( iom_use( 'voltot' ) .OR. iom_use( 'sshtot' )  .OR. iom_use( 'sshdyn' )  ) THEN
+               CALL iom_put( 'voltot', zvol               )
+               CALL iom_put( 'sshtot', zztmp / area_tot )
+               CALL iom_put( 'sshdyn', ssh(:,:,Kmm) - (zztmp / area_tot) )
+            ENDIF
+         ENDIF
       ENDIF
 
       IF( iom_use( 'sshice' ) ) THEN
          !                                         ! total volume of ice+snow 
          IF( nn_ice == 0 ) THEN
-            zvolssh = 0._wp
+            zztmp = 0._wp
          ELSE
             ztf = REAL(MOD( kt-1, nn_fsbc ), wp) / REAL(nn_fsbc, wp)
-            z2d = ztf * snwice_mass(:,:) + (1._wp - ztf) * snwice_mass_b(:,:)
-            zvolssh = glob_sum( 'diaar5', e1e2t(:,:) * z2d(:,:) * r1_rho0 )
+            DO_2D( 0, 0, 0, 0 )
+               z2d(ji,jj) = ztf * snwice_mass(ji,jj) + (1._wp - ztf) * snwice_mass_b(ji,jj)
+            END_2D
+            !                                                                 ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+            zssshice = local_sum( e1e2t(T2D(0)) * z2d(:,:) * r1_rho0, zssshice )
          ENDIF
 
-         CALL iom_put( 'sshice', zvolssh / area_tot )
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN  ! Do only for the last tile
+            IF( nn_ice /= 0 )   zztmp = glob_sum( 'diaar5', zssshice )
+            CALL iom_put( 'sshice', zztmp / area_tot )
+         ENDIF
          !
       ENDIF
 
       IF( iom_use( 'botpres' ) .OR. iom_use( 'sshthster' )  .OR. iom_use( 'sshsteric' )  ) THEN
+         ALLOCATE( zrhd(T2D(0),jpk) )
          !
-         ztsn(:,:,:,jp_tem,Kmm) = ts(A2D(0),:,jp_tem,Kmm)                ! thermosteric ssh
-         ztsn(:,:,:,jp_sal,Kmm) = sn0(:,:,:)
+         DO_3D( 0, 0, 0, 0, 1, jpk )
+            ztsn(ji,jj,jk,jp_tem,Kmm) = ts(ji,jj,jk,jp_tem,Kmm)                    ! thermosteric ssh
+            ztsn(ji,jj,jk,jp_sal,Kmm) = sn0(ji,jj,jk)
+         END_3D
          CALL eos( ztsn, Kmm, zrhd, kbnd=0 )                           ! now in situ density using initial salinity
          !
          z2d(:,:) = 0._wp                        ! no atmospheric surface pressure, levitating sea-ice
@@ -169,13 +194,16 @@ CONTAINS
 !!gm   riceload should be added in both ln_linssh=T or F, no?
 !!gm
          END IF
-         !
-         zarho = glob_sum( 'diaar5', e1e2t(A2D(0)) * z2d(:,:) )
-         zssh_steric = - zarho / area_tot
-         CALL iom_put( 'sshthster', zssh_steric )
-
+         DEALLOCATE( zrhd )
+         !                                                     ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+         zsarho = local_sum( e1e2t(T2D(0)) * z2d(:,:), zsarho)
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN  ! Do only for the last tile
+            zarho = glob_sum( 'diaar5', zsarho )
+            zztmp = - zarho / area_tot
+            CALL iom_put( 'sshthster', zztmp )
+         ENDIF
          !                                         ! steric sea surface height
-         z2d(:,:) = 0._wp                        ! no atmospheric surface pressure, levitating sea-ice
+         z2d(:,:) = 0._wp                          ! no atmospheric surface pressure, levitating sea-ice
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
             z2d(ji,jj) = z2d(ji,jj) + e3t(ji,jj,jk,Kmm) * rhd(ji,jj,jk)
          END_3D
@@ -191,27 +219,32 @@ CONTAINS
                END_2D
             END IF
          END IF
-         !
-         zarho = glob_sum( 'diaar5', e1e2t(A2D(0)) * z2d(:,:) )
-         zssh_steric = - zarho / area_tot
-         CALL iom_put( 'sshsteric', zssh_steric )
-         !                                         ! ocean bottom pressure
+         !                                                        ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+         zsarho2 = local_sum( e1e2t(T2D(0)) * z2d(:,:), zsarho2)
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN  ! Do only for the last tile
+            zarho = glob_sum( 'diaar5', zsarho2 )
+            zztmp = - zarho / area_tot
+            CALL iom_put( 'sshsteric', zztmp )
+         END IF
+         !                                            ! ocean bottom pressure
          zztmp = rho0 * grav * 1.e-4_wp               ! recover pressure from pressure anomaly and cover to dbar = 1.e4 Pa
-         z2d(:,:) = zztmp * ( z2d(:,:) + ssh(A2D(0),Kmm) + thick0(:,:) )
+         DO_2D( 0, 0, 0, 0 )
+            z2d(ji,jj) = zztmp * ( z2d(ji,jj) + ssh(ji,jj,Kmm) + thick0(ji,jj) )
+         END_2D
          CALL iom_put( 'botpres', z2d )
          !
       ENDIF
 
       IF( iom_use( 'masstot' ) .OR. iom_use( 'temptot' )  .OR. iom_use( 'saltot' )  ) THEN
-          !                                         ! Mean density anomalie, temperature and salinity
-          ztsn(:,:,:,:,Kmm) = 0._wp                 ! ztsn(:,:,1,jp_tem/sal) is used here as 2D Workspace for temperature & salinity
-          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-             zztmp = e1e2t(ji,jj) * e3t(ji,jj,jk,Kmm)
-             ztsn(ji,jj,1,jp_tem,Kmm) = ztsn(ji,jj,1,jp_tem,Kmm) + zztmp * ts(ji,jj,jk,jp_tem,Kmm)
-             ztsn(ji,jj,1,jp_sal,Kmm) = ztsn(ji,jj,1,jp_sal,Kmm) + zztmp * ts(ji,jj,jk,jp_sal,Kmm)
-          END_3D
+         !                                         ! Mean density anomalie, temperature and salinity
+         ztsn(:,:,:,:,Kmm) = 0._wp                 ! ztsn(:,:,1,jp_tem/sal) is used here as 2D Workspace for temperature & salinity
+         DO_3D( 0, 0, 0, 0, 1, jpkm1 )
+            zztmp = e1e2t(ji,jj) * e3t(ji,jj,jk,Kmm)
+            ztsn(ji,jj,1,jp_tem,Kmm) = ztsn(ji,jj,1,jp_tem,Kmm) + zztmp * ts(ji,jj,jk,jp_tem,Kmm)
+            ztsn(ji,jj,1,jp_sal,Kmm) = ztsn(ji,jj,1,jp_sal,Kmm) + zztmp * ts(ji,jj,jk,jp_sal,Kmm)
+         END_3D
 
-          IF( ln_linssh ) THEN
+         IF( ln_linssh ) THEN
             IF( ln_isfcav ) THEN
                DO_2D( 0, 0, 0, 0 )
                   iks = mikt(ji,jj)
@@ -223,26 +256,30 @@ CONTAINS
                   ztsn(ji,jj,1,jp_tem,Kmm) = ztsn(ji,jj,1,jp_tem,Kmm) + zarea_ssh(ji,jj) * ts(ji,jj,1,jp_tem,Kmm)
                   ztsn(ji,jj,1,jp_sal,Kmm) = ztsn(ji,jj,1,jp_sal,Kmm) + zarea_ssh(ji,jj) * ts(ji,jj,1,jp_sal,Kmm)
                END_2D
-            END IF
+            ENDIF
+         ENDIF
+         !                                                  ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+         zstemp = local_sum( ztsn(:,:,1,jp_tem,Kmm), zstemp )
+         zssal = local_sum( ztsn(:,:,1,jp_sal,Kmm), zssal )
+         IF( .NOT. l_istiled .OR. ntile == nijtile )  THEN  ! Do only for the last tile
+            ztemp = glob_sum( 'diaar5', zstemp )
+            zsal  = glob_sum( 'diaar5', zssal )
+            zztmp = rho0 * ( zarho + zvol )
+            !
+            CALL iom_put( 'masstot', zztmp )
+            CALL iom_put( 'temptot', ztemp / zvol )
+            CALL iom_put( 'saltot' , zsal  / zvol )
          ENDIF
-         !
-         ztemp = glob_sum( 'diaar5', ztsn(:,:,1,jp_tem,Kmm) )
-         zsal  = glob_sum( 'diaar5', ztsn(:,:,1,jp_sal,Kmm) )
-         zmass = rho0 * ( zarho + zvol )
-         !
-         CALL iom_put( 'masstot', zmass )
-         CALL iom_put( 'temptot', ztemp / zvol )
-         CALL iom_put( 'saltot' , zsal  / zvol )
          !
       ENDIF
 
       IF( ln_teos10 ) THEN        ! ! potential temperature (TEOS-10 case)
          IF( iom_use( 'toce_pot') .OR. iom_use( 'temptot_pot' ) .OR. iom_use( 'sst_pot' )  &
-                                  .OR. iom_use( 'ssttot' ) .OR.  iom_use( 'tosmint_pot' ) ) THEN
+            &                     .OR. iom_use( 'ssttot' ) .OR.  iom_use( 'tosmint_pot' ) ) THEN
             !
             z3d(:,:,jpk) = 0._wp
             DO jk = 1, jpkm1
-               CALL eos_pt_from_ct( ts(:,:,jk,jp_tem,Kmm), ts(:,:,jk,jp_sal,Kmm), z3d(:,:,jk), kbnd=0 )
+               CALL eos_pt_from_ct( ts(T2D(0),jk,jp_tem,Kmm), ts(T2D(0),jk,jp_sal,Kmm), z3d(:,:,jk), kbnd=0 )
             END DO
             !
             CALL iom_put( 'toce_pot', z3d(:,:,:) )  ! potential temperature (TEOS-10 case)
@@ -251,36 +288,49 @@ CONTAINS
             IF( iom_use( 'temptot_pot' ) ) THEN   ! Output potential temperature in case we use TEOS-10
                z2d(:,:) = 0._wp
                DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-                 z2d(ji,jj) = z2d(ji,jj) + e1e2t(ji,jj) * e3t(ji,jj,jk,Kmm) * z3d(ji,jj,jk)
+                  z2d(ji,jj) = z2d(ji,jj) + e1e2t(ji,jj) * e3t(ji,jj,jk,Kmm) * z3d(ji,jj,jk)
                END_3D
-               ztemp = glob_sum( 'diaar5', z2d(:,:)  )
-               CALL iom_put( 'temptot_pot', ztemp / zvol )
-             ENDIF
-             !
-             IF( iom_use( 'ssttot' ) ) THEN   ! Output potential temperature in case we use TEOS-10
-               zsst = glob_sum( 'diaar5',  e1e2t(A2D(0)) * z3d(:,:,1)  )
-               CALL iom_put( 'ssttot', zsst / area_tot )
-             ENDIF
-             ! Vertical integral of temperature
-             IF( iom_use( 'tosmint_pot') ) THEN
+               !                                      ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+               zstemp2 = local_sum( z2d(:,:), zstemp2 )
+               IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN   ! Do only for the last tile
+                  ztemp = glob_sum( 'diaar5', zstemp2 )
+                  CALL iom_put( 'temptot_pot', ztemp / zvol )
+               ENDIF
+            ENDIF
+            !
+            IF( iom_use( 'ssttot' ) ) THEN   ! Output potential temperature in case we use TEOS-10
+               !                                                     ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+               zssst = local_sum( e1e2t(T2D(0)) * z3d(:,:,1), zssst )
+               IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN   ! Do only for the last tile
+                  zztmp = glob_sum( 'diaar5',  zssst )
+                  CALL iom_put( 'ssttot', zztmp / area_tot )
+               ENDIF
+            ENDIF
+            !
+            ! Vertical integral of temperature
+            IF( iom_use( 'tosmint_pot') ) THEN
                z2d(:,:) = 0._wp
                DO_3D( 0, 0, 0, 0, 1, jpkm1 )
                   z2d(ji,jj) = z2d(ji,jj) + rho0 * e3t(ji,jj,jk,Kmm) *  z3d(ji,jj,jk)
                END_3D
-               CALL iom_put( 'tosmint_pot', z2d )
+               CALL iom_put( 'tosmint_pot', z2d(:,:) )
             ENDIF
-        ENDIF
+         ENDIF
       ELSE
-         IF( iom_use('ssttot') ) THEN   ! Output sst in case we use EOS-80
-            zsst  = glob_sum( 'diaar5', e1e2t(:,:) * ts(:,:,1,jp_tem,Kmm) )
-            CALL iom_put('ssttot', zsst / area_tot )
+         !                                                                 ! Sum over tiles (local_sum) and then MPI domains (glob_sum)
+         zssst = local_sum( e1e2t(T2D(0)) * ts(T2D(0),1,jp_tem,Kmm), zssst )
+         IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN                       ! Do only on the last tile
+            IF( iom_use('ssttot') ) THEN   ! Output sst in case we use EOS-80
+               zztmp  = glob_sum( 'diaar5', zssst )
+               CALL iom_put('ssttot', zztmp / area_tot )
+            ENDIF
          ENDIF
       ENDIF
 
       IF( iom_use( 'tnpeo' )) THEN
-        ! Work done against stratification by vertical mixing
-        ! Exclude points where rn2 is negative as convection kicks in here and
-        ! work is not being done against stratification
+         ! Work done against stratification by vertical mixing
+         ! Exclude points where rn2 is negative as convection kicks in here and
+         ! work is not being done against stratification
          z2d(:,:) = 0._wp
          IF( ln_zdfddm ) THEN
             DO_3D( 0, 0, 0, 0, 2, jpk )
@@ -295,18 +345,17 @@ CONTAINS
                      &                   - avs(ji,jj,jk) * zbw * (ts(ji,jj,jk-1,jp_sal,Kmm) - ts(ji,jj,jk,jp_sal,Kmm) ) )
                ENDIF
             END_3D
-          ELSE
+         ELSE
             DO_3D( 0, 0, 0, 0, 1, jpk )
                z2d(ji,jj) = z2d(ji,jj) + avt(ji,jj,jk) * MIN(0._wp,rn2(ji,jj,jk)) * rho0 * e3w(ji,jj,jk,Kmm)
             END_3D
          ENDIF
-          CALL iom_put( 'tnpeo', z2d )
+         CALL iom_put( 'tnpeo', z2d(:,:) )
       ENDIF
 
-      IF( l_ar5 ) THEN
-        DEALLOCATE( zarea_ssh , z2d, z3d )
-        DEALLOCATE( ztsn )
-      ENDIF
+      DEALLOCATE( z2d, zarea_ssh )
+      DEALLOCATE( z3d )
+      DEALLOCATE( ztsn )
       !
       IF( ln_timing )   CALL timing_stop('dia_ar5')
       !
@@ -315,7 +364,7 @@ CONTAINS
 
    SUBROUTINE dia_ar5_hst( ktra, cptr, puflx, pvflx )
       !!----------------------------------------------------------------------
-      !!                    ***  ROUTINE dia_ar5_htr ***
+      !!                    ***  ROUTINE dia_ar5_hst ***
       !!----------------------------------------------------------------------
       !! Wrapper for heat transport calculations
       !! Called from all advection and/or diffusion routines
diff --git a/src/OCE/DIA/diahth.F90 b/src/OCE/DIA/diahth.F90
index a20b476aa6a46ad0008c17403664345465a67be2..dc71bfa7d5f5cc89d5c39c6a289f962f8200362f 100644
--- a/src/OCE/DIA/diahth.F90
+++ b/src/OCE/DIA/diahth.F90
@@ -25,18 +25,8 @@ MODULE diahth
    PRIVATE
 
    PUBLIC   dia_hth       ! routine called by step.F90
-   PUBLIC   dia_hth_alloc ! routine called by nemogcm.F90
 
    LOGICAL, SAVE  ::   l_hth     !: thermocline-20d depths flag
-   
-   ! note: following variables should move to local variables once iom_put is always used 
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   hth    !: depth of the max vertical temperature gradient [m]
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   hd20   !: depth of 20 C isotherm                         [m]
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   hd26   !: depth of 26 C isotherm                         [m]
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   hd28   !: depth of 28 C isotherm                         [m]
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   htc3   !: heat content of first 300 m                    [W]
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   htc7   !: heat content of first 700 m                    [W]
-   REAL(wp), PUBLIC, ALLOCATABLE, SAVE, DIMENSION(:,:) ::   htc20  !: heat content of first 2000 m                   [W]
 
 
    !! * Substitutions
@@ -49,20 +39,6 @@ MODULE diahth
    !!----------------------------------------------------------------------
 CONTAINS
 
-   FUNCTION dia_hth_alloc()
-      !!---------------------------------------------------------------------
-      INTEGER :: dia_hth_alloc
-      !!---------------------------------------------------------------------
-      !
-      ALLOCATE( hth(A2D(0)), hd20(A2D(0)), hd26(A2D(0)), hd28(A2D(0)), &
-         &      htc3(A2D(0)), htc7(A2D(0)), htc20(A2D(0)), STAT=dia_hth_alloc )
-      !
-      CALL mpp_sum ( 'diahth', dia_hth_alloc )
-      IF(dia_hth_alloc /= 0)   CALL ctl_stop( 'STOP', 'dia_hth_alloc: failed to allocate arrays.' )
-      !
-   END FUNCTION dia_hth_alloc
-
-
    SUBROUTINE dia_hth( kt, Kmm )
       !!---------------------------------------------------------------------
       !!                  ***  ROUTINE dia_hth  ***
@@ -92,39 +68,44 @@ CONTAINS
       REAL(wp)                    ::   ztem2 = 0.2_wp        ! temperature criterion for mixed layer depth
       REAL(wp)                    ::   zztmp, zzdep          ! temporary scalars inside do loop
       REAL(wp)                    ::   zu, zv, zw, zut, zvt  ! temporary workspace
-      REAL(wp), DIMENSION(A2D(0)) ::   zabs2      ! MLD: abs( tn - tn(10m) ) = ztem2
-      REAL(wp), DIMENSION(A2D(0)) ::   ztm2       ! Top of thermocline: tn = tn(10m) - ztem2
-      REAL(wp), DIMENSION(A2D(0)) ::   zrho10_3   ! MLD: rho = rho10m + zrho3
-      REAL(wp), DIMENSION(A2D(0)) ::   zpycn      ! pycnocline: rho = rho10m + (dr/dT)(T,S,10m)*(-0.2 degC)
-      REAL(wp), DIMENSION(A2D(0)) ::   ztinv      ! max of temperature inversion
-      REAL(wp), DIMENSION(A2D(0)) ::   zdepinv    ! depth of temperature inversion
-      REAL(wp), DIMENSION(A2D(0)) ::   zrho0_3    ! MLD rho = rho(surf) = 0.03
-      REAL(wp), DIMENSION(A2D(0)) ::   zrho0_1    ! MLD rho = rho(surf) = 0.01
-      REAL(wp), DIMENSION(A2D(0)) ::   zmaxdzT    ! max of dT/dz
-      REAL(wp), DIMENSION(A2D(0)) ::   zdelr      ! delta rho equivalent to deltaT = 0.2
-      !!----------------------------------------------------------------------
+      REAL(wp)                    ::   ztemp = 0._wp
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zabs2      ! MLD: abs( tn - tn(10m) ) = ztem2
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   ztm2       ! Top of thermocline: tn = tn(10m) - ztem2
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zrho10_3   ! MLD: rho = rho10m + zrho3
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zpycn      ! pycnocline: rho = rho10m + (dr/dT)(T,S,10m)*(-0.2 degC)
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   ztinv      ! max of temperature inversion
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zdepinv    ! depth of temperature inversion
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zrho0_3    ! MLD rho = rho(surf) = 0.03
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zrho0_1    ! MLD rho = rho(surf) = 0.01
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zmaxdzT    ! max of dT/dz
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zdelr      ! delta rho equivalent to deltaT = 0.2
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   zhth       ! depth of the max vertical temperature gradient [m]
+      REAL(wp), ALLOCATABLE, DIMENSION(:,:) ::   z2d        ! 2D working array
+      !!---------------------------------------------------------------------
       IF( ln_timing )   CALL timing_start('dia_hth')
-
-      IF( kt == nit000 ) THEN
-         !
-         l_hth = iom_use( 'mlddzt'   ) .OR. iom_use( 'mldr0_3'  ) .OR. iom_use( 'mldr0_1'  )    .OR.  & 
-            &    iom_use( 'mld_dt02' ) .OR. iom_use( 'topthdep' ) .OR. iom_use( 'mldr10_3' )    .OR.  &    
-            &    iom_use( '20d'      ) .OR. iom_use( '26d'      ) .OR. iom_use( '28d'      )    .OR.  &    
-            &    iom_use( 'hc300'    ) .OR. iom_use( 'hc700'    ) .OR. iom_use( 'hc2000'   )    .OR.  &    
-            &    iom_use( 'pycndep'  ) .OR. iom_use( 'tinv'     ) .OR. iom_use( 'depti'    )
-         !
-         !                                      ! allocate dia_hth array
-         IF( l_hth ) THEN 
-            IF( dia_hth_alloc() /= 0 )   CALL ctl_stop( 'STOP', 'dia_hth : unable to allocate standard arrays' )
-            IF(lwp) WRITE(numout,*)
-            IF(lwp) WRITE(numout,*) 'dia_hth : diagnostics of the thermocline depth'
-            IF(lwp) WRITE(numout,*) '~~~~~~~ '
-            IF(lwp) WRITE(numout,*)
+      IF( .NOT. l_istiled .OR. ntile == 1) THEN   ! Do only for the first tile
+         IF( kt == nit000 ) THEN
+            !
+            l_hth = iom_use( 'mlddzt'   ) .OR. iom_use( 'mldr0_3'  ) .OR. iom_use( 'mldr0_1'  )    .OR.  &
+               &    iom_use( 'mld_dt02' ) .OR. iom_use( 'topthdep' ) .OR. iom_use( 'mldr10_3' )    .OR.  & 
+               &    iom_use( '20d'      ) .OR. iom_use( '26d'      ) .OR. iom_use( '28d'      )    .OR.  & 
+               &    iom_use( 'hc300'    ) .OR. iom_use( 'hc700'    ) .OR. iom_use( 'hc2000'   )    .OR.  & 
+               &    iom_use( 'pycndep'  ) .OR. iom_use( 'tinv'     ) .OR. iom_use( 'depti'    )
+            !
+            !                                      ! allocate dia_hth array
+            IF( l_hth ) THEN
+               IF(lwp) WRITE(numout,*)
+               IF(lwp) WRITE(numout,*) 'dia_hth : diagnostics of the thermocline depth'
+               IF(lwp) WRITE(numout,*) '~~~~~~~ '
+               IF(lwp) WRITE(numout,*)
+            ENDIF
          ENDIF
       ENDIF
-
+      !
       IF( l_hth ) THEN
          !
+         ALLOCATE( z2d(T2D(0)), zhth(T2D(0)), zabs2(T2D(0)), ztm2(T2D(0)), zrho10_3(T2D(0)), zpycn(T2D(0)), &
+               & ztinv(T2D(0)), zdepinv(T2D(0)), zrho0_3(T2D(0)), zrho0_1(T2D(0)), zmaxdzT(T2D(0)), zdelr(T2D(0)))
          ! initialization
          IF( iom_use( 'tinv'   ) )   ztinv  (:,:) = 0._wp  
          IF( iom_use( 'depti'  ) )   zdepinv(:,:) = 0._wp  
@@ -133,7 +114,7 @@ CONTAINS
             &                    .OR. iom_use( 'mldr10_3' ) .OR. iom_use( 'pycndep'  ) ) THEN
             DO_2D( 0, 0, 0, 0 )
                zztmp = gdepw(ji,jj,mbkt(ji,jj)+1,Kmm) 
-               hth     (ji,jj) = zztmp
+               zhth    (ji,jj) = zztmp
                zabs2   (ji,jj) = zztmp
                ztm2    (ji,jj) = zztmp
                zrho10_3(ji,jj) = zztmp
@@ -166,7 +147,7 @@ CONTAINS
 
                IF( zztmp > zmaxdzT(ji,jj) ) THEN                        
                    zmaxdzT(ji,jj) = zztmp   
-                   hth    (ji,jj) = zzdep                ! max and depth of dT/dz
+                   zhth    (ji,jj) = zzdep                ! max and depth of dT/dz
                ENDIF
          
                IF( nla10 > 1 ) THEN 
@@ -176,7 +157,7 @@ CONTAINS
                ENDIF
             END_3D
          
-            CALL iom_put( 'mlddzt', hth )            ! depth of the thermocline
+            CALL iom_put( 'mlddzt', zhth )            ! depth of the thermocline
             IF( nla10 > 1 ) THEN 
                CALL iom_put( 'mldr0_3', zrho0_3 )   ! MLD delta rho(surf) = 0.03
                CALL iom_put( 'mldr0_1', zrho0_1 )   ! MLD delta rho(surf) = 0.01
@@ -245,51 +226,46 @@ CONTAINS
          !  Depth of 20C/26C/28C isotherm  !
          ! ------------------------------- !
          IF( iom_use ('20d') ) THEN  ! depth of the 20 isotherm
-            ztem2 = 20.
-            CALL dia_hth_dep( Kmm, ztem2, hd20 )  
-            CALL iom_put( '20d', hd20 )    
+            CALL dia_hth_dep( Kmm, 20., z2d )
+            CALL iom_put( '20d', z2d )
          ENDIF
          !
          IF( iom_use ('26d') ) THEN  ! depth of the 26 isotherm
-            ztem2 = 26.
-            CALL dia_hth_dep( Kmm, ztem2, hd26 )  
-            CALL iom_put( '26d', hd26 )    
+            CALL dia_hth_dep( Kmm, 26., z2d )
+            CALL iom_put( '26d', z2d )
          ENDIF
          !
          IF( iom_use ('28d') ) THEN  ! depth of the 28 isotherm
-            ztem2 = 28.
-            CALL dia_hth_dep( Kmm, ztem2, hd28 )  
-            CALL iom_put( '28d', hd28 )    
+            CALL dia_hth_dep( Kmm, 28., z2d )
+            CALL iom_put( '28d', z2d )
          ENDIF
         
          ! ----------------------------- !
          !  Heat content of first 300 m  !
          ! ----------------------------- !
          IF( iom_use ('hc300') ) THEN  
-            zzdep = 300.
-            CALL  dia_hth_htc( Kmm, zzdep, ts(:,:,:,jp_tem,Kmm), htc3 )
-            CALL iom_put( 'hc300', rho0_rcp * htc3 )  ! vertically integrated heat content (J/m2)
+            CALL  dia_hth_htc( Kmm, 300., ts(:,:,:,jp_tem,Kmm), z2d )
+            CALL iom_put( 'hc300', rho0_rcp * z2d )  ! vertically integrated heat content (J/m2)
          ENDIF
          !
          ! ----------------------------- !
          !  Heat content of first 700 m  !
          ! ----------------------------- !
          IF( iom_use ('hc700') ) THEN  
-            zzdep = 700.
-            CALL  dia_hth_htc( Kmm, zzdep, ts(:,:,:,jp_tem,Kmm), htc7 )
-            CALL iom_put( 'hc700', rho0_rcp * htc7 )  ! vertically integrated heat content (J/m2)
-  
+            CALL dia_hth_htc( Kmm, 700., ts(:,:,:,jp_tem,Kmm), z2d )
+            CALL iom_put( 'hc700', rho0_rcp * z2d )  ! vertically integrated heat content (J/m2)
          ENDIF
          !
          ! ----------------------------- !
          !  Heat content of first 2000 m  !
          ! ----------------------------- !
          IF( iom_use ('hc2000') ) THEN  
-            zzdep = 2000.
-            CALL  dia_hth_htc( Kmm, zzdep, ts(:,:,:,jp_tem,Kmm), htc20 )
-            CALL iom_put( 'hc2000', rho0_rcp * htc20 )  ! vertically integrated heat content (J/m2)  
+            CALL dia_hth_htc( Kmm, 2000., ts(:,:,:,jp_tem,Kmm), z2d )
+            CALL iom_put( 'hc2000', rho0_rcp * z2d )  ! vertically integrated heat content (J/m2)
          ENDIF
          !
+         DEALLOCATE( z2d, zhth, zabs2, ztm2, zrho10_3, zpycn, ztinv, &
+                   & zdepinv, zrho0_3, zrho0_1, zmaxdzT, zdelr )
       ENDIF
 
       !
@@ -301,11 +277,11 @@ CONTAINS
       !
       INTEGER , INTENT(in) :: Kmm      ! ocean time level index
       REAL(wp), INTENT(in) :: ptem
-      REAL(wp), DIMENSION(A2D(0)), INTENT(out) :: pdept     
+      REAL(wp), DIMENSION(T2D(0)), INTENT(out) :: pdept
       !
       INTEGER  :: ji, jj, jk, iid
       REAL(wp) :: zztmp, zzdep
-      INTEGER, DIMENSION(A2D(0)) :: iktem
+      INTEGER, DIMENSION(T2D(0)) :: iktem
       
       ! --------------------------------------- !
       ! search deepest level above ptem         !
@@ -346,11 +322,11 @@ CONTAINS
       INTEGER , INTENT(in) ::   Kmm      ! ocean time level index
       REAL(wp), INTENT(in) ::   pdep     ! depth over the heat content
       REAL(wp), DIMENSION(jpi,jpj,jpk), INTENT(in)    ::   pt   
-      REAL(wp), DIMENSION(A2D(0)),      INTENT(inout) ::   phtc  
+      REAL(wp), DIMENSION(T2D(0))     , INTENT(inout)  ::   phtc
       !
       INTEGER  ::   ji, jj, jk, ik
-      REAL(wp), DIMENSION(A2D(0)) ::   zthick
-      INTEGER , DIMENSION(A2D(0)) ::   ilevel
+      REAL(wp), DIMENSION(T2D(0)) ::   zthick
+      INTEGER , DIMENSION(T2D(0)) ::   ilevel
 
 
       ! surface boundary condition
diff --git a/src/OCE/DIA/diaptr.F90 b/src/OCE/DIA/diaptr.F90
index e04e88fef9ff09e86d7e560d63ad15ab00279967..d8991121c15300db1a00c37bec7f2552ea1f8925 100644
--- a/src/OCE/DIA/diaptr.F90
+++ b/src/OCE/DIA/diaptr.F90
@@ -96,7 +96,7 @@ CONTAINS
          ENDIF
 
          ! Calculate diagnostics only when zonal integrals have finished
-         IF( .NOT. l_istiled .OR. ntile == nijtile ) CALL dia_ptr_iom(kt, Kmm, pvtr)
+         IF( .NOT. l_istiled .OR. ntile == nijtile ) CALL dia_ptr_iom(kt, Kmm, pvtr)         ! Do only for the last tile
       ENDIF
 
       IF( ln_timing )   CALL timing_stop('dia_ptr')
@@ -133,7 +133,7 @@ CONTAINS
             ALLOCATE( z4d1(A2D(0),jpk,nbasin) )
             !
             DO jn = 1, nbasin                                    ! by sub-basins
-               z4d1(Nis0,:,:,jn) =  pvtr_int(:,:,jp_vtr,jn)                  ! zonal cumulative effective transport excluding closed seas
+               z4d1(Nis0,:,:,jn) =  pvtr_int(:,:,jn,jp_vtr)                  ! zonal cumulative effective transport excluding closed seas
                DO jk = jpkm1, 1, -1
                   z4d1(Nis0,:,jk,jn) = z4d1(Nis0,:,jk+1,jn) - z4d1(Nis0,:,jk,jn)    ! effective j-Stream-Function (MSF)
                END DO
@@ -150,26 +150,30 @@ CONTAINS
                &      zt_jk(A1Dj(0),jpk,nbasin), zs_jk( A1Dj(0),jpk,nbasin) )
             !
             DO jn = 1, nbasin
-               sjk(:,:,jn) = pvtr_int(:,:,jp_msk,jn)
+               sjk(:,:,jn) = pvtr_int(:,:,jn,jp_msk)
                r1_sjk(:,:,jn) = 0._wp
                WHERE( sjk(:,:,jn) /= 0._wp )   r1_sjk(:,:,jn) = 1._wp / sjk(:,:,jn)
                ! i-mean T and S, j-Stream-Function, basin
-               zt_jk(:,:,jn) = pvtr_int(:,:,jp_tem,jn) * r1_sjk(:,:,jn)
-               zs_jk(:,:,jn) = pvtr_int(:,:,jp_sal,jn) * r1_sjk(:,:,jn)
-               v_msf(:,:,jn) = pvtr_int(:,:,jp_vtr,jn)
-               hstr_ove(:,jp_tem,jn) = SUM( v_msf(:,:,jn)*zt_jk(:,:,jn), 2 )
-               hstr_ove(:,jp_sal,jn) = SUM( v_msf(:,:,jn)*zs_jk(:,:,jn), 2 )
+               DO jk = 1, jpk
+                  DO jj = Njs0, Nje0
+                     zt_jk(jj,jk,jn) = pvtr_int(jj,jk,jn,jp_tem) * r1_sjk(jj,jk,jn)
+                     zs_jk(jj,jk,jn) = pvtr_int(jj,jk,jn,jp_sal) * r1_sjk(jj,jk,jn)
+                     v_msf(jj,jk,jn) = pvtr_int(jj,jk,jn,jp_vtr)
+                  END DO
+               END DO
+               hstr_ove(:,jn,jp_tem) = SUM( v_msf(:,:,jn)*zt_jk(:,:,jn), 2 )
+               hstr_ove(:,jn,jp_sal) = SUM( v_msf(:,:,jn)*zs_jk(:,:,jn), 2 )
                !
             ENDDO
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_ove(:,jp_tem,jn) * rc_pwatt  !  (conversion in PW)
+               z3dtr(Nis0,:,jn) = hstr_ove(:,jn,jp_tem) * rc_pwatt  !  (conversion in PW)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
             ENDDO
             CALL iom_put( 'sophtove', z3dtr )
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_ove(:,jp_sal,jn) * rc_ggram !  (conversion in Gg)
+               z3dtr(Nis0,:,jn) = hstr_ove(:,jn,jp_sal) * rc_ggram !  (conversion in Gg)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
@@ -184,26 +188,26 @@ CONTAINS
             ALLOCATE( sjk(A1Dj(0),1,nbasin), r1_sjk(A1Dj(0),1,nbasin) )
             !
             DO jn = 1, nbasin
-               sjk(:,1,jn) = SUM( pvtr_int(:,:,jp_msk,jn), 2 )
+               sjk(:,1,jn) = SUM( pvtr_int(:,:,jn,jp_msk), 2 )
                r1_sjk(:,1,jn) = 0._wp
                WHERE( sjk(:,1,jn) /= 0._wp )   r1_sjk(:,1,jn) = 1._wp / sjk(:,1,jn)
                !
-               zvsum(:) =    SUM( pvtr_int(:,:,jp_vtr,jn), 2 )
-               ztsum(:) =    SUM( pvtr_int(:,:,jp_tem,jn), 2 )
-               zssum(:) =    SUM( pvtr_int(:,:,jp_sal,jn), 2 )
-               hstr_btr(:,jp_tem,jn) = zvsum(:) * ztsum(:) * r1_sjk(:,1,jn)
-               hstr_btr(:,jp_sal,jn) = zvsum(:) * zssum(:) * r1_sjk(:,1,jn)
+               zvsum(:) =    SUM( pvtr_int(:,:,jn,jp_vtr), 2 )
+               ztsum(:) =    SUM( pvtr_int(:,:,jn,jp_tem), 2 )
+               zssum(:) =    SUM( pvtr_int(:,:,jn,jp_sal), 2 )
+               hstr_btr(:,jn,jp_tem) = zvsum(:) * ztsum(:) * r1_sjk(:,1,jn)
+               hstr_btr(:,jn,jp_sal) = zvsum(:) * zssum(:) * r1_sjk(:,1,jn)
                !
             ENDDO
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_btr(:,jp_tem,jn) * rc_pwatt  !  (conversion in PW)
+               z3dtr(Nis0,:,jn) = hstr_btr(:,jn,jp_tem) * rc_pwatt  !  (conversion in PW)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
             ENDDO
             CALL iom_put( 'sophtbtr', z3dtr )
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_btr(:,jp_sal,jn) * rc_ggram !  (conversion in Gg)
+               z3dtr(Nis0,:,jn) = hstr_btr(:,jn,jp_sal) * rc_ggram !  (conversion in Gg)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
@@ -221,25 +225,37 @@ CONTAINS
             ALLOCATE( z4d1(A2D(0),jpk,nbasin), z4d2(A2D(0),jpk,nbasin) )
             !
             DO jn = 1, nbasin
-               z4d1(Nis0,:,:,jn) = pzon_int(:,:,jp_msk,jn)
-               DO ji = Nis0+1, Nie0
-                  z4d1(ji,:,:,jn) = z4d1(Nis0,:,:,jn)
+               DO jk = 1, jpk
+                  DO jj = Njs0, Nje0
+                     z4d1(Nis0,jj,jk,jn) = pzon_int(jj,jk,jn,jp_msk)
+                     DO ji = Nis0+1, Nie0
+                        z4d1(ji,jj,jk,jn) = z4d1(Nis0,jj,jk,jn)
+                     ENDDO
+                  END DO
                ENDDO
             ENDDO
             CALL iom_put( 'zosrf', z4d1 )
             !
             DO jn = 1, nbasin
-               z4d2(Nis0,:,:,jn) = pzon_int(:,:,jp_tem,jn) / MAX( z4d1(Nis0,:,:,jn), 10.e-15 )
-               DO ji = Nis0+1, Nie0
-                  z4d2(ji,:,:,jn) = z4d2(Nis0,:,:,jn)
+               DO jk = 1, jpk
+                  DO jj = Njs0, Nje0
+                     z4d2(Nis0,jj,jk,jn) = pzon_int(jj,jk,jn,jp_tem) / MAX( z4d1(Nis0,jj,jk,jn), 10.e-15 )
+                     DO ji = Nis0+1, Nie0
+                        z4d2(ji,jj,jk,jn) = z4d2(Nis0,jj,jk,jn)
+                     END DO
+                  END DO
                ENDDO
             ENDDO
             CALL iom_put( 'zotem', z4d2 )
             !
             DO jn = 1, nbasin
-               z4d2(Nis0,:,:,jn) = pzon_int(:,:,jp_sal,jn) / MAX( z4d1(Nis0,:,:,jn), 10.e-15 )
-               DO ji = Nis0+1, Nie0
-                  z4d2(ji,:,:,jn) = z4d2(Nis0,:,:,jn)
+               DO jk = 1, jpk
+                  DO jj = Njs0, Nje0
+                     z4d2(Nis0,jj,jk,jn) = pzon_int(jj,jk,jn,jp_sal) / MAX( z4d1(Nis0,jj,jk,jn), 10.e-15 )
+                     DO ji = Nis0+1, Nie0
+                        z4d2(ji,jj,jk,jn) = z4d2(Nis0,jj,jk,jn)
+                     ENDDO
+                  ENDDO
                ENDDO
             ENDDO
             CALL iom_put( 'zosal', z4d2 )
@@ -251,14 +267,14 @@ CONTAINS
          IF( iom_use( 'sophtadv' ) .OR. iom_use( 'sopstadv' ) ) THEN
             !
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_adv(:,jp_tem,jn) * rc_pwatt  !  (conversion in PW)
+               z3dtr(Nis0,:,jn) = hstr_adv(:,jn,jp_tem) * rc_pwatt  !  (conversion in PW)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
             ENDDO
             CALL iom_put( 'sophtadv', z3dtr )
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_adv(:,jp_sal,jn) * rc_ggram !  (conversion in Gg)
+               z3dtr(Nis0,:,jn) = hstr_adv(:,jn,jp_sal) * rc_ggram !  (conversion in Gg)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
@@ -269,14 +285,14 @@ CONTAINS
          IF( iom_use( 'sophtldf' ) .OR. iom_use( 'sopstldf' ) ) THEN
             !
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_ldf(:,jp_tem,jn) * rc_pwatt  !  (conversion in PW)
+               z3dtr(Nis0,:,jn) = hstr_ldf(:,jn,jp_tem) * rc_pwatt  !  (conversion in PW)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
             ENDDO
             CALL iom_put( 'sophtldf', z3dtr )
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_ldf(:,jp_sal,jn) * rc_ggram !  (conversion in Gg)
+               z3dtr(Nis0,:,jn) = hstr_ldf(:,jn,jp_sal) * rc_ggram !  (conversion in Gg)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
@@ -287,14 +303,14 @@ CONTAINS
          IF( iom_use( 'sophteiv' ) .OR. iom_use( 'sopsteiv' ) ) THEN
             !
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_eiv(:,jp_tem,jn) * rc_pwatt  !  (conversion in PW)
+               z3dtr(Nis0,:,jn) = hstr_eiv(:,jn,jp_tem) * rc_pwatt  !  (conversion in PW)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
             ENDDO
             CALL iom_put( 'sophteiv', z3dtr )
             DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_eiv(:,jp_sal,jn) * rc_ggram !  (conversion in Gg)
+               z3dtr(Nis0,:,jn) = hstr_eiv(:,jn,jp_sal) * rc_ggram !  (conversion in Gg)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
@@ -304,14 +320,14 @@ CONTAINS
          !
          IF( iom_use( 'sopstvtr' ) .OR. iom_use( 'sophtvtr' ) ) THEN
              DO jn = 1, nbasin
-                z3dtr(Nis0,:,jn) = hstr_vtr(:,jp_tem,jn) * rc_pwatt  !  (conversion in PW)
+                z3dtr(Nis0,:,jn) = hstr_vtr(:,jn,jp_tem) * rc_pwatt  !  (conversion in PW)
                 DO ji = Nis0+1, Nie0
                    z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                 ENDDO
              ENDDO
              CALL iom_put( 'sophtvtr', z3dtr )
              DO jn = 1, nbasin
-               z3dtr(Nis0,:,jn) = hstr_vtr(:,jp_sal,jn) * rc_ggram !  (conversion in Gg)
+               z3dtr(Nis0,:,jn) = hstr_vtr(:,jn,jp_sal) * rc_ggram !  (conversion in Gg)
                DO ji = Nis0+1, Nie0
                   z3dtr(ji,:,jn) = z3dtr(Nis0,:,jn)
                ENDDO
@@ -369,7 +385,7 @@ CONTAINS
                v_msf(:,:,jn) = ptr_sjk( pvtr(:,:,:), btmsk34(:,:,jn) )
             ENDDO
 
-            CALL ptr_sum( pvtr_int(:,:,jp_vtr,:), v_msf(:,:,:) )
+            CALL ptr_sum( pvtr_int(:,:,:,jp_vtr), v_msf(:,:,:) )
 
             DEALLOCATE( v_msf )
          ENDIF
@@ -394,9 +410,9 @@ CONTAINS
                zs_jk(:,:,jn) = ptr_sjk( zts(:,:,:,jp_sal), btmsk(:,:,jn) )
             ENDDO
 
-            CALL ptr_sum( pvtr_int(:,:,jp_msk,:), sjk(:,:,:)   )
-            CALL ptr_sum( pvtr_int(:,:,jp_tem,:), zt_jk(:,:,:) )
-            CALL ptr_sum( pvtr_int(:,:,jp_sal,:), zs_jk(:,:,:) )
+            CALL ptr_sum( pvtr_int(:,:,:,jp_msk),   sjk(:,:,:) )
+            CALL ptr_sum( pvtr_int(:,:,:,jp_tem), zt_jk(:,:,:) )
+            CALL ptr_sum( pvtr_int(:,:,:,jp_sal), zs_jk(:,:,:) )
 
             DEALLOCATE( zmask, zts, sjk, zt_jk, zs_jk )
          ENDIF
@@ -420,10 +436,10 @@ CONTAINS
                zs_jk(:,:,jn) = ptr_sjk( zts(:,:,:,jp_sal), btmsk(:,:,jn) )
             ENDDO
 
-            CALL ptr_sum( pzon_int(:,:,jp_msk,:), sjk(:,:,:)   )
-            CALL ptr_sum( pzon_int(:,:,jp_tem,:), zt_jk(:,:,:) )
-            CALL ptr_sum( pzon_int(:,:,jp_sal,:), zs_jk(:,:,:) )
-
+            CALL ptr_sum( pzon_int(:,:,:,jp_msk),   sjk(:,:,:) )
+            CALL ptr_sum( pzon_int(:,:,:,jp_tem), zt_jk(:,:,:) )
+            CALL ptr_sum( pzon_int(:,:,:,jp_sal), zs_jk(:,:,:) )
+            !
             DEALLOCATE( zmask, zts, sjk, zt_jk, zs_jk )
          ENDIF
 
@@ -553,17 +569,17 @@ CONTAINS
       ENDDO
       !
       IF( cptr == 'adv' ) THEN
-         IF( ktra == jp_tem )  CALL ptr_sum( hstr_adv(:,jp_tem,:), zsj(:,:) )
-         IF( ktra == jp_sal )  CALL ptr_sum( hstr_adv(:,jp_sal,:), zsj(:,:) )
+         IF( ktra == jp_tem )  CALL ptr_sum( hstr_adv(:,:,jp_tem), zsj(:,:) )
+         IF( ktra == jp_sal )  CALL ptr_sum( hstr_adv(:,:,jp_sal), zsj(:,:) )
       ELSE IF( cptr == 'ldf' ) THEN
-         IF( ktra == jp_tem )  CALL ptr_sum( hstr_ldf(:,jp_tem,:), zsj(:,:) )
-         IF( ktra == jp_sal )  CALL ptr_sum( hstr_ldf(:,jp_sal,:), zsj(:,:) )
+         IF( ktra == jp_tem )  CALL ptr_sum( hstr_ldf(:,:,jp_tem), zsj(:,:) )
+         IF( ktra == jp_sal )  CALL ptr_sum( hstr_ldf(:,:,jp_sal), zsj(:,:) )
       ELSE IF( cptr == 'eiv' ) THEN
-         IF( ktra == jp_tem )  CALL ptr_sum( hstr_eiv(:,jp_tem,:), zsj(:,:) )
-         IF( ktra == jp_sal )  CALL ptr_sum( hstr_eiv(:,jp_sal,:), zsj(:,:) )
+         IF( ktra == jp_tem )  CALL ptr_sum( hstr_eiv(:,:,jp_tem), zsj(:,:) )
+         IF( ktra == jp_sal )  CALL ptr_sum( hstr_eiv(:,:,jp_sal), zsj(:,:) )
       ELSE IF( cptr == 'vtr' ) THEN
-         IF( ktra == jp_tem )  CALL ptr_sum( hstr_vtr(:,jp_tem,:), zsj(:,:) )
-         IF( ktra == jp_sal )  CALL ptr_sum( hstr_vtr(:,jp_sal,:), zsj(:,:) )
+         IF( ktra == jp_tem )  CALL ptr_sum( hstr_vtr(:,:,jp_tem), zsj(:,:) )
+         IF( ktra == jp_sal )  CALL ptr_sum( hstr_vtr(:,:,jp_sal), zsj(:,:) )
       ENDIF
       !
    END SUBROUTINE dia_ptr_hst_t
@@ -582,19 +598,21 @@ CONTAINS
       !!----------------------------------------------------------------------
       REAL(wp), DIMENSION(A1Dj(0),nbasin), INTENT(inout)  ::  phstr  !
       REAL(wp), DIMENSION(T1Dj(0),nbasin), INTENT(in   )  ::  pva    !
-      INTEGER                                             ::  jj
+      INTEGER                                             ::  jj, jn
 #if ! defined key_mpi_off
       INTEGER,  DIMENSION(1)               ::  ish1d
       INTEGER,  DIMENSION(2)               ::  ish2d
       REAL(wp), DIMENSION(:), ALLOCATABLE  ::  zwork
 #endif
 
-      DO_1Dj( 0, 0 )
-         phstr(jj,:) = phstr(jj,:)  + pva(jj,:)
-      END_1D
+      DO jn = 1, nbasin
+         DO_1Dj( 0, 0 )
+            phstr(jj,jn) = phstr(jj,jn)  + pva(jj,jn)
+         END_1D
+      END DO
 
 #if ! defined key_mpi_off
-      IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN
+      IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN   ! Do only for the last tile
          ALLOCATE( zwork(Nj_0*nbasin) )
          ish1d(1) = Nj_0*nbasin
          ish2d(1) = Nj_0 ; ish2d(2) = nbasin
@@ -618,23 +636,25 @@ CONTAINS
       !!
       !! ** Action  : phstr
       !!----------------------------------------------------------------------
-      REAL(wp), DIMENSION(Njs0:Nje0,jpk,nbasin), INTENT(inout)   ::  phstr  !
-      REAL(wp), DIMENSION(T1Dj(0)  ,jpk,nbasin), INTENT(in   )   ::  pva    !
-      INTEGER                                                    ::  jj, jk
+      REAL(wp), DIMENSION(A1Dj(0),jpk,nbasin), INTENT(inout)   ::  phstr  !
+      REAL(wp), DIMENSION(T1Dj(0),jpk,nbasin), INTENT(in   )   ::  pva    !
+      INTEGER                                                  ::  jj, jk, jn
 #if ! defined key_mpi_off
       INTEGER,  DIMENSION(1)               ::  ish1d
       INTEGER,  DIMENSION(3)               ::  ish3d
       REAL(wp), DIMENSION(:), ALLOCATABLE  ::  zwork
 #endif
 
-      DO jk = 1, jpk
-         DO_1Dj( 0, 0 )
-            phstr(jj,jk,:) = phstr(jj,jk,:)  + pva(jj,jk,:)
-         END_1D
+      DO jn = 1, nbasin
+         DO jk = 1, jpk
+            DO_1Dj( 0, 0 )
+               phstr(jj,jk,jn) = phstr(jj,jk,jn) + pva(jj,jk,jn)
+            END_1D
+         END DO
       END DO
 
 #if ! defined key_mpi_off
-      IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN
+      IF( .NOT. l_istiled .OR. ntile == nijtile ) THEN   ! Do only for the last tile
          ALLOCATE( zwork(Nj_0*jpk*nbasin) )
          ish1d(1) = Nj_0*jpk*nbasin
          ish3d(1) = Nj_0 ; ish3d(2) = jpk ; ish3d(3) = nbasin
@@ -660,12 +680,12 @@ CONTAINS
       !
       IF( .NOT. ALLOCATED( btmsk ) ) THEN
          ALLOCATE( btmsk(A2D(nn_hls),nbasin)    , btmsk34(A2D(nn_hls),nbasin)  , &
-            &      hstr_adv(A1Dj(0),jpts,nbasin), hstr_eiv(A1Dj(0),jpts,nbasin), &
-            &      hstr_ove(A1Dj(0),jpts,nbasin), hstr_btr(A1Dj(0),jpts,nbasin), &
-            &      hstr_ldf(A1Dj(0),jpts,nbasin), hstr_vtr(A1Dj(0),jpts,nbasin), STAT=ierr(1)  )
+            &      hstr_adv(A1Dj(0),nbasin,jpts), hstr_eiv(A1Dj(0),nbasin,jpts), &
+            &      hstr_ove(A1Dj(0),nbasin,jpts), hstr_btr(A1Dj(0),nbasin,jpts), &
+            &      hstr_ldf(A1Dj(0),nbasin,jpts), hstr_vtr(A1Dj(0),nbasin,jpts), STAT=ierr(1)  )
             !
-         ALLOCATE( pvtr_int(A1Dj(0),jpk,jpts+2,nbasin), &
-            &      pzon_int(A1Dj(0),jpk,jpts+1,nbasin), STAT=ierr(2) )
+         ALLOCATE( pvtr_int(A1Dj(0),jpk,nbasin,jpts+2), &
+            &      pzon_int(A1Dj(0),jpk,nbasin,jpts+1), STAT=ierr(2) )
          !
          dia_ptr_alloc = MAXVAL( ierr )
          CALL mpp_sum( 'diaptr', dia_ptr_alloc )
diff --git a/src/OCE/DIA/diawri.F90 b/src/OCE/DIA/diawri.F90
index 48ab63b5bb996d797055fb20f3c0a2dd9769ac4b..87b7dbe2bc1ef40e5b24d8b48029710b5bbb92ba 100644
--- a/src/OCE/DIA/diawri.F90
+++ b/src/OCE/DIA/diawri.F90
@@ -122,95 +122,91 @@ CONTAINS
       REAL(wp)::   zztmp , zztmpx   ! local scalar
       REAL(wp)::   zztmp2, zztmpy   !   -      -
       REAL(wp)::   ze3
-      REAL(wp), DIMENSION(A2D(0))          ::   z2d0   ! 2D workspace
-      REAL(wp), DIMENSION(A2D(0)     ,jpk) ::   z3d0   ! 3D workspace
-      REAL(wp), DIMENSION(A2D(nn_hls),jpk) ::   z3d    ! 3D workspace
+      REAL(wp), DIMENSION(T2D(0))     ::   z2d   ! 2D workspace
+      REAL(wp), DIMENSION(T2D(0),jpk) ::   z3d   ! 3D workspace
       !!----------------------------------------------------------------------
       ! 
       IF( ln_timing )   CALL timing_start('dia_wri')
-      ! 
-      ! Output the initial state and forcings
-      IF( ninist == 1 ) THEN                       
-         CALL dia_wri_state( Kmm, 'output.init' )
-         ninist = 0
+      !
+      IF( .NOT. l_istiled .OR. ntile == 1 )  THEN   ! Do only for the first tile
+         ! Output the initial state and forcings
+         IF( ninist == 1 ) THEN
+            CALL dia_wri_state( Kmm, 'output.init' )
+            ninist = 0
+         ENDIF
       ENDIF
 
-      ! initialize arrays
-      z2d0(:,:)   = 0._wp
-      z3d0(:,:,:) = 0._wp
-      z3d (:,:,:) = 0._wp
-      
       ! Output of initial vertical scale factor
       IF( lk_vco_3d ) THEN
-            DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d(ji,jj,jk) =  e3t_0(ji,jj,jk)
-            END_3D
-            CALL iom_put( "e3t_0", z3d )
-            !
-            DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d(ji,jj,jk) =  e3u_0(ji,jj,jk)
-            END_3D
-            CALL iom_put( "e3u_0", z3d )
-            !
-            DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d(ji,jj,jk) =  e3v_0(ji,jj,jk)
-            END_3D
-            CALL iom_put( "e3v_0", z3d )
-            !
-            DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d(ji,jj,jk) =  e3f_0(ji,jj,jk)
-            END_3D
-            CALL iom_put( "e3f_0", z3d )
-         ELSE
-            CALL iom_put( "e3t_0", e3t_0(:,:,:) )
-            CALL iom_put( "e3u_0", e3u_0(:,:,:) )
-            CALL iom_put( "e3v_0", e3v_0(:,:,:) )
-            CALL iom_put( "e3f_0", e3f_0(:,:,:) )
+         DO_3D( 0, 0, 0, 0, 1, jpk )
+            z3d(ji,jj,jk) =  e3t_0(ji,jj,jk)
+         END_3D
+         CALL iom_put( "e3t_0", z3d )
+         !
+         DO_3D( 0, 0, 0, 0, 1, jpk )
+            z3d(ji,jj,jk) =  e3u_0(ji,jj,jk)
+         END_3D
+         CALL iom_put( "e3u_0", z3d )
+         !
+         DO_3D( 0, 0, 0, 0, 1, jpk )
+            z3d(ji,jj,jk) =  e3v_0(ji,jj,jk)
+         END_3D
+         CALL iom_put( "e3v_0", z3d )
+         !
+         DO_3D( 0, 0, 0, 0, 1, jpk )
+            z3d(ji,jj,jk) =  e3f_0(ji,jj,jk)
+         END_3D
+         CALL iom_put( "e3f_0", z3d )
+      ELSE
+         CALL iom_put( "e3t_0", e3t_0(:,:,:) )
+         CALL iom_put( "e3u_0", e3u_0(:,:,:) )
+         CALL iom_put( "e3v_0", e3v_0(:,:,:) )
+         CALL iom_put( "e3f_0", e3f_0(:,:,:) )
       ENDIF
       !
       IF ( iom_use("tpt_dep") ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) = gdept(ji,jj,jk,Kmm)
+            z3d(ji,jj,jk) = gdept(ji,jj,jk,Kmm)
          END_3D
-         CALL iom_put( "tpt_dep", z3d0 )
+         CALL iom_put( "tpt_dep", z3d )
       ENDIF
 
       ! --- vertical scale factors --- !
       IF ( iom_use("e3t") .OR. iom_use("e3tdef") ) THEN  ! time-varying e3t
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) =  e3t(ji,jj,jk,Kmm)
+            z3d(ji,jj,jk) =  e3t(ji,jj,jk,Kmm)
          END_3D
-         CALL iom_put( "e3t", z3d0 )
+         CALL iom_put( "e3t", z3d )
          IF ( iom_use("e3tdef") ) THEN
             DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d0(ji,jj,jk) = ( ( z3d0(ji,jj,jk) - e3t_0(ji,jj,jk) ) / e3t_0(ji,jj,jk) * 100._wp * tmask(ji,jj,jk) ) ** 2
+               z3d(ji,jj,jk) = ( ( z3d(ji,jj,jk) - e3t_0(ji,jj,jk) ) / e3t_0(ji,jj,jk) * 100._wp * tmask(ji,jj,jk) ) ** 2
             END_3D
-            CALL iom_put( "e3tdef", z3d0 )
+            CALL iom_put( "e3tdef", z3d )
          ENDIF
-      ENDIF 
+      ENDIF
       IF ( iom_use("e3u") ) THEN                         ! time-varying e3u
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) =  e3u(ji,jj,jk,Kmm)
-         END_3D 
-         CALL iom_put( "e3u" , z3d0 )
+            z3d(ji,jj,jk) =  e3u(ji,jj,jk,Kmm)
+         END_3D
+         CALL iom_put( "e3u" , z3d )
       ENDIF
       IF ( iom_use("e3v") ) THEN                         ! time-varying e3v
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) =  e3v(ji,jj,jk,Kmm)
+            z3d(ji,jj,jk) =  e3v(ji,jj,jk,Kmm)
          END_3D
-         CALL iom_put( "e3v" , z3d0 )
+         CALL iom_put( "e3v" , z3d )
       ENDIF
       IF ( iom_use("e3w") ) THEN                         ! time-varying e3w
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) =  e3w(ji,jj,jk,Kmm)
+            z3d(ji,jj,jk) =  e3w(ji,jj,jk,Kmm)
          END_3D
-         CALL iom_put( "e3w" , z3d0 )
+         CALL iom_put( "e3w" , z3d )
       ENDIF
       IF ( iom_use("e3f") ) THEN                         ! time-varying e3f caution here at Kaa
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) =  e3f(ji,jj,jk)
+            z3d(ji,jj,jk) =  e3f(ji,jj,jk)
          END_3D
-         CALL iom_put( "e3f" , z3d0 )
+         CALL iom_put( "e3f" , z3d )
       ENDIF
 
       IF ( iom_use("ssh") ) THEN
@@ -222,7 +218,7 @@ CONTAINS
       ENDIF
 
       IF( iom_use("wetdep") )    CALL iom_put( "wetdep" , ht_0(:,:) + ssh(:,:,Kmm) )   ! wet depth
-         
+
 #if defined key_qco
       IF( iom_use("ht") )   CALL iom_put( "ht" , ht(:,:,Kmm) )   ! water column at t-point
       IF( iom_use("hu") )   CALL iom_put( "hu" , hu(:,:,Kmm) )   ! water column at u-point
@@ -230,26 +226,26 @@ CONTAINS
       IF( iom_use("hf") )   CALL iom_put( "hf" , hf_0(:,:)*( 1._wp + r3f(:,:) ) )   ! water column at f-point (caution here at Naa)
 #endif
 
-      ! --- tracers T&S --- !      
+      ! --- tracers T&S --- !
       CALL iom_put( "toce", ts(:,:,:,jp_tem,Kmm) )    ! 3D temperature
       CALL iom_put(  "sst", ts(:,:,1,jp_tem,Kmm) )    ! surface temperature
 
       IF ( iom_use("sbt") ) THEN
          DO_2D( 0, 0, 0, 0 )
             ikbot = mbkt(ji,jj)
-            z2d0(ji,jj) = ts(ji,jj,ikbot,jp_tem,Kmm)
+            z2d(ji,jj) = ts(ji,jj,ikbot,jp_tem,Kmm)
          END_2D
-         CALL iom_put( "sbt", z2d0 )                ! bottom temperature
+         CALL iom_put( "sbt", z2d )                ! bottom temperature
       ENDIF
-      
+
       CALL iom_put( "soce", ts(:,:,:,jp_sal,Kmm) )    ! 3D salinity
       CALL iom_put(  "sss", ts(:,:,1,jp_sal,Kmm) )    ! surface salinity
       IF ( iom_use("sbs") ) THEN
          DO_2D( 0, 0, 0, 0 )
             ikbot = mbkt(ji,jj)
-            z2d0(ji,jj) = ts(ji,jj,ikbot,jp_sal,Kmm)
+            z2d(ji,jj) = ts(ji,jj,ikbot,jp_sal,Kmm)
          END_2D
-         CALL iom_put( "sbs", z2d0 )                ! bottom salinity
+         CALL iom_put( "sbs", z2d )                ! bottom salinity
       ENDIF
 
       IF( .NOT.lk_SWE )   CALL iom_put( "rhop", rhop(:,:,:) )          ! 3D potential density (sigma0)
@@ -257,16 +253,15 @@ CONTAINS
       ! --- momentum --- !
       IF ( iom_use("taubot") ) THEN                ! bottom stress
          zztmp = rho0 * 0.25_wp
-         z2d0(:,:) = 0._wp
          DO_2D( 0, 0, 0, 0 )
             zztmp2 = (  ( rCdU_bot(ji+1,jj)+rCdU_bot(ji  ,jj) ) * uu(ji  ,jj,mbku(ji  ,jj),Kmm)  )**2   &
                &   + (  ( rCdU_bot(ji  ,jj)+rCdU_bot(ji-1,jj) ) * uu(ji-1,jj,mbku(ji-1,jj),Kmm)  )**2   &
                &   + (  ( rCdU_bot(ji,jj+1)+rCdU_bot(ji,jj  ) ) * vv(ji,jj  ,mbkv(ji,jj  ),Kmm)  )**2   &
                &   + (  ( rCdU_bot(ji,jj  )+rCdU_bot(ji,jj-1) ) * vv(ji,jj-1,mbkv(ji,jj-1),Kmm)  )**2
-            z2d0(ji,jj) = zztmp * SQRT( zztmp2 ) * tmask(ji,jj,1)
+            z2d(ji,jj) = zztmp * SQRT( zztmp2 ) * tmask(ji,jj,1)
             !
          END_2D
-         CALL iom_put( "taubot", z2d0 )
+         CALL iom_put( "taubot", z2d )
       ENDIF
          
       CALL iom_put( "uoce", uu(:,:,:,Kmm) )            ! 3D i-current
@@ -274,9 +269,9 @@ CONTAINS
       IF ( iom_use("sbu") ) THEN
          DO_2D( 0, 0, 0, 0 )
             ikbot = mbku(ji,jj)
-            z2d0(ji,jj) = uu(ji,jj,ikbot,Kmm)
+            z2d(ji,jj) = uu(ji,jj,ikbot,Kmm)
          END_2D
-         CALL iom_put( "sbu", z2d0 )                ! bottom i-current
+         CALL iom_put( "sbu", z2d )                ! bottom i-current
       ENDIF
       
       CALL iom_put( "voce", vv(:,:,:,Kmm) )            ! 3D j-current
@@ -284,36 +279,37 @@ CONTAINS
       IF ( iom_use("sbv") ) THEN
          DO_2D( 0, 0, 0, 0 )
             ikbot = mbkv(ji,jj)
-            z2d0(ji,jj) = vv(ji,jj,ikbot,Kmm)
+            z2d(ji,jj) = vv(ji,jj,ikbot,Kmm)
          END_2D
-         CALL iom_put( "sbv", z2d0 )                ! bottom j-current
+         CALL iom_put( "sbv", z2d )                ! bottom j-current
       ENDIF
-
       !                                            ! vertical velocity
-      IF( ln_zad_Aimp ) THEN
-         IF( iom_use('woce') ) THEN
+      IF( iom_use('woce') ) THEN
+         IF( ln_zad_Aimp ) THEN  ! explicit plus implicit parts
             DO_3D( 0, 0, 0, 0, 1, jpk )
                z3d(ji,jj,jk) = ww(ji,jj,jk) + wi(ji,jj,jk)
             END_3D
-            CALL iom_put( "woce", z3d )   ! explicit plus implicit parts
+         ELSE
+            DO_3D( 0, 0, 0, 0, 1, jpk )
+               z3d(ji,jj,jk) = ww(ji,jj,jk)
+            END_3D
          ENDIF
-      ELSE
-         CALL iom_put( "woce", ww )
+         CALL iom_put( "woce", z3d )
       ENDIF
 
       IF( iom_use('w_masstr') .OR. iom_use('w_masstr2') ) THEN   ! vertical mass transport & its square value
          !                     ! Caution: in the VVL case, it only correponds to the baroclinic mass transport.
          IF( ln_zad_Aimp ) THEN
             DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d0(ji,jj,jk) = rho0 * e1e2t(ji,jj) * ( ww(ji,jj,jk) + wi(ji,jj,jk) )
+               z3d(ji,jj,jk) = rho0 * e1e2t(ji,jj) * ( ww(ji,jj,jk) + wi(ji,jj,jk) )
             END_3D
          ELSE
             DO_3D( 0, 0, 0, 0, 1, jpk )
-               z3d0(ji,jj,jk) = rho0 * e1e2t(ji,jj) * ww(ji,jj,jk)
+               z3d(ji,jj,jk) = rho0 * e1e2t(ji,jj) * ww(ji,jj,jk)
             END_3D
          ENDIF
-         CALL iom_put( "w_masstr" , z3d0 )
-         IF( iom_use('w_masstr2') )   CALL iom_put( "w_masstr2", z3d0 * z3d0 )
+         CALL iom_put( "w_masstr" , z3d )
+         IF( iom_use('w_masstr2') )   CALL iom_put( "w_masstr2", z3d * z3d )
       ENDIF
 
       CALL iom_put( "avt" , avt )        ! T vert. eddy diff. coef.
@@ -328,220 +324,219 @@ CONTAINS
             zztmp  = ts(ji,jj,1,jp_sal,Kmm)
             zztmpx = (ts(ji+1,jj,1,jp_sal,Kmm) - zztmp) * r1_e1u(ji,jj) + (zztmp - ts(ji-1,jj  ,1,jp_sal,Kmm)) * r1_e1u(ji-1,jj)
             zztmpy = (ts(ji,jj+1,1,jp_sal,Kmm) - zztmp) * r1_e2v(ji,jj) + (zztmp - ts(ji  ,jj-1,1,jp_sal,Kmm)) * r1_e2v(ji,jj-1)
-            z2d0(ji,jj) = 0.25_wp * ( zztmpx * zztmpx + zztmpy * zztmpy )   &
+            z2d(ji,jj) = 0.25_wp * ( zztmpx * zztmpx + zztmpy * zztmpy )   &
                &                 * umask(ji,jj,1) * umask(ji-1,jj,1) * vmask(ji,jj,1) * vmask(ji,jj-1,1)
          END_2D
-         CALL iom_put( "sssgrad2",  z2d0 )          ! square of module of sss gradient
+         CALL iom_put( "sssgrad2",  z2d )          ! square of module of sss gradient
          IF ( iom_use("sssgrad") ) THEN
             DO_2D( 0, 0, 0, 0 )
-               z2d0(ji,jj) = SQRT( z2d0(ji,jj) )
+               z2d(ji,jj) = SQRT( z2d(ji,jj) )
             END_2D
-            CALL iom_put( "sssgrad",  z2d0 )        ! module of sss gradient
+            CALL iom_put( "sssgrad",  z2d )        ! module of sss gradient
          ENDIF
       ENDIF
-         
+
       IF ( iom_use("sstgrad") .OR. iom_use("sstgrad2") ) THEN
          DO_2D( 0, 0, 0, 0 )                       ! sst gradient
             zztmp  = ts(ji,jj,1,jp_tem,Kmm)
             zztmpx = ( ts(ji+1,jj,1,jp_tem,Kmm) - zztmp ) * r1_e1u(ji,jj) + ( zztmp - ts(ji-1,jj  ,1,jp_tem,Kmm) ) * r1_e1u(ji-1,jj)
             zztmpy = ( ts(ji,jj+1,1,jp_tem,Kmm) - zztmp ) * r1_e2v(ji,jj) + ( zztmp - ts(ji  ,jj-1,1,jp_tem,Kmm) ) * r1_e2v(ji,jj-1)
-            z2d0(ji,jj) = 0.25_wp * ( zztmpx * zztmpx + zztmpy * zztmpy )   &
+            z2d(ji,jj) = 0.25_wp * ( zztmpx * zztmpx + zztmpy * zztmpy )   &
                &                 * umask(ji,jj,1) * umask(ji-1,jj,1) * vmask(ji,jj,1) * vmask(ji,jj-1,1)
          END_2D
-         CALL iom_put( "sstgrad2",  z2d0 )          ! square of module of sst gradient
+         CALL iom_put( "sstgrad2",  z2d )          ! square of module of sst gradient
          IF ( iom_use("sstgrad") ) THEN
             DO_2D( 0, 0, 0, 0 )
-               z2d0(ji,jj) = SQRT( z2d0(ji,jj) )
+               z2d(ji,jj) = SQRT( z2d(ji,jj) )
             END_2D
-            CALL iom_put( "sstgrad",  z2d0 )        ! module of sst gradient
+            CALL iom_put( "sstgrad",  z2d )        ! module of sst gradient
          ENDIF
       ENDIF
-         
+
       ! heat and salt contents
       IF( iom_use("heatc") ) THEN
-         z2d0(:,:)  = 0._wp
+         z2d(:,:)  = 0._wp
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-            z2d0(ji,jj) = z2d0(ji,jj) + e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_tem,Kmm) * tmask(ji,jj,jk)
+            z2d(ji,jj) = z2d(ji,jj) + e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_tem,Kmm) * tmask(ji,jj,jk) * rho0_rcp
          END_3D
-         CALL iom_put( "heatc", rho0_rcp * z2d0 )   ! vertically integrated heat content (J/m2)
+         CALL iom_put( "heatc", z2d )   ! vertically integrated heat content (J/m2)
       ENDIF
 
       IF( iom_use("saltc") ) THEN
-         z2d0(:,:)  = 0._wp
+         z2d(:,:)  = 0._wp
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-            z2d0(ji,jj) = z2d0(ji,jj) + e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_sal,Kmm) * tmask(ji,jj,jk)
+            z2d(ji,jj) = z2d(ji,jj) + e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_sal,Kmm) * tmask(ji,jj,jk) * rho0
          END_3D
-         CALL iom_put( "saltc", rho0 * z2d0 )       ! vertically integrated salt content (PSU*kg/m2)
+         CALL iom_put( "saltc", z2d )       ! vertically integrated salt content (PSU*kg/m2)
       ENDIF
       !
       IF( iom_use("salt2c") ) THEN
-         z2d0(:,:)  = 0._wp
+         z2d(:,:)  = 0._wp
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-            z2d0(ji,jj) = z2d0(ji,jj) + e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_sal,Kmm) * ts(ji,jj,jk,jp_sal,Kmm) * tmask(ji,jj,jk)
+            z2d(ji,jj) = z2d(ji,jj) + e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_sal,Kmm) * ts(ji,jj,jk,jp_sal,Kmm) * tmask(ji,jj,jk) * rho0
          END_3D
-         CALL iom_put( "salt2c", rho0 * z2d0 )      ! vertically integrated square of salt content (PSU2*kg/m2)
+         CALL iom_put( "salt2c", z2d )      ! vertically integrated square of salt content (PSU2*kg/m2)
       ENDIF
       !
       IF ( iom_use("ke") .OR. iom_use("ke_int") ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
             zztmpx = uu(ji-1,jj  ,jk,Kmm) + uu(ji,jj,jk,Kmm)
             zztmpy = vv(ji  ,jj-1,jk,Kmm) + vv(ji,jj,jk,Kmm)
-            z3d0(ji,jj,jk) = 0.25_wp * ( zztmpx*zztmpx + zztmpy*zztmpy )
+            z3d(ji,jj,jk) = 0.25_wp * ( zztmpx*zztmpx + zztmpy*zztmpy )
          END_3D
-         CALL iom_put( "ke", z3d0 )                 ! kinetic energy
+         CALL iom_put( "ke", z3d )                 ! kinetic energy
 
-         z2d0(:,:)  = 0._wp
+         z2d(:,:)  = 0._wp
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-            z2d0(ji,jj) = z2d0(ji,jj) + e3t(ji,jj,jk,Kmm) * z3d0(ji,jj,jk) * e1e2t(ji,jj) * tmask(ji,jj,jk)
+            z2d(ji,jj) = z2d(ji,jj) + e3t(ji,jj,jk,Kmm) * z3d(ji,jj,jk) * e1e2t(ji,jj) * tmask(ji,jj,jk)
          END_3D
-         CALL iom_put( "ke_int", z2d0 )             ! vertically integrated kinetic energy
+         CALL iom_put( "ke_int", z2d )             ! vertically integrated kinetic energy
       ENDIF
       !
       IF ( iom_use("sKE") ) THEN                   ! surface kinetic energy at T point
-         z2d0(:,:) = 0._wp
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = 0.25_wp * ( uu(ji  ,jj,1,Kmm) * uu(ji  ,jj,1,Kmm) * e1e2u(ji  ,jj) * e3u(ji  ,jj,1,Kmm)  &
+            z2d(ji,jj) = 0.25_wp * ( uu(ji  ,jj,1,Kmm) * uu(ji  ,jj,1,Kmm) * e1e2u(ji  ,jj) * e3u(ji  ,jj,1,Kmm)  &
                &                   + uu(ji-1,jj,1,Kmm) * uu(ji-1,jj,1,Kmm) * e1e2u(ji-1,jj) * e3u(ji-1,jj,1,Kmm)  &
-               &                   + vv(ji,jj  ,1,Kmm) * vv(ji,jj  ,1,Kmm) * e1e2v(ji,jj  ) * e3v(ji,jj  ,1,Kmm)  & 
+               &                   + vv(ji,jj  ,1,Kmm) * vv(ji,jj  ,1,Kmm) * e1e2v(ji,jj  ) * e3v(ji,jj  ,1,Kmm)  &
                &                   + vv(ji,jj-1,1,Kmm) * vv(ji,jj-1,1,Kmm) * e1e2v(ji,jj-1) * e3v(ji,jj-1,1,Kmm)  )  &
                &                 * r1_e1e2t(ji,jj) / e3t(ji,jj,1,Kmm) * ssmask(ji,jj)
          END_2D
-         IF ( iom_use("sKE" ) )  CALL iom_put( "sKE" , z2d0 )
+         IF ( iom_use("sKE" ) )  CALL iom_put( "sKE" , z2d )
       ENDIF
-      !    
+      !
       IF ( iom_use("ssKEf") ) THEN                 ! surface kinetic energy at F point
-         z2d0(:,:) = 0._wp                          ! CAUTION : only valid in SWE, not with bathymetry
+         ! CAUTION : only valid in SWE, not with bathymetry
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = 0.25_wp * ( uu(ji,jj  ,1,Kmm) * uu(ji,jj  ,1,Kmm) * e1e2u(ji,jj  ) * e3u(ji,jj  ,1,Kmm)  &
+            z2d(ji,jj) = 0.25_wp * ( uu(ji,jj  ,1,Kmm) * uu(ji,jj  ,1,Kmm) * e1e2u(ji,jj  ) * e3u(ji,jj  ,1,Kmm)  &
                &                   + uu(ji,jj+1,1,Kmm) * uu(ji,jj+1,1,Kmm) * e1e2u(ji,jj+1) * e3u(ji,jj+1,1,Kmm)  &
-               &                   + vv(ji  ,jj,1,Kmm) * vv(ji,jj  ,1,Kmm) * e1e2v(ji  ,jj) * e3v(ji  ,jj,1,Kmm)  & 
+               &                   + vv(ji  ,jj,1,Kmm) * vv(ji,jj  ,1,Kmm) * e1e2v(ji  ,jj) * e3v(ji  ,jj,1,Kmm)  &
                &                   + vv(ji+1,jj,1,Kmm) * vv(ji+1,jj,1,Kmm) * e1e2v(ji+1,jj) * e3v(ji+1,jj,1,Kmm)  )  &
                &                 * r1_e1e2f(ji,jj) / e3f(ji,jj,1) * ssfmask(ji,jj)
          END_2D
-         CALL iom_put( "ssKEf", z2d0 )
+         CALL iom_put( "ssKEf", z2d )
       ENDIF
       !
       CALL iom_put( "hdiv", hdiv )                 ! Horizontal divergence
       !
       IF( iom_use("u_masstr") .OR. iom_use("u_masstr_vint") .OR. iom_use("u_heattr") .OR. iom_use("u_salttr") ) THEN
-         
+
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) = rho0 * uu(ji,jj,jk,Kmm) * e2u(ji,jj) * e3u(ji,jj,jk,Kmm) * umask(ji,jj,jk)
+            z3d(ji,jj,jk) = rho0 * uu(ji,jj,jk,Kmm) * e2u(ji,jj) * e3u(ji,jj,jk,Kmm) * umask(ji,jj,jk)
          END_3D
-         CALL iom_put( "u_masstr"     , z3d0 )      ! mass transport in i-direction
-         
+         CALL iom_put( "u_masstr"     , z3d )      ! mass transport in i-direction
+
          IF( iom_use("u_masstr_vint") ) THEN
-            z2d0(:,:) = 0._wp
+            z2d(:,:) = 0._wp
             DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-               z2d0(ji,jj) = z2d0(ji,jj) + z3d0(ji,jj,jk)
+               z2d(ji,jj) = z2d(ji,jj) + z3d(ji,jj,jk)
             END_3D
-            CALL iom_put( "u_masstr_vint", z2d0 )   ! mass transport in i-direction vertical sum
+            CALL iom_put( "u_masstr_vint", z2d )   ! mass transport in i-direction vertical sum
          ENDIF
          IF( iom_use("u_heattr") ) THEN
-            z2d0(:,:) = 0._wp
+            z2d(:,:) = 0._wp
             zztmp = 0.5_wp * rcp
             DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-               z2d0(ji,jj) = z2d0(ji,jj) + zztmp * z3d0(ji,jj,jk) * ( ts(ji,jj,jk,jp_tem,Kmm) + ts(ji+1,jj,jk,jp_tem,Kmm) )
+               z2d(ji,jj) = z2d(ji,jj) + zztmp * z3d(ji,jj,jk) * ( ts(ji,jj,jk,jp_tem,Kmm) + ts(ji+1,jj,jk,jp_tem,Kmm) )
             END_3D
-            CALL iom_put( "u_heattr", z2d0 )        ! heat transport in i-direction
+            CALL iom_put( "u_heattr", z2d )        ! heat transport in i-direction
          ENDIF
          IF( iom_use("u_salttr") ) THEN
-            z2d0(:,:) = 0._wp
+            z2d(:,:) = 0._wp
             DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-               z2d0(ji,jj) = z2d0(ji,jj) +   0.5 * z3d0(ji,jj,jk) * ( ts(ji,jj,jk,jp_sal,Kmm) + ts(ji+1,jj,jk,jp_sal,Kmm) )
+               z2d(ji,jj) = z2d(ji,jj) +   0.5 * z3d(ji,jj,jk) * ( ts(ji,jj,jk,jp_sal,Kmm) + ts(ji+1,jj,jk,jp_sal,Kmm) )
             END_3D
-            CALL iom_put( "u_salttr", z2d0 )        ! heat transport in i-direction
+            CALL iom_put( "u_salttr", z2d )        ! heat transport in i-direction
          ENDIF
-         
+
       ENDIF
-      
+
       IF( iom_use("v_masstr") .OR. iom_use("v_heattr") .OR. iom_use("v_salttr") ) THEN
-         
+
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) = rho0 * vv(ji,jj,jk,Kmm) * e1v(ji,jj) * e3v(ji,jj,jk,Kmm) * vmask(ji,jj,jk)
+            z3d(ji,jj,jk) = rho0 * vv(ji,jj,jk,Kmm) * e1v(ji,jj) * e3v(ji,jj,jk,Kmm) * vmask(ji,jj,jk)
          END_3D
-         CALL iom_put( "v_masstr", z3d0 )           ! mass transport in j-direction
-         
+         CALL iom_put( "v_masstr", z3d )           ! mass transport in j-direction
+
          IF( iom_use("v_heattr") ) THEN
-            z2d0(:,:) = 0._wp
+            z2d(:,:) = 0._wp
             zztmp = 0.5_wp * rcp
             DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-               z2d0(ji,jj) = z2d0(ji,jj) + zztmp * z3d0(ji,jj,jk) * ( ts(ji,jj,jk,jp_tem,Kmm) + ts(ji,jj+1,jk,jp_tem,Kmm) )
+               z2d(ji,jj) = z2d(ji,jj) + zztmp * z3d(ji,jj,jk) * ( ts(ji,jj,jk,jp_tem,Kmm) + ts(ji,jj+1,jk,jp_tem,Kmm) )
             END_3D
-            CALL iom_put( "v_heattr", z2d0 )        !  heat transport in j-direction
+            CALL iom_put( "v_heattr", z2d )        !  heat transport in j-direction
          ENDIF
          IF( iom_use("v_salttr") ) THEN
-            z2d0(:,:) = 0._wp
+            z2d(:,:) = 0._wp
             DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-               z2d0(ji,jj) = z2d0(ji,jj) +   0.5 * z3d0(ji,jj,jk) * ( ts(ji,jj,jk,jp_sal,Kmm) + ts(ji,jj+1,jk,jp_sal,Kmm) )
+               z2d(ji,jj) = z2d(ji,jj) +   0.5 * z3d(ji,jj,jk) * ( ts(ji,jj,jk,jp_sal,Kmm) + ts(ji,jj+1,jk,jp_sal,Kmm) )
             END_3D
-            CALL iom_put( "v_salttr", z2d0 )        !  heat transport in j-direction
+            CALL iom_put( "v_salttr", z2d )        !  heat transport in j-direction
          ENDIF
 
       ENDIF
 
       IF( iom_use("tosmint") ) THEN
-         z2d0(:,:) = 0._wp
+         z2d(:,:) = 0._wp
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-            z2d0(ji,jj) = z2d0(ji,jj) + rho0 * e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_tem,Kmm)
+            z2d(ji,jj) = z2d(ji,jj) + rho0 * e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_tem,Kmm)
          END_3D
-         CALL iom_put( "tosmint", z2d0 )            ! Vertical integral of temperature
+         CALL iom_put( "tosmint", z2d )            ! Vertical integral of temperature
       ENDIF
       IF( iom_use("somint") ) THEN
-         z2d0(:,:) = 0._wp
+         z2d(:,:) = 0._wp
          DO_3D( 0, 0, 0, 0, 1, jpkm1 )
-            z2d0(ji,jj) = z2d0(ji,jj) + rho0 * e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_sal,Kmm)
+            z2d(ji,jj) = z2d(ji,jj) + rho0 * e3t(ji,jj,jk,Kmm) * ts(ji,jj,jk,jp_sal,Kmm)
          END_3D
-         CALL iom_put( "somint", z2d0 )             ! Vertical integral of salinity
+         CALL iom_put( "somint", z2d )             ! Vertical integral of salinity
       ENDIF
 
       CALL iom_put( "bn2", rn2 )                   ! Brunt-Vaisala buoyancy frequency (N^2)
-      
+
       IF (ln_dia25h)   CALL dia_25h( kt, Kmm )     ! 25h averaging
-      
+
       ! Output of surface vorticity terms
       !
       IF ( iom_use("ssplavor")    .OR. iom_use("ssrelvor")    .OR. iom_use("ssEns")    .OR.   &
          & iom_use("ssrelpotvor") .OR. iom_use("ssabspotvor") ) THEN
          !
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = ff_f(ji,jj)
+            z2d(ji,jj) = ff_f(ji,jj)
          END_2D
-         CALL iom_put( "ssplavor", z2d0 )           ! planetary vorticity ( f )
+         CALL iom_put( "ssplavor", z2d )           ! planetary vorticity ( f )
 
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = (   e2v(ji+1,jj  ) * vv(ji+1,jj  ,1,Kmm) - e2v(ji,jj) * vv(ji,jj,1,Kmm)    &
+            z2d(ji,jj) = (   e2v(ji+1,jj  ) * vv(ji+1,jj  ,1,Kmm) - e2v(ji,jj) * vv(ji,jj,1,Kmm)    &
             &              - e1u(ji  ,jj+1) * uu(ji  ,jj+1,1,Kmm) + e1u(ji,jj) * uu(ji,jj,1,Kmm)  ) * r1_e1e2f(ji,jj)
          END_2D
-         CALL iom_put( "ssrelvor", z2d0 )           ! relative vorticity ( zeta )
+         CALL iom_put( "ssrelvor", z2d )           ! relative vorticity ( zeta )
          !
          IF ( iom_use("ssEns") .OR. iom_use("ssrelpotvor") .OR. iom_use("ssabspotvor") ) THEN
-            DO_2D( 0, 0, 0, 0 )  
+            DO_2D( 0, 0, 0, 0 )
                ze3 = (  e3t(ji,jj+1,1,Kmm) * e1e2t(ji,jj+1) + e3t(ji+1,jj+1,1,Kmm) * e1e2t(ji+1,jj+1)    &
-                  &   + e3t(ji,jj  ,1,Kmm) * e1e2t(ji,jj  ) + e3t(ji+1,jj  ,1,Kmm) * e1e2t(ji+1,jj  )  ) * r1_e1e2f(ji,jj)
+                  &    + e3t(ji,jj  ,1,Kmm) * e1e2t(ji,jj  ) + e3t(ji+1,jj  ,1,Kmm) * e1e2t(ji+1,jj  )  ) * r1_e1e2f(ji,jj)
                IF( ze3 /= 0._wp ) THEN   ;   ze3 = 4._wp / ze3
                ELSE                      ;   ze3 = 0._wp
                ENDIF
-               z2d0(ji,jj) = ze3 * z2d0(ji,jj)
+               z2d(ji,jj) = ze3 * z2d(ji,jj)
             END_2D
-            CALL iom_put( "ssrelpotvor", z2d0 )     ! relative potential vorticity (zeta/h)
+            CALL iom_put( "ssrelpotvor", z2d )     ! relative potential vorticity (zeta/h)
             !
             IF ( iom_use("ssEns") .OR. iom_use("ssabspotvor") ) THEN
                DO_2D( 0, 0, 0, 0 )
                   ze3 = (  e3t(ji,jj+1,1,Kmm) * e1e2t(ji,jj+1) + e3t(ji+1,jj+1,1,Kmm) * e1e2t(ji+1,jj+1)    &
-                     &   + e3t(ji,jj  ,1,Kmm) * e1e2t(ji,jj  ) + e3t(ji+1,jj  ,1,Kmm) * e1e2t(ji+1,jj  )  ) * r1_e1e2f(ji,jj)
+                     &    + e3t(ji,jj  ,1,Kmm) * e1e2t(ji,jj  ) + e3t(ji+1,jj  ,1,Kmm) * e1e2t(ji+1,jj  )  ) * r1_e1e2f(ji,jj)
                   IF( ze3 /= 0._wp ) THEN   ;   ze3 = 4._wp / ze3
                   ELSE                      ;   ze3 = 0._wp
                   ENDIF
-                  z2d0(ji,jj) = ze3 * ff_f(ji,jj) + z2d0(ji,jj)
+                  z2d(ji,jj) = ze3 * ff_f(ji,jj) + z2d(ji,jj)
                END_2D
-               CALL iom_put( "ssabspotvor", z2d0 )  ! absolute potential vorticity ( q )
+               CALL iom_put( "ssabspotvor", z2d )  ! absolute potential vorticity ( q )
                !
                IF ( iom_use("ssEns") ) THEN
                   DO_2D( 0, 0, 0, 0 )  
-                     z2d0(ji,jj) = 0.5_wp * z2d0(ji,jj) * z2d0(ji,jj)
+                     z2d(ji,jj) = 0.5_wp * z2d(ji,jj) * z2d(ji,jj)
                   END_2D
-                  CALL iom_put( "ssEns", z2d0 )     ! potential enstrophy ( 1/2*q2 )
+                  CALL iom_put( "ssEns", z2d )     ! potential enstrophy ( 1/2*q2 )
                ENDIF
             ENDIF
          ENDIF
@@ -608,8 +603,8 @@ CONTAINS
       INTEGER  ::   jn, ierror                               ! local integers
       REAL(wp) ::   zsto, zout, zmax, zjulian                ! local scalars
       !
-      REAL(wp), DIMENSION(jpi,jpj    ) :: z2d0     ! 2D workspace
-      REAL(wp), DIMENSION(jpi,jpj,jpk) :: z3d0     ! 3D workspace
+      REAL(wp), DIMENSION(jpi,jpj    ) :: z2d     ! 2D workspace
+      REAL(wp), DIMENSION(jpi,jpj,jpk) :: z3d     ! 3D workspace
       REAL(wp), DIMENSION(:,:,:), ALLOCATABLE :: zw3d_abl   ! ABL 3D workspace
       !!----------------------------------------------------------------------
       !
@@ -961,21 +956,21 @@ CONTAINS
 
       IF( .NOT.ln_linssh ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) = ts(ji,jj,jk,jp_tem,Kmm) * e3t(ji,jj,jk,Kmm)
+            z3d(ji,jj,jk) = ts(ji,jj,jk,jp_tem,Kmm) * e3t(ji,jj,jk,Kmm)
          END_3D
-         CALL histwrite( nid_T, "votemper", it, z3d0, ndim_T , ndex_T  )   ! heat content
+         CALL histwrite( nid_T, "votemper", it, z3d, ndim_T , ndex_T  )   ! heat content
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) = ts(ji,jj,jk,jp_sal,Kmm) * e3t(ji,jj,jk,Kmm)
+            z3d(ji,jj,jk) = ts(ji,jj,jk,jp_sal,Kmm) * e3t(ji,jj,jk,Kmm)
          END_3D
-         CALL histwrite( nid_T, "vosaline", it, z3d0, ndim_T , ndex_T  )   ! salt content
+         CALL histwrite( nid_T, "vosaline", it, z3d, ndim_T , ndex_T  )   ! salt content
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj   ) = ts(ji,jj, 1,jp_tem,Kmm) * e3t(ji,jj, 1,Kmm)
+            z2d(ji,jj   ) = ts(ji,jj, 1,jp_tem,Kmm) * e3t(ji,jj, 1,Kmm)
          END_2D
-         CALL histwrite( nid_T, "sosstsst", it, z2d0, ndim_hT, ndex_hT )   ! sea surface heat content
+         CALL histwrite( nid_T, "sosstsst", it, z2d, ndim_hT, ndex_hT )   ! sea surface heat content
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj   ) = ts(ji,jj, 1,jp_sal,Kmm) * e3t(ji,jj, 1,Kmm)
+            z2d(ji,jj   ) = ts(ji,jj, 1,jp_sal,Kmm) * e3t(ji,jj, 1,Kmm)
          END_2D
-         CALL histwrite( nid_T, "sosaline", it, z2d0, ndim_hT, ndex_hT )   ! sea surface salinity content
+         CALL histwrite( nid_T, "sosaline", it, z2d, ndim_hT, ndex_hT )   ! sea surface salinity content
       ELSE
          CALL histwrite( nid_T, "votemper", it, ts(:,:,:,jp_tem,Kmm) , ndim_T , ndex_T  )   ! temperature
          CALL histwrite( nid_T, "vosaline", it, ts(:,:,:,jp_sal,Kmm) , ndim_T , ndex_T  )   ! salinity
@@ -984,41 +979,41 @@ CONTAINS
       ENDIF
       IF( .NOT.ln_linssh ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
-           z3d0(ji,jj,jk) = e3t(ji,jj,jk,Kmm)     ! 3D workspace for qco substitution
+           z3d(ji,jj,jk) = e3t(ji,jj,jk,Kmm)     ! 3D workspace for qco substitution
          END_3D
-         CALL histwrite( nid_T, "vovvle3t", it, z3d0        , ndim_T , ndex_T  )   ! level thickness
+         CALL histwrite( nid_T, "vovvle3t", it, z3d        , ndim_T , ndex_T  )   ! level thickness
          DO_3D( 0, 0, 0, 0, 1, jpk )
-           z3d0(ji,jj,jk) = gdept(ji,jj,jk,Kmm)   ! 3D workspace for qco substitution
+           z3d(ji,jj,jk) = gdept(ji,jj,jk,Kmm)   ! 3D workspace for qco substitution
          END_3D
-         CALL histwrite( nid_T, "vovvldep", it, z3d0        , ndim_T , ndex_T  )   ! t-point depth
+         CALL histwrite( nid_T, "vovvldep", it, z3d        , ndim_T , ndex_T  )   ! t-point depth
          DO_3D( 0, 0, 0, 0, 1, jpk )
-            z3d0(ji,jj,jk) = ( ( e3t(ji,jj,jk,Kmm) - e3t_0(ji,jj,jk) ) / e3t_0(ji,jj,jk) * 100._wp * tmask(ji,jj,jk) ) ** 2
+            z3d(ji,jj,jk) = ( ( e3t(ji,jj,jk,Kmm) - e3t_0(ji,jj,jk) ) / e3t_0(ji,jj,jk) * 100._wp * tmask(ji,jj,jk) ) ** 2
          END_3D         
-         CALL histwrite( nid_T, "vovvldef", it, z3d0        , ndim_T , ndex_T  )   ! level thickness deformation
+         CALL histwrite( nid_T, "vovvldef", it, z3d        , ndim_T , ndex_T  )   ! level thickness deformation
       ENDIF
       CALL histwrite( nid_T, "sossheig", it, ssh(:,:,Kmm)  , ndim_hT, ndex_hT )   ! sea surface height
       DO_2D( 0, 0, 0, 0 )
-         z2d0(ji,jj) = emp(ji,jj) - rnf(ji,jj)
+         z2d(ji,jj) = emp(ji,jj) - rnf(ji,jj)
       END_2D
-      CALL histwrite( nid_T, "sowaflup", it, z2d0           , ndim_hT, ndex_hT )   ! upward water flux
+      CALL histwrite( nid_T, "sowaflup", it, z2d           , ndim_hT, ndex_hT )   ! upward water flux
       CALL histwrite( nid_T, "sorunoff", it, rnf           , ndim_hT, ndex_hT )   ! river runoffs
       CALL histwrite( nid_T, "sosfldow", it, sfx           , ndim_hT, ndex_hT )   ! downward salt flux 
                                                                                   ! (includes virtual salt flux beneath ice 
                                                                                   ! in linear free surface case)
       IF( ln_linssh ) THEN
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = emp (ji,jj) * ts(ji,jj,1,jp_tem,Kmm)
+            z2d(ji,jj) = emp (ji,jj) * ts(ji,jj,1,jp_tem,Kmm)
          END_2D
-         CALL histwrite( nid_T, "sosst_cd", it, z2d0, ndim_hT, ndex_hT )          ! c/d term on sst
+         CALL histwrite( nid_T, "sosst_cd", it, z2d, ndim_hT, ndex_hT )          ! c/d term on sst
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = emp (ji,jj) * ts(ji,jj,1,jp_sal,Kmm)
+            z2d(ji,jj) = emp (ji,jj) * ts(ji,jj,1,jp_sal,Kmm)
          END_2D
-         CALL histwrite( nid_T, "sosss_cd", it, z2d0, ndim_hT, ndex_hT )          ! c/d term on sss
+         CALL histwrite( nid_T, "sosss_cd", it, z2d, ndim_hT, ndex_hT )          ! c/d term on sss
       ENDIF
       DO_2D( 0, 0, 0, 0 )
-         z2d0(ji,jj) = qsr(ji,jj) + qns(ji,jj)
+         z2d(ji,jj) = qsr(ji,jj) + qns(ji,jj)
       END_2D
-      CALL histwrite( nid_T, "sohefldo", it, z2d0           , ndim_hT, ndex_hT )   ! total heat flux
+      CALL histwrite( nid_T, "sohefldo", it, z2d           , ndim_hT, ndex_hT )   ! total heat flux
       CALL histwrite( nid_T, "soshfldo", it, qsr           , ndim_hT, ndex_hT )   ! solar heat flux
       IF( ALLOCATED(hmld) ) THEN   ! zdf_mxl not called by SWE
          CALL histwrite( nid_T, "somixhgt", it, hmld          , ndim_hT, ndex_hT )   ! turbocline depth
@@ -1077,9 +1072,9 @@ CONTAINS
          CALL histwrite( nid_T, "sohefldp", it, qrp           , ndim_hT, ndex_hT )   ! heat flux damping
          CALL histwrite( nid_T, "sowafldp", it, erp           , ndim_hT, ndex_hT )   ! freshwater flux damping
          DO_2D( 0, 0, 0, 0 )
-            z2d0(ji,jj) = erp(ji,jj) * ts(ji,jj,1,jp_sal,Kmm) * tmask(ji,jj,1)
+            z2d(ji,jj) = erp(ji,jj) * ts(ji,jj,1,jp_sal,Kmm) * tmask(ji,jj,1)
          END_2D
-         CALL histwrite( nid_T, "sosafldp", it, z2d0           , ndim_hT, ndex_hT )   ! salt flux damping
+         CALL histwrite( nid_T, "sosafldp", it, z2d           , ndim_hT, ndex_hT )   ! salt flux damping
       ENDIF
 !      zw2d(:,:) = FLOAT( nmln(:,:) ) * tmask(:,:,1)
 !      CALL histwrite( nid_T, "sobowlin", it, zw2d          , ndim_hT, ndex_hT )   ! ???
@@ -1099,9 +1094,9 @@ CONTAINS
 
       IF( ln_zad_Aimp ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
-           z3d0(ji,jj,jk) = ww(ji,jj,jk) + wi(ji,jj,jk)
+           z3d(ji,jj,jk) = ww(ji,jj,jk) + wi(ji,jj,jk)
          END_3D         
-         CALL histwrite( nid_W, "vovecrtz", it, z3d0         , ndim_T, ndex_T )    ! vert. current
+         CALL histwrite( nid_W, "vovecrtz", it, z3d         , ndim_T, ndex_T )    ! vert. current
       ELSE
          CALL histwrite( nid_W, "vovecrtz", it, ww          , ndim_T, ndex_T )    ! vert. current
       ENDIF
@@ -1150,8 +1145,8 @@ CONTAINS
       !!
       INTEGER ::   ji, jj, jk       ! dummy loop indices
       INTEGER ::   inum
-      REAL(wp), DIMENSION(A2D(0))     :: z2d0
-      REAL(wp), DIMENSION(A2D(0),jpk) :: z3d0
+      REAL(wp), DIMENSION(A2D(0))     :: z2d
+      REAL(wp), DIMENSION(A2D(0),jpk) :: z3d
       !!----------------------------------------------------------------------
       ! 
       IF(lwp) THEN
@@ -1170,9 +1165,9 @@ CONTAINS
       CALL iom_rstput( 0, 0, inum, 'vomecrty', vv(:,:,:,Kmm)        )    ! now j-velocity
       IF( ln_zad_Aimp ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
-           z3d0(ji,jj,jk) = ww(ji,jj,jk) + wi(ji,jj,jk)
+           z3d(ji,jj,jk) = ww(ji,jj,jk) + wi(ji,jj,jk)
          END_3D
-         CALL iom_rstput( 0, 0, inum, 'vovecrtz', z3d0           )    ! now k-velocity
+         CALL iom_rstput( 0, 0, inum, 'vovecrtz', z3d            )    ! now k-velocity
       ELSE
          CALL iom_rstput( 0, 0, inum, 'vovecrtz', ww             )    ! now k-velocity
       ENDIF
@@ -1208,26 +1203,26 @@ CONTAINS
          CALL iom_rstput( 0, 0, inum,  'ahmf', ahmf              )    ! ahmf at v-point
       ENDIF
       DO_2D( 0, 0, 0, 0 )
-         z2d0(ji,jj) = emp(ji,jj) - rnf(ji,jj)
+         z2d(ji,jj) = emp(ji,jj) - rnf(ji,jj)
       END_2D
-      CALL iom_rstput( 0, 0, inum, 'sowaflup', z2d0              )    ! freshwater budget
+      CALL iom_rstput( 0, 0, inum, 'sowaflup', z2d               )    ! freshwater budget
       DO_2D( 0, 0, 0, 0 )
-         z2d0(ji,jj) = qsr(ji,jj) + qns(ji,jj)
+         z2d(ji,jj) = qsr(ji,jj) + qns(ji,jj)
       END_2D
-      CALL iom_rstput( 0, 0, inum, 'sohefldo', z2d0              )    ! total heat flux
+      CALL iom_rstput( 0, 0, inum, 'sohefldo', z2d               )    ! total heat flux
       CALL iom_rstput( 0, 0, inum, 'soshfldo', qsr               )    ! solar heat flux
       CALL iom_rstput( 0, 0, inum, 'soicecov', fr_i              )    ! ice fraction
       CALL iom_rstput( 0, 0, inum, 'sozotaux', utau              )    ! i-wind stress
       CALL iom_rstput( 0, 0, inum, 'sometauy', vtau              )    ! j-wind stress
       IF(  .NOT.ln_linssh  ) THEN
          DO_3D( 0, 0, 0, 0, 1, jpk )
-           z3d0(ji,jj,jk) = gdept(ji,jj,jk,Kmm)   ! 3D workspace for qco substitution
+           z3d(ji,jj,jk) = gdept(ji,jj,jk,Kmm)   ! 3D workspace for qco substitution
          END_3D
-         CALL iom_rstput( 0, 0, inum, 'vovvldep', z3d0           )    !  T-cell depth
+         CALL iom_rstput( 0, 0, inum, 'vovvldep', z3d            )    !  T-cell depth
          DO_3D( 0, 0, 0, 0, 1, jpk )
-           z3d0(ji,jj,jk) = e3t(ji,jj,jk,Kmm)     ! 3D workspace for qco substitution
+           z3d(ji,jj,jk) = e3t(ji,jj,jk,Kmm)     ! 3D workspace for qco substitution
          END_3D
-         CALL iom_rstput( 0, 0, inum, 'vovvle3t', z3d0           )    !  T-cell thickness
+         CALL iom_rstput( 0, 0, inum, 'vovvle3t', z3d            )    !  T-cell thickness
       END IF
       IF( ln_wave .AND. ln_sdw ) THEN
          CALL iom_rstput( 0, 0, inum, 'sdzocrtx', usd            )    ! now StokesDrift i-velocity
diff --git a/src/OCE/lib_fortran.F90 b/src/OCE/lib_fortran.F90
index 0a4ac708ded389bce46657c7d4eb9ceef643c8c4..ed6c8eea3ad8cf423b92195977540d8054cfeed5 100644
--- a/src/OCE/lib_fortran.F90
+++ b/src/OCE/lib_fortran.F90
@@ -18,6 +18,7 @@ MODULE lib_fortran
    !!----------------------------------------------------------------------
    USE par_oce         ! Ocean parameter
    USE dom_oce         ! ocean domain
+   USE domutl, ONLY : is_tile
    USE in_out_manager  ! I/O manager
    USE lib_mpp         ! distributed memory computing
    USE lbclnk          ! ocean lateral boundary conditions
@@ -37,7 +38,7 @@ MODULE lib_fortran
 #endif
 
    INTERFACE glob_sum
-      MODULE PROCEDURE glob_sum_1d, glob_sum_2d, glob_sum_3d
+      MODULE PROCEDURE glob_sum_0d, glob_sum_1d, glob_sum_2d, glob_sum_3d
    END INTERFACE
    INTERFACE local_sum
       MODULE PROCEDURE local_sum_2d, local_sum_3d
@@ -79,6 +80,9 @@ MODULE lib_fortran
 CONTAINS
 
 #  define GLOBSUM_CODE
+#     define DIM_0d
+#        include "lib_fortran_generic.h90"
+#     undef DIM_0d
 #     define DIM_1d
 #        include "lib_fortran_generic.h90"
 #     undef DIM_1d
diff --git a/src/OCE/lib_fortran_generic.h90 b/src/OCE/lib_fortran_generic.h90
index 2b84ed31aa187d201ea01f5b4619f2bbc6f8b005..4959cc0a15a15c0969b641dc53b66fcd5c62b482 100644
--- a/src/OCE/lib_fortran_generic.h90
+++ b/src/OCE/lib_fortran_generic.h90
@@ -3,6 +3,10 @@
 /*   DEFINE COMMON VARIABLES   */
 /*-----------------------------*/
 /**/
+#   if defined DIM_0d
+#      define XD                  0d
+#      define ARRAY_IN(i,j,k,l)   ptab
+#   endif
 #   if defined DIM_1d
 #      define XD                  1d
 #      define ARRAY_IN(i,j,k,l)   ptab(i)
@@ -49,29 +53,38 @@
 /**/
 !
 #   if defined LOCALONLY
-FUNCTION TYPENAME/**/_sum/**/ISVEC/**/_/**/XD/**/(         ptab ) RESULT( ptmp )
+   FUNCTION TYPENAME/**/_sum/**/ISVEC/**/_/**/XD/**/(         ptab, psum ) RESULT( ptmp )
       !!----------------------------------------------------------------------
 #   else
-FUNCTION TYPENAME/**/_sum/**/ISVEC/**/_/**/XD/**/( cdname, ptab ) RESULT( ptmp )
+   FUNCTION TYPENAME/**/_sum/**/ISVEC/**/_/**/XD/**/( cdname, ptab, psum ) RESULT( ptmp )
       !!----------------------------------------------------------------------
-      CHARACTER(len=*), INTENT(in   ) ::   cdname  ! name of the calling subroutine
+      CHARACTER(len=*), INTENT(in   )           ::   cdname  ! name of the calling subroutine
 #   endif
-      REAL(wp)        , INTENT(in   ) ::   ARRAY_IN(:,:,:,:)   ! array on which operation is applied
+      REAL(wp)        , INTENT(in   )           ::   ARRAY_IN(:,:,:,:)   ! array on which operation is applied
+      REAL(wp)        , INTENT(in   ), OPTIONAL ::   psum   ! existing sum to add to
       !
 #   if defined VEC
       REAL(wp)   , DIMENSION(LAST_SIZE) ::   ptmp
       COMPLEX(dp), DIMENSION(LAST_SIZE) ::   ctmp
 #   else
-      REAL(wp)   ::  ptmp
-      COMPLEX(dp)::   ctmp
+      REAL(wp)    ::   ptmp
+      COMPLEX(dp) ::   ctmp
 #   endif
       INTEGER    ::    ji,  jj,  jk,  jl        ! dummy loop indices
       INTEGER    ::   ipi, ipj, ipk, ipl        ! dimensions
-      INTEGER    ::   iisht, ijsht
+      INTEGER    ::   iilsht, ijlsht            ! loop shift indices
+      INTEGER    ::   iiasht, ijasht            ! array shift indices
       !!-----------------------------------------------------------------------
       !
-#  if defined DIM_1d
-      ctmp = CMPLX( 0.e0, 0.e0, dp )   ! warning ctmp is cumulated
+      IF( PRESENT( psum )) THEN           ! warning ctmp is cumulated
+         ctmp = CMPLX( psum, 0.e0, dp )
+      ELSE
+         ctmp = CMPLX( 0.e0, 0.e0, dp )
+      END IF
+
+#  if defined DIM_0d
+      CALL DDPDD( CMPLX( ptab, 0.e0, dp), ctmp )
+#  elif defined DIM_1d
       DO ji = 1, SIZE(ptab,1)
          CALL DDPDD( CMPLX( ptab(ji), 0.e0, dp ), ctmp )
       END DO
@@ -82,16 +95,21 @@ FUNCTION TYPENAME/**/_sum/**/ISVEC/**/_/**/XD/**/( cdname, ptab ) RESULT( ptmp )
       ipk = K_SIZE(ptab)   ! 3rd dimension
       ipl = L_SIZE(ptab)   ! 4th dimension
       !
-      iisht = ( jpi - ipi ) / 2
-      ijsht = ( jpj - ipj ) / 2   ! should be the same as iisht...
-      !
-      ctmp = CMPLX( 0.e0, 0.e0, dp )   ! warning ctmp is cumulated
+      IF( .NOT. is_tile(SIZE(ptab,1), SIZE(ptab,2)) ) THEN
+         iilsht = ( jpi - ipi ) / 2
+         ijlsht = ( jpj - ipj ) / 2   ! should be the same as iisht...
+      ELSE ! Tile sized array
+         iilsht = ( ntei - ntsi + 1 - ipi ) / 2 + nn_hls
+         ijlsht = ( ntej - ntsj + 1 - ipj ) / 2 + nn_hls
+      END IF
+      iiasht = iilsht + ntsi - 1 - nn_hls
+      ijasht = ijlsht + ntsj - 1 - nn_hls
       !
       DO jl = 1, ipl
          DO jk = 1, ipk
             DO_2D( 0, 0, 0, 0 )
                ! warning tmask_i is defined over the full MPI domain but maybe not ptab
-#   define ARRAY_LOOP             ARRAY_IN(ji-iisht,jj-ijsht,jk,jl) * tmask_i(ji,jj)
+#   define ARRAY_LOOP             ARRAY_IN(ji-iiasht,jj-ijasht,jk,jl) * tmask_i(ji,jj)
 #   if   defined VEC && defined DIM_3d
                CALL DDPDD( CMPLX( ARRAY_LOOP, 0.e0, dp ), ctmp(jk) )
 #   endif
diff --git a/src/OCE/stpmlf.F90 b/src/OCE/stpmlf.F90
index 9d7c7c0adff311c450dc5c9bdccaa8cde095c9c9..bd1caeb06d2ec486e9335149ff90b2917958e1a6 100644
--- a/src/OCE/stpmlf.F90
+++ b/src/OCE/stpmlf.F90
@@ -151,6 +151,7 @@ CONTAINS
                              CALL iom_setkt( kstp - nit000 + 1, cw_ablrst_cxt )
          ENDIF
       ENDIF
+      IF( kstp == nit000 )   CALL dia_ar5_init    ! AR5 diagnostics
       IF( kstp /= nit000 )   CALL day( kstp )         ! Calendar (day was already called at nit000 in day_init)
                              CALL iom_setkt( kstp - nit000 + 1,      cxios_context          )   ! tell IOM we are at time step kstp
       IF( ln_crs         )   CALL iom_setkt( kstp - nit000 + 1, TRIM(cxios_context)//"_crs" )   ! tell IOM we are at time step kstp
@@ -279,11 +280,19 @@ CONTAINS
       !<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
       IF( ln_floats  )   CALL flo_stp   ( kstp, Nbb, Nnn )      ! drifting Floats
       IF( ln_diacfl  )   CALL dia_cfl   ( kstp,      Nnn )      ! Courant number diagnostics
-                         CALL dia_hth   ( kstp,      Nnn )      ! Thermocline depth (20 degres isotherm depth)
       IF( ln_diadct  )   CALL dia_dct   ( kstp,      Nnn )      ! Transports
+
+      IF( ln_tile ) CALL dom_tile_start         ! Loop for DIA modules with tiling
+
+      DO jtile = 1, nijtile
+         IF( ln_tile ) CALL dom_tile( ntsi, ntsj, ntei, ntej, ktile = jtile )
+                         CALL dia_hth   ( kstp,      Nnn )      ! Thermocline depth (20 degres isotherm depth)
                          CALL dia_ar5   ( kstp,      Nnn )      ! ar5 diag
                          CALL dia_ptr   ( kstp,      Nnn )      ! Poleward adv/ldf TRansports diagnostics
                          CALL dia_wri   ( kstp,      Nnn )      ! ocean model: outputs
+      END DO
+      IF( ln_tile ) CALL dom_tile_stop
+
       IF( ln_crs     )   CALL crs_fld   ( kstp,      Nnn )      ! ocean model: online field coarsening & output
       IF( lk_diadetide ) CALL dia_detide( kstp )                ! Weights computation for daily detiding of model diagnostics
       IF( lk_diamlr  )   CALL dia_mlr                           ! Update time used in multiple-linear-regression analysis