Skip to content
Snippets Groups Projects
lib_mpp.F90 74.3 KiB
Newer Older
Guillaume Samson's avatar
Guillaume Samson committed
MODULE lib_mpp
   !!======================================================================
   !!                       ***  MODULE  lib_mpp  ***
   !! Ocean numerics:  massively parallel processing library
   !!=====================================================================
   !! History :  OPA  !  1994  (M. Guyon, J. Escobar, M. Imbard)  Original code
   !!            7.0  !  1997  (A.M. Treguier)  SHMEM additions
   !!            8.0  !  1998  (M. Imbard, J. Escobar, L. Colombet ) SHMEM and MPI
   !!                 !  1998  (J.M. Molines) Open boundary conditions
   !!   NEMO     1.0  !  2003  (J.M. Molines, G. Madec)  F90, free form
   !!                 !  2003  (J.M. Molines) add mpp_ini_north(_3d,_2d)
   !!             -   !  2004  (R. Bourdalle Badie)  isend option in mpi
   !!                 !  2004  (J.M. Molines) minloc, maxloc
   !!             -   !  2005  (G. Madec, S. Masson)  npolj=5,6 F-point & ice cases
   !!             -   !  2005  (R. Redler) Replacement of MPI_COMM_WORLD except for MPI_Abort
   !!             -   !  2005  (R. Benshila, G. Madec)  add extra halo case
   !!             -   !  2008  (R. Benshila) add mpp_ini_ice
   !!            3.2  !  2009  (R. Benshila) SHMEM suppression, north fold in lbc_nfd
   !!            3.2  !  2009  (O. Marti)    add mpp_ini_znl
   !!            4.0  !  2011  (G. Madec)  move ctl_ routines from in_out_manager
   !!            3.5  !  2012  (S.Mocavero, I. Epicoco) Add mpp_lnk_bdy_3d/2d routines to optimize the BDY comm.
   !!            3.5  !  2013  (C. Ethe, G. Madec)  message passing arrays as local variables
   !!            3.5  !  2013  (S.Mocavero, I.Epicoco - CMCC) north fold optimizations
   !!            3.6  !  2015  (O. Tintó and M. Castrillo - BSC) Added '_multiple' case for 2D lbc and max
   !!            4.0  !  2017  (G. Madec) automatique allocation of array argument (use any 3rd dimension)
   !!             -   !  2017  (G. Madec) create generic.h90 files to generate all lbc and north fold routines
   !!----------------------------------------------------------------------

   !!----------------------------------------------------------------------
   !!   ctl_stop      : update momentum and tracer Kz from a tke scheme
   !!   ctl_warn      : initialization, namelist read, and parameters control
   !!   ctl_opn       : Open file and check if required file is available.
   !!   ctl_nam       : Prints informations when an error occurs while reading a namelist
   !!   load_nml      : Read, condense and buffer namelist file into character array for use as an internal file
   !!----------------------------------------------------------------------
   !!----------------------------------------------------------------------
   !!   mpp_start     : get local communicator its size and rank
   !!   mpp_lnk       : interface (defined in lbclnk) for message passing of 2d or 3d arrays (mpp_lnk_2d, mpp_lnk_3d)
   !!   mpp_lnk_icb   : interface for message passing of 2d arrays with extra halo for icebergs (mpp_lnk_2d_icb)
   !!   mpprecv       :
   !!   mppsend       :
   !!   mppscatter    :
   !!   mppgather     :
   !!   mpp_min       : generic interface for mppmin_int , mppmin_a_int , mppmin_real, mppmin_a_real
   !!   mpp_max       : generic interface for mppmax_int , mppmax_a_int , mppmax_real, mppmax_a_real
   !!   mpp_sum       : generic interface for mppsum_int , mppsum_a_int , mppsum_real, mppsum_a_real
   !!   mpp_minloc    :
   !!   mpp_maxloc    :
   !!   mppsync       :
   !!   mppstop       :
   !!   mpp_ini_northgather : initialisation of north fold with gathering of the communications
   !!   mpp_lbc_north_icb : alternative to mpp_nfd for extra outer halo with icebergs
   !!   mpp_bcast_nml : broadcast/receive namelist character buffer from reading process to all others
   !!----------------------------------------------------------------------
   USE dom_oce        ! ocean space and time domain
   USE in_out_manager ! I/O manager
#if ! defined key_mpi_off
   USE MPI
#endif

   IMPLICIT NONE
   PRIVATE
   !
   PUBLIC   ctl_stop, ctl_warn, ctl_opn, ctl_nam, load_nml
   PUBLIC   mpp_start, mppstop, mppsync, mpp_comm_free
   PUBLIC   mpp_ini_northgather
   PUBLIC   mpp_min, mpp_max, mpp_sum, mpp_minloc, mpp_maxloc
   PUBLIC   mpp_delay_max, mpp_delay_sum, mpp_delay_rcv
   PUBLIC   mppscatter, mppgather
   PUBLIC   mpp_ini_znl
   PUBLIC   mpp_ini_nc
   PUBLIC   mppsend, mpprecv                          ! needed by TAM and ICB routines
   PUBLIC   mppsend_sp, mpprecv_sp                          ! needed by TAM and ICB routines
   PUBLIC   mppsend_dp, mpprecv_dp                          ! needed by TAM and ICB routines
   PUBLIC   mpp_report
   PUBLIC   mpp_bcast_nml
   PUBLIC   tic_tac
#if defined key_mpi_off
   PUBLIC   MPI_wait
   PUBLIC   MPI_waitall
   PUBLIC   MPI_Wtime
#endif

   !! * Interfaces
   !! define generic interface for these routine as they are called sometimes
   !! with scalar arguments instead of array arguments, which causes problems
   !! for the compilation on AIX system as well as NEC and SGI. Ok on COMPACQ
   INTERFACE mpp_min
      MODULE PROCEDURE mppmin_a_int, mppmin_int
      MODULE PROCEDURE mppmin_a_real_sp, mppmin_real_sp
      MODULE PROCEDURE mppmin_a_real_dp, mppmin_real_dp
   END INTERFACE
   INTERFACE mpp_max
      MODULE PROCEDURE mppmax_a_int, mppmax_int
      MODULE PROCEDURE mppmax_a_real_sp, mppmax_real_sp
      MODULE PROCEDURE mppmax_a_real_dp, mppmax_real_dp
   END INTERFACE
   INTERFACE mpp_sum
      MODULE PROCEDURE mppsum_a_int, mppsum_int
      MODULE PROCEDURE mppsum_realdd, mppsum_a_realdd
      MODULE PROCEDURE mppsum_a_real_sp, mppsum_real_sp
      MODULE PROCEDURE mppsum_a_real_dp, mppsum_real_dp
   END INTERFACE
   INTERFACE mpp_minloc
      MODULE PROCEDURE mpp_minloc2d_sp ,mpp_minloc3d_sp
      MODULE PROCEDURE mpp_minloc2d_dp ,mpp_minloc3d_dp
   END INTERFACE
   INTERFACE mpp_maxloc
      MODULE PROCEDURE mpp_maxloc2d_sp ,mpp_maxloc3d_sp
      MODULE PROCEDURE mpp_maxloc2d_dp ,mpp_maxloc3d_dp
   END INTERFACE

   TYPE, PUBLIC ::   PTR_4D_sp   !: array of 4D pointers (used in lbclnk and lbcnfd)
      REAL(sp), DIMENSION (:,:,:,:), POINTER ::   pt4d
   END TYPE PTR_4D_sp

   TYPE, PUBLIC ::   PTR_4D_dp   !: array of 4D pointers (used in lbclnk and lbcnfd)
      REAL(dp), DIMENSION (:,:,:,:), POINTER ::   pt4d
   END TYPE PTR_4D_dp

   !! ========================= !!
   !!  MPI  variable definition !!
   !! ========================= !!
#if ! defined key_mpi_off
   LOGICAL, PUBLIC, PARAMETER ::   lk_mpp = .TRUE.    !: mpp flag
#else
   INTEGER, PUBLIC, PARAMETER ::   MPI_STATUS_SIZE = 1
   INTEGER, PUBLIC, PARAMETER ::   MPI_REAL = 4
   INTEGER, PUBLIC, PARAMETER ::   MPI_DOUBLE_PRECISION = 8
   INTEGER, PUBLIC, PARAMETER ::   MPI_REQUEST_NULL = 1
   LOGICAL, PUBLIC, PARAMETER ::   lk_mpp = .FALSE.    !: mpp flag
   INTEGER, PUBLIC, DIMENSION(MPI_STATUS_SIZE) ::   MPI_STATUS_IGNORE = 1     ! out from mpi_wait
   INTEGER, PUBLIC, DIMENSION(MPI_STATUS_SIZE) ::   MPI_STATUSES_IGNORE = 1   ! out from mpi_waitall
#endif

   INTEGER, PUBLIC ::   mppsize        ! number of process
   INTEGER, PUBLIC ::   mpprank        ! process number  [ 0 - size-1 ]
!$AGRIF_DO_NOT_TREAT
   INTEGER, PUBLIC ::   mpi_comm_oce   ! opa local communicator
!$AGRIF_END_DO_NOT_TREAT

   INTEGER :: MPI_SUMDD

   ! Neighbourgs informations
   INTEGER,    PARAMETER, PUBLIC ::   n_hlsmax = 3
   INTEGER, DIMENSION(         8), PUBLIC ::   mpinei      !: 8-neighbourg MPI indexes (starting at 0, -1 if no neighbourg)
   INTEGER, DIMENSION(n_hlsmax,8), PUBLIC ::   mpiSnei     !: 8-neighbourg Send MPI indexes (starting at 0, -1 if no neighbourg)
   INTEGER, DIMENSION(n_hlsmax,8), PUBLIC ::   mpiRnei     !: 8-neighbourg Recv MPI indexes (starting at 0, -1 if no neighbourg)
   INTEGER,    PARAMETER, PUBLIC ::   jpwe = 1   !: WEst
   INTEGER,    PARAMETER, PUBLIC ::   jpea = 2   !: EAst
   INTEGER,    PARAMETER, PUBLIC ::   jpso = 3   !: SOuth
   INTEGER,    PARAMETER, PUBLIC ::   jpno = 4   !: NOrth
   INTEGER,    PARAMETER, PUBLIC ::   jpsw = 5   !: South-West
   INTEGER,    PARAMETER, PUBLIC ::   jpse = 6   !: South-East
   INTEGER,    PARAMETER, PUBLIC ::   jpnw = 7   !: North-West
   INTEGER,    PARAMETER, PUBLIC ::   jpne = 8   !: North-East

   LOGICAL, DIMENSION(8), PUBLIC ::   l_SelfPerio  !   should we explicitely take care of I/J periodicity
   LOGICAL,               PUBLIC ::   l_IdoNFold

   ! variables used for zonal integration
   INTEGER, PUBLIC ::   ncomm_znl         !: communicator made by the processors on the same zonal average
   LOGICAL, PUBLIC ::   l_znl_root        !: True on the 'left'most processor on the same row
   INTEGER         ::   ngrp_znl          !: group ID for the znl processors
   INTEGER         ::   ndim_rank_znl     !: number of processors on the same zonal average
   INTEGER, DIMENSION(:), ALLOCATABLE, SAVE ::   nrank_znl  ! dimension ndim_rank_znl, number of the procs into the same znl domain

   ! variables used for MPI3 neighbourhood collectives
   INTEGER, DIMENSION(n_hlsmax), PUBLIC ::   mpi_nc_com4       ! MPI3 neighbourhood collectives communicator
   INTEGER, DIMENSION(n_hlsmax), PUBLIC ::   mpi_nc_com8       ! MPI3 neighbourhood collectives communicator (with diagionals)

   ! North fold condition in mpp_mpi with jpni > 1 (PUBLIC for TAM)
   INTEGER, PUBLIC ::   ngrp_world        !: group ID for the world processors
   INTEGER, PUBLIC ::   ngrp_opa          !: group ID for the opa processors
   INTEGER, PUBLIC ::   ngrp_north        !: group ID for the northern processors (to be fold)
   INTEGER, PUBLIC ::   ncomm_north       !: communicator made by the processors belonging to ngrp_north
   INTEGER, PUBLIC ::   ndim_rank_north   !: number of 'sea' processor in the northern line (can be /= jpni !)
   INTEGER, PUBLIC ::   njmppmax          !: value of njmpp for the processors of the northern line
   INTEGER, PUBLIC ::   north_root        !: number (in the comm_opa) of proc 0 in the northern comm
   INTEGER, PUBLIC, DIMENSION(:), ALLOCATABLE, SAVE ::   nrank_north   !: dimension ndim_rank_north

   ! Communications summary report
   CHARACTER(len=lca), DIMENSION(:), ALLOCATABLE ::   crname_lbc                   !: names of lbc_lnk calling routines
   CHARACTER(len=lca), DIMENSION(:), ALLOCATABLE ::   crname_glb                   !: names of global comm calling routines
   CHARACTER(len=lca), DIMENSION(:), ALLOCATABLE ::   crname_dlg                   !: names of delayed global comm calling routines
   INTEGER, PUBLIC                               ::   ncom_stp = 0                 !: copy of time step # istp
   INTEGER, PUBLIC                               ::   ncom_fsbc = 1                !: copy of sbc time step # nn_fsbc
   INTEGER, PUBLIC                               ::   ncom_freq                    !: frequency of comm diagnostic
   INTEGER, PUBLIC , DIMENSION(:,:), ALLOCATABLE ::   ncomm_sequence               !: size of communicated arrays (halos)
   INTEGER, PARAMETER, PUBLIC                    ::   ncom_rec_max = 5000          !: max number of communication record
   INTEGER, PUBLIC                               ::   n_sequence_lbc = 0           !: # of communicated arraysvia lbc
   INTEGER, PUBLIC                               ::   n_sequence_glb = 0           !: # of global communications
   INTEGER, PUBLIC                               ::   n_sequence_dlg = 0           !: # of delayed global communications
   INTEGER, PUBLIC                               ::   numcom = -1                  !: logical unit for communicaton report
   LOGICAL, PUBLIC                               ::   l_full_nf_update = .TRUE.    !: logical for a full (2lines) update of bc at North fold report
   INTEGER,                    PARAMETER, PUBLIC ::   nbdelay = 2       !: number of delayed operations
   !: name (used as id) of allreduce-delayed operations
   ! Warning: we must use the same character length in an array constructor (at least for gcc compiler)
   CHARACTER(len=32), DIMENSION(nbdelay), PUBLIC ::   c_delaylist = (/ 'cflice', 'fwb   ' /)
   !: component name where the allreduce-delayed operation is performed
Guillaume Samson's avatar
Guillaume Samson committed
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650
   CHARACTER(len=3),  DIMENSION(nbdelay), PUBLIC ::   c_delaycpnt = (/ 'ICE'   , 'OCE' /)
   TYPE, PUBLIC ::   DELAYARR
      REAL(   wp), POINTER, DIMENSION(:) ::  z1d => NULL()
      COMPLEX(dp), POINTER, DIMENSION(:) ::  y1d => NULL()
   END TYPE DELAYARR
   TYPE( DELAYARR ), DIMENSION(nbdelay), PUBLIC, SAVE  ::   todelay         !: must have SAVE for default initialization of DELAYARR
   INTEGER,          DIMENSION(nbdelay), PUBLIC        ::   ndelayid = -1   !: mpi request id of the delayed operations

   ! timing summary report
   REAL(dp), DIMENSION(2), PUBLIC ::  waiting_time = 0._dp
   REAL(dp)              , PUBLIC ::  compute_time = 0._dp, elapsed_time = 0._dp

   REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::   tampon   ! buffer in case of bsend

   LOGICAL, PUBLIC ::   ln_nnogather                !: namelist control of northfold comms
   INTEGER, PUBLIC ::   nn_comm                     !: namelist control of comms

   INTEGER, PUBLIC, PARAMETER ::   jpfillnothing = 1
   INTEGER, PUBLIC, PARAMETER ::   jpfillcst     = 2
   INTEGER, PUBLIC, PARAMETER ::   jpfillcopy    = 3
   INTEGER, PUBLIC, PARAMETER ::   jpfillperio   = 4
   INTEGER, PUBLIC, PARAMETER ::   jpfillmpi     = 5

   !! * Substitutions
#  include "do_loop_substitute.h90"
   !!----------------------------------------------------------------------
   !! NEMO/OCE 4.0 , NEMO Consortium (2018)
   !! $Id: lib_mpp.F90 15267 2021-09-17 09:04:34Z smasson $
   !! Software governed by the CeCILL license (see ./LICENSE)
   !!----------------------------------------------------------------------
CONTAINS

   SUBROUTINE mpp_start( localComm )
      !!----------------------------------------------------------------------
      !!                  ***  routine mpp_start  ***
      !!
      !! ** Purpose :   get mpi_comm_oce, mpprank and mppsize
      !!----------------------------------------------------------------------
      INTEGER         , OPTIONAL   , INTENT(in   ) ::   localComm    !
      !
      INTEGER ::   ierr
      LOGICAL ::   llmpi_init
      !!----------------------------------------------------------------------
#if ! defined key_mpi_off
      !
      CALL mpi_initialized ( llmpi_init, ierr )
      IF( ierr /= MPI_SUCCESS ) CALL ctl_stop( 'STOP', ' lib_mpp: Error in routine mpi_initialized' )

      IF( .NOT. llmpi_init ) THEN
         IF( PRESENT(localComm) ) THEN
            WRITE(ctmp1,*) ' lib_mpp: You cannot provide a local communicator '
            WRITE(ctmp2,*) '          without calling MPI_Init before ! '
            CALL ctl_stop( 'STOP', ctmp1, ctmp2 )
         ENDIF
         CALL mpi_init( ierr )
         IF( ierr /= MPI_SUCCESS ) CALL ctl_stop( 'STOP', ' lib_mpp: Error in routine mpi_init' )
      ENDIF

      IF( PRESENT(localComm) ) THEN
         IF( Agrif_Root() ) THEN
            mpi_comm_oce = localComm
         ENDIF
      ELSE
         CALL mpi_comm_dup( mpi_comm_world, mpi_comm_oce, ierr)
         IF( ierr /= MPI_SUCCESS ) CALL ctl_stop( 'STOP', ' lib_mpp: Error in routine mpi_comm_dup' )
      ENDIF

# if defined key_agrif
      IF( Agrif_Root() ) THEN
         CALL Agrif_MPI_Init(mpi_comm_oce)
      ELSE
         CALL Agrif_MPI_set_grid_comm(mpi_comm_oce)
      ENDIF
# endif

      CALL mpi_comm_rank( mpi_comm_oce, mpprank, ierr )
      CALL mpi_comm_size( mpi_comm_oce, mppsize, ierr )
      !
      CALL MPI_OP_CREATE(DDPDD_MPI, .TRUE., MPI_SUMDD, ierr)
      !
#else
      IF( PRESENT( localComm ) ) mpi_comm_oce = localComm
      mppsize = 1
      mpprank = 0
#endif
   END SUBROUTINE mpp_start


   SUBROUTINE mppsend( ktyp, pmess, kbytes, kdest, md_req )
      !!----------------------------------------------------------------------
      !!                  ***  routine mppsend  ***
      !!
      !! ** Purpose :   Send messag passing array
      !!
      !!----------------------------------------------------------------------
      REAL(wp), INTENT(inout) ::   pmess(*)   ! array of real
      INTEGER , INTENT(in   ) ::   kbytes     ! size of the array pmess
      INTEGER , INTENT(in   ) ::   kdest      ! receive process number
      INTEGER , INTENT(in   ) ::   ktyp       ! tag of the message
      INTEGER , INTENT(inout) ::   md_req     ! argument for isend
      !!
      INTEGER ::   iflag
      INTEGER :: mpi_working_type
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      IF (wp == dp) THEN
         mpi_working_type = mpi_double_precision
      ELSE
         mpi_working_type = mpi_real
      END IF
      CALL mpi_isend( pmess, kbytes, mpi_working_type, kdest , ktyp, mpi_comm_oce, md_req, iflag )
#endif
      !
   END SUBROUTINE mppsend


   SUBROUTINE mppsend_dp( ktyp, pmess, kbytes, kdest, md_req )
      !!----------------------------------------------------------------------
      !!                  ***  routine mppsend  ***
      !!
      !! ** Purpose :   Send messag passing array
      !!
      !!----------------------------------------------------------------------
      REAL(dp), INTENT(inout) ::   pmess(*)   ! array of real
      INTEGER , INTENT(in   ) ::   kbytes     ! size of the array pmess
      INTEGER , INTENT(in   ) ::   kdest      ! receive process number
      INTEGER , INTENT(in   ) ::   ktyp       ! tag of the message
      INTEGER , INTENT(inout) ::   md_req     ! argument for isend
      !!
      INTEGER ::   iflag
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      CALL mpi_isend( pmess, kbytes, mpi_double_precision, kdest , ktyp, mpi_comm_oce, md_req, iflag )
#endif
      !
   END SUBROUTINE mppsend_dp


   SUBROUTINE mppsend_sp( ktyp, pmess, kbytes, kdest, md_req )
      !!----------------------------------------------------------------------
      !!                  ***  routine mppsend  ***
      !!
      !! ** Purpose :   Send messag passing array
      !!
      !!----------------------------------------------------------------------
      REAL(sp), INTENT(inout) ::   pmess(*)   ! array of real
      INTEGER , INTENT(in   ) ::   kbytes     ! size of the array pmess
      INTEGER , INTENT(in   ) ::   kdest      ! receive process number
      INTEGER , INTENT(in   ) ::   ktyp       ! tag of the message
      INTEGER , INTENT(inout) ::   md_req     ! argument for isend
      !!
      INTEGER ::   iflag
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      CALL mpi_isend( pmess, kbytes, mpi_real, kdest , ktyp, mpi_comm_oce, md_req, iflag )
#endif
      !
   END SUBROUTINE mppsend_sp


   SUBROUTINE mpprecv( ktyp, pmess, kbytes, ksource )
      !!----------------------------------------------------------------------
      !!                  ***  routine mpprecv  ***
      !!
      !! ** Purpose :   Receive messag passing array
      !!
      !!----------------------------------------------------------------------
      REAL(wp), INTENT(inout) ::   pmess(*)   ! array of real
      INTEGER , INTENT(in   ) ::   kbytes     ! suze of the array pmess
      INTEGER , INTENT(in   ) ::   ktyp       ! Tag of the recevied message
      INTEGER, OPTIONAL, INTENT(in) :: ksource    ! source process number
      !!
      INTEGER :: istatus(mpi_status_size)
      INTEGER :: iflag
      INTEGER :: use_source
      INTEGER :: mpi_working_type
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      ! If a specific process number has been passed to the receive call,
      ! use that one. Default is to use mpi_any_source
      use_source = mpi_any_source
      IF( PRESENT(ksource) )   use_source = ksource
      !
      IF (wp == dp) THEN
         mpi_working_type = mpi_double_precision
      ELSE
         mpi_working_type = mpi_real
      END IF
      CALL mpi_recv( pmess, kbytes, mpi_working_type, use_source, ktyp, mpi_comm_oce, istatus, iflag )
#endif
      !
   END SUBROUTINE mpprecv

   SUBROUTINE mpprecv_dp( ktyp, pmess, kbytes, ksource )
      !!----------------------------------------------------------------------
      !!                  ***  routine mpprecv  ***
      !!
      !! ** Purpose :   Receive messag passing array
      !!
      !!----------------------------------------------------------------------
      REAL(dp), INTENT(inout) ::   pmess(*)   ! array of real
      INTEGER , INTENT(in   ) ::   kbytes     ! suze of the array pmess
      INTEGER , INTENT(in   ) ::   ktyp       ! Tag of the recevied message
      INTEGER, OPTIONAL, INTENT(in) :: ksource    ! source process number
      !!
      INTEGER :: istatus(mpi_status_size)
      INTEGER :: iflag
      INTEGER :: use_source
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      ! If a specific process number has been passed to the receive call,
      ! use that one. Default is to use mpi_any_source
      use_source = mpi_any_source
      IF( PRESENT(ksource) )   use_source = ksource
      !
      CALL mpi_recv( pmess, kbytes, mpi_double_precision, use_source, ktyp, mpi_comm_oce, istatus, iflag )
#endif
      !
   END SUBROUTINE mpprecv_dp


   SUBROUTINE mpprecv_sp( ktyp, pmess, kbytes, ksource )
      !!----------------------------------------------------------------------
      !!                  ***  routine mpprecv  ***
      !!
      !! ** Purpose :   Receive messag passing array
      !!
      !!----------------------------------------------------------------------
      REAL(sp), INTENT(inout) ::   pmess(*)   ! array of real
      INTEGER , INTENT(in   ) ::   kbytes     ! suze of the array pmess
      INTEGER , INTENT(in   ) ::   ktyp       ! Tag of the recevied message
      INTEGER, OPTIONAL, INTENT(in) :: ksource    ! source process number
      !!
      INTEGER :: istatus(mpi_status_size)
      INTEGER :: iflag
      INTEGER :: use_source
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      ! If a specific process number has been passed to the receive call,
      ! use that one. Default is to use mpi_any_source
      use_source = mpi_any_source
      IF( PRESENT(ksource) )   use_source = ksource
      !
      CALL mpi_recv( pmess, kbytes, mpi_real, use_source, ktyp, mpi_comm_oce, istatus, iflag )
#endif
      !
   END SUBROUTINE mpprecv_sp


   SUBROUTINE mppgather( ptab, kp, pio )
      !!----------------------------------------------------------------------
      !!                   ***  routine mppgather  ***
      !!
      !! ** Purpose :   Transfert between a local subdomain array and a work
      !!     array which is distributed following the vertical level.
      !!
      !!----------------------------------------------------------------------
      REAL(wp), DIMENSION(jpi,jpj)      , INTENT(in   ) ::   ptab   ! subdomain input array
      INTEGER                           , INTENT(in   ) ::   kp     ! record length
      REAL(wp), DIMENSION(jpi,jpj,jpnij), INTENT(  out) ::   pio    ! subdomain input array
      !!
      INTEGER :: itaille, ierror   ! temporary integer
      !!---------------------------------------------------------------------
      !
      itaille = jpi * jpj
#if ! defined key_mpi_off
      CALL mpi_gather( ptab, itaille, mpi_double_precision, pio, itaille     ,   &
         &                            mpi_double_precision, kp , mpi_comm_oce, ierror )
#else
      pio(:,:,1) = ptab(:,:)
#endif
      !
   END SUBROUTINE mppgather


   SUBROUTINE mppscatter( pio, kp, ptab )
      !!----------------------------------------------------------------------
      !!                  ***  routine mppscatter  ***
      !!
      !! ** Purpose :   Transfert between awork array which is distributed
      !!      following the vertical level and the local subdomain array.
      !!
      !!----------------------------------------------------------------------
      REAL(wp), DIMENSION(jpi,jpj,jpnij)  ::   pio    ! output array
      INTEGER                             ::   kp     ! Tag (not used with MPI
      REAL(wp), DIMENSION(jpi,jpj)        ::   ptab   ! subdomain array input
      !!
      INTEGER :: itaille, ierror   ! temporary integer
      !!---------------------------------------------------------------------
      !
      itaille = jpi * jpj
      !
#if ! defined key_mpi_off
      CALL mpi_scatter( pio, itaille, mpi_double_precision, ptab, itaille     ,   &
         &                            mpi_double_precision, kp  , mpi_comm_oce, ierror )
#else
      ptab(:,:) = pio(:,:,1)
#endif
      !
   END SUBROUTINE mppscatter


   SUBROUTINE mpp_delay_sum( cdname, cdelay, y_in, pout, ldlast, kcom )
     !!----------------------------------------------------------------------
      !!                   ***  routine mpp_delay_sum  ***
      !!
      !! ** Purpose :   performed delayed mpp_sum, the result is received on next call
      !!
      !!----------------------------------------------------------------------
      CHARACTER(len=*), INTENT(in   )               ::   cdname  ! name of the calling subroutine
      CHARACTER(len=*), INTENT(in   )               ::   cdelay  ! name (used as id) of the delayed operation
      COMPLEX(dp),      INTENT(in   ), DIMENSION(:) ::   y_in
      REAL(wp),         INTENT(  out), DIMENSION(:) ::   pout
      LOGICAL,          INTENT(in   )               ::   ldlast  ! true if this is the last time we call this routine
      INTEGER,          INTENT(in   ), OPTIONAL     ::   kcom
      !!
      INTEGER ::   ji, isz
      INTEGER ::   idvar
      INTEGER ::   ierr, ilocalcomm
      COMPLEX(dp), ALLOCATABLE, DIMENSION(:) ::   ytmp
      !!----------------------------------------------------------------------
#if ! defined key_mpi_off
      ilocalcomm = mpi_comm_oce
      IF( PRESENT(kcom) )   ilocalcomm = kcom

      isz = SIZE(y_in)

      IF( narea == 1 .AND. numcom == -1 ) CALL mpp_report( cdname, ld_dlg = .TRUE. )

      idvar = -1
      DO ji = 1, nbdelay
         IF( TRIM(cdelay) == TRIM(c_delaylist(ji)) ) idvar = ji
      END DO
      IF ( idvar == -1 )   CALL ctl_stop( 'STOP',' mpp_delay_sum : please add a new delayed exchange for '//TRIM(cdname) )

      IF ( ndelayid(idvar) == 0 ) THEN         ! first call    with restart: %z1d defined in iom_delay_rst
         !                                       --------------------------
         IF ( SIZE(todelay(idvar)%z1d) /= isz ) THEN                  ! Check dimension coherence
            IF(lwp) WRITE(numout,*) ' WARNING: the nb of delayed variables in restart file is not the model one'
            DEALLOCATE(todelay(idvar)%z1d)
            ndelayid(idvar) = -1                                      ! do as if we had no restart
         ELSE
            ALLOCATE(todelay(idvar)%y1d(isz))
            todelay(idvar)%y1d(:) = CMPLX(todelay(idvar)%z1d(:), 0., wp)   ! create %y1d, complex variable needed by mpi_sumdd
            ndelayid(idvar) = MPI_REQUEST_NULL                             ! initialised request to a valid value
         END IF
      ENDIF

      IF( ndelayid(idvar) == -1 ) THEN         ! first call without restart: define %y1d and %z1d from y_in with blocking allreduce
         !                                       --------------------------
         ALLOCATE(todelay(idvar)%z1d(isz), todelay(idvar)%y1d(isz))   ! allocate also %z1d as used for the restart
         CALL mpi_allreduce( y_in(:), todelay(idvar)%y1d(:), isz, MPI_DOUBLE_COMPLEX, mpi_sumdd, ilocalcomm, ierr )   ! get %y1d
         ndelayid(idvar) = MPI_REQUEST_NULL
      ENDIF

      CALL mpp_delay_rcv( idvar )         ! make sure %z1d is received

      ! send back pout from todelay(idvar)%z1d defined at previous call
      pout(:) = todelay(idvar)%z1d(:)

      ! send y_in into todelay(idvar)%y1d with a non-blocking communication
# if defined key_mpi2
      IF( ln_timing ) CALL tic_tac( .TRUE., ld_global = .TRUE.)
      CALL  mpi_allreduce( y_in(:), todelay(idvar)%y1d(:), isz, MPI_DOUBLE_COMPLEX, mpi_sumdd, ilocalcomm, ierr )
      ndelayid(idvar) = MPI_REQUEST_NULL
      IF( ln_timing ) CALL tic_tac(.FALSE., ld_global = .TRUE.)
# else
      CALL mpi_iallreduce( y_in(:), todelay(idvar)%y1d(:), isz, MPI_DOUBLE_COMPLEX, mpi_sumdd, ilocalcomm, ndelayid(idvar), ierr )
# endif
#else
      pout(:) = REAL(y_in(:), wp)
#endif

   END SUBROUTINE mpp_delay_sum


   SUBROUTINE mpp_delay_max( cdname, cdelay, p_in, pout, ldlast, kcom )
      !!----------------------------------------------------------------------
      !!                   ***  routine mpp_delay_max  ***
      !!
      !! ** Purpose :   performed delayed mpp_max, the result is received on next call
      !!
      !!----------------------------------------------------------------------
      CHARACTER(len=*), INTENT(in   )                 ::   cdname  ! name of the calling subroutine
      CHARACTER(len=*), INTENT(in   )                 ::   cdelay  ! name (used as id) of the delayed operation
      REAL(wp),         INTENT(in   ), DIMENSION(:)   ::   p_in    !
      REAL(wp),         INTENT(  out), DIMENSION(:)   ::   pout    !
      LOGICAL,          INTENT(in   )                 ::   ldlast  ! true if this is the last time we call this routine
      INTEGER,          INTENT(in   ), OPTIONAL       ::   kcom
      !!
      INTEGER ::   ji, isz
      INTEGER ::   idvar
      INTEGER ::   ierr, ilocalcomm
      INTEGER ::   MPI_TYPE
      !!----------------------------------------------------------------------

#if ! defined key_mpi_off
      if( wp == dp ) then
         MPI_TYPE = MPI_DOUBLE_PRECISION
      else if ( wp == sp ) then
         MPI_TYPE = MPI_REAL
      else
        CALL ctl_stop( "Error defining type, wp is neither dp nor sp" )

      end if

      ilocalcomm = mpi_comm_oce
      IF( PRESENT(kcom) )   ilocalcomm = kcom

      isz = SIZE(p_in)

      IF( narea == 1 .AND. numcom == -1 ) CALL mpp_report( cdname, ld_dlg = .TRUE. )

      idvar = -1
      DO ji = 1, nbdelay
         IF( TRIM(cdelay) == TRIM(c_delaylist(ji)) ) idvar = ji
      END DO
      IF ( idvar == -1 )   CALL ctl_stop( 'STOP',' mpp_delay_max : please add a new delayed exchange for '//TRIM(cdname) )

      IF ( ndelayid(idvar) == 0 ) THEN         ! first call    with restart: %z1d defined in iom_delay_rst
         !                                       --------------------------
         IF ( SIZE(todelay(idvar)%z1d) /= isz ) THEN                  ! Check dimension coherence
            IF(lwp) WRITE(numout,*) ' WARNING: the nb of delayed variables in restart file is not the model one'
            DEALLOCATE(todelay(idvar)%z1d)
            ndelayid(idvar) = -1                                      ! do as if we had no restart
         ELSE
            ndelayid(idvar) = MPI_REQUEST_NULL
         END IF
      ENDIF

      IF( ndelayid(idvar) == -1 ) THEN         ! first call without restart: define %z1d from p_in with a blocking allreduce
         !                                       --------------------------
         ALLOCATE(todelay(idvar)%z1d(isz))
         CALL mpi_allreduce( p_in(:), todelay(idvar)%z1d(:), isz, MPI_DOUBLE_PRECISION, mpi_max, ilocalcomm, ierr )   ! get %z1d
         ndelayid(idvar) = MPI_REQUEST_NULL
      ENDIF

      CALL mpp_delay_rcv( idvar )         ! make sure %z1d is received

      ! send back pout from todelay(idvar)%z1d defined at previous call
      pout(:) = todelay(idvar)%z1d(:)

      ! send p_in into todelay(idvar)%z1d with a non-blocking communication
      ! (PM) Should we get rid of MPI2 option ? MPI3 was release in 2013. Who is still using MPI2 ?
# if defined key_mpi2
      IF( ln_timing ) CALL tic_tac( .TRUE., ld_global = .TRUE.)
      CALL  mpi_allreduce( p_in(:), todelay(idvar)%z1d(:), isz, MPI_TYPE, mpi_max, ilocalcomm, ierr )
      IF( ln_timing ) CALL tic_tac(.FALSE., ld_global = .TRUE.)
# else
      CALL mpi_iallreduce( p_in(:), todelay(idvar)%z1d(:), isz, MPI_TYPE, mpi_max, ilocalcomm, ndelayid(idvar), ierr )
# endif
#else
      pout(:) = p_in(:)
#endif

   END SUBROUTINE mpp_delay_max


   SUBROUTINE mpp_delay_rcv( kid )
      !!----------------------------------------------------------------------
      !!                   ***  routine mpp_delay_rcv  ***
      !!
      !! ** Purpose :  force barrier for delayed mpp (needed for restart)
      !!
      !!----------------------------------------------------------------------
      INTEGER,INTENT(in   )      ::  kid
      INTEGER ::   ierr
      !!----------------------------------------------------------------------
#if ! defined key_mpi_off
      IF( ln_timing ) CALL tic_tac( .TRUE., ld_global = .TRUE.)
      ! test on ndelayid(kid) useless as mpi_wait return immediatly if the request handle is MPI_REQUEST_NULL
      CALL mpi_wait( ndelayid(kid), MPI_STATUS_IGNORE, ierr ) ! after this ndelayid(kid) = MPI_REQUEST_NULL
      IF( ln_timing ) CALL tic_tac( .FALSE., ld_global = .TRUE.)
      IF( ASSOCIATED(todelay(kid)%y1d) )   todelay(kid)%z1d(:) = REAL(todelay(kid)%y1d(:), wp)  ! define %z1d from %y1d
#endif
   END SUBROUTINE mpp_delay_rcv

   SUBROUTINE mpp_bcast_nml( cdnambuff , kleng )
      CHARACTER(LEN=:)    , ALLOCATABLE, INTENT(INOUT) :: cdnambuff
      INTEGER                          , INTENT(INOUT) :: kleng
      !!----------------------------------------------------------------------
      !!                  ***  routine mpp_bcast_nml  ***
      !!
      !! ** Purpose :   broadcast namelist character buffer
      !!
      !!----------------------------------------------------------------------
      !!
      INTEGER ::   iflag
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      call MPI_BCAST(kleng, 1, MPI_INT, 0, mpi_comm_oce, iflag)
      call MPI_BARRIER(mpi_comm_oce, iflag)
!$AGRIF_DO_NOT_TREAT
      IF ( .NOT. ALLOCATED(cdnambuff) ) ALLOCATE( CHARACTER(LEN=kleng) :: cdnambuff )
!$AGRIF_END_DO_NOT_TREAT
      call MPI_BCAST(cdnambuff, kleng, MPI_CHARACTER, 0, mpi_comm_oce, iflag)
      call MPI_BARRIER(mpi_comm_oce, iflag)
#endif
      !
   END SUBROUTINE mpp_bcast_nml


   !!----------------------------------------------------------------------
   !!    ***  mppmax_a_int, mppmax_int, mppmax_a_real, mppmax_real  ***
   !!
   !!----------------------------------------------------------------------
   !!
#  define OPERATION_MAX
#  define INTEGER_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppmax_int
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppmax_a_int
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef INTEGER_TYPE
!
   !!
   !!   ----   SINGLE PRECISION VERSIONS
   !!
#  define SINGLE_PRECISION
#  define REAL_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppmax_real_sp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppmax_a_real_sp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef SINGLE_PRECISION
   !!
   !!
   !!   ----   DOUBLE PRECISION VERSIONS
   !!
!
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppmax_real_dp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppmax_a_real_dp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef REAL_TYPE
#  undef OPERATION_MAX
   !!----------------------------------------------------------------------
   !!    ***  mppmin_a_int, mppmin_int, mppmin_a_real, mppmin_real  ***
   !!
   !!----------------------------------------------------------------------
   !!
#  define OPERATION_MIN
#  define INTEGER_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppmin_int
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppmin_a_int
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef INTEGER_TYPE
!
   !!
   !!   ----   SINGLE PRECISION VERSIONS
   !!
#  define SINGLE_PRECISION
#  define REAL_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppmin_real_sp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppmin_a_real_sp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef SINGLE_PRECISION
   !!
   !!   ----   DOUBLE PRECISION VERSIONS
   !!

#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppmin_real_dp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppmin_a_real_dp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef REAL_TYPE
#  undef OPERATION_MIN

   !!----------------------------------------------------------------------
   !!    ***  mppsum_a_int, mppsum_int, mppsum_a_real, mppsum_real  ***
   !!
   !!   Global sum of 1D array or a variable (integer, real or complex)
   !!----------------------------------------------------------------------
   !!
#  define OPERATION_SUM
#  define INTEGER_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppsum_int
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppsum_a_int
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef INTEGER_TYPE

   !!
   !!   ----   SINGLE PRECISION VERSIONS
   !!
#  define OPERATION_SUM
#  define SINGLE_PRECISION
#  define REAL_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppsum_real_sp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppsum_a_real_sp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef REAL_TYPE
#  undef OPERATION_SUM

#  undef SINGLE_PRECISION

   !!
   !!   ----   DOUBLE PRECISION VERSIONS
   !!
#  define OPERATION_SUM
#  define REAL_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppsum_real_dp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppsum_a_real_dp
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef REAL_TYPE
#  undef OPERATION_SUM

#  define OPERATION_SUM_DD
#  define COMPLEX_TYPE
#  define DIM_0d
#     define ROUTINE_ALLREDUCE           mppsum_realdd
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_0d
#  define DIM_1d
#     define ROUTINE_ALLREDUCE           mppsum_a_realdd
#     include "mpp_allreduce_generic.h90"
#     undef ROUTINE_ALLREDUCE
#  undef DIM_1d
#  undef COMPLEX_TYPE
#  undef OPERATION_SUM_DD

   !!----------------------------------------------------------------------
   !!    ***  mpp_minloc2d, mpp_minloc3d, mpp_maxloc2d, mpp_maxloc3d
   !!
   !!----------------------------------------------------------------------
   !!
   !!
   !!   ----   SINGLE PRECISION VERSIONS
   !!
#  define SINGLE_PRECISION
#  define OPERATION_MINLOC
#  define DIM_2d
#     define ROUTINE_LOC           mpp_minloc2d_sp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_2d
#  define DIM_3d
#     define ROUTINE_LOC           mpp_minloc3d_sp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_3d
#  undef OPERATION_MINLOC

#  define OPERATION_MAXLOC
#  define DIM_2d
#     define ROUTINE_LOC           mpp_maxloc2d_sp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_2d
#  define DIM_3d
#     define ROUTINE_LOC           mpp_maxloc3d_sp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_3d
#  undef OPERATION_MAXLOC
#  undef SINGLE_PRECISION
   !!
   !!   ----   DOUBLE PRECISION VERSIONS
   !!
#  define OPERATION_MINLOC
#  define DIM_2d
#     define ROUTINE_LOC           mpp_minloc2d_dp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_2d
#  define DIM_3d
#     define ROUTINE_LOC           mpp_minloc3d_dp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_3d
#  undef OPERATION_MINLOC

#  define OPERATION_MAXLOC
#  define DIM_2d
#     define ROUTINE_LOC           mpp_maxloc2d_dp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_2d
#  define DIM_3d
#     define ROUTINE_LOC           mpp_maxloc3d_dp
#     include "mpp_loc_generic.h90"
#     undef ROUTINE_LOC
#  undef DIM_3d
#  undef OPERATION_MAXLOC


   SUBROUTINE mppsync()
      !!----------------------------------------------------------------------
      !!                  ***  routine mppsync  ***
      !!
      !! ** Purpose :   Massively parallel processors, synchroneous
      !!
      !!-----------------------------------------------------------------------
      INTEGER :: ierror
      !!-----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      CALL mpi_barrier( mpi_comm_oce, ierror )
#endif
      !
   END SUBROUTINE mppsync


   SUBROUTINE mppstop( ld_abort )
      !!----------------------------------------------------------------------
      !!                  ***  routine mppstop  ***
      !!
      !! ** purpose :   Stop massively parallel processors method
      !!
      !!----------------------------------------------------------------------
      LOGICAL, OPTIONAL, INTENT(in) :: ld_abort    ! source process number
      LOGICAL ::   ll_abort
      INTEGER ::   info, ierr
      !!----------------------------------------------------------------------
      ll_abort = .FALSE.
      IF( PRESENT(ld_abort) ) ll_abort = ld_abort
      !
#if ! defined key_mpi_off
      IF(ll_abort) THEN
         CALL mpi_abort( MPI_COMM_WORLD, 123, info )
      ELSE
         CALL mppsync
         CALL mpi_finalize( info )
      ENDIF
#endif
      IF( ll_abort ) STOP 123
      !
   END SUBROUTINE mppstop


   SUBROUTINE mpp_comm_free( kcom )
      !!----------------------------------------------------------------------
      INTEGER, INTENT(inout) ::   kcom
      !!
      INTEGER :: ierr
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      CALL MPI_COMM_FREE(kcom, ierr)
#endif
      !
   END SUBROUTINE mpp_comm_free


   SUBROUTINE mpp_ini_znl( kumout )
      !!----------------------------------------------------------------------
      !!               ***  routine mpp_ini_znl  ***
      !!
      !! ** Purpose :   Initialize special communicator for computing zonal sum
      !!
      !! ** Method  : - Look for processors in the same row
      !!              - Put their number in nrank_znl
      !!              - Create group for the znl processors
      !!              - Create a communicator for znl processors
      !!              - Determine if processor should write znl files
      !!
      !! ** output
      !!      ndim_rank_znl = number of processors on the same row
      !!      ngrp_znl = group ID for the znl processors
      !!      ncomm_znl = communicator for the ice procs.
      !!      n_znl_root = number (in the world) of proc 0 in the ice comm.
      !!
      !!----------------------------------------------------------------------
      INTEGER, INTENT(in) ::   kumout   ! ocean.output logical units
      !
      INTEGER :: jproc      ! dummy loop integer
      INTEGER :: ierr, ii   ! local integer
      INTEGER, ALLOCATABLE, DIMENSION(:) ::   kwork
      !!----------------------------------------------------------------------
#if ! defined key_mpi_off
      !-$$     WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - ngrp_world     : ', ngrp_world
      !-$$     WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - mpi_comm_world : ', mpi_comm_world
      !-$$     WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - mpi_comm_oce   : ', mpi_comm_oce
      !
      ALLOCATE( kwork(jpnij), STAT=ierr )
      IF( ierr /= 0 ) CALL ctl_stop( 'STOP', 'mpp_ini_znl : failed to allocate 1D array of length jpnij')

      IF( jpnj == 1 ) THEN
         ngrp_znl  = ngrp_world
         ncomm_znl = mpi_comm_oce
      ELSE
         !
         CALL MPI_ALLGATHER ( njmpp, 1, mpi_integer, kwork, 1, mpi_integer, mpi_comm_oce, ierr )
         !-$$        WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - kwork pour njmpp : ', kwork
         !-$$        CALL flush(numout)
         !
         ! Count number of processors on the same row
         ndim_rank_znl = 0
         DO jproc=1,jpnij
            IF ( kwork(jproc) == njmpp ) THEN
               ndim_rank_znl = ndim_rank_znl + 1
            ENDIF
         END DO
         !-$$        WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - ndim_rank_znl : ', ndim_rank_znl
         !-$$        CALL flush(numout)
         ! Allocate the right size to nrank_znl
         IF (ALLOCATED (nrank_znl)) DEALLOCATE(nrank_znl)
         ALLOCATE(nrank_znl(ndim_rank_znl))
         ii = 0
         nrank_znl (:) = 0
         DO jproc=1,jpnij
            IF ( kwork(jproc) == njmpp) THEN
               ii = ii + 1
               nrank_znl(ii) = jproc -1
            ENDIF
         END DO
         !-$$        WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - nrank_znl : ', nrank_znl
         !-$$        CALL flush(numout)

         ! Create the opa group
         CALL MPI_COMM_GROUP(mpi_comm_oce,ngrp_opa,ierr)
         !-$$        WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - ngrp_opa : ', ngrp_opa
         !-$$        CALL flush(numout)

         ! Create the znl group from the opa group
         CALL MPI_GROUP_INCL  ( ngrp_opa, ndim_rank_znl, nrank_znl, ngrp_znl, ierr )
         !-$$        WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - ngrp_znl ', ngrp_znl
         !-$$        CALL flush(numout)

         ! Create the znl communicator from the opa communicator, ie the pool of procs in the same row
         CALL MPI_COMM_CREATE ( mpi_comm_oce, ngrp_znl, ncomm_znl, ierr )
         !-$$        WRITE (numout,*) 'mpp_ini_znl ', mpprank, ' - ncomm_znl ', ncomm_znl
         !-$$        CALL flush(numout)
         !
      END IF

      ! Determines if processor if the first (starting from i=1) on the row
      IF ( jpni == 1 ) THEN
         l_znl_root = .TRUE.
      ELSE
         l_znl_root = .FALSE.
         kwork (1) = nimpp
         CALL mpp_min ( 'lib_mpp', kwork(1), kcom = ncomm_znl)
         IF ( nimpp == kwork(1)) l_znl_root = .TRUE.
      END IF

      DEALLOCATE(kwork)
#endif

   END SUBROUTINE mpp_ini_znl

   
   SUBROUTINE mpp_ini_nc( khls )
      !!----------------------------------------------------------------------
      !!               ***  routine mpp_ini_nc  ***
      !!
      !! ** Purpose :   Initialize special communicators for MPI3 neighbourhood
      !!                collectives
      !!
      !! ** Method  : - Create graph communicators starting from the processes
      !!                distribution along i and j directions
      !
      !! ** output
      !!         mpi_nc_com4 = MPI3 neighbourhood collectives communicator
      !!         mpi_nc_com8 = MPI3 neighbourhood collectives communicator (with diagonals)
      !!----------------------------------------------------------------------
      INTEGER,             INTENT(in   ) ::   khls        ! halo size, default = nn_hls
      !
      INTEGER, DIMENSION(:), ALLOCATABLE :: iSnei4, iRnei4, iSnei8, iRnei8
      INTEGER                            :: iScnt4, iRcnt4, iScnt8, iRcnt8
      INTEGER                            :: ierr
      LOGICAL, PARAMETER                 :: ireord = .FALSE.
      !!----------------------------------------------------------------------
#if ! defined key_mpi_off && ! defined key_mpi2
      
      iScnt4 = COUNT( mpiSnei(khls,1:4) >= 0 )
      iRcnt4 = COUNT( mpiRnei(khls,1:4) >= 0 )
      iScnt8 = COUNT( mpiSnei(khls,1:8) >= 0 )
      iRcnt8 = COUNT( mpiRnei(khls,1:8) >= 0 )

      ALLOCATE( iSnei4(iScnt4), iRnei4(iRcnt4), iSnei8(iScnt8), iRnei8(iRcnt8) )   ! ok if icnt4 or icnt8 = 0

      iSnei4 = PACK( mpiSnei(khls,1:4), mask = mpiSnei(khls,1:4) >= 0 )
      iRnei4 = PACK( mpiRnei(khls,1:4), mask = mpiRnei(khls,1:4) >= 0 )
      iSnei8 = PACK( mpiSnei(khls,1:8), mask = mpiSnei(khls,1:8) >= 0 )
      iRnei8 = PACK( mpiRnei(khls,1:8), mask = mpiRnei(khls,1:8) >= 0 )

      CALL MPI_Dist_graph_create_adjacent( mpi_comm_oce, iScnt4, iSnei4, MPI_UNWEIGHTED, iRcnt4, iRnei4, MPI_UNWEIGHTED,   &
         &                                 MPI_INFO_NULL, ireord, mpi_nc_com4(khls), ierr )
      CALL MPI_Dist_graph_create_adjacent( mpi_comm_oce, iScnt8, iSnei8, MPI_UNWEIGHTED, iRcnt8, iRnei8, MPI_UNWEIGHTED,   &
         &                                 MPI_INFO_NULL, ireord, mpi_nc_com8(khls), ierr)

      DEALLOCATE( iSnei4, iRnei4, iSnei8, iRnei8 )
#endif
   END SUBROUTINE mpp_ini_nc


   SUBROUTINE mpp_ini_northgather
      !!----------------------------------------------------------------------
      !!               ***  routine mpp_ini_northgather  ***
      !!
      !! ** Purpose :   Initialize special communicator for north folding
      !!      condition together with global variables needed in the mpp folding
      !!
      !! ** Method  : - Look for northern processors
      !!              - Put their number in nrank_north
      !!              - Create groups for the world processors and the north processors
      !!              - Create a communicator for northern processors
      !!
      !! ** output
      !!      ndim_rank_north = number of processors in the northern line
      !!      nrank_north (ndim_rank_north) = number  of the northern procs.
      !!      ngrp_world = group ID for the world processors
      !!      ngrp_north = group ID for the northern processors
      !!      ncomm_north = communicator for the northern procs.
      !!      north_root = number (in the world) of proc 0 in the northern comm.
      !!
      !!----------------------------------------------------------------------
      INTEGER ::   ierr
      INTEGER ::   jjproc
      INTEGER ::   ii, ji
      !!----------------------------------------------------------------------
      !
#if ! defined key_mpi_off
      !
      ! Look for how many procs on the northern boundary
      ndim_rank_north = 0
      DO jjproc = 1, jpni
         IF( nfproc(jjproc) /= -1 )   ndim_rank_north = ndim_rank_north + 1
      END DO
      !
      ! Allocate the right size to nrank_north
      IF (ALLOCATED (nrank_north)) DEALLOCATE(nrank_north)
      ALLOCATE( nrank_north(ndim_rank_north) )

      ! Fill the nrank_north array with proc. number of northern procs.
      ! Note : the rank start at 0 in MPI
      ii = 0
      DO ji = 1, jpni
         IF ( nfproc(ji) /= -1   ) THEN
            ii=ii+1
            nrank_north(ii)=nfproc(ji)
         END IF
      END DO
      !
      ! create the world group
      CALL MPI_COMM_GROUP( mpi_comm_oce, ngrp_world, ierr )
      !
      ! Create the North group from the world group
      CALL MPI_GROUP_INCL( ngrp_world, ndim_rank_north, nrank_north, ngrp_north, ierr )
      !
      ! Create the North communicator , ie the pool of procs in the north group
      CALL MPI_COMM_CREATE( mpi_comm_oce, ngrp_north, ncomm_north, ierr )
      !
#endif
   END SUBROUTINE mpp_ini_northgather


   SUBROUTINE DDPDD_MPI( ydda, yddb, ilen, itype )
      !!---------------------------------------------------------------------
      !!   Routine DDPDD_MPI: used by reduction operator MPI_SUMDD
      !!
      !!   Modification of original codes written by David H. Bailey
      !!   This subroutine computes yddb(i) = ydda(i)+yddb(i)
      !!---------------------------------------------------------------------
      INTEGER                     , INTENT(in)    ::   ilen, itype
      COMPLEX(dp), DIMENSION(ilen), INTENT(in)    ::   ydda
      COMPLEX(dp), DIMENSION(ilen), INTENT(inout) ::   yddb
      !
      REAL(dp) :: zerr, zt1, zt2    ! local work variables
      INTEGER  :: ji, ztmp           ! local scalar
      !!---------------------------------------------------------------------
      !
      ztmp = itype   ! avoid compilation warning
      !
      DO ji=1,ilen
      ! Compute ydda + yddb using Knuth's trick.
         zt1  = real(ydda(ji)) + real(yddb(ji))
         zerr = zt1 - real(ydda(ji))
         zt2  = ((real(yddb(ji)) - zerr) + (real(ydda(ji)) - (zt1 - zerr))) &
                + aimag(ydda(ji)) + aimag(yddb(ji))

         ! The result is zt1 + zt2, after normalization.
         yddb(ji) = cmplx ( zt1 + zt2, zt2 - ((zt1 + zt2) - zt1),wp )
      END DO
      !
   END SUBROUTINE DDPDD_MPI


   SUBROUTINE mpp_report( cdname, kpk, kpl, kpf, ld_lbc, ld_glb, ld_dlg )
      !!----------------------------------------------------------------------
      !!                  ***  routine mpp_report  ***
      !!
      !! ** Purpose :   report use of mpp routines per time-setp
      !!
      !!----------------------------------------------------------------------
      CHARACTER(len=*),           INTENT(in   ) ::   cdname      ! name of the calling subroutine
      INTEGER         , OPTIONAL, INTENT(in   ) ::   kpk, kpl, kpf
      LOGICAL         , OPTIONAL, INTENT(in   ) ::   ld_lbc, ld_glb, ld_dlg
      !!
      CHARACTER(len=128)                        ::   ccountname  ! name of a subroutine to count communications
      LOGICAL ::   ll_lbc, ll_glb, ll_dlg
      INTEGER ::    ji,  jj,  jk,  jh, jf, jcount   ! dummy loop indices
      !!----------------------------------------------------------------------
#if ! defined key_mpi_off
      !
      ll_lbc = .FALSE.
      IF( PRESENT(ld_lbc) ) ll_lbc = ld_lbc
      ll_glb = .FALSE.
      IF( PRESENT(ld_glb) ) ll_glb = ld_glb
      ll_dlg = .FALSE.
      IF( PRESENT(ld_dlg) ) ll_dlg = ld_dlg
      !
      ! find the smallest common frequency: default = frequency product, if multiple, choose the larger of the 2 frequency
      ncom_freq = ncom_fsbc
      !
      IF ( ncom_stp == nit000+ncom_freq ) THEN   ! avoid to count extra communications in potential initializations at nit000
         IF( ll_lbc ) THEN
            IF( .NOT. ALLOCATED(ncomm_sequence) ) ALLOCATE( ncomm_sequence(ncom_rec_max,2) )
            IF( .NOT. ALLOCATED(    crname_lbc) ) ALLOCATE(     crname_lbc(ncom_rec_max  ) )
            n_sequence_lbc = n_sequence_lbc + 1
            IF( n_sequence_lbc > ncom_rec_max ) CALL ctl_stop( 'STOP', 'lib_mpp, increase ncom_rec_max' )   ! deadlock
            crname_lbc(n_sequence_lbc) = cdname     ! keep the name of the calling routine
            ncomm_sequence(n_sequence_lbc,1) = kpk*kpl   ! size of 3rd and 4th dimensions
            ncomm_sequence(n_sequence_lbc,2) = kpf       ! number of arrays to be treated (multi)
         ENDIF
         IF( ll_glb ) THEN
            IF( .NOT. ALLOCATED(crname_glb) ) ALLOCATE( crname_glb(ncom_rec_max) )
            n_sequence_glb = n_sequence_glb + 1
            IF( n_sequence_glb > ncom_rec_max ) CALL ctl_stop( 'STOP', 'lib_mpp, increase ncom_rec_max' )   ! deadlock
            crname_glb(n_sequence_glb) = cdname     ! keep the name of the calling routine
         ENDIF
         IF( ll_dlg ) THEN
            IF( .NOT. ALLOCATED(crname_dlg) ) ALLOCATE( crname_dlg(ncom_rec_max) )
            n_sequence_dlg = n_sequence_dlg + 1
            IF( n_sequence_dlg > ncom_rec_max ) CALL ctl_stop( 'STOP', 'lib_mpp, increase ncom_rec_max' )   ! deadlock
            crname_dlg(n_sequence_dlg) = cdname     ! keep the name of the calling routine
         ENDIF
      ELSE IF ( ncom_stp == nit000+2*ncom_freq ) THEN
         CALL ctl_opn( numcom, 'communication_report.txt', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout, .FALSE., narea )
         WRITE(numcom,*) ' '
         WRITE(numcom,*) ' ------------------------------------------------------------'
         WRITE(numcom,*) ' Communication pattern report (second oce+sbc+top time step):'
         WRITE(numcom,*) ' ------------------------------------------------------------'
         WRITE(numcom,*) ' '
         WRITE(numcom,'(A,I4)') ' Exchanged halos : ', n_sequence_lbc
         jj = 0; jk = 0; jf = 0; jh = 0
         DO ji = 1, n_sequence_lbc
            IF ( ncomm_sequence(ji,1) .GT. 1 ) jk = jk + 1
            IF ( ncomm_sequence(ji,2) .GT. 1 ) jf = jf + 1
            IF ( ncomm_sequence(ji,1) .GT. 1 .AND. ncomm_sequence(ji,2) .GT. 1 ) jj = jj + 1
            jh = MAX (jh, ncomm_sequence(ji,1)*ncomm_sequence(ji,2))
         END DO
         WRITE(numcom,'(A,I3)') ' 3D Exchanged halos : ', jk
         WRITE(numcom,'(A,I3)') ' Multi arrays exchanged halos : ', jf
         WRITE(numcom,'(A,I3)') '   from which 3D : ', jj
         WRITE(numcom,'(A,I10)') ' Array max size : ', jh*jpi*jpj
         WRITE(numcom,*) ' '
         WRITE(numcom,*) ' lbc_lnk called'
         DO ji = 1, n_sequence_lbc - 1
            IF ( crname_lbc(ji) /= 'already counted' ) THEN
               ccountname = crname_lbc(ji)
               crname_lbc(ji) = 'already counted'
               jcount = 1
               DO jj = ji + 1, n_sequence_lbc
                  IF ( ccountname ==  crname_lbc(jj) ) THEN
                     jcount = jcount + 1
                     crname_lbc(jj) = 'already counted'
                  END IF
               END DO
               WRITE(numcom,'(A, I4, A, A)') ' - ', jcount,' times by subroutine ', TRIM(ccountname)
            END IF
         END DO
         IF ( crname_lbc(n_sequence_lbc) /= 'already counted' ) THEN
            WRITE(numcom,'(A, I4, A, A)') ' - ', 1,' times by subroutine ', TRIM(crname_lbc(n_sequence_lbc))
         END IF
         WRITE(numcom,*) ' '
         IF ( n_sequence_glb > 0 ) THEN
            WRITE(numcom,'(A,I4)') ' Global communications : ', n_sequence_glb
            jj = 1
            DO ji = 2, n_sequence_glb
               IF( crname_glb(ji-1) /= crname_glb(ji) ) THEN
                  WRITE(numcom,'(A, I4, A, A)') ' - ', jj,' times by subroutine ', TRIM(crname_glb(ji-1))
                  jj = 0
               END IF
               jj = jj + 1
            END DO
            WRITE(numcom,'(A, I4, A, A)') ' - ', jj,' times by subroutine ', TRIM(crname_glb(n_sequence_glb))
            DEALLOCATE(crname_glb)
         ELSE
            WRITE(numcom,*) ' No MPI global communication '
         ENDIF
         WRITE(numcom,*) ' '
         IF ( n_sequence_dlg > 0 ) THEN
            WRITE(numcom,'(A,I4)') ' Delayed global communications : ', n_sequence_dlg
            jj = 1
            DO ji = 2, n_sequence_dlg
               IF( crname_dlg(ji-1) /= crname_dlg(ji) ) THEN
                  WRITE(numcom,'(A, I4, A, A)') ' - ', jj,' times by subroutine ', TRIM(crname_dlg(ji-1))
                  jj = 0
               END IF
               jj = jj + 1
            END DO
            WRITE(numcom,'(A, I4, A, A)') ' - ', jj,' times by subroutine ', TRIM(crname_dlg(n_sequence_dlg))
            DEALLOCATE(crname_dlg)
         ELSE
            WRITE(numcom,*) ' No MPI delayed global communication '
         ENDIF
         WRITE(numcom,*) ' '
         WRITE(numcom,*) ' -----------------------------------------------'
         WRITE(numcom,*) ' '
         DEALLOCATE(ncomm_sequence)
         DEALLOCATE(crname_lbc)
      ENDIF
#endif
   END SUBROUTINE mpp_report


   SUBROUTINE tic_tac (ld_tic, ld_global)

    LOGICAL,           INTENT(IN) :: ld_tic
    LOGICAL, OPTIONAL, INTENT(IN) :: ld_global
    REAL(dp), DIMENSION(2), SAVE :: tic_wt
    REAL(dp),               SAVE :: tic_ct = 0._dp
    INTEGER :: ii
#if ! defined key_mpi_off

    IF( ncom_stp <= nit000 ) RETURN
    IF( ncom_stp == nitend ) RETURN
    ii = 1
    IF( PRESENT( ld_global ) ) THEN
       IF( ld_global ) ii = 2
    END IF

    IF ( ld_tic ) THEN
       tic_wt(ii) = MPI_Wtime()                                                    ! start count tic->tac (waiting time)
       IF ( tic_ct > 0.0_dp ) compute_time = compute_time + MPI_Wtime() - tic_ct   ! cumulate count tac->tic
    ELSE
       waiting_time(ii) = waiting_time(ii) + MPI_Wtime() - tic_wt(ii)              ! cumulate count tic->tac
       tic_ct = MPI_Wtime()                                                        ! start count tac->tic (waiting time)
    ENDIF
#endif

   END SUBROUTINE tic_tac

#if defined key_mpi_off
   SUBROUTINE mpi_wait(request, status, ierror)
      INTEGER                            , INTENT(in   ) ::   request
      INTEGER, DIMENSION(MPI_STATUS_SIZE), INTENT(  out) ::   status
      INTEGER                            , INTENT(  out) ::   ierror
      IF (.FALSE.) THEN   ! to avoid compilation warning
         status(:) = -1
         ierror = -1
      ENDIF
   END SUBROUTINE mpi_wait

   SUBROUTINE mpi_waitall(count, request, status, ierror)
      INTEGER                            , INTENT(in   ) :: count
      INTEGER, DIMENSION(count)          , INTENT(in   ) :: request
      INTEGER, DIMENSION(MPI_STATUS_SIZE), INTENT(  out) :: status
      INTEGER                            , INTENT(  out) :: ierror
      IF (.FALSE.) THEN   ! to avoid compilation warning
         status(:) = -1
         ierror = -1
      ENDIF
   END SUBROUTINE mpi_waitall

   FUNCTION MPI_Wtime()
      REAL(wp) ::  MPI_Wtime
      MPI_Wtime = -1.
   END FUNCTION MPI_Wtime
#endif

   !!----------------------------------------------------------------------
   !!   ctl_stop, ctl_warn, get_unit, ctl_opn, ctl_nam, load_nml   routines
   !!----------------------------------------------------------------------

   SUBROUTINE ctl_stop( cd1, cd2, cd3, cd4, cd5 ,   &
      &                 cd6, cd7, cd8, cd9, cd10 )
      !!----------------------------------------------------------------------
      !!                  ***  ROUTINE  stop_opa  ***
      !!
      !! ** Purpose :   print in ocean.outpput file a error message and
      !!                increment the error number (nstop) by one.
      !!----------------------------------------------------------------------
      CHARACTER(len=*), INTENT(in   )           ::   cd1
      CHARACTER(len=*), INTENT(in   ), OPTIONAL ::        cd2, cd3, cd4, cd5
      CHARACTER(len=*), INTENT(in   ), OPTIONAL ::   cd6, cd7, cd8, cd9, cd10
      !
      CHARACTER(LEN=8) ::   clfmt            ! writing format
      INTEGER          ::   inum
      !!----------------------------------------------------------------------
      !
      nstop = nstop + 1
      !
      IF( cd1 == 'STOP' .AND. narea /= 1 ) THEN    ! Immediate stop: add an arror message in 'ocean.output' file
         CALL ctl_opn( inum, 'ocean.output', 'APPEND', 'FORMATTED', 'SEQUENTIAL', -1, 6, .FALSE. )
         WRITE(inum,*)
         WRITE(inum,*) ' ==>>>   Look for "E R R O R" messages in all existing *ocean.output* files'
         CLOSE(inum)
      ENDIF
      IF( numout == 6 ) THEN                       ! force to open ocean.output file if not already opened
         CALL ctl_opn( numout, 'ocean.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, -1, .FALSE., narea )
      ENDIF
      !
                            WRITE(numout,*)
                            WRITE(numout,*) ' ===>>> : E R R O R'
                            WRITE(numout,*)
                            WRITE(numout,*) '         ==========='
                            WRITE(numout,*)
                            WRITE(numout,*) TRIM(cd1)
      IF( PRESENT(cd2 ) )   WRITE(numout,*) TRIM(cd2)
      IF( PRESENT(cd3 ) )   WRITE(numout,*) TRIM(cd3)
      IF( PRESENT(cd4 ) )   WRITE(numout,*) TRIM(cd4)
      IF( PRESENT(cd5 ) )   WRITE(numout,*) TRIM(cd5)
      IF( PRESENT(cd6 ) )   WRITE(numout,*) TRIM(cd6)
      IF( PRESENT(cd7 ) )   WRITE(numout,*) TRIM(cd7)
      IF( PRESENT(cd8 ) )   WRITE(numout,*) TRIM(cd8)
      IF( PRESENT(cd9 ) )   WRITE(numout,*) TRIM(cd9)
      IF( PRESENT(cd10) )   WRITE(numout,*) TRIM(cd10)
                            WRITE(numout,*)
      !
                               CALL FLUSH(numout    )
      IF( numstp     /= -1 )   CALL FLUSH(numstp    )
      IF( numrun     /= -1 )   CALL FLUSH(numrun    )
      IF( numevo_ice /= -1 )   CALL FLUSH(numevo_ice)
      !
      IF( cd1 == 'STOP' ) THEN
         WRITE(numout,*)
         WRITE(numout,*)  'huge E-R-R-O-R : immediate stop'
         WRITE(numout,*)
         CALL FLUSH(numout)
         CALL SLEEP(60)   ! make sure that all output and abort files are written by all cores. 60s should be enough...
         CALL mppstop( ld_abort = .true. )
      ENDIF
      !
   END SUBROUTINE ctl_stop


   SUBROUTINE ctl_warn( cd1, cd2, cd3, cd4, cd5,   &
      &                 cd6, cd7, cd8, cd9, cd10 )
      !!----------------------------------------------------------------------
      !!                  ***  ROUTINE  stop_warn  ***
      !!
      !! ** Purpose :   print in ocean.outpput file a error message and
      !!                increment the warning number (nwarn) by one.
      !!----------------------------------------------------------------------
      CHARACTER(len=*), INTENT(in), OPTIONAL ::  cd1, cd2, cd3, cd4, cd5
      CHARACTER(len=*), INTENT(in), OPTIONAL ::  cd6, cd7, cd8, cd9, cd10
      !!----------------------------------------------------------------------
      !
      nwarn = nwarn + 1
      !
      IF(lwp) THEN
                               WRITE(numout,*)
                               WRITE(numout,*) ' ===>>> : W A R N I N G'
                               WRITE(numout,*)
                               WRITE(numout,*) '         ==============='
                               WRITE(numout,*)
         IF( PRESENT(cd1 ) )   WRITE(numout,*) TRIM(cd1)
         IF( PRESENT(cd2 ) )   WRITE(numout,*) TRIM(cd2)
         IF( PRESENT(cd3 ) )   WRITE(numout,*) TRIM(cd3)
         IF( PRESENT(cd4 ) )   WRITE(numout,*) TRIM(cd4)
         IF( PRESENT(cd5 ) )   WRITE(numout,*) TRIM(cd5)
         IF( PRESENT(cd6 ) )   WRITE(numout,*) TRIM(cd6)
         IF( PRESENT(cd7 ) )   WRITE(numout,*) TRIM(cd7)
         IF( PRESENT(cd8 ) )   WRITE(numout,*) TRIM(cd8)
         IF( PRESENT(cd9 ) )   WRITE(numout,*) TRIM(cd9)
         IF( PRESENT(cd10) )   WRITE(numout,*) TRIM(cd10)
                               WRITE(numout,*)
      ENDIF
      CALL FLUSH(numout)
      !
   END SUBROUTINE ctl_warn


   SUBROUTINE ctl_opn( knum, cdfile, cdstat, cdform, cdacce, klengh, kout, ldwp, karea )
      !!----------------------------------------------------------------------
      !!                  ***  ROUTINE ctl_opn  ***
      !!
      !! ** Purpose :   Open file and check if required file is available.
      !!
      !! ** Method  :   Fortan open
      !!----------------------------------------------------------------------
      INTEGER          , INTENT(  out) ::   knum      ! logical unit to open
      CHARACTER(len=*) , INTENT(in   ) ::   cdfile    ! file name to open
      CHARACTER(len=*) , INTENT(in   ) ::   cdstat    ! disposition specifier
      CHARACTER(len=*) , INTENT(in   ) ::   cdform    ! formatting specifier
      CHARACTER(len=*) , INTENT(in   ) ::   cdacce    ! access specifier
      INTEGER          , INTENT(in   ) ::   klengh    ! record length
      INTEGER          , INTENT(in   ) ::   kout      ! number of logical units for write
      LOGICAL          , INTENT(in   ) ::   ldwp      ! boolean term for print
      INTEGER, OPTIONAL, INTENT(in   ) ::   karea     ! proc number
      !
      CHARACTER(len=80) ::   clfile
      CHARACTER(LEN=10) ::   clfmt            ! writing format
      INTEGER           ::   iost
      INTEGER           ::   idg              ! number of digits
      !!----------------------------------------------------------------------
      !
      ! adapt filename
      ! ----------------
      clfile = TRIM(cdfile)
      IF( PRESENT( karea ) ) THEN
         IF( karea > 1 ) THEN
            ! Warning: jpnij is maybe not already defined when calling ctl_opn -> use mppsize instead of jpnij
            idg = MAX( INT(LOG10(REAL(MAX(1,mppsize-1),wp))) + 1, 4 )      ! how many digits to we need to write? min=4, max=9
            WRITE(clfmt, "('(a,a,i', i1, '.', i1, ')')") idg, idg          ! '(a,a,ix.x)'
            WRITE(clfile, clfmt) TRIM(clfile), '_', karea-1
         ENDIF
      ENDIF
#if defined key_agrif
      IF( .NOT. Agrif_Root() )   clfile = TRIM(Agrif_CFixed())//'_'//TRIM(clfile)
      knum=Agrif_Get_Unit()
#else
      knum=get_unit()
#endif
      IF( TRIM(cdfile) == '/dev/null' )   clfile = TRIM(cdfile)   ! force the use of /dev/null
      !
      IF(       cdacce(1:6) == 'DIRECT' )  THEN   ! cdacce has always more than 6 characters
         OPEN( UNIT=knum, FILE=clfile, FORM=cdform, ACCESS=cdacce, STATUS=cdstat, RECL=klengh         , ERR=100, IOSTAT=iost )
      ELSE IF( TRIM(cdstat) == 'APPEND' )  THEN   ! cdstat can have less than 6 characters
         OPEN( UNIT=knum, FILE=clfile, FORM=cdform, ACCESS=cdacce, STATUS='UNKNOWN', POSITION='APPEND', ERR=100, IOSTAT=iost )
      ELSE
         OPEN( UNIT=knum, FILE=clfile, FORM=cdform, ACCESS=cdacce, STATUS=cdstat                      , ERR=100, IOSTAT=iost )
      ENDIF
      IF( iost /= 0 .AND. TRIM(clfile) == '/dev/null' ) &   ! for windows
         &  OPEN(UNIT=knum,FILE='NUL', FORM=cdform, ACCESS=cdacce, STATUS=cdstat                      , ERR=100, IOSTAT=iost )
      IF( iost == 0 ) THEN
         IF(ldwp .AND. kout > 0) THEN
            WRITE(kout,*) '     file   : ', TRIM(clfile),' open ok'
            WRITE(kout,*) '     unit   = ', knum
            WRITE(kout,*) '     status = ', cdstat
            WRITE(kout,*) '     form   = ', cdform
            WRITE(kout,*) '     access = ', cdacce
            WRITE(kout,*)
         ENDIF
      ENDIF
100   CONTINUE
      IF( iost /= 0 ) THEN
         WRITE(ctmp1,*) ' ===>>>> : bad opening file: ', TRIM(clfile)
         WRITE(ctmp2,*) ' =======   ===  '
         WRITE(ctmp3,*) '           unit   = ', knum
         WRITE(ctmp4,*) '           status = ', cdstat
         WRITE(ctmp5,*) '           form   = ', cdform
         WRITE(ctmp6,*) '           access = ', cdacce
         WRITE(ctmp7,*) '           iostat = ', iost
         WRITE(ctmp8,*) '           we stop. verify the file '
         CALL ctl_stop( 'STOP', ctmp1, ctmp2, ctmp3, ctmp4, ctmp5, ctmp6, ctmp7, ctmp8 )
      ENDIF
      !
   END SUBROUTINE ctl_opn


   SUBROUTINE ctl_nam ( kios, cdnam )
      !!----------------------------------------------------------------------
      !!                  ***  ROUTINE ctl_nam  ***
      !!
      !! ** Purpose :   Informations when error while reading a namelist
      !!
      !! ** Method  :   Fortan open
      !!----------------------------------------------------------------------
      INTEGER                                , INTENT(inout) ::   kios    ! IO status after reading the namelist
      CHARACTER(len=*)                       , INTENT(in   ) ::   cdnam   ! group name of namelist for which error occurs
      !
      CHARACTER(len=5) ::   clios   ! string to convert iostat in character for print
      !!----------------------------------------------------------------------
      !
      WRITE (clios, '(I5.0)')   kios
      IF( kios < 0 ) THEN
         CALL ctl_warn( 'end of record or file while reading namelist '   &
            &           // TRIM(cdnam) // ' iostat = ' // TRIM(clios) )
      ENDIF
      !
      IF( kios > 0 ) THEN
         CALL ctl_stop( 'misspelled variable in namelist '   &
            &           // TRIM(cdnam) // ' iostat = ' // TRIM(clios) )
      ENDIF
      kios = 0
      !
   END SUBROUTINE ctl_nam


   INTEGER FUNCTION get_unit()
      !!----------------------------------------------------------------------
      !!                  ***  FUNCTION  get_unit  ***
      !!
      !! ** Purpose :   return the index of an unused logical unit
      !!----------------------------------------------------------------------
      LOGICAL :: llopn
      !!----------------------------------------------------------------------
      !
      get_unit = 15   ! choose a unit that is big enough then it is not already used in NEMO
      llopn = .TRUE.
Guillaume Samson's avatar
Guillaume Samson committed
         get_unit = get_unit + 1
         INQUIRE( unit = get_unit, opened = llopn )
      END DO
      IF( (get_unit == 9999) .AND. llopn ) THEN
         CALL ctl_stop( 'STOP', 'get_unit: All logical units until 9999 are used...' )
Guillaume Samson's avatar
Guillaume Samson committed
      ENDIF
      !
   END FUNCTION get_unit

   SUBROUTINE load_nml( cdnambuff , cdnamfile, kout, ldwp)
      CHARACTER(LEN=:)    , ALLOCATABLE, INTENT(INOUT) :: cdnambuff
      CHARACTER(LEN=*), INTENT(IN )                :: cdnamfile
      CHARACTER(LEN=256)                           :: chline
      CHARACTER(LEN=1)                             :: csp
      INTEGER, INTENT(IN)                          :: kout
      LOGICAL, INTENT(IN)                          :: ldwp  !: .true. only for the root broadcaster
      INTEGER                                      :: itot, iun, iltc, inl, ios, itotsav
      !
      !csp = NEW_LINE('A')
      ! a new line character is the best seperator but some systems (e.g.Cray)
      ! seem to terminate namelist reads from internal files early if they
      ! encounter new-lines. Use a single space for safety.
      csp = ' '
      !
      ! Check if the namelist buffer has already been allocated. Return if it has.
      !
      IF ( ALLOCATED( cdnambuff ) ) RETURN
      IF( ldwp ) THEN
         !
         ! Open namelist file
         !
         CALL ctl_opn( iun, cdnamfile, 'OLD', 'FORMATTED', 'SEQUENTIAL', -1, kout, ldwp )
         !
         ! First pass: count characters excluding comments and trimable white space
         !
         itot=0
     10  READ(iun,'(A256)',END=20,ERR=20) chline
         iltc = LEN_TRIM(chline)
         IF ( iltc.GT.0 ) THEN
          inl = INDEX(chline, '!')
          IF( inl.eq.0 ) THEN
           itot = itot + iltc + 1                                ! +1 for the newline character
          ELSEIF( inl.GT.0 .AND. LEN_TRIM( chline(1:inl-1) ).GT.0 ) THEN
           itot = itot + inl                                  !  includes +1 for the newline character
          ENDIF
         ENDIF
         GOTO 10
     20  CONTINUE
         !
         ! Allocate text cdnambuff for condensed namelist
         !
!$AGRIF_DO_NOT_TREAT
         ALLOCATE( CHARACTER(LEN=itot) :: cdnambuff )
!$AGRIF_END_DO_NOT_TREAT
         itotsav = itot
         !
         ! Second pass: read and transfer pruned characters into cdnambuff
         !
         REWIND(iun)
         itot=1
     30  READ(iun,'(A256)',END=40,ERR=40) chline
         iltc = LEN_TRIM(chline)
         IF ( iltc.GT.0 ) THEN
          inl = INDEX(chline, '!')
          IF( inl.eq.0 ) THEN
           inl = iltc
          ELSE
           inl = inl - 1
          ENDIF
          IF( inl.GT.0 .AND. LEN_TRIM( chline(1:inl) ).GT.0 ) THEN
             cdnambuff(itot:itot+inl-1) = chline(1:inl)
             WRITE( cdnambuff(itot+inl:itot+inl), '(a)' ) csp
             itot = itot + inl + 1
          ENDIF
         ENDIF
         GOTO 30
     40  CONTINUE
         itot = itot - 1
         IF( itotsav .NE. itot ) WRITE(*,*) 'WARNING in load_nml. Allocated ',itotsav,' for read buffer; but used ',itot
         !
         ! Close namelist file
         !
         CLOSE(iun)
         !write(*,'(32A)') cdnambuff
      ENDIF
#if ! defined key_mpi_off
      CALL mpp_bcast_nml( cdnambuff, itot )
#endif
  END SUBROUTINE load_nml


   !!----------------------------------------------------------------------
END MODULE lib_mpp