!#################################################################
!
! Parallelisation stuff.
! Dummy variables and routines for single processor case.
!
! npes                 |     1    |             3
! ---------------------+----------+-----------------------------------------
! myid                 |     0    |       0            1            2
!                      |          |
! root                 |     0    |       0            0            0
! root_k               |     0    |       0            0            0
! root_t               |     0    |       0            0            0
!                      |          |
! lm                   |    25    |      25           25           25
! lmloc                |    25    |       8            8            9
! lmar(0:npes-1)       |  (/25/)  |   (/8,8,9/)    (/8,8,9/)    (/8,8,9/)
! offsetl              |     0    |       0            8           16
!                      |          |
! ntrace               |    42    |      42           42           42
!                      |          |
! ntracet              |    26    |      26           26           26
! ntracetloc           |    26    |       8            9            9
! ntracet_ar(0:npes-1) |  (/26/)  |   (/8,9,9/)    (/8,9,9/)    (/8,9,9/)
! offsetn              |     0    |       0            8           17
!                      |          |
! tracer_active(1:26)  |  T  1  0 |     T  1  0       F    0      F     0 
! tracer_loc(1:26)     |  :  :  : |     :     :       :    :      :     : 
! tracer_id(1:26)      |  T  8  0 |     T  8  0       F    0      F     0 
!                      |  T  9  0 |     F     1       T  1 1      F     1 
!                      |  :  :  : |     :     :       :  : :      :     : 
!                      |  T 17  0 |     F     1       T  9 1      F     1 
!                      |  T 18  0 |     F     2       F    2      T  1  2 
!                      |  :  :  : |     :     :       :    :      :  :  : 
!                      |  T 26  0 |     F     2       F    2      T  9  2 
!                      |          |
! 
!
!  Error handling in MPI programs
!  -----------------------------------
!
!   Change the error handler used by a communicator:
!
!      subroutine MPI_ErrHandler_Set( comm, ErrHandler, iError )
!        integer, intent(in)    ::  comm
!        integer, intent(in)    ::  ErrHandler
!        integer, intent(in)    ::  iError
!      end subroutine MPI_ErrHandler_Set
!
!  Standard error handlers :
!      MPI_ERRORS_ARE_FATAL       ! default
!      MPI_ERRORS_RETURN 
!
!
!### macro's #####################################################
!
#define TRACEBACK write (gol,'("in ",a," (",a,", line",i5,")")') rname, __FILE__, __LINE__; call goErr
#define IF_NOTOK_RETURN(action) if (status/=0) then; TRACEBACK; action; return; end if
#define IF_ERROR_RETURN(action) if (status> 0) then; TRACEBACK; action; return; end if
!
#include "tm5.inc"
!
!#################################################################

module ParTools

  use GO        , only : gol, goPr, goErr
#ifdef MPI
  use mpi_const , only : npes, myid
  use mpi_const , only : root, root_k, root_t
  use mpi_const , only : tracer_active, tracer_loc, ntracetloc, ntracet_ar
  use mpi_const , only : proc_tracer
  use mpi_const , only : lmloc, lmar
  use mpi_const , only : which_par, previous_par
  use mpi_const , only : allocate_mass
  use mpi_const , only : localComm, MPI_INFO_NULL
  use mpi_const , only : MPI_CHARACTER, MPI_INTEGER
#else
  use dims      , only : nregions
#endif 
  use chem_param, only : ntracet
  
  implicit none

  ! --- in/out -----------------------------------
  
  private
  
  public  ::  localComm
  public  ::  npes, myid
  public  ::  proc_tracer
  public  ::  procname
  public  ::  root, root_k, root_t
  public  ::  lmloc, lmar, offsetl
  public  ::  ntracetloc, ntracet_ar, offsetn
  public  ::  tracer_loc, tracer_active, tracer_id
  public  ::  which_par, previous_par
  public  ::  allocate_mass
  
  public  ::  MPI_INFO_NULL
  public  ::  PAR_OPER_SUM
  
  public  ::  MPI_CHARACTER, MPI_INTEGER

  public  ::  TM5_MPI_Init, TM5_MPI_Done
  public  ::  TM5_MPI_Abort
  public  ::  Par_Init, Par_Done
  public  ::  Par_Barrier
  public  ::  Par_StopMPI
  public  ::  Par_Broadcast_Status
  public  ::  Par_Broadcast
  public  ::  Par_Reduce, Par_AllReduce
  public  ::  Par_Gather_Tracer_t
  public  ::  Par_Gather_Tracer_k
  public  ::  Par_Scatter_After_Read_t
  public  ::  Par_Scatter_Over_Levels
  public  ::  Par_Gather_From_Levels
  public  ::  Par_Swap_Levels

  
  ! --- const --------------------------------------

  character(len=*), parameter  ::  mname = 'ParTools'
  
  ! operations 
  integer, parameter        ::  PAR_OPER_SUM = 100

  ! --- var --------------------------------------

#ifdef MPI
#else

  ! dummy constants:
  integer, parameter  ::  MPI_INFO_NULL = 0
  integer, parameter  ::  MPI_CHARACTER = 0
  integer, parameter  ::  MPI_INTEGER   = 0

  ! communicator:
  integer           ::  localComm

  ! same as used from mpi const:
  integer           ::  npes
  integer           ::  myid     ! PE number in tm5 communicator (always one if not MPI)
  integer           ::  root     ! myid of root in tm5 communicator
  integer           ::  root_k   ! myid of root in com_lev
  integer           ::  root_t   ! myid root in com_trac

  ! nr of levels at this PE
  integer               ::  lmloc

  ! number of levels actually assigned to each PE
  integer, allocatable  ::  lmar(:)
  
  ! nr of tracers and transported tracers at this PE
  integer               ::  ntracetloc    

  ! nr of transported tracers to each PE
  integer, allocatable  ::  ntracet_ar(:)

  ! tracer_active  : determines whether tracer is active on processer 
  logical           ::  tracer_active(ntracet)

  ! tracer_loc  : determines location in the local array 
  integer           ::  tracer_loc(ntracet)
  
  ! proc_tracer  : determines which PE handles each tracer
  integer           ::  proc_tracer(ntracet)

  ! parrallel regime : either 'levels' or 'tracer'
  character(len=6)        ::  which_par  
  character(len=6)        ::  previous_par(nregions)

  ! switch to allocate and deallocate mass after each swap
  logical, parameter      ::  allocate_mass = .false.

#endif

  ! level offset: lglob = offsetl+lloc
  integer           ::  offsetl
  
  ! chemical offset: itracer_global = offsetn + itracer_local
  integer           ::  offsetn

  ! character keys for each processor
  character(len=6)  ::  procname
  
  ! processor id's for each tracer:
  integer           ::  tracer_id(ntracet)

  
  ! --- interfaces -----------------------------------
  
  interface Par_Broadcast
    module procedure Par_Broadcast_i
    module procedure Par_Broadcast_s
    module procedure Par_Broadcast_r2
    module procedure Par_Broadcast_r3
  end interface

  interface Par_Reduce
    module procedure Par_Reduce_r2
  end interface

  interface Par_Gather_Tracer_k
    module procedure Par_Gather_Tracer_k_3d
    module procedure Par_Gather_Tracer_k_4d
  end interface

  interface Par_Scatter_Over_Levels
    module procedure Par_Scatter_Over_Levels__r3
  end interface

  interface Par_Gather_From_Levels
    module procedure Par_Gather_From_Levels__r3
  end interface

  interface Par_Swap_Levels
    module procedure Par_Swap_Levels__r3
  end interface


contains


  ! ===================================================
  
  
  subroutine TM5_MPI_Init( status, comm )
  
#ifdef MPI
    use mpi_const, only : MPI_COMM_WORLD, localComm
    use mpi_const, only : MPI_SUCCESS
#endif

    ! --- in/out ----------------------------------
    
    integer, intent(out)            ::  status
    integer, intent(in), optional   ::  comm
    
    ! --- const ------------------------------

    character(len=*), parameter ::  rname = mname//'/TM5_MPI_Init'
    
    ! --- begin -----------------------------------

#ifdef MPI

    ! communicator provided, for example by prism coupler ?
    if ( present(comm) ) then
    
      ! store the local communicator:
      localComm = comm
      
    else
    
      ! init mpi here to set MPI_COMM_WORLD etc
      call MPI_INIT( status )
      if (status/=MPI_SUCCESS) then
        write (gol,'("from MPI_INIT : ",i6)') status; call goErr
        TRACEBACK; status=1; return
      end if
      
      ! store the 'local' communicator:
      localComm = MPI_COMM_WORLD
      
    end if
    
    ! obtain number of proceses:
    call MPI_COMM_SIZE( localComm, npes, status )
    if (status/=MPI_SUCCESS) then
      write (gol,'("from MPI_COMM_SIZE : ",i6)') status; call goErr
      TRACEBACK; status=1; return
    end if

    ! obtain proces number:
    call MPI_COMM_RANK( localComm, myid, status )
    if (status/=MPI_SUCCESS) then
      write (gol,'("from MPI_COMM_RANK : ",i6)') status; call goErr
      TRACEBACK; status=1; return
    end if
    
    ! set root in localComm to PE 0
    root = 0

#else

    ! dummy comunicator:
    localComm = 0
    
    ! single processor:
    npes = 1

    ! dummy id for root processor
    root = 0

    ! only one processor, so this is always root ...
    myid = root

#endif

    ! ok
    status = 0
    
  end subroutine TM5_MPI_Init


  ! ***
      

  subroutine TM5_MPI_Done( status, comm )
  
#ifdef MPI
    use mpi_const, only : MPI_SUCCESS
#endif

    ! --- in/out ----------------------------------
    
    integer, intent(out)            ::  status
    integer, intent(in), optional   ::  comm
    
    ! --- const ------------------------------

    character(len=*), parameter ::  rname = mname//'/TM5_MPI_Done'
    
    ! --- begin -----------------------------------

#ifdef MPI

    ! local communicator ?
    if ( present(comm) ) then
    
      ! nothing to finalize ...
      
    else
    
      ! shut-down communication:
      write (*,'("call MPI_Finalize from proces ",i6)') myid
      call MPI_Finalize( status )
      if (status/=MPI_SUCCESS) then
        write (*,'("ERROR - from MPI_Finalize : ",i6)') status
        TRACEBACK; status=1; return
      end if
      
    end if

#endif

    ! ok
    status = 0

  end subroutine TM5_MPI_Done


  ! ***
      

  subroutine TM5_MPI_Abort( errorcode, status )
  
#ifdef MPI
    use mpi_const, only : MPI_SUCCESS
#else
    use GO, only : goExit
#endif

    ! --- in/out ----------------------------------
    
    integer, intent(in)             ::  errorcode
    integer, intent(out)            ::  status
    
    ! --- const ------------------------------

    character(len=*), parameter ::  rname = mname//'/TM5_MPI_Abort'
    
    ! --- begin -----------------------------------

#ifdef MPI

    ! (pls, 8-4-2011) Sometimes the code does not return from MPI_Abort, for
    ! example when a problem reading restart files occurs. From the doc:
    !-------------------------------------------------
    !"Before the error value is returned, the current MPI error handler is
    ! called. By default, this error handler aborts the MPI job, except for
    ! I/O function errors."
    !-------------------------------------------------
    ! so, the only way to nicely abort is to close files when an i/o error
    ! occurs. Done with a new macro for problems with reading/writing restart
    ! (see tm5_restart.F90). Check if there your module/routine is prone to
    ! i/o error, and apply a similar patch.
    
    ! emergency break ...
    call MPI_Abort( localComm, errorcode, status )
    if (status/=MPI_SUCCESS) then
      write (*,'("ERROR - from MPI_Abort : ",i6)') status
      TRACEBACK; status=1; return
    end if

#else

    ! system exit:
    call goExit( errorcode )

#endif

    ! ok
    status = 0

  end subroutine TM5_MPI_Abort


  ! ***


  !
  ! Inititialisation (former mpi_comm/startmpi)
  !
  
  subroutine Par_Init( status )
  
    use Dims, only : lm
#ifdef MPI
    use mpi_const, only : MPI_DOUBLE_PRECISION
    use mpi_const, only : my_real
    use mpi_comm, only : initialize_domains
#endif
    use chem_param, only : ntrace, ntracet, names

    ! --- in/out ---------------------------------
    
    integer, intent(out)          ::  status
    
    ! --- const ------------------------------

    character(len=*), parameter ::  rname = mname//'/Par_Init'
    
    ! --- local ----------------------------------
    
    integer        ::  id
    integer        ::  n
    
    ! --- begin ----------------------------------
    
    ! allocate arrays
    allocate( lmar      (0:npes-1) )
    allocate( ntracet_ar(0:npes-1) )

#ifdef MPI

    !WP! my_real comes from mpif.h, other declarations may be needed sometimes
    my_real = MPI_DOUBLE_PRECISION 

    call initialize_domains( status )
    if (status/=0) then; TRACEBACK; status=1; return; end if

#else

    ! only one processor, so this is always root ...
    root_k = root
    root_t = root 

    ! *** levels

    ! nr of levels at this PE
    ! (take levels from region 1)
    lmloc = lm(1)

    ! number of levels actually assigned to each PE
    ! (take levels from region 1)
    lmar(0) = lm(1)
  
    ! *** tracers

    ! tracer_active  : determines whether tracer is active on processer 
    tracer_active = .true.
 
    ! number of transported tracers
    ntracetloc = ntracet
 
    ! tracer location
    tracer_loc = -9
    do n = 1, ntracet
      tracer_loc(n) = n
    end do
    
    ! ***

#endif

    ! initially 'parallel' over tracers:
    which_par = 'tracer'
    previous_par(:) = 'tracer'

    ! level offset: lglob = offsetl+lloc
    offsetl = 0
    if ( myid > 0 ) offsetl = sum(lmar(0:myid-1))
    
    ! tracer offset: itracer_global = offsetn + itracer_local
    offsetn = 0
    if ( myid > 0 ) offsetn = sum(ntracet_ar(0:myid-1))
    
    ! fill processor id for each tracer:
    tracer_id = -1
    do n = 1, ntracet
      if ( n <= ntracet_ar(0) ) then
        tracer_id(n) = 0
      else
        do id = 1, npes-1
          if ( (n > sum(ntracet_ar(0:id-1))) .and. &
               (n <= sum(ntracet_ar(0:id))) ) tracer_id(n) = id
        end do
      end if
    end do
    
    ! info ...
    if ( myid == root ) then
      write (gol,'(" ")'); call goPr
      write (gol,'("Tracer distribution:")'); call goPr
      write (gol,'("    n  tracer  pe")'); call goPr
      write (gol,'("  ---  ------ ---")'); call goPr
      do n = 1, ntracet
        write (gol,'("  ",i3,a8,i4)') n, trim(names(n)), tracer_id(n); call goPr
      end do
      do n = ntracet+1, ntrace
        write (gol,'("  ",i3,a8," all")') n, trim(names(n)); call goPr
      end do
      write (gol,'("  ---  ------ ---")'); call goPr
      write (gol,'(" ")'); call goPr
    end if

    ! set processor names: pe0000, pe0001, ...
    write (procname,'("pe",i4.4)') myid

    ! wait ...
    call Par_Barrier

    ! ok
    status = 0

  end subroutine Par_Init
  
  
  ! ***
  
  
  subroutine Par_Done( status )
  
    ! --- in/out ------------------------------
    
    integer, intent(out)          ::  status
    
    ! --- const ------------------------------

    character(len=*), parameter ::  rname = mname//'/Par_Done'
    
    ! --- begin ------------------------------

    ! deallocate arrays
    deallocate( lmar       )
    deallocate( ntracet_ar )

    ! ok
    status = 0

  end subroutine Par_Done
  

  ! ***
  
  
  subroutine Par_Barrier

#ifdef MPI
    use mpi_const, only : localComm
    use mpi_const, only : MPI_SUCCESS
#endif

    ! --- local ------------------------------------
    
    integer  ::  status
    
    ! --- begin ------------------------------------
      
#ifdef MPI   
    call mpi_barrier( localComm, status )
    if (status/=MPI_SUCCESS) write (*,'("WARNING - error from mpi_barrier; continue ...")')
#endif

  end subroutine Par_Barrier
    

  ! ***
  
  
  subroutine Par_StopMPI

#ifdef MPI
    use mpi_const, only : MPI_SUCCESS
#endif

    ! --- local ------------------------------------
    
    integer  ::  status
    
    ! --- begin ------------------------------------
    
    write (*,'("WARNING - (par_)stopmpi should be avoided; please trace back to main program ...")')
    
    ! deallocate arrays
    call Par_Done(status)
    if (status/=0) write (*,'("WARNING - error from par_done; continue ...")')
     
#ifdef MPI
    ! shut down mpi communication:
    call mpi_finalize(status)
    if (status/=MPI_SUCCESS) write (*,'("WARNING - error from mpi_finalize; continue ...")')
#endif

    ! fortran stop ....
    stop 'Fortran STOP in Par_StopMPI ...'

  end subroutine Par_StopMPI


  ! **************************************************************
  

!  ! check status argument on all processors;
!  ! return status is maximum of all input status's
!  ! message is issued to identify processes with non-zero input status
!  
!  subroutine Par_Check_Status( status )
!  
!    ! --- in/out ---------------------------------
!    
!    integer, intent(inout)  ::  status
!    
!    ! --- const --------------------------------------
!    
!    character(len=*), parameter  ::  rname = mname//'/Par_Check_Status'
!    
!    ! --- local -----------------------------------
!    
!    integer         ::  ierr
!    integer         ::  status_r
!    
!    ! --- begin ----------------------------------
!    
!    if ( npes > 1 ) then
!    
!      ! display message if input status on this proces non zero:
!      if ( status /= 0 ) then
!        write (gol,'("non zero status found on proces ",i6)') myid; call goErr
!      end if
!    
!      ! collect maximum status on each process:
!#ifdef MPI
!      call MPI_AllReduce( status, status_r, 1, MPI_INTEGER, MPI_MAX, localComm, ierr )
!      if (ierr/=MPI_SUCCESS) write (*,'("WARNING - error from MPI_AllReduce; continue ...")')
!#else
!      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
!      TRACEBACK; status=1; return
!#endif
!      
!      ! set return status to maximum:
!      status = status_r
!
!    end if
!    
!  end subroutine Par_Check_Status
  

  ! **************************************************************
  

  subroutine Par_Broadcast_Status( istat, id )
  
#ifdef MPI
    use mpi_const, only : MPI_INTEGER, localComm
#endif

    ! --- in/out -------------------------------------
    
    integer, intent(inout)            ::  istat
    integer, intent(in)               ::  id
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Broadcast_Status'
    
    ! --- local ------------------------------------
    
    integer          ::  status
    
    ! --- begin ------------------------------------
    
    ! send the input status to all other processes:
    call Par_Broadcast( istat, id, status )
    if (status/=0) then
      write (gol,'("broadcasting status")'); call goErr
      TRACEBACK; istat=1; return
    end if
    
    ! each process has now return status 'istat' ...
    
  end subroutine Par_Broadcast_Status


  ! ***
  

  subroutine Par_Broadcast_i( i, id, status )
  
#ifdef MPI
    use mpi_const, only : MPI_INTEGER, localComm
#endif

    ! --- in/out -------------------------------------
    
    integer, intent(inout)            ::  i
    integer, intent(in)               ::  id
    integer, intent(out)              ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Broadcast_i'
    
    ! --- begin ------------------------------------

    if ( npes > 1 ) then
#ifdef MPI
      call MPI_BCast( i, 1, MPI_INTEGER, id, localComm, status )
      IF_NOTOK_RETURN(status=1)
#else
      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return
#endif
    end if
    
    ! ok
    status = 0
    
  end subroutine Par_Broadcast_i


  ! ***
  

  subroutine Par_Broadcast_s( s, id, status )
  
#ifdef MPI
    use mpi_const, only : MPI_CHARACTER, localComm
#endif

    ! --- in/out -------------------------------------
    
    character(len=*), intent(inout)   ::  s
    integer, intent(in)               ::  id
    integer, intent(out)              ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Broadcast_s'
    
    ! --- begin ------------------------------------

    if ( npes > 1 ) then
#ifdef MPI
      call MPI_BCast( s, len(s), MPI_CHARACTER, id, localComm, status )
      if (status/=0) then; write (*,'("ERROR from MPI_BCAST ",a)') rname; status=1; return; end if
#else
      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return
#endif
    end if
    
    ! ok
    status = 0
    
  end subroutine Par_Broadcast_s


  ! ***
  

  subroutine Par_Broadcast_r2( x, id, status )
  
#ifdef MPI
    use mpi_const, only : MPI_REAL8, localComm
#endif

    ! --- in/out -------------------------------------
    
    real(8), intent(inout)   ::  x(:,:)
    integer, intent(in)      ::  id
    integer, intent(out)     ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Broadcast_r2'
    
    ! --- begin ------------------------------------

    if ( npes > 1 ) then
#ifdef MPI
      call mpi_bcast( x, size(x), MPI_REAL8, id, localComm, status )
      if (status/=0) then; TRACEBACK; status=1; return; end if
#else
      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return
#endif
    end if
    
    ! synchronize ...
    call Par_Barrier
    
    ! ok
    status = 0
    
  end subroutine Par_Broadcast_r2


  ! ***
  
  
  subroutine Par_Broadcast_r3( x, id, status )
  
#ifdef MPI
    use mpi_const, only : MPI_REAL8, localComm
#endif

    ! --- in/out -------------------------------------
    
    real(8), intent(inout)   ::  x(:,:,:)
    integer, intent(in)      ::  id
    integer, intent(out)     ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Broadcast_r3'
    
    ! --- begin ------------------------------------

    if ( npes > 1 ) then
#ifdef MPI
      call mpi_bcast( x, size(x), MPI_REAL8, id, localComm, status )
      if (status/=0) then; TRACEBACK; status=1; return; end if
#else
      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return
#endif
    end if
    
    ! synchronize ...
    call Par_Barrier
    
    ! ok
    status = 0
    
  end subroutine Par_Broadcast_r3


  ! *********************************************************
  
  
  subroutine Par_AllReduce( x_l, x, command, status )
  
#ifdef MPI
    use mpi_const, only : MPI_MAX
    use mpi_const, only : MPI_REAL8, localComm
#endif

    ! --- in/out -------------------------------------
    
    real(8), intent(in)               ::  x_l
    real(8), intent(out)              ::  x
    character(len=*), intent(in)      ::  command
    integer, intent(out)              ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_AllReduce'
    
    ! --- local -----------------------------------
    
    integer           ::  icommand
    
    ! --- begin ------------------------------------

    if ( npes > 1 ) then

#ifdef MPI
      
      ! determine correct mpi constant
      select case ( command )
        case ( 'max', 'MAX' )
          icommand = MPI_MAX
        case default
          write (*,'("ERROR - unsupported command for mpi : ",a)') command
          TRACEBACK; status=1; return
      end select

      ! reduce local values to global value:
      call MPI_AllReduce( x_l, x, 1, MPI_REAL8, &
                          icommand, localComm, status ) 
      if (status/=0) then
        write (*,'("ERROR - error from MPI_AllReduce : ",i6)') status
        TRACEBACK; status=1; return
      end if

#else

      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return

#endif

    else
    
      ! only one processor; just copy:
      x = x_l

    end if
    
    ! ok
    status = 0
    
  end subroutine Par_AllReduce


  ! *********************************************************
  
  
  !
  ! gather an array  
  !    tracer(1-ih,im+ih,1-jh,jm+jh,1-lh,lm+lh,ntracer) 
  ! on root_t
  ! from local arrays  
  !    tracerloc(1-ih,im+ih,1-jh,jm+jh,1-lh:lm+lh,ntracerloc) 
  ! on all PE's where  
  ! the array is distributed over the n-index
  !

  subroutine Par_Gather_Tracer_t( tracer, im,jm,lm, ih,jh,lh, ntracer, tracerloc, broadcast )

#ifdef MPI   
    use mpi_comm, only : gather_tracer_t
#endif
    
    ! --- in/out ----------------------------------
    
    real, intent(inout)    ::  tracer(:,:,:,:)  
    integer, intent(in)    ::  im,jm,lm
    integer, intent(in)    ::  ih,jh,lh
    integer, intent(in)    ::  ntracer
    real, intent(in)       ::  tracerloc(:,:,:,:)
    logical, intent(in)    ::  broadcast

    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/ Par_Gather_Tracer_t'
    
    ! --- begin -----------------------------------

#ifdef MPI   
    call gather_tracer_t( tracer, im,jm,lm, ih,jh,lh, ntracer, tracerloc, broadcast )
#else
    tracer = tracerloc
#endif

  end subroutine Par_Gather_Tracer_t


  ! *********************************************************
  
  
  subroutine Par_Gather_Tracer_k_3d( tracer, im,jm,lm, ih,jh,lh, &
                                       tracerloc, broadcast, status )

#ifdef MPI
    use mpi_comm, only : Gather_Tracer_k_3d
#endif

    ! --- in/out -----------------------------
    
    real, intent(inout)       ::  tracer(:,:,:)
    integer, intent(in)       ::  im,jm,lm
    integer, intent(in)       ::  ih,jh,lh
    real, intent(in)          ::  tracerloc(:,:,:)
    logical, intent(in)       ::  broadcast
    integer, intent(out)      ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Gather_Tracer_k_3d'
    
    ! --- begin --------------------------------
    
    if ( npes == 1 ) then
    
      ! just copy ...
      tracer = tracerloc
      
    else
    
#ifdef MPI

    call Gather_Tracer_k_3d( tracer, im,jm,lm, ih,jh,lh, tracerloc, broadcast )

#else

      write (gol,'("ERROR - please implement for non-mpi parallel library.")'); call goErr
      write (gol,'("ERROR in ",a)') rname; call goErr; status=1; return

#endif

    end if

    ! ok
    status = 0

  end subroutine Par_Gather_Tracer_k_3d


  ! ***
  

  subroutine Par_Gather_Tracer_k_4d( tracer, im,jm,lm, ih,jh,lh, ntracer, &
                                        tracerloc, broadcast, status )

#ifdef MPI
    use mpi_comm, only : Gather_Tracer_k
#endif

    ! --- in/out -----------------------------
    
    real, intent(inout)       ::  tracer(:,:,:,:)
    integer, intent(in)       ::  im,jm,lm
    integer, intent(in)       ::  ih,jh,lh
    integer, intent(in)       ::  ntracer
    real, intent(in)          ::  tracerloc(:,:,:,:)
    logical, intent(in)       ::  broadcast
    integer, intent(out)      ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Gather_Tracer_k_4d'
    
    ! --- begin --------------------------------
    
    if ( npes == 1 ) then
    
      ! just copy ...
      tracer = tracerloc
      
    else
    
#ifdef MPI

      call Gather_Tracer_k( tracer, im,jm,lm, ih,jh,lh, ntracer, tracerloc, broadcast )

#else

      write (gol,'("ERROR - please implement for non-mpi parallel library.")'); call goErr
      write (gol,'("ERROR in ",a)') rname; call goErr; status=1; return

#endif

    end if

    ! ok
    status = 0

  end subroutine Par_Gather_Tracer_k_4d


  ! *********************************************************
  

  !
  ! scatter an array
  !    tracer    (1-ih,im+ih,1-jh,jm+jh,1-lh,lm+lh,ntracer)
  ! on root (pe=0)
  ! over local arrays
  !    tracerloc(1-ih,im+ih,1-jh,jm+jh,1,  ,lmloc,ntracer) 
  ! on all PE's so that 
  ! the array is distributed over the l-index
  !

  subroutine Par_Scatter_After_Read_t( tracer_read, im,jm,lm, ih,jh,lh, tracerloc, send_id )

#ifdef MPI   
    use mpi_comm, only : Scatter_After_Read_t
#endif
    
    ! --- in/out -----------------------------------

    real, intent(in)            ::  tracer_read(:,:,:,:)
    integer,intent(in)          ::  im,jm,lm
    integer,intent(in)          ::  ih,jh,lh
    real, intent(out)           ::  tracerloc (:,:,:,:)  
    integer,intent(in)          ::  send_id 
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Scatter_After_Read_t'
    
    ! --- begin ---------------------------------
    
    if ( npes == 1 ) then
    
      ! just copy ...
      tracerloc = tracer_read
      
    else
    
#ifdef MPI

      call scatter_after_read_t( tracer_read, im,jm,lm, ih,jh,lh, tracerloc, send_id )

#else

      write (gol,'("ERROR - please implement for non-mpi parallel library.")'); call goErr
      write (gol,'("ERROR in ",a)') rname; call goErr; stop

#endif

    end if

  end subroutine Par_Scatter_After_Read_t


  ! ***
  
  
  !
  ! Distribute array 'x_send(:,:,lm)' on pe 'id_send'
  ! over arrays 'x_recv(:,:,lmloc)' on all pe's .
  !
  
  subroutine Par_Scatter_Over_Levels__r3( x_send, id_send, x_recv, status )
  
#ifdef MPI
    use mpi_const, only : MPI_REAL8
    use mpi_const, only : localComm
#endif

    ! --- in/out -------------------------------------
    
    real(8), intent(in)      ::  x_send(:,:,:)
    real(8), intent(out)     ::  x_recv(:,:,:)
    integer, intent(in)      ::  id_send
    integer, intent(out)     ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Scatter_Over_Levels__r3'

    integer, parameter  ::  ndim = 3
    
    ! --- local ------------------------------------

#ifdef MPI
    integer           ::  sendcounts(0:npes-1)
    integer           ::  displ(0:npes-1)
    integer           ::  recvcount
    
    integer           ::  n
#endif    
    ! --- begin ------------------------------------

    if ( npes == 1 ) then
    
      x_recv = x_send
      
    else

#ifdef MPI

      sendcounts = size(x_send,1) * size(x_send,2) * lmar

      displ(0) = 0
      do n = 1, npes-1
        displ(n) = sum(sendcounts(0:n-1))
      end do
      
      recvcount  = size(x_recv,1) * size(x_recv,2) * lmloc

      call MPI_SCATTERV( x_send, sendcounts, displ, MPI_REAL8, & 
                         x_recv, recvcount        , MPI_REAL8, &
                         id_send, localComm, status )
      IF_NOTOK_RETURN(status=1)

#else

      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return

#endif

    end if
    
    ! ok
    status = 0
    
  end subroutine Par_Scatter_Over_Levels__r3


  ! ***
  
  
  !
  ! Collect arrays 'x_send(:,:,lmloc)' on all pe's 
  ! into 'x_recv(:,:,lm)' on pe 'id_recv' .
  !
  
  subroutine Par_Gather_From_Levels__r3( x_send, x_recv, id_recv, status )
  
#ifdef MPI
    use mpi_const, only : MPI_REAL8
    use mpi_const, only : localComm
#endif

    ! --- in/out -------------------------------------
    
    real(8), intent(in)      ::  x_send(:,:,:)
    real(8), intent(out)     ::  x_recv(:,:,:)
    integer, intent(in)      ::  id_recv
    integer, intent(out)     ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Gather_From_Levels__r3'

    integer, parameter  ::  ndim = 3
    
    ! --- local ------------------------------------

#ifdef MPI
    integer           ::  recvcounts(0:npes-1)
    integer           ::  displ(0:npes-1)
    integer           ::  sendcount    
    integer           ::  n
#endif
    
    ! --- begin ------------------------------------

    if ( npes == 1 ) then
    
      x_recv = x_send
      
    else

#ifdef MPI

      recvcounts = size(x_recv,1) * size(x_recv,2) * lmar

      displ(0) = 0
      do n = 1, npes-1
        displ(n) = sum(recvcounts(0:n-1))
      end do
      
      sendcount  = size(x_send,1) * size(x_send,2) * lmloc

      call MPI_GATHERV( x_send, sendcount        , MPI_REAL8, & 
                        x_recv, recvcounts, displ, MPI_REAL8, &
                        id_recv, localComm, status )
      IF_NOTOK_RETURN(status=1)

#else

      write (gol,'("please implement for non-mpi parallel library.")'); call goErr
      TRACEBACK; status=1; return

#endif

    end if
    
    ! ok
    status = 0
    
  end subroutine Par_Gather_From_Levels__r3


  !
  ! Swap arrays 'x_recv(:,:,lmloc)' on all pe's over levels.
  !
  
  subroutine Par_Swap_Levels__r3( x_k, status )
  
    use dims, only : lm
  
    ! --- in/out -------------------------------------
    
    real(8), intent(inout)     ::  x_k(:,:,:)
    integer, intent(out)       ::  status
    
    ! --- const --------------------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Swap_Levels__r3'

    ! --- local ------------------------------------
    
    real(8), allocatable  ::  x_t1(:,:,:)
    real(8), allocatable  ::  x_t2(:,:,:)
    integer               ::  k

    ! --- begin ------------------------------------

    ! only if parallel; otherwise, leave x_k unchanged
    if ( npes > 1 ) then
    
      ! storage for full 3D field:
      allocate( x_t1(size(x_k,1),size(x_k,2),lm(1)) )
      allocate( x_t2(size(x_k,1),size(x_k,2),lm(1)) )
    
      ! collect on root:
      call Par_Gather_From_Levels( x_k, x_t1, root_k, status )
      IF_NOTOK_RETURN(status=1)
      
      ! swap levels:
      do k = 1, lm(1)
        x_t2(:,:,k) = x_t1(:,:,lm(1)+1-k)
      end do
      
      ! distribute swapped levels over processors:
      call Par_Scatter_Over_Levels( x_t2, root_k, x_k, status )
      IF_NOTOK_RETURN(status=1)
      
      ! clear
      deallocate( x_t1 )
      deallocate( x_t2 )
      
    end if
    
    ! ok
    status = 0
    
  end subroutine Par_Swap_Levels__r3
      


  ! =========================================================================
  
    
  
  subroutine Par_Reduce_r2( send, recv, oper, id_recv, status, all )

#ifdef MPI  
    use mpi_const, only : MPI_SUCCESS, MPI_SUM
    use mpi_const, only : my_real
#endif
  
    ! --- in/out -----------------------------
    
    real, intent(in)          ::  send(:,:)
    real, intent(out)         ::  recv(:,:)
    integer, intent(in)       ::  oper
    integer, intent(in)       ::  id_recv
    integer, intent(out)      ::  status
    logical, optional, intent(in) :: all  ! switch from mpi_reduce to mpi_allreduce

    ! !HISTORY
    !   21 Aug 2012 - P. Le Sager - added option to reduce to all

    
    ! --- const -----------------------------
    
    character(len=*), parameter  ::  rname = mname//'/Par_Reduce_r2'
    
    ! --- local -----------------------------
    
    integer         ::  mpi_op
    integer         ::  slen
    logical :: isall
    
    ! --- begin ------------------------------
    
    ! parallel ?
    if ( npes == 1 ) then
    
      ! just copy
      recv = send
      
    else

       isall=.false.
       if (present(all)) isall=all
    
#ifdef MPI

      ! select mpi reduce operation:
      select case ( oper )
        case ( PAR_OPER_SUM ) ; mpi_op = MPI_SUM
        case default
          write (gol,'("unsupported oper : ",i6)') oper; call goErr
          TRACEBACK; status=1; return
      end select

      if (isall) then
         
         ! reduce to all processors
         call MPI_AllReduce( send, recv, size(send), MY_REAL, &
              mpi_op, localComm, status )
         if (status/=MPI_SUCCESS) then
            call MPI_Error_String( status, gol, slen ); call goErr
            TRACEBACK; status=1; return
         end if


      else
         
         ! reduce to processor id_send
         call MPI_Reduce( send, recv, size(send), MY_REAL, &
              mpi_op, id_recv, localComm, status )
         if (status/=MPI_SUCCESS) then
            call MPI_Error_String( status, gol, slen ); call goErr
            TRACEBACK; status=1; return
         end if
         
      end if
#else

      write (gol,'("please implement for non mpi library")'); call goErr
      TRACEBACK; status=1; return
        
#endif

    end if  ! parallel ?

    ! ok
    status = 0
    
  end subroutine Par_Reduce_r2
  


end module ParTools
