*ID GBCCF406
*/ U.M. 4.6 unix / source code change form / header   version 06/01/99
*/ CODE WRITERS MUST READ THE ACCOMPANYING INSTRUCTIONS FOR THIS BUILD:
*/  - See http://fr0800/umdoc/hegui/t3e4.6.html#chgfinst
*/ 
*/SOC: Introduce Load Balancing into the Tracer Advection Code
*/
*/ The modset redistributes rows amongst the processors to improve
*/ the load balaning during Tracer Advection.  The modset is only
*/ applicable for Global Models.  This modset only works with
*/ ADVCTL1E, not the original version.
*/
*/ Has an entry been lodged in the Problem Reporting System? [N]      
*/
*/ THIS CODE IS INTENDED FOR INCLUSION IN THE 4.6 BUILD      [Y]
*/ .....................................................................
*/   Author[s]:-> E-mail:-> bcarruthers@meto.gov.uk 
*/ Reviewer[s]:-> E-mail:-> @meto.gov.uk
*/
*/    "I have checked this change. When provided, the advance design 
*/  specification was agreed and adequate, and the new code conforms to
*/  Unified Model standards."
*/
*/  DESIGN SPEC. WAS REVIEWED ON: ......      REVIEWER[S] SIGNATURES
*/                                            ----------------------
*/    DATE CODE REVIEWED: ......
*/  .....................................................................
*/
*/  WILL CHANGES AFFECT ANCILLARY FILES?         [N]
*/  ARE ANY CHANGES TO STASHMASTER FILES NEEDED? [N] 
*/  USER INTERFACE ACTION REQUIRED?              [N]
*/ 
*/  TESTED IN CONFIGURATIONS:-> Global Climate and High Resolution
*/  TESTS RUN BY [PERSON]:-> Bob Carruthers
*/ 
*/  WILL THE CHANGES SLOW DOWN THE MODEL?        [N]
*/  -> Further details
*/  CHANGES WILL INCREASE MEMORY CONSUMPTION?    [Y]   
*/  -> Further details
*/
*/ | Re-start dumps bit compare with those created without the change 
*/ V MARK [Y| ] BELOW; leave rest of lines untouched.
*/
*/   Control Code    loses bit comparison
*/   Atmosphere (assuming same science options chosen)   loses b.c.
*/   Ocean       loses bit comparison
*/   Wave        loses bit comparison
*/   Reconfiguration   loses bit comparison
*/   Diagnostics      lose bit comparison
*/ For Y2K compliance checking:  
*/ DOES THIS CHANGE INTERACT WITH DATE CALCULATIONS IN ANY WAY? [N]   
*/ 
*/  SECTIONS (TO BE) CHANGED:
*/
*/  SECTIONS (TO BE) DELETED? 
*/
*/  NEW SECTIONS?  Fill in form http://www-hc/~hadmk/STASHmaster_change.html,
*/  and give section numbers below:
*/  
*/  *DEFS ADDED OR REMOVED: 
*/
*/  **Existing** decks being changed [with *I, *D, *B directives]
*/ ->
*/   TRACAD1A
*/   ADVCTL1E
*/   ATMDYN1
*/   SETTRA1A
*/
*/  Decks being created or purged [with *DECK, *COMDECK, *PURGEDK]
*/ *......K  Deck name   Section#.vr
*/ -> 
*/  TRACMAP
*/  ALL_RW1A
*/  SRT_SW1A
*/  C_L_U1A
*/......................................................................
*/ ANY REFERENCES TO EXTERNAL DOCUMENTS-> instead of design spec.
*/  ...OR ... ADVANCE DESIGN SPECIFICATION (optional) 
*/ ->    
*//////////////////////////////////////////////////////////////////////// 
*/
*/
*/
*DC TRACAD1A
*B TRACAD1A.33
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
      SUBROUTINE TRAC_ADV_L(
     &                    first_level, last_level,
     &                    send_map,
     &                    recv_map,
     &                    n_send, n_recv,
     &                    last_row, global_row, 
     &                    max_advected_rows, 
     &                    send_dim1, send_dim2,
     &                    recv_dim1, recv_dim2,
     &                    FIELD,N_SWEEP,U_MEAN,V_MEAN,U_FIELD,P_FIELD,
*ELSE
*I ARB1F402.378
*ENDIF
*I ARB1F402.385
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     &,first_level         !IN First level over which to advect
     &,last_level          !IN Last level over which to advect
     &,send_dim1           !IN First dimension of send map
     &,send_dim2           !IN Second dimension of send map
     &,recv_dim1           !IN First dimension of recv map
     &,recv_dim2           !IN Second dimension of recv map
     &,send_map(send_dim1, send_dim2)
                           !IN The calculated send map
     &,recv_map(recv_dim1, recv_dim2)
                           !IN The calculated receive map
     &,n_send              !IN The number of send entries found
     &,n_recv              !IN The number of receive entries 
                           !   found
     &,last_row            !IN The Maximum number of rows for 
                           !   this PE
     &,global_row((glsize(2)-2)*last_level)
     &                     !IN The global index to the
                           !   rows on this PE
     &,max_advected_rows   !IN Maximum number of rows to be 
                           !   advected on this processor
     &,N_SWEEP((glsize(2)-2)*(last_level-first_level+1))
                           !IN Number of sweeps to be done
                           !   East-West for each row in full
                           !   domain
*ELSE
*I ARB1F402.387
*ENDIF
*I TRACAD1A.55
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & U_MEAN(U_FIELD, first_level:last_level)
                           !IN ADVECTING U FIELD, MASS-WEIGHTED.
     &,V_MEAN(U_FIELD, first_level:last_level)
                           !IN ADVECTING V FIELD, MASS-WEIGHTED.
     &,FIELD(P_FIELD, first_level:last_level)
                           !IN FIELD TO BE ADVECTED.
*ELSE
*I TRACAD1A.58
*ENDIF
*B TRACAD1A.66
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     &,RS(P_FIELD, first_level:last_level)
                           !IN RS_FIELD
*ELSE
*I TRACAD1A.66
*ENDIF
*B TRACAD1A.68
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     &,DELTA_AK(first_level:last_level)
                           !IN
     &,DELTA_BK(first_level:last_level)
                           !IN
*ELSE
*I TRACAD1A.69
*ENDIF
*I TRACAD1A.76
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & FLUX_DELTA_T(P_FIELD, first_level:last_level)
                           ! FLUX * ADVECTION TIMESTEP
     &,B1(P_FIELD, first_level:last_level)
                           ! ARGUMENT OF B_TERM
     &,B2(P_FIELD, first_level:last_level)
                           ! ARGUMENT OF B_TERM
     &,B_TERM(P_FIELD, first_level:last_level)!
     &,COURANT(P_FIELD, first_level:last_level)
                           ! COURANT NUMBER
     &,ABS_COURANT(P_FIELD, first_level:last_level)
                           ! ABSOLUTE VALUE OF COURANT NUMBER
     &,COURANT_MW(P_FIELD, first_level:last_level)
                           ! MASS WEIGHTED COURANT NUMBER
     &,RS_SQUARED_DELTAP(P_FIELD, first_level:last_level)
                           ! MASS * RADIUS OF EARTH
     &,MW(P_FIELD, first_level:last_level)
                           ! MASS WEIGHTING ASSOCIATED WITH
     &                     ! GRID BOX BOUNDARY FLUXES
     &,MW_RECIP(P_FIELD, first_level:last_level)
                           ! 1./MW
     &,RS_SQUARED_DELTAP_RECIP(P_FIELD, first_level:last_level)
                           ! HOLDS 1./RS_SQUARED_DELTAP
     &,FIELD_INC(P_FIELD, first_level:last_level)
                           ! HOLDS INCREMENT TO FIELD.
     &,WORK(P_FIELD)       ! General work-space.
     &,B_SWITCH(P_FIELD, first_level:last_level)
                           ! Entropy condition switch.
     &,SHIFT_N(ROW_LENGTH, 2, first_level:last_level)
                           ! Local copy of polar rows for 180 deg
     &,B2_SHIFT_N(ROW_LENGTH, first_level:last_level)
                           ! rotational shift by GCG_RVECSHIFT
     &,SHIFT_S(ROW_LENGTH, 2, first_level:last_level)
                           ! Local copy of polar rows for 180 deg
     &,B2_SHIFT_S(ROW_LENGTH, first_level:last_level)
                           ! rotational shift by GCG_RVECSHIFT
*ELSE
*I ARB1F402.395
*ENDIF
*B TRACAD1A.104
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & ,START_P(2, first_level:last_level)
                        ! Start point for loop with I_CNTL =1 or 2
     & ,END_P(2, first_level:last_level)
                        ! End point for loop with I_CNTL=1 or 2
*ELSE
*I TRACAD1A.105
*ENDIF
*B TRACAD1A.107
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & ,N_HEMI(first_level:last_level)
                        ! Number of hemispheres east-west advection
     &                  ! being performed in.
*ELSE
*I TRACAD1A.107
*ENDIF
*I ARB1F402.399
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & ,k               ! Loop index over levels
     & ,kl              ! Offset for level K
*ENDIF
*B TRACAD1A.111
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & NORTH_POLE_INC(first_level:last_level)
     &,SOUTH_POLE_INC(first_level:last_level)
*ELSE
*I TRACAD1A.112
*ENDIF
*I ARB1F402.401
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
C
C----------------------------------------------------------------------
C      NEW VARIABLES NEEDED FOR LOAD BALANCING CODE.


      INTEGER OFF,SWEEP

      INTEGER ISTAT,FLAG

*CALL GCCOM

! 2d arrays needed for communication.
      
      REAL RE_FIELD(GLOBAL_ROW_LENGTH,max_advected_rows)
      REAL RE_RS_SQUARED_DELTAP(GLOBAL_ROW_LENGTH,max_advected_rows)
      REAL RE_RS_SQUARED_DELTAP_RECIP(GLOBAL_ROW_LENGTH,
     &     max_advected_rows)
      REAL RE_COURANT_MW(GLOBAL_ROW_LENGTH,max_advected_rows)

      INTEGER slen,rlen

! 1d arrays

      real re_mw(global_row_length)
      real re_courant(global_row_length)
      real re_abs_courant(global_row_length)
      real re_mw_recip(global_row_length)
      real re_work(global_row_length)
      real re_b1(global_row_length)
      real re_b2(global_row_length)
      real re_b_term(global_row_length)
      real re_flux_delta_t(global_row_length)
      real re_b_switch(global_row_length)
      real re_field_inc(global_row_length)
*ENDIF
*I TRACAD1A.129
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

      flag = GC_NONE

      rlen = GLOBAL_ROW_LENGTH*LAST_ROW
      slen = P_FIELD*(last_level-first_level+1)
*ENDIF
*B ARB1F402.407
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
      max_sweeps=0
      do k=first_level, last_level
        max_sweeps = max(max_sweeps,
     &                   n_sweep((k-first_level  )*(glsize(2)-2)+1),
     &                   n_sweep((k-first_level+1)*(glsize(2)-2)))
      end do
*ELSE
*I ARB1F402.407
*ENDIF
*I TRACAD1A.142
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

C----------------------------------------------------------------------
CL    SECTION 1.1    CALCULATE COURANT NUMBER
C----------------------------------------------------------------------

      do k=first_level, last_level
        do  j = datastart(2)+first_row-1,datastart(2)+p_last_row-1
          j1 = (j-datastart(2))*row_length
! Because of unused 1st and last rows of while doing tracer advection
          if(j-1.lt.2 .or. j-1.ge.glsize(2)) then
            write(0,*)'TRAC_ADV_L: J is out of Range ',2,j-1,glsize(2)-1
            call abort()
          endif
          r_sweep = 1.0/n_sweep((k-1)*(glsize(2)-2)+j-2)
          do i = 1,row_length
            courant_mw(j1+i, k) = 
     &       ((0.5*(u_mean(j1+i-row_length, k) + u_mean(j1+i, k))) * 
     &         sec_p_latitude(j1+i)) *
     &       ((advection_timestep * longitude_step_inverse) * r_sweep)
          end do ! loop over one row
        end do ! loop over j - rows

        do i=start_p_update-row_length,end_p_update+row_length
          rs_squared_deltap(i, k) = rs(i, k)*(rs(i, k)*
     &                             (delta_ak(k)+delta_bk(k)*pstar(i)))
        end do
!
! Put some data (not updated by advection) into the haloes, polar row
! if present, and the first row of data which is not updated here
!
        do i=first_fld_pt, first_row*row_length
          rs_squared_deltap_recip(i, k) = 1.0/rs_squared_deltap(i, k)
        end do

!
! Put some data (not updated by advection) into the haloes, polar row
! if present, and the last row of data which is not updated here
!
        do i=(p_last_row-1)*row_length+1, end_p_update+row_length
          rs_squared_deltap_recip(i, k) = 1.0/rs_squared_deltap(i, k)
        end do

      end do ! end of loop over levels

! Redistribute the arrays to the processors

      flag=0  ! This is currently ignored at GCG v1.1

      CALL GC_SETOPT(GC_SHM_DIR,GC_SHM_PUT,info)  ! gather operation
      istat=GC_NONE

      CALL GCG_RALLTOALLE(
     &       FIELD(1,first_level),SEND_MAP(1,1),N_SEND,slen,
     &       RE_FIELD,RECV_MAP(1,1),N_RECV,rlen,
     &       gc_all_proc_group,flag,istat)
      IF(istat.ne.0) THEN
         CALL GC_ABORT(mype,nproc,'failed in data reordering')
      endif

      CALL GCG_RALLTOALLE(
     &       RS_SQUARED_DELTAP(1,first_level),SEND_MAP(1,1),N_SEND,slen,
     &       RE_RS_SQUARED_DELTAP,RECV_MAP(1,1),N_RECV,rlen,
     &       gc_all_proc_group,flag,istat)
      IF(istat.ne.0) THEN
         CALL GC_ABORT(mype,nproc,'failed in data reordering')
      endif


      CALL GCG_RALLTOALLE(
     &       COURANT_MW(1,first_level),SEND_MAP(1,1),N_SEND,slen,
     &       RE_COURANT_MW,RECV_MAP(1,1),N_RECV,rlen,
     &       gc_all_proc_group,flag,istat)
      IF(istat.ne.0) THEN
         CALL GC_ABORT(mype,nproc,'failed in data reordering')
      endif

! End of redistribution.

!
! Complete the calculation of 'rs_squared_deltap_recip' and
! The Courant Number
!
      do j=1,last_row
        do i=1,global_row_length
          re_rs_squared_deltap_recip(i, j)=1.0/
     &     re_rs_squared_deltap(i, j)
        end do

        do i=1,global_row_length -1
          re_mw(i) = 0.5*(re_rs_squared_deltap(i, j) +
     &                    re_rs_squared_deltap(i+1, j))
          ii=min(0.0,sign(1.0, re_courant_mw(i, j)))
          re_courant(i) = re_courant_mw(i, j)*
     &                    re_rs_squared_deltap_recip(i-ii, j)
        end do

C End Points
        re_mw(global_row_length) =
     &   0.5*(re_rs_squared_deltap(global_row_length, j) +
     &        re_rs_squared_deltap(1, j))

C End Points
        re_courant(global_row_length) =
     &   re_courant_mw(global_row_length, j)*
     &   re_rs_squared_deltap_recip(global_row_length, j)
        if(re_courant_mw(global_row_length, j).lt.0)
     &   re_courant(global_row_length) =
     &   re_courant_mw(global_row_length, j)*
     &   re_rs_squared_deltap_recip(1, j)


C SET ABSOLUTE VALUE OF COURANT NUMBER. THIS LOOP IS SEPARATE   
C TO LOOPS CALCULATING COURANT NUMBER SINCE INCLUDING IT THERE
C PREVENTS TOTAL OPTIMISATION.

        do i=1,global_row_length
          re_abs_courant(i) = abs(re_courant(i))
          re_mw_recip(i) = 1.0/re_mw(i)
        end do

C----------------------------------------------------------------------
CL END OF INITIALISATIONS
C----------------------------------------------------------------------

CL    PERFORM N_SWEEPS OF ADVECTION ON EACH ROW.
CL    LOOP OVER NUMBER OF SWEEPS REQUIRED.

        do sweep=1,n_sweep(global_row(j))

C----------------------------------------------------------------------
CL    SECTION 1.2    CALCULATE FLUX_DELTA_T AND B1
C----------------------------------------------------------------------

C CALCULATE TERM AT ALL POINTS

          do i=1,global_row_length-1
            re_flux_delta_t(i) = (re_field(i+1, j)-re_field(i, j))*
     &                            re_courant_mw(i, j)
            re_field_inc(i) = 0.0
            re_b1(i)=re_flux_delta_t(i)*(0.5*(1.0-re_abs_courant(i)))
          end do

C RECALULATE LAST VALUES ON EACH ROW

          re_flux_delta_t(global_row_length) = (re_field(1, j) -
     &     re_field(global_row_length, j)) *
     &     re_courant_mw(global_row_length, j)
          re_b1(global_row_length)=re_flux_delta_t(global_row_length)*
     &     (0.5*(1.0-re_abs_courant(global_row_length)))


C----------------------------------------------------------------------
CL    SECTION 1.3    CALCULATE B1 AND B2
C----------------------------------------------------------------------


C Global Model
C Loop over all Points

          off = sign(1.0,re_courant_mw(1, j))
          if(off.eq.-1) then
            re_b2(1)= re_flux_delta_t(global_row_length)*(0.5*
     &       (re_mw(1)*re_mw_recip(global_row_length)-
     &       re_abs_courant(global_row_length)))
            re_b_switch(1) = sign(1.0,re_courant(1)*
     &       re_courant(global_row_length))
          else
            re_b2(1) = re_flux_delta_t(2)*(0.5*
     &       (re_mw(1)*re_mw_recip(2)-re_abs_courant(2)))
            re_b_switch(1) = sign(1.0, re_courant(1)*re_courant(2))
          endif

          do i=2,global_row_length-1
            off = sign(1.0,re_courant_mw(i, j))
            re_b2(i) = re_flux_delta_t(i+off)*(0.5*
     &       (re_mw(i)*re_mw_recip(i+off)-re_abs_courant(i+off)))
            re_b_switch(i) = sign(1.0, re_courant(i)*re_courant(i+off))
          end do

          off = sign(1.0,re_courant_mw(global_row_length, j))

          if(off.eq.-1) then
            re_b2(global_row_length) =
     &       re_flux_delta_t(global_row_length-1)*
     &       (0.5*(re_mw(global_row_length)*
     &       re_mw_recip(global_row_length-1)-
     &       re_abs_courant(global_row_length-1)))

            re_b_switch(global_row_length) =
     &       sign(1.0,re_courant(global_row_length)*
     &       re_courant(global_row_length-1))
          else
            re_b2(global_row_length) =
     &       re_flux_delta_t(1) * (0.5*(re_mw(global_row_length)*
     &       re_mw_recip(1) - re_abs_courant(1)))

            re_b_switch(global_row_length) =
     &       sign(1.0,re_courant(global_row_length)*re_courant(1))
          endif


C----------------------------------------------------------------------
CL    SECTION 1.4    CALCULATE B_TERM
C----------------------------------------------------------------------

          if(l_superbee) then
            do i=1, global_row_length
              if(abs(re_b2(i)).GT.1E-8) then
                re_b_switch(i) = re_b_switch(i)*(re_b1(i)/re_b2(i))
                if(re_b_switch(i).gt.0.5 .and.
     &           re_b_switch(i).lt.2.0) then
                  re_b_term(i) = re_b2(i)*
     &             max(re_b_switch(i),1.0)
                else if(re_b_switch(i).le.0.0) then
                  re_b_term(i) = 0.0
                else
                  re_b_term(i) = 2.0*(re_b2(i)*min(re_b_switch(i),1.0))
                endif
              else
                re_b_switch(i) = 0.0
                re_b_term(i) = 0.0
              endif
            end do
          else
            do i=1, global_row_length
              re_b_term(i) = 0.0
              if(re_b1(i)*(re_b2(i)*re_b_switch(i)).gt.0.0)
     &         re_b_term(i) = 2.0*(re_b1(i)*(re_b2(i)*
     &         (re_b_switch(i)/(re_b1(i)+(re_b2(i)*re_b_switch(i))))))
            end do
          endif

C----------------------------------------------------------------------
CL    SECTION 1.5    CALCULATE INCREMENTS TO FIELD
C----------------------------------------------------------------------

          do i=1, global_row_length
            re_field_inc(i) = - re_b_term(i)
            if(re_courant_mw(i, j).ge.0.0)
     &       re_field_inc(i) = re_field_inc(i)+
     &       (2.0*re_b_term(i)-re_flux_delta_t(i))
          end do

          do i=2,global_row_length
            re_field_inc(i) = re_field_inc(i) - re_b_term(i-1)
            if(re_courant_mw(i-1, j).lt.0.0)
     &       re_field_inc(i) = re_field_inc(i)+
     &       (2.0*re_b_term(i-1)-re_flux_delta_t(i-1))
          end do

          re_field_inc(1) = - re_b_term(1)
          if(re_courant_mw(1, j).ge.0.0)
     &     re_field_inc(1) = re_field_inc(1)+
     &     (2.0*re_b_term(1)-re_flux_delta_t(1))
          re_field_inc(1) = re_field_inc(1) -
     &     re_b_term(global_row_length)
          if(re_courant_mw(global_row_length, j).lt.0.0)
     &     re_field_inc(1) = re_field_inc(1) + 
     &     (2.0*re_b_term(global_row_length)-
     &     re_flux_delta_t(global_row_length))


C----------------------------------------------------------------------
CL    SECTION 1.6    UPDATE FIELD
C----------------------------------------------------------------------

C UPDATE MASS WEIGHTING FIELDS

          re_work(1) = re_courant_mw(global_row_length, j) -
     &     re_courant_mw(1, j)
          do i=2,global_row_length
            re_work(i) = re_courant_mw(i-1, j) - re_courant_mw(i, j)
          end do

          do i=1, global_row_length
            re_rs_squared_deltap(i, j) = re_rs_squared_deltap(i, j)+
     &       re_work(i)
            re_rs_squared_deltap_recip(i, j) = 1.0/
     &       re_rs_squared_deltap(i, j)
          end do

          do i=1, global_row_length
            re_field(i, j) = re_field(i, j) + re_field_inc(i)*
     &       re_rs_squared_deltap_recip(i, j)
          end do

! END LOOP OVER SWEEPS
        end do
! END LOOP OVER ROWS
      end do

      flag=0  ! This is currently ignored at GCG v1.1

      CALL GC_SETOPT(GC_SHM_DIR,GC_SHM_PUT,info)  ! gather operation
      istat=GC_NONE

      CALL GCG_RALLTOALLE(
     &     RE_RS_SQUARED_DELTAP,RECV_MAP(1,1),N_RECV,rlen,
     &     RS_SQUARED_DELTAP(1,first_level),SEND_MAP(1,1),
     &     N_SEND,slen, gc_all_proc_group,flag,istat)

      IF(istat.ne.0) THEN
         CALL GC_ABORT(mype,nproc,'failed in data gathering')
      endif

      CALL GCG_RALLTOALLE(
     &     RE_RS_SQUARED_DELTAP_RECIP,RECV_MAP(1,1),N_RECV,rlen,
     &     RS_SQUARED_DELTAP_RECIP(1,first_level),SEND_MAP(1,1),
     &     N_SEND,slen, gc_all_proc_group,flag,istat)

      IF(istat.ne.0) THEN
         CALL GC_ABORT(mype,nproc,'failed in data gathering')
      endif

      CALL GCG_RALLTOALLE(
     &     RE_FIELD,RECV_MAP(1,1),N_RECV,rlen,
     &     FIELD(1,first_level),SEND_MAP(1,1),N_SEND,slen,
     &     gc_all_proc_group,flag,istat)

      IF(istat.ne.0) THEN
         CALL GC_ABORT(mype,nproc,'failed in data gathering')
      endif

! Swap all halo points to ensure updated arrays are fully up-to-date
!  for North-South sweep.

      CALL SWAPBOUNDS(FIELD(1, first_level), ROW_LENGTH, ROWS,
     &                EW_Halo, NS_Halo, (last_level-first_level+1))
      CALL SWAPBOUNDS(RS_SQUARED_DELTAP(1, first_level), ROW_LENGTH,
     &                ROWS,
     &                EW_Halo, NS_Halo, (last_level-first_level+1))
      CALL SWAPBOUNDS(RS_SQUARED_DELTAP_RECIP(1, first_level),
     &                ROW_LENGTH, ROWS,
     &                EW_Halo, NS_Halo, (last_level-first_level+1))

!!! Might be better to recompute RS_SQUARED_DELTAP_RECIP haloes.

CL---------------------------------------------------------------------
CL    SECTION 2.     CALCULATE FIELD INCREMENTS FOR V ADVECTION
CL---------------------------------------------------------------------

      do k=first_level, last_level

C----------------------------------------------------------------------
CL    SECTION 2.1    CALCULATE COURANT NUMBER
C----------------------------------------------------------------------

        DO  I = FIRST_VALID_PT+1,LAST_U_VALID_PT
          COURANT_MW(I, K) = (0.5*(V_MEAN(I, K)+V_MEAN(I-1, K)))*
     &                       (ADVECTION_TIMESTEP*LATITUDE_STEP_INVERSE)
        END DO

      end do

! Do EW Swapbounds to fill in west halo points.
      CALL SWAPBOUNDS(COURANT_MW(1, first_level), ROW_LENGTH, ROWS,
     &                EW_Halo, 0, (last_level-first_level+1))

      do k=first_level, last_level

        DO  I = FIRST_VALID_PT,LAST_U_FLD_PT
          MW(I, K)=0.5*(RS_SQUARED_DELTAP(I, K)*COS_P_LATITUDE(I)+
     &     RS_SQUARED_DELTAP(I+ROW_LENGTH, K)*
     &     COS_P_LATITUDE(I+ROW_LENGTH))
! Split this loop to try and retain MPP & nonMPP bit comparison
          COURANT(I, K) = COURANT_MW(I, K)
     &                   *RS_SQUARED_DELTAP_RECIP(I, K)
          COURANT(I, K) = COURANT(I, K)*SEC_P_LATITUDE(I)
          IF (COURANT_MW(I, K).GT.0.) THEN
            COURANT(I, K) = COURANT_MW(I, K)*
     &                     (SEC_P_LATITUDE(I+ROW_LENGTH)*
     &                      RS_SQUARED_DELTAP_RECIP(I+ROW_LENGTH, K))
          ENDIF
        END DO

C ABSOLUTE VALUE OF COURANT NUMBER CALCULATED IN THIS LOOP AS PUTTING
C IT IN PREVIOUS LOOP PREVENTS FULL OPTIMISATION.

      end do

! Do NS Swapbounds to fill in south halo points.
      CALL SWAPBOUNDS(MW(1, first_level), ROW_LENGTH, ROWS,
     &                0, NS_Halo, (last_level-first_level+1))
      CALL SWAPBOUNDS(COURANT(1, first_level), ROW_LENGTH, ROWS,
     &                0, NS_Halo, (last_level-first_level+1))

      do k=first_level, last_level

        DO  I = FIRST_VALID_PT,LAST_U_VALID_PT
          ABS_COURANT(I, K) = ABS(COURANT(I, K))
          MW_RECIP(I, K) = 1./MW(I, K)
        END DO

C----------------------------------------------------------------------
CL    SECTION 2.2    CALCULATE FLUX_DELTA_T AND B1
C----------------------------------------------------------------------

        DO  I = FIRST_VALID_PT,LAST_U_FLD_PT
          FLUX_DELTA_T(I, K) = COURANT_MW(I, K) *
     &                        (FIELD(I, K)-FIELD(I+ROW_LENGTH, K))
        END DO

      end do

! Do NS Swapbounds to fill in south halo points.
      CALL SWAPBOUNDS(FLUX_DELTA_T(1, first_level), ROW_LENGTH, ROWS,
     &                0, NS_Halo, (last_level-first_level+1))

      do k=first_level, last_level

        DO  I = FIRST_VALID_PT,LAST_U_FLD_PT
          B1(I, K) = FLUX_DELTA_T(I, K)*(0.5*(1.0-ABS_COURANT(I, K)))
        END DO

C----------------------------------------------------------------------
CL    SECTION 2.3    CALCULATE B1 AND B2
C----------------------------------------------------------------------

C CALCULATE FLUXES AT VELOCITY POINTS.
C FIRST LOOP OVER ALL POINTS NOT AT NORTHERN OR SOUTHERN BOUNDARY.

!DIR$ NOUNROLL
        real_row_length=row_length
cdir$ unroll
        DO  I = START_POINT_NO_HALO,END_U_POINT_NO_HALO
          ii=nint(sign(real_row_length, COURANT_MW(I, K)))
          B2(I, K) = FLUX_DELTA_T(I-II, K)*(0.5*(MW(I, K)
     &              *MW_RECIP(I-II, K) - ABS_COURANT(I-II, K)))
        END DO
c
        DO  I = START_POINT_NO_HALO,END_U_POINT_NO_HALO
          ii=nint(sign(real_row_length, COURANT_MW(I, K)))
          B_SWITCH(I, K) = sign(1.,COURANT(I, K)*COURANT(I-II, K))
        END DO

      end do

      IF (at_top_of_LPG) THEN

        do k=first_level, last_level
! Needs values over top of pole on another processor
          HALF_RL = GLOBAL_ROW_LENGTH/2
! Copy North polar row into copy arrays
          DO  I = 1,ROW_LENGTH
            SHIFT_N(I, 1, K) = MW(TOP_ROW_START+I-1, K)
            SHIFT_N(I, 2, K) = COURANT(TOP_ROW_START+I-1, K)
          END DO

        end do

! Rotate these arrays by half the global row length to get values on
!  opposite side of the pole.
        CALL GCG_RVECSHIFT(ROW_LENGTH, ROW_LENGTH-2*Offx, 1+Offx,
     &                     2*(last_level-first_level+1),
     &                     HALF_RL, .TRUE., SHIFT_N(1, 1, first_level),
     &                     GC_ROW_GROUP, info)

        do k=first_level, last_level

          DO  I = 1,ROW_LENGTH
            B2_SHIFT_N(I, K) =
     &       FLUX_DELTA_T(TOP_ROW_START+I-1, K)*(0.5*(SHIFT_N(I, 1, K)
     &      *MW_RECIP(TOP_ROW_START+I-1, K)
     &      -ABS_COURANT(TOP_ROW_START+I-1, K)))
            B_SWITCH(TOP_ROW_START+I-1, K) =
     &       sign(1.0,COURANT(TOP_ROW_START+I-1, K)*SHIFT_N(I, 2, K))
          END DO

        end do

        CALL GCG_RVECSHIFT(ROW_LENGTH, ROW_LENGTH-2*Offx, 1+Offx,
     &                     (last_level-first_level+1), 
     &                     HALF_RL, .TRUE., B2_SHIFT_N(1, first_level),
     &                     GC_ROW_GROUP, info)

        do k=first_level, last_level

          DO  I = 1,ROW_LENGTH
            B2(TOP_ROW_START+I-1, K) = B2_SHIFT_N(I, K)
          END DO

          DO  I = TOP_ROW_START,TOP_ROW_START+ROW_LENGTH-1
            IF (COURANT_MW(I, K) .lt. 0.0) THEN
              B2(I, K) = FLUX_DELTA_T(I+ROW_LENGTH, K)*(0.5*(MW(I, K)
     &               *MW_RECIP(I+ROW_LENGTH, K)
     &               -ABS_COURANT(I+ROW_LENGTH, K)))
              B_SWITCH(I, K) = sign(1.,COURANT(I, K)
     &                     *COURANT(I+ROW_LENGTH, K))
            endif
          END DO

        end do

      endif ! at_top_of_LPG

      IF (at_base_of_LPG) THEN

        do k=first_level, last_level

          DO  I = U_BOT_ROW_START,LAST_U_VALID_PT
            B2(I, K) = FLUX_DELTA_T(I-ROW_LENGTH, K)*(0.5*(MW(I, K)
     &             *MW_RECIP(I-ROW_LENGTH, K)
     &             -ABS_COURANT(I-ROW_LENGTH, K)))
            B_SWITCH(I, K) = sign(1.0,COURANT(I, K)
     &                   *COURANT(I-ROW_LENGTH, K))
          END DO

          HALF_RL = GLOBAL_ROW_LENGTH/2
          DO  I = 1,ROW_LENGTH
            SHIFT_S(I, 1, K) = MW(U_BOT_ROW_START+I-1, K)
            SHIFT_S(I, 2, K) = COURANT(U_BOT_ROW_START+I-1, K)
          END DO

        end do

        CALL GCG_RVECSHIFT(ROW_LENGTH, ROW_LENGTH-2*Offx, 1+Offx, 
     &                     2*(last_level-first_level+1),
     &                     HALF_RL, .TRUE., SHIFT_S(1, 1, first_level),
     &                     GC_ROW_GROUP, info)

        do k=first_level, last_level

          DO  I = 1,ROW_LENGTH
            B2_SHIFT_S(I, K) =
     &       FLUX_DELTA_T(U_BOT_ROW_START+I-1, K)*(0.5*(SHIFT_S(I, 1, K)
     &      *MW_RECIP(U_BOT_ROW_START+I-1, K)
     &      -ABS_COURANT(U_BOT_ROW_START+I-1, K)))
            IF (COURANT_MW(U_BOT_ROW_START+I-1, K) .lt. 0.0) THEN
              B_SWITCH(U_BOT_ROW_START+I-1, K) = sign(1.0,
     &         COURANT(U_BOT_ROW_START+I-1, K)*SHIFT_S(I, 2, K))
            endif
          END DO

        end do

        CALL GCG_RVECSHIFT(ROW_LENGTH, ROW_LENGTH-2*Offx, 1+Offx,
     &                     (last_level-first_level+1),
     &                     HALF_RL, .TRUE., B2_SHIFT_S(1, first_level),
     &                     GC_ROW_GROUP, info)

        do k=first_level, last_level

          DO  I = 1,ROW_LENGTH
            IF (COURANT_MW(U_BOT_ROW_START+I-1, K) .lt. 0.0) THEN
              B2(U_BOT_ROW_START+I-1, K) = B2_SHIFT_S(I, K)
            endif
          END DO

        end do

      endif ! at_base_of LPG


C----------------------------------------------------------------------
CL    SECTION 2.4    CALCULATE B_TERM
C----------------------------------------------------------------------

      do k=first_level, last_level

        IF (L_SUPERBEE) THEN

CL    SUPERBEE LIMITER.

          DO  I = FIRST_FLD_PT,LAST_U_FLD_PT
            IF(ABS(B2(I, K)).GT.1.0E-8) THEN
              B_SWITCH(I, K) = B_SWITCH(I, K)*(B1(I, K)/B2(I, K))
              IF (B_SWITCH(I, K).GT.0.5.AND.B_SWITCH(I, K).LT.2.0) THEN
                B_TERM(I, K) = B2(I, K) * MAX(B_SWITCH(I, K),1.0)
              ELSE IF (B_SWITCH(I, K).LE.0.0) THEN
                B_TERM(I, K) = 0.0
              ELSE
                B_TERM(I, K) = 2.0 * (B2(I, K)*MIN(B_SWITCH(I, K),1.0))
              endif
            ELSE
              B_SWITCH(I, K) = 0.
              B_TERM(I, K) = 0.0
            endif
          END DO

        ELSE

CL    VAN LEER LIMITER.

C LOOP OVER ALL POINTS
          DO  I = FIRST_FLD_PT,LAST_U_FLD_PT
            B_TERM(I, K) = 0.0
            IF (B1(I, K)*(B2(I, K)*B_SWITCH(I, K)).GT.0.0)
     &        B_TERM(I, K) = 2.0*(B1(I, K)*(B2(I, K)*(B_SWITCH(I, K)/
     &                           (B1(I, K)+(B2(I, K)*B_SWITCH(I, K))))))
          END DO

        endif

      end do

! Do NS Swapbounds to fill in halo points.
      CALL SWAPBOUNDS(B_TERM(1, first_level), ROW_LENGTH, ROWS,
     &                0, NS_Halo, (last_level-first_level+1))

C----------------------------------------------------------------------
CL    SECTION 2.5    CALCULATE INCREMENTS TO FIELD
CL---------------------------------------------------------------------

      do k=first_level, last_level

        DO I=1,P_FIELD
          FIELD_INC(I, K)=0.0
        END DO

cdir$ unroll
        do  i = first_fld_pt, last_u_fld_pt
          field_inc(i, k) = field_inc(i, k) - b_term(i, k)
          if (courant_mw(i, k).lt.0.0)
     &     field_inc(i, k) = field_inc(i, k) + 
     &     (2.*b_term(i, k) - flux_delta_t(i, k))
        end do
cdir$ ivdep
        do  i = first_valid_pt, last_u_fld_pt
          field_inc(i+row_length, k) = field_inc(i+row_length, k) -
     &                                 b_term(i, k)
          if (courant_mw(i, k).ge.0.0)
     &     field_inc(i+row_length, k) = field_inc(i+row_length, k) +
     &     (2.*b_term(i, k) - flux_delta_t(i, k))
        end do

C----------------------------------------------------------------------
CL    SECTION 2.6    CALCULATE POLAR INCREMENTS
CL---------------------------------------------------------------------

C CALCULATE AVERAGE POLAR INCREMENT
        NORTH_POLE_INC(K) = 0.0
        SOUTH_POLE_INC(K) = 0.0

      end do

! Use reproducible vector sum of points on polar rows.
      IF (at_top_of_LPG) THEN
        CALL GCG_RVECSUMR(P_FIELD, ROW_LENGTH-2*EW_Halo,
     &                    TOP_ROW_START+EW_Halo, 
     &                    (last_level-first_level+1), 
     &                    FIELD_INC(1, first_level),
     &                    GC_ROW_GROUP, info, 
     &                    NORTH_POLE_INC(first_level))
      endif
! Because values are summed in reverse order in non-MPP loop
! SOUTH_POLE_INC will probably not bit-compare
      IF (at_base_of_LPG) THEN
        CALL GCG_RVECSUMR(P_FIELD, ROW_LENGTH-2*EW_Halo,
     &                    P_BOT_ROW_START+EW_Halo, 
     &                    (last_level-first_level+1), 
     &                    FIELD_INC(1, first_level),
     &                    GC_ROW_GROUP, info, 
     &                    SOUTH_POLE_INC(first_level))
      endif

      do k=first_level, last_level

        NORTH_POLE_INC(K) = NORTH_POLE_INC(K) * (ROW_LENGTH_RECIP * 2.0)
        SOUTH_POLE_INC(K) = SOUTH_POLE_INC(K) * (ROW_LENGTH_RECIP * 2.0)

        IF (at_top_of_LPG) THEN
          DO  I = TOP_ROW_START,TOP_ROW_START+ROW_LENGTH-1
            FIELD_INC(I, K) =  NORTH_POLE_INC(K)
          END DO
        endif
        IF (at_base_of_LPG) THEN
          DO  J = P_BOT_ROW_START,P_BOT_ROW_START+ROW_LENGTH-1
            FIELD_INC(J, K) = SOUTH_POLE_INC(K)
          END DO
        endif

C----------------------------------------------------------------------
CL    SECTION 2.7    UPDATE FIELD
CL---------------------------------------------------------------------

C UPDATE MASS WEIGHTING
        DO  I = START_POINT_NO_HALO,END_P_POINT_NO_HALO
          RS_SQUARED_DELTAP(I, K) = RS_SQUARED_DELTAP(I, K) +
     &          (COURANT_MW(I, K)-COURANT_MW(I-ROW_LENGTH, K))*
     &           SEC_P_LATITUDE(I)
        END DO

C     POLAR VALUES
        NORTH_POLE_INC(K) = 0.
        SOUTH_POLE_INC(K) = 0.

      end do

! Use reproducible vector sum of points on polar rows.
      IF (at_top_of_LPG) THEN
        CALL GCG_RVECSUMR(P_FIELD, ROW_LENGTH-2*EW_Halo,
     &                    TOP_ROW_START+EW_Halo, 
     &                    (last_level-first_level+1), 
     &                    COURANT_MW(1, first_level),
     &                    GC_ROW_GROUP, info,
     &                    NORTH_POLE_INC(first_level))
      endif
      IF (at_base_of_LPG) THEN
        CALL GCG_RVECSUMR(P_FIELD, ROW_LENGTH-2*EW_Halo,
     &                    U_BOT_ROW_START+EW_Halo, 
     &                    (last_level-first_level+1), 
     &                    COURANT_MW(1, first_level),
     &                    GC_ROW_GROUP, info, 
     &                    SOUTH_POLE_INC(first_level))

        do k=first_level, last_level
          SOUTH_POLE_INC(K) = -SOUTH_POLE_INC(K)
        end do

      endif

      do k=first_level, last_level

        NORTH_POLE_INC(K) = (NORTH_POLE_INC(K)*
     &                       SEC_P_LATITUDE(TOP_ROW_START))*
     &                      (ROW_LENGTH_RECIP*2.0)
        SOUTH_POLE_INC(K) = (SOUTH_POLE_INC(K)*
     &                       SEC_P_LATITUDE(P_BOT_ROW_START))*
     &                      (ROW_LENGTH_RECIP*2.0)
        IF (at_top_of_LPG) THEN
          DO  I = TOP_ROW_START,TOP_ROW_START+ROW_LENGTH-1
            RS_SQUARED_DELTAP(I, K) = RS_SQUARED_DELTAP(I, K) +
     &       NORTH_POLE_INC(K)
          END DO
        endif
        IF (at_base_of_LPG) THEN
          DO  J = P_BOT_ROW_START,P_BOT_ROW_START+ROW_LENGTH-1
            RS_SQUARED_DELTAP(J, K) = RS_SQUARED_DELTAP(J, K) +
     &       SOUTH_POLE_INC(K)
          END DO
        endif

C ADD INCREMENTS TO FIELD
        DO  I = FIRST_FLD_PT,LAST_P_FLD_PT
          FIELD(I, K) = FIELD(I, K)+
     &     FIELD_INC(I, K)*(SEC_P_LATITUDE(I) / RS_SQUARED_DELTAP(I, K))
        END DO

      end do

*ELSE
*I GSM6F404.9
*ENDIF
*/
*/
*/
*COMDECK TRACMAP
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

      integer
     &  n_trac_decomp
     & ,trac_decomp                    ! Current tracer decomposition
     & ,send_map_dim1                  ! First dimension of send map
     & ,recv_map_dim1                  ! First dimension of recv map
     & ,send_map_dim2                  ! Second dimension of send map
     & ,recv_map_dim2                  ! Second dimension of recv map
     & ,trac_decomp_p_levels           ! Value of 'trac_decomp' for
                                       ! Tracer Advection over p_plevels
                                       ! - normally 1.
     & ,trac_decomp_q_levels           ! Value of 'trac_decomp' for
                                       ! Tracer Advection over q_plevels
                                       ! - either 1 or 2
     & ,trac_decomp_tracer             ! Value of 'trac_decomp' for
                                       ! Tracer Advection of Tracers
                                       ! - normally 3.
     & ,trac_decomp_murk               ! Value of 'trac_decomp' for
                                       ! Murk Advection
                                       ! - normally 4.
      parameter (n_trac_decomp=4)      ! Number of Tracer Advection
                                       ! decompositions, as follws:
                                       !
                                       ! 1.  1 to P_LEVELS
                                       ! 2.  1 to Q_LEVELS
                                       ! 3.  Tracer Advection Levels
                                       ! 4.  Aerosol Levels

      parameter (send_map_dim1=7, recv_map_dim1=7)

      integer trac_decomp_size(n_trac_decomp)
                                       ! Number of levels for each
                                       ! decomposition
      integer trac_decomp_f_level(n_trac_decomp)
                                       ! First level at which tracer
                                       ! advection is used for this
                                       ! decomposition

      integer, allocatable, target :: the_send_map(:, :, :)
                                       ! The map to send decomposed
                                       ! rows to a single PE for Load-
                                       ! Balancing Tracer Advection
      integer, allocatable, target :: the_recv_map(:, :, :)
                                       ! Pointer pass the address of
                                       ! srecv map via common

      integer, pointer :: send_map(:, :, :)
                                       ! Pointer pass the address of
                                       ! send map via common
      integer, pointer :: recv_map(:, :, :)
                                       ! The map to receive single
                                       ! rows back to decomposed rows
                                       ! for Tracer Advection Load-
                                       ! Balancing
      integer n_send(n_trac_decomp)    ! The number of send entries
      integer n_recv(n_trac_decomp)    ! The number of receive entries

      common /send_recv/ send_map, recv_map, n_send, n_recv

      integer max_advected_rows(n_trac_decomp)
                                       ! The maximum number
                                       ! of rows on my PE
      logical trac_decomp_valid(n_trac_decomp)
                                       ! Indicates if Tracer
                                       ! Decomposition is
                                       ! currently valid.

      logical trac_alloc_valid(n_trac_decomp)
                                       ! Indicates if the send/recv
                                       ! map has been calculated for
                                       ! a decomposition
      integer last_row(n_trac_decomp)  ! Number of rows to
                                       ! Advect on this PE
      integer, allocatable, target :: the_global_row(:, :)
     &                                 ! The global index to the
                                       ! rows on this PE
      integer, pointer :: global_row(:, :)
                                       ! A pointer to the global index 
                                       ! to the rows on this PE

      common /trac_size/ 
     & global_row, last_row, max_advected_rows,
     & trac_decomp, 
     & trac_decomp_p_levels, trac_decomp_q_levels,
     & trac_decomp_tracer, trac_decomp_murk,
     4 trac_decomp_size, trac_decomp_f_level, 
     & trac_decomp_valid, trac_alloc_valid

*ENDIF
*/
*/
*/
*DC ADVCTL1E
*C ADVCTL1E
*I ADVCTL1E.41
!     4.6    11/12/98 Add code to support load-balancing
!                     in tracer advection.
!                       Author:  EPCC and Bob Carruthers
*B ADVCTL1E.91
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     *, NSWEEP(2:glsize(2)-1,P_LEVELS) !IN No.of EW sweeps for all rows.
*ELSE
*I ADVCTL1E.91
*ENDIF
*I ADVCTL1E.169
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

*CALL TRACMAP

*CALL T3ECLKTK

      integer t1, t2, t3

*ENDIF
*B ADVCTL1E.359
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

      trac_decomp=trac_decomp_p_levels

      k=1
      CALL TRAC_ADV_L(1, P_LEVELS,
     &                send_map(1, 1, trac_decomp), 
     &                recv_map(1, 1, trac_decomp), 
     &                n_send(trac_decomp), n_recv(trac_decomp), 
     &                last_row(trac_decomp),
     &                global_row(1, trac_decomp), 
     &                max_advected_rows(trac_decomp), 
     &                send_map_dim1, send_map_dim2,
     &                recv_map_dim1, recv_map_dim2,
     &                THETAL(1,k),NSWEEP(2,k),U_MEAN(1,k),V_MEAN(1,k),
*ELSE
*I ADVCTL1E.360
*ENDIF
*B ADVCTL1E.367
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I ADVCTL1E.367
*ENDIF
*B ADVCTL1E.484
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

      trac_decomp=trac_decomp_q_levels

      k=1
      CALL TRAC_ADV_L(1, Q_LEVELS,
     &                send_map(1, 1, trac_decomp), 
     &                recv_map(1, 1, trac_decomp), 
     &                n_send(trac_decomp), n_recv(trac_decomp), 
     &                last_row(trac_decomp), 
     &                global_row(1, trac_decomp), 
     &                max_advected_rows(trac_decomp), 
     &                send_map_dim1, send_map_dim2,
     &                recv_map_dim1, recv_map_dim2,
     &                QT(1,k),NSWEEP(2,k),U_MEAN(1,k),V_MEAN(1,k),
*ELSE
*I ADVCTL1E.485
*ENDIF
*B ADVCTL1E.492
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I ADVCTL1E.492
*ENDIF
*/
*/
*/
*DC ATMDYN1
*C ATMDYN1
*I ADM2F404.37
!    4.6   11/12/98   Add code to support load-balancing
!                     in tracer advection.
!                       Author:  EPCC and Bob Carruthers
*B ARB1F402.75
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     &       TRACER_EW_SWEEPS(2:glsize(2)-1,P_LEVELSDA)

*CALL TRACMAP

      integer 
     & row_decomp((glsize(2)-2)*glsize(3), n_trac_decomp)
                              ! Holds Load Balance decomposition
                              ! data - which processor owns a row

*CALL T3ECLKTK

      integer t1, t2, t3
*ELSE
*I ARB1F402.75
*ENDIF
*D ARB1F403.76,ARB1F403.83
          do  k = 1,p_levels
            write(6,*)' ATMDYN; level, tracer_ew_sweeps ',k,
*IF DEF,MPP
     &       0, (tracer_ew_sweeps(i,k),i=2,glsize(2)-1), 0
*ELSE
     &       (tracer_ew_sweeps(i,k),i=1,p_rows)
*ENDIF
          end do
*I ARB1F403.85
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
!
! Only compute the Load Balance data for Tracer Advection on the
! first sweep of the dynamics
!
        if(i_count.eq.1 .and. i_loop.eq.1) then
          do trac_decomp=1, n_trac_decomp
            trac_decomp_valid(trac_decomp)=.false.
            trac_alloc_valid(trac_decomp)=.false.
          end do
        endif

!
! Setup the constants for Tracer Advection over p_levels
!
        trac_decomp_p_levels=1
        trac_decomp=trac_decomp_p_levels
        trac_decomp_f_level(trac_decomp)=1
        trac_decomp_size(trac_decomp)=p_levels
!
! Check if we are doing Tracer Advection over P_LEVELS
!
        if(l_tracer_thetal_qt .or. l_sulpc_so2 ) then
! Check if the p_levels data needs computing
          if(.not.trac_decomp_valid(trac_decomp)) then
!            t1=irtc()
            call allocate_rows(
     &       tracer_ew_sweeps(2, trac_decomp_f_level(trac_decomp)),
     &       glsize(2), trac_decomp_size(trac_decomp),
     &       row_decomp(1, trac_decomp), 
     &       nproc, mype, nproc_x, 
     &       max_advected_rows(trac_decomp))
!            t2=irtc()-t1
!            if(mype.eq.0) then
!              write(0,'(''Time to Allocate Rows =    '',f10.6,
!     &                  '' Seconds'')') real(t2)/ticks_per_second
!            endif
            trac_decomp_valid(trac_decomp)=.true.
          endif ! .not.trac_decomp_valid(trac_decomp)
        endif ! l_tracer_thetal_qt, etc (1...p_levels)
!
! Setup the constants for Tracer Advection over q_levels
!
        trac_decomp_q_levels=2
        trac_decomp=trac_decomp_q_levels
        trac_decomp_f_level(trac_decomp)=1
        trac_decomp_size(trac_decomp)=q_levels
!
! Check if we are doing Tracer Advection over Q_LEVELS
!
        if(l_tracer_thetal_qt .or. l_lspice) then
!
! Check if the Tracer Advection over Q_LEVELS can be mapped on to
! that for P_LEVELS
!
          if(q_levels.eq.p_levels) then
            trac_decomp_q_levels=1
          else
            trac_decomp_q_levels=2
          endif
          trac_decomp=trac_decomp_q_levels

! Check if the q_levels data needs computing
          if(.not.trac_decomp_valid(trac_decomp)) then
!            t1=irtc()
            call allocate_rows(
     &       tracer_ew_sweeps(2, trac_decomp_f_level(trac_decomp)),
     &       glsize(2), trac_decomp_size(trac_decomp),
     &       row_decomp(1, trac_decomp), 
     &       nproc, mype, nproc_x, 
     &       max_advected_rows(trac_decomp))
!            t2=irtc()-t1
!            if(mype.eq.0) then
!              write(0,'(''Time to Allocate Rows =    '',f10.6,
!     &                  '' Seconds'')') real(t2)/ticks_per_second
!            endif
            trac_decomp_valid(trac_decomp)=.true.
          endif ! .not.trac_decomp_valid(trac_decomp)
        endif ! l_tracer_thetal_qt, etc (1...q_levels)
!
! Setup the constants for Tracer Advection over q_levels
!
        trac_decomp_tracer=3
        trac_decomp=trac_decomp_tracer
        start_level=1+p_levels-tr_levels
        end_level=start_level+trac_adv_levels-1
        trac_decomp_f_level(trac_decomp)=start_level
        trac_decomp_size(trac_decomp)=end_level-start_level+1
!
! Check if we are doing Tracer Advection
!
        if (tr_vars.ne.0) then
!
! See if the tracer advection can be mapped on to the p_levels or
! q_levels - if not, select its own range, etc
!
          if(start_level.eq.1) then
            if(end_level.eq.p_levels) then
              trac_decomp_tracer=1
            else if(end_level.eq.q_levels) then
              trac_decomp_tracer=2
            else
              trac_decomp_tracer=3
            endif
          else
            trac_decomp_tracer=3
          endif
          trac_decomp=trac_decomp_tracer

! Check if the data for Tracers needs computing
          if(.not.trac_decomp_valid(trac_decomp)) then
!            t1=irtc()
            call allocate_rows(
     &       tracer_ew_sweeps(2, trac_decomp_f_level(trac_decomp)),
     &       glsize(2), trac_decomp_size(trac_decomp),
     &       row_decomp(1, trac_decomp), 
     &       nproc, mype, nproc_x, 
     &       max_advected_rows(trac_decomp))
!            t2=irtc()-t1
!            if(mype.eq.0) then
!              write(0,'(''Time to Allocate Rows =    '',f10.6,
!     &                  '' Seconds'')') real(t2)/ticks_per_second
!            endif
            trac_decomp_valid(trac_decomp)=.true.
          endif ! .not.trac_decomp_valid(trac_decomp)
        endif ! Tracers
!
! Setup the constants for Murk Advection over 'A_INTHD(13)' levels
!
        trac_decomp_murk=4
        trac_decomp=trac_decomp_murk
        start_level=1
        end_level=a_inthd(13)
        trac_decomp_f_level(trac_decomp)=start_level
        trac_decomp_size(trac_decomp)=end_level-start_level+1
!
! Check if we are doing murk Advection
!
        if (l_murk) then
!
! See if the murk advection can be mapped on to the p_levels, 
! q_levels, or murk advection levels - if not, select its own range
!
          if(start_level.eq.1) then
            if(end_level.eq.p_levels) then
              trac_decomp_murk=1
            else if(end_level.eq.q_levels) then
              trac_decomp_murk=2
            else
              trac_decomp_murk=4
            endif
          else
            trac_decomp_murk=4
          endif
          trac_decomp=trac_decomp_murk

! Check if the data for murk needs computing
          if(.not.trac_decomp_valid(trac_decomp)) then
!            t1=irtc()
            call allocate_rows(
     &       tracer_ew_sweeps(2, trac_decomp_f_level(trac_decomp)),
     &       glsize(2), trac_decomp_size(trac_decomp),
     &       row_decomp(1, trac_decomp), 
     &       nproc, mype, nproc_x, 
     &       max_advected_rows(trac_decomp))
!            t2=irtc()-t1
!            if(mype.eq.0) then
!              write(0,'(''Time to Allocate Rows =    '',f10.6,
!     &                  '' Seconds'')') real(t2)/ticks_per_second
!            endif
            trac_decomp_valid(trac_decomp)=.true.
          endif ! .not.trac_decomp_valid(trac_decomp)
        endif ! murk

! Check we if we need to allocate the send map
        if(.not.allocated(the_send_map)) then
! I cannot send more rows than I hold!
          send_map_dim2=(p_field/row_length)*p_levels
          allocate(the_send_map(send_map_dim1, send_map_dim2, 
     &     n_trac_decomp))
          send_map=>the_send_map
        endif

! Check we if we need to allocate the recv map
        if(.not.allocated(the_recv_map)) then
! For the receive map, work out the maximum number of rows I will
! compute.  I cannot receive more rows than I am to compute!
          recv_map_dim2=0
! Find the maximum number of rows I can receive
          do trac_decomp=1, n_trac_decomp
            if(trac_decomp_valid(trac_decomp)) then
! Each advected row comes from 'nproc_x' processors
              recv_map_dim2=max(recv_map_dim2,
     &         max_advected_rows(trac_decomp)*nproc_x)
            endif
          end do
          allocate(the_recv_map(recv_map_dim1, recv_map_dim2, 
     &     n_trac_decomp))
          recv_map=>the_recv_map
        endif

! Check we if we need to allocate the global_row array
        if(.not.allocated(the_global_row)) then
          allocate(the_global_row((glsize(2)-2)*p_levels, 
     &     n_trac_decomp))
          global_row=>the_global_row
        endif

!
! Now loop over decompositions, working out the send/recv maps
!
        do trac_decomp=1, n_trac_decomp
! Do we need to compute this map?
          if(trac_decomp_valid(trac_decomp) .and.
     &     (.not.trac_alloc_valid(trac_decomp))) then
!            t1=irtc()
! Compute the send/recv map for this decomposition
            call calc_lookup(
     &       glsize(2), trac_decomp_size(trac_decomp),
     &       row_decomp(1, trac_decomp), 
     &       first_fld_pt,
     &       send_map(1, 1, trac_decomp), 
     &       recv_map(1, 1, trac_decomp), 
     &       n_send(trac_decomp), n_recv(trac_decomp), 
     &       last_row(trac_decomp), global_row(1, trac_decomp), 
     &       send_map_dim1, send_map_dim2,
     &       recv_map_dim1, recv_map_dim2)
!            t3=irtc()-t1
!            if(mype.eq.0) then
!              write(0,'(''Time to Calculate Lookup = '',f10.6,
!     &                  '' Seconds'')') 
!     &         real(t3)/ticks_per_second
!            endif
! indicate that this decomposition has a send/recv table
            trac_alloc_valid(trac_decomp)=.true.
          endif ! trac_decomp_valid .and .not.trac_alloc_valid 
        end do ! do trac_decomp=1, n_trac_decomp

*ENDIF
*B ATMDYN1.402
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

         trac_decomp=trac_decomp_tracer

         k=start_level
          CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JTRACER(K,VAR)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I ATMDYN1.404
*ENDIF
*B ATMDYN1.413
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I ATMDYN1.413
*ENDIF
*I APC1F304.33
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_murk

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JMURK(K)),TRACER_EW_SWEEPS(2,K),
*ELSE
*I APC1F304.36
*ENDIF
*B APC1F304.45
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I APC1F304.45
*ENDIF
*I ADM2F404.151
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

          trac_decomp=trac_decomp_q_levels

          k=1
            CALL TRAC_ADV_L(1, q_levels,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JQCF(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I ADM2F404.154
*ENDIF
*B ADM2F404.164
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I ADM2F404.164
*ENDIF
*B AWO2F401.45
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSO2(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F401.46
*ENDIF
*B AWO2F401.54
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F401.54
*ENDIF
*B AWO2F405.12
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JNH3(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F405.13
*ENDIF
*B AWO2F405.22
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F405.22
*ENDIF
*B AWO2F401.84
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSO4_AITKEN(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F401.85
*ENDIF
*B AWO2F401.93
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F401.93
*ENDIF
*B AWO2F401.123
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSO4_ACCU(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F401.124
*ENDIF
*B AWO2F401.132
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F401.132
*ENDIF
*B AWO2F401.162
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSO4_DISS(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F401.163
*ENDIF
*B AWO2F401.171
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F401.171
*ENDIF
*B AWO2F401.203
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp), 
     &                  recv_map(1, 1, trac_decomp), 
     &                  n_send(trac_decomp), n_recv(trac_decomp), 
     &                  last_row(trac_decomp), 
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JDMS(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F401.204
*ENDIF
*B AWO2F401.212
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F401.212
*ENDIF
*B AWO2F405.87
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp),
     &                  recv_map(1, 1, trac_decomp),
     &                  n_send(trac_decomp), n_recv(trac_decomp),
     &                  last_row(trac_decomp),
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSOOT_NEW(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F405.88
*ENDIF
*B AWO2F405.98
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F405.98
*ENDIF
*B AWO2F405.162
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp),
     &                  recv_map(1, 1, trac_decomp),
     &                  n_send(trac_decomp), n_recv(trac_decomp),
     &                  last_row(trac_decomp),
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSOOT_AGD(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F405.163
*ENDIF
*B AWO2F405.173
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F405.173
*ENDIF
*B AWO2F405.237
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp),
     &                  recv_map(1, 1, trac_decomp),
     &                  n_send(trac_decomp), n_recv(trac_decomp),
     &                  last_row(trac_decomp),
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JSOOT_CLD(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I AWO2F405.238
*ENDIF
*B AWO2F405.248
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I AWO2F405.248
*ENDIF
*B ACN2F405.151
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

            trac_decomp=trac_decomp_p_levels

            k=1
              CALL TRAC_ADV_L(start_level, end_level,
     &                  send_map(1, 1, trac_decomp),
     &                  recv_map(1, 1, trac_decomp),
     &                  n_send(trac_decomp), n_recv(trac_decomp),
     &                  last_row(trac_decomp),
     &                  global_row(1, trac_decomp),
     &                  max_advected_rows(trac_decomp),
     &                  send_map_dim1, send_map_dim2,
     &                  recv_map_dim1, recv_map_dim2,
     &                  D1(JCO2(K)),TRACER_EW_SWEEPS(2,k),
*ELSE
*I ACN2F405.152
*ENDIF
*B ACN2F405.161
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
! END DO not needed
*ELSE
*I ACN2F405.161
*ENDIF
*I GPB3F403.30
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL

! Check we if we need to de-allocate the send map
        if(allocated(the_send_map)) then
          deallocate(the_send_map)
        endif
! Check we if we need to de-allocate the recv map
        if(allocated(the_recv_map)) then
          deallocate(the_recv_map)
        endif
! Check we if we need to de-allocate the global_row array
        if(allocated(the_global_row)) then
          deallocate(the_global_row)
        endif
*ENDIF
*/
*/
*/
*DC SETTRA1A
*C SETTRA1A
*I ARB1F403.7
!    4.6  11/12/98  Add code to support load-balancing
!                   in tracer advection.
!                     Author:  EPCC and Bob Carruthers
*B ARB1F402.283
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
     & TRACER_EW_SWEEPS(2:glsize(2)-1,P_LEVELS) ! OUT.
*ELSE
*I ARB1F402.283
*ENDIF
*D ARB1F402.296
     &, i_global_row ! row number in global array ALL_EW_SWEEPS
*D ARB1F402.338
        i_global_row = J+datastart(2)-Offy-1
*D ARB1F402.340
          ALL_EW_SWEEPS(K,i_global_row) = LOCAL_EW_SWEEPS(K,J)
*B ARB1F402.371
*IF DEF,MPP,AND,DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL
      DO  J = 2,glsize(2)-1
*ELSE
*I ARB1F402.371
*ENDIF
*/
*/
*/
*DECK ALL_RW1A
*IF DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL,AND,DEF,MPP
C ******************************COPYRIGHT******************************
C (c) CROWN COPYRIGHT 1999, METEOROLOGICAL OFFICE, All Rights Reserved.
C
C Use, duplication or disclosure of this code is subject to the
C restrictions as set forth in the contract.
C
C                Meteorological Office
C                London Road
C                BRACKNELL
C                Berkshire UK
C                RG12 2SZ
C 
C If no contract has been raised with this copy of the code, the use,
C duplication or disclosure of it is strictly prohibited.  Permission
C to do so must first be obtained in writing from the Head of Numerical
C Modelling at the above address.
C ******************************COPYRIGHT******************************
C
CLL Subroutine ALLOCATE_ROWS ------------------------------------------
CLL
CLL Purpose :
CLL  
CLL  Sort the rows based on the number sweeps required for Tracer
CLL  Advection, and then allocate each row in turn to the processor 
CLL  which has the smallest number of sweeps to perform currently.
CLL
CLL
CLL  Author:  Original version from EPCC, and then heavily modified
CLL           to work over rows and levels.
CLL             Bob Carruthers, Cray Research.   Date: 29 July 1999
CLL
CLL Version Date      Modification history
CLL
CLL  -------------------------------------------------------------------
C*L  Interface and arguments: ------------------------------------------

C
C*L   ARGUMENTS:-------------------------------------------------------

      subroutine allocate_rows(sweep, nrows, nlevels, row_decomp, 
     &     nproc, my_pe, nproc_x, max_advected_rows)
      
      IMPLICIT NONE

      INTEGER 
     &  nrows              ! IN   Number of rows
     & ,nlevels            ! IN   Number of levels over which to apply
                           !      Tracer Advection
     & ,nproc              ! IN   The total number of Processors
     & ,my_pe              ! IN   My PE Number
     & ,nproc_x            ! IN   The number of processors along X (E-W)
     & ,max_advected_rows  ! OUT  The Maximum Number of rows to be 
                           !      advected by any one processor
     & ,sweep((nrows-2)*nlevels)
                           ! IN   The number of sweeps reqired for each
                           !      row
     & ,row_decomp((nrows-2)*nlevels)
                           ! OUT  The processor that will advect each
                           !      row

C*---------------------------------------------------------------------

C*L   DEFINE ARRAYS AND VARIABLES USED IN THIS ROUTINE-----------------

      INTEGER 
     &  order((nrows-2)*nlevels)
                           ! This array gives the position of the sorted
                           ! rows.  For example, 'order(2)' gives the
                           ! position of the largest, 'order(3)' the
                           ! next largest, etc.
     & ,proc               ! Loop index over processors.
     & ,proc_sweep(0:nproc-1)
                           ! Current Number of sweeps per
                           ! processor
     & ,proc_rows(0:nproc-1)
                           ! Current Number of rows per
                           ! processor
     & ,level              ! Loop index over levels
     & ,i,j,k
     & ,flag               ! Indicates if a row has been allocated
                           ! (ALLOCATED) to a processor or not
                           ! (NOT_ALLOCATED)
     & ,ALLOCATED          ! Allocated value for FLAG
     & ,NOT_ALLOCATED      ! Not allocated value for FLAG
     & ,min_proc           ! Current processor with the minimum
                           ! number of sweeps
     & ,min_proc_sweep     ! Current Minimum number of sweeps
     & ,n

      parameter (ALLOCATED = 0, NOT_ALLOCATED = 1)

*CALL T3ECLKTK

      integer t1, t2

! must reorder the rows - put into decreasing order in
! array order (which contains row)
! Note that the polar rows are not advected E-W and so should
! not be allocated

      do i=0, nproc-1
        proc_sweep(i) = 0
        proc_rows(i)=0
      end do

!      t1=irtc()
      call sort_sweep(nrows, nlevels, sweep, order)
!      t2=irtc()-t1

!  now allocate the new distribution

      do i=1, (nrows-2)*nlevels
        row_decomp(i)=-999999
      end do

!
! In turn, allocate each row to the next processor with
! the smallest number of rows to process already
!
      do i=1, (nrows-2)*nlevels
! Set MIN_PROC_SWEEP to some ridiculously huge number
        min_proc_sweep = 999999
! Loop over processors
        do proc=0, nproc-1
          if(proc_sweep(proc).lt.min_proc_sweep) then
            min_proc = proc
            min_proc_sweep=proc_sweep(proc)
          endif
        end do ! end loop over processors

        if(min_proc_sweep.ne.999999) then
          row_decomp(order(i)) = min_proc
          proc_sweep(min_proc) = proc_sweep(min_proc)
     &                         + sweep(order(i))
          proc_rows(min_proc)=proc_rows(min_proc)+1
        else

! The allocation has failed
! At the moment call abort
! What we should do is resort to a static allocation instead.
          CALL GC_ABORT(my_pe,nproc,'failed in data allocation')

        endif
      end do ! end loop over rows and levels

!
! Compute the maximum number of rows that a processor can advect
!
      max_advected_rows=0
      do i=0, nproc-1
        max_advected_rows=max(max_advected_rows, proc_rows(i))
!         n=0
!         do j=1, (nrows-2)*nlevels
!           if(row_decomp(j).eq.i-1) n=n+1
!         end do ! end loop over processors
!         max_advected_rows=max(max_advected_rows, n)
      end do ! end loop over levels

!      if(my_pe.eq.0) then
!        write(0,*) 'ALLOCATE_ROWS: MAX_SWEEPS/PE = ',
!     2   max_sweeps
!        write(0,*) 'ALLOCATE_ROWS: MAX_ADVECTED_ROWS/PE = ',
!     2   max_advected_rows
!        do i=1, (nrows-2)*nlevels
!          write(0,*) i, sweep(i), order(i), sweep(order(i))
!        end do
!        write(0,'(''Time to Sort Rows =        '',f10.6,'' Seconds'')
!     &   ') real(t2)/ticks_per_second
!      endif
      END
*ENDIF
*/
*/
*/
*DECK SRT_SW1A
*IF DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL,AND,DEF,MPP
C ******************************COPYRIGHT******************************
C (c) CROWN COPYRIGHT 1999, METEOROLOGICAL OFFICE, All Rights Reserved.
C
C Use, duplication or disclosure of this code is subject to the
C restrictions as set forth in the contract.
C
C                Meteorological Office
C                London Road
C                BRACKNELL
C                Berkshire UK
C                RG12 2SZ
C 
C If no contract has been raised with this copy of the code, the use,
C duplication or disclosure of it is strictly prohibited.  Permission
C to do so must first be obtained in writing from the Head of Numerical
C Modelling at the above address.
C ******************************COPYRIGHT******************************
C
CLL Subroutine SORT_SWEEP ---------------------------------------------
CLL
CLL Purpose :
CLL  
CLL   Modified HPSORT from Numerical Recipes in FORTRAN to sort
CLL   the rows so that the first entry contains the row with
CLL   the maximum number of sweeps required, the second entry
CLL   contains the row with the next largest, etc.  Rather than
CLL   move the rows about in 'sweep', this routine produces
CLL   the order vector 'order', such that order(1) holds the
CLL   position in 'sweep' of the row with the largest number
CLL   of sweeps, and so on.  The sort is over all the rows
CLL   (minus the poar rows), and all the levels.
CLL  
CLL   The original routine moved the data, but 'order' gives access
CLL   to other Variables apart from 'sweep'.
CLL  
CLL
CLL  Author:  Bob Carruthers, Cray Research.   Date: 29 July 1999
CLL
CLL Version Date      Modification history
CLL
CLL  -------------------------------------------------------------------
C*L  Interface and arguments: ------------------------------------------

      subroutine sort_sweep(nrows, nlevels, sweep, order)
      implicit none
c
      integer
     &  nrows              ! IN   Number of rows
     & ,nlevels            ! IN   Number of levels over which to apply
                           !      Tracer Advection
     & ,sweep((nrows-2)*nlevels)
                           ! IN   The number of sweeps required for each
                           !      row
     & ,order((nrows-2)*nlevels)
                           ! OUT  This array gives the position of the
                           !      sorted rows.  For example, 'order(1)'
                           !      gives the position of the largest,
                           !      'order(2)' the next largest, etc.

C*---------------------------------------------------------------------

C*L   DEFINE ARRAYS AND VARIABLES USED IN THIS ROUTINE-----------------

      integer
     &  i,ir,j,l,n,i_order

      n=(nrows-2)*nlevels

      do i=1, n
        order(i)=i
      end do

      if(n.lt.2) return

      l=n/2+1
      ir=n

10    continue

      if(l.gt.1) then
        l=l-1
        i_order=order(l)
      else
        i_order=order(ir)
        order(ir)=order(1)
        ir=ir-1
        if(ir.eq.1) then
          order(1)=i_order
          return
        endif
      endif

      i=l
      j=l+1

20    continue

      if(j.le.ir) then
        if(j.lt.ir) then
          if(sweep(order(j)).lt.sweep(order(j+1))) j=j+1
        endif
        if(sweep(i_order).lt.sweep(order(j))) then
          order(i)=order(j)
          i=j
          j=j+j
        else
          j=ir+1
        endif
        goto 20
      endif
      order(i)=i_order
      goto 10
      end

*ENDIF
*/
*/
*/
*DECK C_L_U1A
*IF DEF,A11_1A,AND,DEF,A12_1E,AND,DEF,GLOBAL,AND,DEF,MPP
C ******************************COPYRIGHT******************************
C (c) CROWN COPYRIGHT 1999, METEOROLOGICAL OFFICE, All Rights Reserved.
C
C Use, duplication or disclosure of this code is subject to the
C restrictions as set forth in the contract.
C
C                Meteorological Office
C                London Road
C                BRACKNELL
C                Berkshire UK
C                RG12 2SZ
C 
C If no contract has been raised with this copy of the code, the use,
C duplication or disclosure of it is strictly prohibited.  Permission
C to do so must first be obtained in writing from the Head of Numerical
C Modelling at the above address.
C ******************************COPYRIGHT******************************
C
CLL Subroutine CALC_LOOKUP --------------------------------------------
CLL
CLL Purpose :
CLL  
CLL  Calculate the lookup tables required to redistribute rows
CLL  to achieve better load balance during Tracer Advection.
CLL  
CLL
CLL  Author:  Original version from EPCC, and then heavily modified
CLL           to work over rows and levels.
CLL             Bob Carruthers, Cray Research.   Date: 29 July 1999
CLL
CLL Version Date      Modification history
CLL
CLL  -------------------------------------------------------------------
C*L  Interface and arguments: ------------------------------------------
C
C*L   ARGUMENTS:-------------------------------------------------------

      subroutine calc_lookup(nrows, nlevels, row_decomp, first_field_pt,
     & send_map, recv_map, n_send, n_recv, last_row, global_row,
     & send_dim1, send_dim2, recv_dim1, recv_dim2)

      implicit none

      integer 
     &  nrows              ! IN   Number of rows
     & ,nlevels            ! IN   Number of levels over which to apply
     & ,row_decomp((nrows-2)*nlevels)
                           ! IN   The processor that will advect each
                           !      row
     & ,first_field_pt     ! IN   Position of the first data point in
                           !      the field to be distributed
     & ,send_dim1          ! IN   First dimension of send map
     & ,send_dim2          ! IN   Second dimension of send map
     & ,recv_dim1          ! IN   First dimension of recv map
     & ,recv_dim2          ! IN   Second dimension of recv map
     & ,send_map(send_dim1, send_dim2)
                           ! OUT  The calculated send map
     & ,recv_map(recv_dim1, recv_dim2)
                           ! OUT  The calculated receive map
     & ,n_recv             ! OUT  The number of receive entries found
     & ,n_send             ! OUT  The number of send entries found
     & ,last_row           ! OUT  The Maximum number of rows for this
                           !      PE
     & ,global_row((nrows-2)*nlevels)
                           ! OUT  The global index to the rows on this
                           !      PE

*CALL PARVARS   

C*---------------------------------------------------------------------

C*L   DEFINE ARRAYS AND VARIABLES USED IN THIS ROUTINE-----------------
      
      integer 
     &  proc                ! Index loop over processors
     & ,i,J
     & ,local_row(0:maxproc)
                            ! Number of rows for each processor
     & ,row_in_level        ! Current row number (2 to nrows-1)
     & ,row_index_over_levels
                            ! Current row number over all levels
                            ! (1 to (nrows-2)*nlevels))
     & ,which_proc          ! Current processor number in search to
                            ! find which PE has part of a row
     & ,row_length          ! Row length, including Haloes, on PE
                            ! that has a part of the current row
     & ,level               ! Current level in the range 0 to nlevels-1
     & ,recv_base           ! Base address in the full on the PE
                            ! doing the sweeps.
     & ,send_base           ! Base address in the distributed row

!   END OF DECLARATIONS

      n_send = 0
      n_recv = 0
      do i=0, maxproc
        local_row(i) = 0
      end do
      last_row = 0

! Find the processor that will receive the row

      do row_index_over_levels=1, (nrows-2)*nlevels
        proc=row_decomp(row_index_over_levels)
        local_row(proc) = local_row(proc)+1

        if(proc.eq.mype) then
          last_row = last_row+1
          global_row(last_row) = row_index_over_levels
        endif

        IF(last_row.gt.(nrows-2)*nlevels) then
          call gc_abort(mype,nproc,'overfill on tracer
     &              lookup table')
        endif

! Find the processors that contain this row

        level=(row_index_over_levels-1)/(nrows-2)
        row_in_level=row_index_over_levels-level*(nrows-2)+1

        do which_proc=0, nproc-1
          if(row_in_level.ge.g_datastart(2, which_proc) .and.
     &       row_in_level.lt.g_datastart(2, which_proc)+
     &              g_blsizep(2, which_proc)) then

! 'which_proc' contains part of this row

            if(which_proc.eq.mype) then
              n_send = n_send+1
            endif

            if(proc.eq.mype) then
              n_recv = n_recv+1
            endif

            row_length = g_blsizep(1, which_proc)+2*offx
                  
! Calculate position of data to send 
                  
            send_base = 1 +
     &                 (row_in_level-g_datastart(2, which_proc)+offy)
     &                 *row_length
     &                 +Offx
     &                 +level*g_lasize(1, which_proc)
     &                       *g_lasize(2, which_proc)


! Calculate position of data to recieve
                  
             recv_base = (local_row(proc)-1)*glsize(1)+
     &                    g_datastart(1, which_proc)

! Calculate the appropriate entry in the send map

            if(which_proc.eq.mype) then
              send_map(1,n_send) = proc
              send_map(2,n_send) = send_base
              send_map(3,n_send) = 1
              send_map(4,n_send) = row_length
              send_map(5,n_send) = g_blsizep(1, which_proc)
              send_map(6,n_send) = recv_base
              send_map(7,n_send) = glsize(1)
            endif

! Calculate the appropriate entry in the receive map

            if(proc.eq.mype) then
              recv_map(1,n_recv) = which_proc
              recv_map(2,n_recv) = recv_base
              recv_map(3,n_recv) = 1
              recv_map(4,n_recv) = glsize(1)
              recv_map(5,n_recv) = g_blsizep(1, which_proc)
              recv_map(6,n_recv) = send_base
              recv_map(7,n_recv) = row_length
            endif
          endif ! end if 'which_proc' has part of this row
        end do ! end of loop over processors - 'which_proc'
      end do ! end over global_rows - 'row_index_over_levels'

      return 

      end
*ENDIF
*/
*/
*/
