MODULE letkf_tools
!=======================================================================
!
! [PURPOSE:] Module for LETKF with AFES
!
! [HISTORY:]
!   01/26/2009 Takemasa Miyoshi  created
!
!=======================================================================
!$USE OMP_LIB
  USE common
  USE common_mpi
  USE common_obs
  USE common_miroc
  USE common_mpi_miroc
  USE common_obs_miroc
  USE common_tvs_miroc
  USE common_letkf
  use letkf_obs

  IMPLICIT NONE

  PRIVATE
  PUBLIC ::  das_letkf

  INTEGER,SAVE :: nobstotal

  INTEGER,PARAMETER :: nlev_dampinfl = 15 ! levels for vertical inflation damping
  REAL(r_size),PARAMETER :: sp_inflation = 0.15d0 !SQRT of cov infl
  REAL(r_size),PARAMETER :: sp_infl_additive = 0.d0 !additive inflation
  LOGICAL,PARAMETER :: msw_vbc = .FALSE.

CONTAINS
!-----------------------------------------------------------------------
! Data Assimilation
!-----------------------------------------------------------------------
SUBROUTINE das_letkf(gues3d,gues2d,anal3d,anal2d)
  IMPLICIT NONE
  REAL(r_size),INTENT(INOUT) :: gues3d(nij1,nlev,nbv,nv3d) ! background  ensemble
  REAL(r_size),INTENT(INOUT) :: gues2d(nij1,nbv,nv2d)      !  output:  destroyed
  REAL(r_size),INTENT(OUT) :: anal3d(nij1,nlev,nbv,nv3d) ! analysis ensemble
  REAL(r_size),INTENT(OUT) :: anal2d(nij1,nbv,nv2d)
  REAL(r_size),ALLOCATABLE :: mean3d(:,:,:)
  REAL(r_size),ALLOCATABLE :: mean2d(:,:)
  REAL(r_size),ALLOCATABLE :: hdxf(:,:)
  REAL(r_size),ALLOCATABLE :: rdiag(:)
  REAL(r_size),ALLOCATABLE :: dep(:)
  REAL(r_size),ALLOCATABLE :: work3d(:,:,:)
  REAL(r_size),ALLOCATABLE :: work2d(:,:)
  REAL(r_size),ALLOCATABLE :: logpfm(:,:)
  REAL(r_size),ALLOCATABLE :: parm_infl(:,:) ! spread inflation parameter
  REAL(r_size) :: trans(nbv,nbv)
  INTEGER :: ij,ilev,n,m,i,k,nobsl

  WRITE(6,'(A)') 'Hello from das_letkf'
  nobstotal = nobs !+ ntvs
  WRITE(6,'(A,I8)') 'Target observation numbers : NOBS=',nobs!,', NTVS=',ntvs
!$OMP PARALLEL WORKSHARE
  anal3d = gues3d
  anal2d = gues2d
!$OMP END PARALLEL WORKSHARE
  !
  ! In case of no obs
  !
  IF(nobstotal == 0) THEN
    WRITE(6,'(A)') 'No observation assimilated'
    RETURN
  END IF
  !
  ! FCST PERTURBATIONS
  !
  ALLOCATE(mean3d(nij1,nlev,nv3d))
  ALLOCATE(mean2d(nij1,nv2d))
  CALL ensmean_grd(nbv,nij1,gues3d,gues2d,mean3d,mean2d)
  DO n=1,nv3d
!$OMP PARALLEL DO PRIVATE(i,j,k)
    DO m=1,nbv
      DO k=1,nlev
        DO i=1,nij1
          gues3d(i,k,m,n) = gues3d(i,k,m,n) - mean3d(i,k,n)
        END DO
      END DO
    END DO
!$OMP END PARALLEL DO
  END DO
  DO n=1,nv2d
!$OMP PARALLEL DO PRIVATE(i,j,k)
    DO m=1,nbv
      DO i=1,nij1
        gues2d(i,m,n) = gues2d(i,m,n) - mean2d(i,n)
      END DO
    END DO
!$OMP END PARALLEL DO
  END DO
  !
  ! MULTIPLICATIVE INFLATION
  !
  IF(sp_inflation > 0.0d0) THEN
    ALLOCATE(parm_infl(nij1,nlev))
    DO k=1,nlev
      DO i=1,nij1
        parm_infl(i,k) = 1.0d0 + sp_inflation * MIN(1.0d0,REAL(nlev-k,r_size)/REAL(nlev_dampinfl,r_size))
      END DO
    END DO
    DO n=1,nv3d
      DO m=1,nbv
        DO k=1,nlev
          DO i=1,nij1
            gues3d(i,k,m,n) = gues3d(i,k,m,n) * parm_infl(i,k)
          END DO
        END DO
      END DO
    END DO
    DO n=1,nv2d
      DO m=1,nbv
        DO i=1,nij1
          gues2d(i,m,n) = gues2d(i,m,n) * parm_infl(i,1)
        END DO
      END DO
    END DO
    DEALLOCATE(parm_infl)
  END IF
  !
  ! p_full for background ensemble mean
  !
  ALLOCATE(logpfm(nij1,nlev))
  CALL calc_pfull(nij1,1,mean2d(:,iv2d_ps),logpfm)
!$OMP PARALLEL WORKSHARE
  logpfm = DLOG(logpfm)
!$OMP END PARALLEL WORKSHARE
  !
  ! MAIN ASSIMILATION LOOP
  !
!$OMP PARALLEL PRIVATE(ij,ilev,n,i,k,hdxf,rdiag,dep,trans,nobsl)
  ALLOCATE( hdxf(1:nobstotal,1:nbv),rdiag(1:nobstotal),dep(1:nobstotal) )
!--- For ILEV = 1
! Remark by YS:
!   Incomprehensible error was occured when
!   the ilev loop was iterated from 1 to NLEV.
!---
  ilev=1
write(6,*) 'ilev =',ilev
!$OMP DO SCHEDULE(DYNAMIC)
    DO ij=1,nij1
      CALL obs_local(ij,ilev,hdxf,rdiag,dep,nobsl,logpfm)
      IF( nobsl /= 0 ) THEN
        CALL letkf_core(nobstotal,nobsl,hdxf,rdiag,dep,0.0d0,trans)
        DO n=1,nv3d
          DO m=1,nbv
            anal3d(ij,ilev,m,n) = mean3d(ij,ilev,n)
            DO k=1,nbv
              anal3d(ij,ilev,m,n) = anal3d(ij,ilev,m,n) &
                & + gues3d(ij,ilev,k,n) * trans(k,m)
            END DO
          END DO
        END DO
        DO n=1,nv2d
          DO m=1,nbv
            anal2d(ij,m,n)  = mean2d(ij,n)
            DO k=1,nbv
              anal2d(ij,m,n) = anal2d(ij,m,n) + gues2d(ij,k,n) * trans(k,m)
            END DO
          END DO
        END DO
      END IF
    END DO
!$OMP END DO
!--- For ILEV = 2 - NLEV
!$OMP DO SCHEDULE(DYNAMIC)
  DO ilev=2,nlev
write(6,*) 'ilev =',ilev
    DO ij=1,nij1
      CALL obs_local(ij,ilev,hdxf,rdiag,dep,nobsl,logpfm)
      IF( nobsl /= 0 ) THEN
        CALL letkf_core(nobstotal,nobsl,hdxf,rdiag,dep,0.0d0,trans)
        DO n=1,nv3d
          DO m=1,nbv
            anal3d(ij,ilev,m,n) = mean3d(ij,ilev,n)
            DO k=1,nbv
              anal3d(ij,ilev,m,n) = anal3d(ij,ilev,m,n) &
                & + gues3d(ij,ilev,k,n) * trans(k,m)
            END DO
          END DO
        END DO
      END IF
    END DO
  END DO
!$OMP END DO
  DEALLOCATE(hdxf,rdiag,dep)
!$OMP END PARALLEL
  !
  ! Additive inflation
  !
  IF(sp_infl_additive > 0.0d0) THEN
    CALL read_ens_mpi('addi',nbv,gues3d,gues2d)
    ALLOCATE( work3d(nij1,nlev,nv3d) )
    ALLOCATE( work2d(nij1,nv2d) )
    CALL ensmean_grd(nbv,nij1,gues3d,gues2d,work3d,work2d)
    DO n=1,nv3d
!$OMP PARALLEL DO PRIVATE(i,j,k)
      DO m=1,nbv
        DO k=1,nlev
          DO i=1,nij1
            gues3d(i,k,m,n) = gues3d(i,k,m,n) - work3d(i,k,n)
          END DO
        END DO
      END DO
!$OMP END PARALLEL DO
    END DO
    DO n=1,nv2d
!$OMP PARALLEL DO PRIVATE(i,j,k)
      DO m=1,nbv
        DO i=1,nij1
          gues2d(i,m,n) = gues2d(i,m,n) - work2d(i,n)
        END DO
      END DO
!$OMP END PARALLEL DO
    END DO

    DEALLOCATE(work3d,work2d)
    WRITE(6,'(A)') '===== Additive covariance inflation ====='
    WRITE(6,'(A,F10.4)') '  parameter:',sp_infl_additive
    WRITE(6,'(A)') '========================================='
!    parm = 0.7d0
!    DO ilev=1,nlev
!      parm_infl_damp(ilev) = 1.0d0 + parm &
!        & + parm * REAL(1-ilev,r_size)/REAL(nlev_dampinfl,r_size)
!      parm_infl_damp(ilev) = MAX(parm_infl_damp(ilev),1.0d0)
!    END DO
    DO n=1,nv3d
      DO m=1,nbv
        DO ilev=1,nlev
          DO ij=1,nij1
            anal3d(ij,ilev,m,n) = anal3d(ij,ilev,m,n) &
              & + gues3d(ij,ilev,m,n) * sp_infl_additive
          END DO
        END DO
      END DO
    END DO
    DO n=1,nv2d
      DO m=1,nbv
        DO ij=1,nij1
          anal2d(ij,m,n) = anal2d(ij,m,n) + gues2d(ij,m,n) * sp_infl_additive
        END DO
      END DO
    END DO
  END IF

  DEALLOCATE(logpfm,mean3d,mean2d)
  RETURN
END SUBROUTINE das_letkf
!-----------------------------------------------------------------------
! Project global observations to local
!     (hdxf_g,dep_g,rdiag_g) -> (hdxf,dep,rdiag)
!-----------------------------------------------------------------------
SUBROUTINE obs_local(ij,ilev,hdxf,rdiag,dep,nobsl,logpfm)
  IMPLICIT NONE
  INTEGER,INTENT(IN) :: ij,ilev
  REAL(r_size),INTENT(IN) :: logpfm(nij1,nlev)
  REAL(r_size),INTENT(OUT) :: hdxf(nobstotal,nbv)
  REAL(r_size),INTENT(OUT) :: rdiag(nobstotal)
  REAL(r_size),INTENT(OUT) :: dep(nobstotal)
  INTEGER,INTENT(OUT) :: nobsl
  REAL(r_size) :: minlon,maxlon,minlat,maxlat,dist,dlev
  REAL(r_size) :: tmplon,tmplat,tmperr!,tmpwgt(nlev)
  INTEGER :: tmpqc
  INTEGER,ALLOCATABLE:: nobs_use(:)
  INTEGER,ALLOCATABLE:: ntvs_use_prof(:),ntvs_use_inst(:),ntvs_use_slot(:)
  INTEGER :: imin,imax,jmin,jmax,im,ichan
  integer :: jmin2,jmax2
  INTEGER :: n,nn,tvnn,m
!
! INITIALIZE
!
  IF( nobs > 0 ) THEN
    ALLOCATE(nobs_use(nobs))
  END IF
  IF( ntvs > 0 ) THEN
    ALLOCATE(ntvs_use_prof(ntvs))
    ALLOCATE(ntvs_use_inst(ntvs))
    ALLOCATE(ntvs_use_slot(ntvs))
  END IF
!
! data search
!
  minlon = lon1(ij) - dlon_zero(ij)
  maxlon = lon1(ij) + dlon_zero(ij)
  minlat = lat1(ij) - dlat_zero
  maxlat = lat1(ij) + dlat_zero
  if(maxlon - minlon >= 360.d0) then
    minlon=0.d0                                                               
    maxlon=360.d0                                                             
  end if !near pole, overlap of the right wing and the left wing may be occur.
  DO jmin=0,nlat-1
    IF(minlat < lat(jmin+1)) EXIT
  END DO
  DO jmax=0,nlat-1
    IF(maxlat < lat(jmax+1)) EXIT
  END DO
  jmin2 = max(jmin,1)     
  jmax2 = min(jmax,nlat-1)
  nn = 1
  tvnn = 1
  IF(minlon >= 0 .AND. maxlon <= 360.0) THEN
    DO imin=1,nlon-1
      IF(minlon < lon(imin+1)) EXIT
    END DO
    DO imax=1,nlon-1
      IF(maxlon < lon(imax+1)) EXIT
    END DO
    IF( nobs > 0 ) &
    & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
    IF( ntvs > 0 ) &
    & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
    &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
  ELSE IF(minlon >= 0 .AND. maxlon > 360.0) THEN
    DO imin=1,nlon-1
      IF(minlon < lon(imin+1)) EXIT
    END DO
    maxlon = maxlon - 360.0d0
    IF(maxlon > 360.0d0) THEN
      imin = 1
      imax = nlon
      IF( nobs > 0 ) &
      & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
      IF( ntvs > 0 ) &
      & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
      &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
    ELSE
      DO imax=1,nlon-1
        IF(maxlon < lon(imax+1)) EXIT
      END DO
      IF(imax > imin) THEN
        imin = 1
        imax = nlon
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
      ELSE
        imin = 1
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
        DO imin=1,nlon-1
          IF(minlon < lon(imin+1)) EXIT
        END DO
        imax = nlon
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin2,jmax2,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
      END IF
    END IF
  ELSE IF(minlon < 0 .AND. maxlon <= 360.0d0) THEN
    DO imax=1,nlon-1
      IF(maxlon < lon(imax+1)) EXIT
    END DO
    minlon = minlon + 360.0d0
    IF(minlon < 0) THEN
      imin = 1
      imax = nlon
      IF( nobs > 0 ) &
      & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
      IF( ntvs > 0 ) &
      & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
      &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
    ELSE
      DO imin=1,nlon-1
        IF(minlon < lon(imin+1)) EXIT
      END DO
      IF(imin < imax) THEN
        imin = 1
        imax = nlon
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
      ELSE
        imin = 1
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
        DO imin=1,nlon-1
          IF(minlon < lon(imin+1)) EXIT
        END DO
        imax = nlon
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin2,jmax2,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
      END IF
    END IF
  ELSE
!!$    maxlon = maxlon - 360.0d0
!!$    minlon = minlon + 360.0d0
!!$    IF(maxlon > 360.0 .OR. minlon < 0) THEN
!!$      imin = 1
!!$      imax = nlon
!!$      IF( nobs > 0 ) &
!!$      & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
!!$      IF( ntvs > 0 ) &
!!$      & CALL tvs_local_sub(imin,imax,jmin,jmax,tvnn, &
!!$      &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
!!$    ELSE
!!$      DO imin=1,nlon-1
!!$        IF(minlon < lon(imin+1)) EXIT
!!$      END DO
!!$      DO imax=1,nlon-1
!!$        IF(maxlon < lon(imax+1)) EXIT
!!$      END DO
!!$      IF(imin > imax) THEN
        imin = 1
        imax = nlon
        IF( nobs > 0 ) &
        & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
        IF( ntvs > 0 ) &
        & CALL tvs_local_sub(imin,imax,jmin2,jmax2,tvnn, &
        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
!!$      ELSE
!!$        IF( nobs > 0 ) &
!!$        & CALL obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
!!$        IF( ntvs > 0 ) &
!!$        & CALL tvs_local_sub(imin,imax,jmin,jmax,tvnn, &
!!$        &                    ntvs_use_prof,ntvs_use_inst,ntvs_use_slot)
!!$      END IF
!!$    END IF
  END IF
  nn = nn-1
  tvnn = tvnn -1
  IF( nn < 1 .AND. tvnn < 1 ) THEN
!TVSTVS  IF(nn < 1) THEN
    nobsl = 0
    RETURN
  END IF
!
! CONVENTIONAL
!
  nobsl = 0
  IF(nn > 0) THEN
    DO n=1,nn
      IF(NINT(obselm(nobs_use(n))) == id_ps_obs .AND. ilev > 1) THEN
        dlev = ABS(LOG(obsdat(nobs_use(n))) - logpfm(ij,ilev))
      ELSE IF(NINT(obselm(nobs_use(n))) /= id_ps_obs) THEN
        dlev = ABS(LOG(obslev(nobs_use(n))) - logpfm(ij,ilev))
      ELSE
        dlev = 0.0d0
      END IF
      IF(dlev > dist_zerov) CYCLE

      tmplon=obslon(nobs_use(n))
      tmplat=obslat(nobs_use(n))
!CDIR IEXPAND
      CALL com_distll_1( tmplon, tmplat,lon1(ij), lat1(ij), dist)
      IF(dist > dist_zero ) CYCLE

      nobsl = nobsl + 1
!CDIR EXPAND=100
      DO m=1,nbv
        hdxf(nobsl,m) = obshdxf(nobs_use(n),m)
      END DO
      dep(nobsl)    = obsdep(nobs_use(n))
      !
      ! Observational localization
      !
      tmperr=obserr(nobs_use(n))
      rdiag(nobsl) = tmperr * tmperr &
        & * exp(0.5d0 * ((dist/sigma_obs)**2 + (dlev/sigma_obsv)**2))
    END DO
  END IF
!
! ATOVS
!
  IF(tvnn > 0) THEN
    DO n=1,tvnn
      tmplon=tvslon(ntvs_use_prof(n),ntvs_use_inst(n),ntvs_use_slot(n))
      tmplat=tvslat(ntvs_use_prof(n),ntvs_use_inst(n),ntvs_use_slot(n))
      CALL com_distll_1( tmplon, tmplat, lon1(ij), lat1(ij), dist)
      IF( dist > dist_zero) CYCLE

      DO ichan=1,ntvsch(ntvs_use_inst(n))
        dlev = &
        abs(log(tvslev(ichan,ntvs_use_prof(n),ntvs_use_inst(n),ntvs_use_slot(n)))-logpfm(ij,ilev))
        if(dlev>dist_zerov) cycle
!        select case(nint(tvselm(ntvs_use_prof(n),ntvs_use_inst(n),ntvs_use_slot(n))))
!        case(id_bt_obs)
!          iobs=8
!        end select
        tmperr=tvserr(ichan,ntvs_use_prof(n),ntvs_use_inst(n),ntvs_use_slot(n))
        tmpqc=tvsqc(ichan,ntvs_use_prof(n),ntvs_use_inst(n),ntvs_use_slot(n))
        !tmpwgt(:)=tvswgt(:,ichan, &
        !                 & ntvs_use_prof(n), &
        !                 & ntvs_use_inst(n), &
        !                 & ntvs_use_slot(n))
        !IF( tmpqc == 1 .AND. tmpwgt(ilev) > 0.05D0 ) THEN
        if( tmpqc == 1 ) then
          nobsl = nobsl + 1
          DO im = 1, nbv
            hdxf(nobsl,im) = tvshdxf(im,ichan, &
                              & ntvs_use_prof(n), &
                              & ntvs_use_inst(n), &
                              & ntvs_use_slot(n))
          END DO
          dep(nobsl)    = tvsdep(ichan, &
                              & ntvs_use_prof(n), &
                              & ntvs_use_inst(n), &
                              & ntvs_use_slot(n))
          rdiag(nobsl)  = tmperr * tmperr &
                        & * exp(0.5d0 * ((dist/sigma_obs)**2 &
!                        & / (tmpwgt(ilev) * tmpwgt(ilev))
                        & +(dlev/sigma_obsv)**2))
        END IF
      END DO
    END DO
  END IF
!
! DEBUG
! IF( ILEV == 1 .AND. ILON == 1 ) &
! & WRITE(6,*) 'ILEV,ILON,ILAT,NN,TVNN,NOBSL=',ilev,ij,nn,tvnn,nobsl
!
  IF( nobsl > nobstotal ) THEN
    WRITE(6,'(A,I5,A,I5)') 'FATAL ERROR, NOBSL=',nobsl,' >  NOBSTOTAL=',nobstotal
    WRITE(6,*) 'IJ,NN,TVNN=', ij, nn, tvnn
    STOP 99
  END IF
!
  IF( nobs > 0 ) THEN
    DEALLOCATE(nobs_use)
  END IF
  IF( ntvs > 0 ) THEN
    DEALLOCATE(ntvs_use_prof)
    DEALLOCATE(ntvs_use_inst)
    DEALLOCATE(ntvs_use_slot)
  END IF
!
  RETURN
END SUBROUTINE obs_local

SUBROUTINE obs_local_sub(imin,imax,jmin,jmax,nn,nobs_use)
  INTEGER,INTENT(IN) :: imin,imax,jmin,jmax
  INTEGER,INTENT(INOUT) :: nn, nobs_use(nobs)
  INTEGER :: j,n,ib,ie,ip

  DO j=jmin,jmax
    IF(imin > 1) THEN
      ib = nobsgrd(imin-1,j)+1
    ELSE
!      IF(j > 1) THEN
      if(j>0) then
        ib = nobsgrd(nlon,j-1)+1
      ELSE
        ib = 1
      END IF
    END IF
    ie = nobsgrd(imax,j)
!Degenerate grids on the poles
    if(j == 0) then           
      ib = 1                  
      ie = nobsgrd(nlon,j)    
    elseif(j == nlat) then    
      ib = nobsgrd(nlon,j-1)+1
      ie = nobsgrd(nlon,j)    
    end if  
    n = ie - ib + 1
    IF(n == 0) CYCLE
    DO ip=ib,ie
      IF(nn > nobs) THEN
        WRITE(6,*) 'FATALERROR, NN > NOBS', NN, NOBS
      END IF
      nobs_use(nn) = ip
      nn = nn + 1
    END DO
  END DO

  RETURN
END SUBROUTINE obs_local_sub

SUBROUTINE  tvs_local_sub(imin,imax,jmin,jmax,nn,ntvs_prof,ntvs_inst,ntvs_slot)
  INTEGER,INTENT(IN) :: imin,imax,jmin,jmax
  INTEGER,INTENT(INOUT) :: nn, ntvs_prof(ntvs), ntvs_inst(ntvs), ntvs_slot(ntvs)
  INTEGER :: j,n,ib,ie,ip
  INTEGER :: islot, iinst

  DO j=jmin,jmax
    DO islot=1,nslots
      DO iinst=1,ninstrument
        IF(imin > 1) THEN
          ib = ntvsgrd(imin-1,j,iinst,islot)+1
        ELSE
!          IF(j > 1) THEN
          if(j > 0) then
            ib = ntvsgrd(nlon,j-1,iinst,islot)+1
          ELSE
            ib = 1
          END IF
        END IF
        ie = ntvsgrd(imax,j,iinst,islot)
        if(j == 0) then
          ib = 1
          ie = ntvsgrd(imax,j,iinst,islot)
        elseif(j == nlat) then
          ib = ntvsgrd(imax,j-1,iinst,islot)+1
          ie = ntvsgrd(imax,j,iinst,islot)
        end if
        n = ie - ib + 1
        IF(n == 0) CYCLE
        DO ip=ib,ie
          IF(nn > nobs) THEN
            WRITE(6,*) 'FATALERROR, NN > NTVS', NN, NTVS
          END IF
          ntvs_prof(nn)=ip
          ntvs_inst(nn)=iinst
          ntvs_slot(nn)=islot
          nn = nn + 1
        END DO
      END DO
    END DO
  END DO
  RETURN
END SUBROUTINE tvs_local_sub
!-----------------------------------------------------------------------
! Data Assimilation for VARBC
!-----------------------------------------------------------------------
!SUBROUTINE das_vbc(um,vm,tm,qm,qlm,psm,vbcf,vbca)
!  USE common_mtx
!  IMPLICIT NONE
!  REAL(r_size),INTENT(IN) :: um(nij1,nlev)
!  REAL(r_size),INTENT(IN) :: vm(nij1,nlev)
!   REAL(r_size),INTENT(IN) :: tm(nij1,nlev)
!   REAL(r_size),INTENT(IN) :: qm(nij1,nlev)
!   REAL(r_size),INTENT(IN) :: qlm(nij1,nlev)
!   REAL(r_size),INTENT(IN) :: psm(nij1)
!   REAL(r_size),INTENT(INOUT) :: vbcf(maxvbc,maxtvsch,ninstrument)
!   REAL(r_size),INTENT(OUT)   :: vbca(maxvbc,maxtvsch,ninstrument)
!   REAL(r_sngl) :: u4(nlon,nlat,nlev)
!   REAL(r_sngl) :: v4(nlon,nlat,nlev)
!   REAL(r_sngl) :: t4(nlon,nlat,nlev)
!   REAL(r_sngl) :: q4(nlon,nlat,nlev)
!   REAL(r_sngl) :: ql4(nlon,nlat,nlev)
!   REAL(r_sngl) :: ps4(nlon,nlat)
!   REAL(r_size) :: u(nlon,nlat,nlev)
!   REAL(r_size) :: v(nlon,nlat,nlev)
!   REAL(r_size) :: t(nlon,nlat,nlev)
!   REAL(r_size) :: q(nlon,nlat,nlev)
!   REAL(r_size) :: ql(nlon,nlat,nlev)
!   REAL(r_size) :: ps(nlon,nlat)
!   REAL(r_size) :: p_full(nlon,nlat,nlev)
!   REAL(r_size),ALLOCATABLE :: hx(:,:,:,:)
!   REAL(r_size),ALLOCATABLE :: pred(:,:,:,:,:)
!   INTEGER,ALLOCATABLE :: tmpqc(:,:,:)
!   REAL(r_size),ALLOCATABLE :: tmpwgt(:,:,:,:)
!   REAL(r_size) :: a(maxvbc,maxvbc)
!   REAL(r_size) :: b(maxvbc)
!   REAL(r_size) :: ainv(maxvbc,maxvbc)
!   INTEGER:: ntvschan1(maxtvsch,ninstrument)
!   INTEGER:: i,j,k,n,islot,nn

!   PRINT *,'Hello from das_vbc'

!   IF(ntvs == 0) THEN
!     PRINT *,'No radiance data: das_vbc skipped..'
! !$OMP PARALLEL WORKSHARE
!     vbca = vbcf
! !$OMP END PARALLEL WORKSHARE
!     RETURN
!   END IF

!   CALL gather_grd_mpi(0,um,vm,tm,qm,qlm,psm,u4,v4,t4,q4,ql4,ps4)
!   n = nlon*nlat*nlev
!   CALL MPI_BARRIER(MPI_COMM_WORLD,i)
!   CALL MPI_BCAST(u4(1,1,1),n,MPI_REAL,0,MPI_COMM_WORLD,i)
!   CALL MPI_BCAST(v4(1,1,1),n,MPI_REAL,0,MPI_COMM_WORLD,i)
!   CALL MPI_BCAST(t4(1,1,1),n,MPI_REAL,0,MPI_COMM_WORLD,i)
!   CALL MPI_BCAST(q4(1,1,1),n,MPI_REAL,0,MPI_COMM_WORLD,i)
!   CALL MPI_BCAST(ql4(1,1,1),n,MPI_REAL,0,MPI_COMM_WORLD,i)
!   n = nlon*nlat
!   CALL MPI_BCAST(ps4(1,1),n,MPI_REAL,0,MPI_COMM_WORLD,i)
! !$OMP PARALLEL WORKSHARE
!   u = REAL(u4,r_size)
!   v = REAL(v4,r_size)
!   t = REAL(t4,r_size)
!   q = REAL(q4,r_size)
!   ql = REAL(ql4,r_size)
!   ps = REAL(ps4,r_size)
! !$OMP END PARALLEL WORKSHARE
!   CALL calc_pfull(ps,p_full)

!   ALLOCATE( hx(maxtvsch,maxtvsprof,ninstrument,nslots) )
!   ALLOCATE( pred(maxvbc,maxtvsch,maxtvsprof,ninstrument,nslots) )
!   ALLOCATE( tmpqc(maxtvsch,maxtvsprof,ninstrument) )
!   ALLOCATE( tmpwgt(nlev,maxtvsch,maxtvsprof,ninstrument) )
!   DO islot=1,nslots
! !    IF(SUM(ntvsprofslots(:,islot)) == 0) CYCLE
!     ntvsprof(:) = ntvsprofslots(:,islot)
!     CALL Trans_XtoY_tvs(u,v,t,q,ql,ps,p_full, &
!       & tvslon(:,:,islot),tvslat(:,:,islot),tvszenith(:,:,islot),&
!       & tvsskin(:,:,islot),tvsstmp(:,:,islot),tvsclw(:,:,islot),&
!       & tvsemis(:,:,:,islot),tmpqc,hx(:,:,:,islot),tmpwgt,pred(:,:,:,:,islot))
!   END DO
!   DEALLOCATE(tmpqc,tmpwgt)

! !$OMP PARALLEL PRIVATE(j,k,n,a,b,ainv)
! !$OMP WORKSHARE
!   vbca = 0.0d0
! !$OMP END WORKSHARE
! !$OMP DO SCHEDULE(DYNAMIC)
!   DO k=1,ninstrument
!     DO j=1,maxtvsch
!       !
!       ! Parallel processing
!       !
!       IF(MOD(j+maxtvsch*(k-1)-1,nprocs) /= myrank) CYCLE
!       !
!       ! DATA NUMBER
!       !
!       ntvschan(j,k) = SUM(tvsqc(j,:,k,:))
!       IF(msw_vbc .AND. ntvschan(j,k) /= 0 ) THEN
!         PRINT '(3A,I3,A,I6)',' >> VBC executed for instrument,channel,ntvsl: ',&
!                             & tvsname(k),',',tvsch(j,k),',',ntvschan(j,k)
!         CALL vbc_local(j,k,ntvschan(j,k),hx,pred,a,b)
!         CALL mtx_inv(maxvbc,a,ainv)
!         vbca(:,j,k) = vbcf(:,j,k)
!         DO n=1,maxvbc
!           vbca(:,j,k) = vbca(:,j,k) - ainv(:,n)*b(n) !ATTN: sign for beta
!         END DO
!       ELSE
!         PRINT '(3A,I3,A,I6)',' !! NO VBC executed for instrument,channel,ntvsl: ',&
!                             & tvsname(k),',',tvsch(j,k),',',ntvschan(j,k)
!         vbca(:,j,k) = vbcf(:,j,k)
!       END IF
!     END DO
!   END DO
! !$OMP END DO
! !$OMP WORKSHARE
!   vbcf = vbca
!   ntvschan1 = ntvschan
! !$OMP END WORKSHARE
! !$OMP END PARALLEL
!   DEALLOCATE(hx,pred)
!   n = maxvbc*maxtvsch*ninstrument
!   CALL MPI_BARRIER(MPI_COMM_WORLD,j)
!   CALL MPI_ALLREDUCE(vbcf,vbca,n,MPI_DOUBLE_PRECISION,MPI_SUM,MPI_COMM_WORLD,j)
!   n = maxtvsch*ninstrument
!   CALL MPI_BARRIER(MPI_COMM_WORLD,j)
!   CALL MPI_ALLREDUCE(ntvschan1,ntvschan,n,MPI_INTEGER,MPI_SUM,MPI_COMM_WORLD,j)

!   RETURN
! END SUBROUTINE das_vbc
!-----------------------------------------------------------------------
!  (in) ichan: channnel
!  (in) iinst: sensor
!  (out) a = B_beta^-1 + p R^-1 p^T
!  (out) b = p R^-1 d
!-----------------------------------------------------------------------
SUBROUTINE vbc_local(ichan,iinst,ntvsl,hx,pred,a,b)
  IMPLICIT NONE
  INTEGER,PARAMETER :: msw=1
  INTEGER,PARAMETER :: nmin=400
  INTEGER,INTENT(IN) :: ichan,iinst,ntvsl
  REAL(r_size),INTENT(IN) :: hx(maxtvsch,maxtvsprof,ninstrument,nslots)
  REAL(r_size),INTENT(IN) :: pred(maxvbc,maxtvsch,maxtvsprof,ninstrument,nslots)
  REAL(r_size),INTENT(OUT) :: a(maxvbc,maxvbc)
  REAL(r_size),INTENT(OUT) :: b(maxvbc)
  REAL(r_size) :: dep,dep0
  REAL(r_size) :: bias,bias0
  REAL(r_size) :: r,tmp
  INTEGER:: islot, iprof, i,j,n

  a = 0.0d0
  b = 0.0d0
  dep = 0.0d0
  dep0 = 0.0d0
  bias = 0.0d0
  bias0 = 0.0d0
  n = 0
  DO islot=1,nslots
    DO iprof=1,maxtvsprof
      IF(tvsqc(ichan,iprof,iinst,islot)/=1) CYCLE
      !
      ! R
      !
      r = tvserr(ichan,iprof,iinst,islot)**2
      !
      ! p R^-1 p^T
      !
      DO j=1,maxvbc
        DO i=1,maxvbc
          a(i,j) = a(i,j) &
               & + pred(i,ichan,iprof,iinst,islot) &
               & * pred(j,ichan,iprof,iinst,islot) / r
        END DO
      END DO
      !
      ! B_beta^-1
      !
      IF(msw == 1) THEN ! Y.Sato
        IF(ntvsl < nmin) THEN
          tmp = REAL(nmin,r_size) / r

        ELSE
          tmp = (REAL(ntvsl,r_size) &
            & / (LOG10(REAL(ntvsl,r_size)/REAL(nmin,r_size))+1.0d0)) / r
        END IF
      ELSE IF(msw == 2) THEN ! D.Dee
        tmp = REAL(ntvsl,r_size) / r
      ELSE ! Constant
        tmp = 100.0d0
      END IF
      DO i=1,maxvbc
        a(i,i) = a(i,i) + tmp
      END DO
      !
      ! p R^-1 d
      !
      b(:) = b(:) + pred(:,ichan,iprof,iinst,islot) / r &
                & *(tvsdat(ichan,iprof,iinst,islot)-hx(ichan,iprof,iinst,islot))
      bias = bias+tvsdat(ichan,iprof,iinst,islot)-hx(ichan,iprof,iinst,islot)
      dep = dep+(tvsdat(ichan,iprof,iinst,islot)-hx(ichan,iprof,iinst,islot))**2
      bias0= bias0+tvsdep(ichan,iprof,iinst,islot)
      dep0= dep0+tvsdep(ichan,iprof,iinst,islot)**2
      n = n+1
    END DO
  END DO

  dep = SQRT(dep / REAL(n,r_size))
  dep0 = SQRT(dep0 / REAL(n,r_size))
  bias = bias / REAL(n,r_size)
  bias0 = bias0 / REAL(n,r_size)
  PRINT '(2A,I3,4F12.4)',' >> D monit: ',tvsname(iinst),tvsch(ichan,iinst),bias0,bias,dep0,dep

  RETURN
END SUBROUTINE vbc_local

END MODULE letkf_tools
