diffusion_mod sourceΒΆ

   1module diffusion_mod
   2  ! MODULE diffusion_mod (prefix='diff' category='4. Data Object transformations')
   3  !
   4  !:Purpose:  Diffusion operator and storage of related diffusion configuration used to model
   5  !           background-error horizontal correlations. Both explicit and implicit
   6  !           formulations are included, with implicit being preferred when using large
   7  !           correlation length scales relative to the grid spacing. For implicit the
   8  !           MPI topology must have npex=1, i.e. 1xNPEYxNUMTHREADS. A 2D MPI topology
   9  !           can be used for the explicit formulation.
  10  !
  11  !:Reference:  Weaver, A. T., and P. Courtier, 2001: Correlation modelling on
  12  !             the sphere using a generalized diffusion equation.
  13  !             Q. J. R. Meteorol. Soc., 127, 1815-1846.
  14  !
  15  !:Basic equations: * Lcorr^2 = 2*k*dt*numt   (1)
  16  !                  * stab    = k*dt/dx^2     (2)
  17  !
  18  use midasMpi_mod
  19  use horizontalCoord_mod
  20  use verticalCoord_mod
  21  use oceanMask_mod
  22  use earthConstants_mod
  23  use randomNumber_mod
  24  use utilities_mod
  25  use gridStateVector_mod
  26  use gridStateVectorFileIO_mod
  27
  28  implicit none
  29  save
  30  private
  31
  32  ! Public subroutines and functions
  33  public :: diff_setup, diff_finalize, diff_Csqrt, diff_Csqrtadj
  34
  35  type struct_diff
  36    ! number of grid points in x and y directions (includes perimeter of land points)
  37    integer :: ni, nj
  38    integer :: myLonBeg, myLonEnd, myLatBeg, myLatEnd
  39    integer :: lonPerPE, lonPerPEmax, latPerPE, latPerPEmax
  40    integer :: myLonBeg_transpose, myLonEnd_transpose
  41    integer :: lonPerPE_transpose, lonPerPEmax_transpose
  42    integer :: latIndexIgnore
  43    integer :: numt
  44    integer :: numIterImp
  45    real(8) :: dt
  46    real(8), allocatable :: cosyhalf(:), cosyinv(:), cosyinvsq(:)
  47    real(8), allocatable :: mhalfx(:,:), mhalfy(:,:)
  48    real(8), allocatable :: khalfx(:,:), khalfy(:,:)
  49    real(8) :: dlon, dlat        ! grid spacing in radians
  50    ! These are used in subroutines diff_Csqrt and diff_Csqrtadj
  51    real(8), allocatable :: Winv(:,:), Wsqrt(:,:), Winvsqrt(:,:)
  52    real(8), allocatable :: Lambda(:,:)
  53    real(8), allocatable :: diff1x_ap(:,:),diff1x_bp_inv(:,:)
  54    real(8), allocatable :: diff1x_c(:,:)
  55    real(8), allocatable :: diff1y_ap(:,:),diff1y_bp_inv(:,:)
  56    real(8), allocatable :: diff1y_c(:,:)
  57    logical :: useImplicit
  58  end type struct_diff
  59
  60  integer, parameter :: nMaxDiff = 10
  61  integer            :: nDiffAlreadyAllocated = 0
  62  type(struct_diff)  :: diff(nMaxDiff)
  63
  64contains
  65
  66  !----------------------------------------------------------------------------------------
  67  ! diff_finalize
  68  !----------------------------------------------------------------------------------------
  69  integer function diff_setup (variableIndex, bdiff_varNameList, hco, vco, &
  70                               corr_len, stab, numberSamples, useImplicit, &
  71                               latIndexIgnore)
  72    !
  73    !:Purpose: set up diffusion operator
  74    !
  75    implicit none
  76
  77    ! Arguments:
  78    integer,                   intent(in) :: variableIndex        ! Variable index in bdiff_varNameList(:)
  79    character(len=4),          intent(in) :: bdiff_varNameList(:) ! list of 2D analysis variables  
  80    type(struct_hco), pointer, intent(in) :: hco                  ! Horizontal grid structure
  81    type(struct_vco), pointer, intent(in) :: vco                  ! Vertical grid structure
  82    real,                      intent(in) :: corr_len             ! Horizontal corr. length scale (km); if -1, read 2D field from file
  83    real,                      intent(in) :: stab                 ! Stability criteria (definitely < 0.5)
  84    integer,                   intent(in) :: numberSamples        ! Number of samples to estimate normalization factors by randomization
  85    logical,                   intent(in) :: useImplicit          ! Indicate to use the implicit formulation
  86    integer,                   intent(in) :: latIndexIgnore       ! Number of grid points to ignore near poles
  87
  88    ! Locals:    
  89    real(8), allocatable :: latr(:) ! latitudes on the analysis rotated grid, in radians
  90    real(4), allocatable :: buf2d(:,:)
  91    integer :: lonIndex, latIndex, sampleIndex, timeStep
  92    real(8) :: mindxy, maxL, currentMin, currentLatSpacing, currentLonSpacing
  93    real(8) :: a,b
  94    real(8), allocatable :: Lcorr(:,:)
  95    real(8), allocatable :: kappa(:,:)
  96    real(8), allocatable :: W(:,:)
  97    real(8), allocatable :: m(:,:)
  98    real(8), allocatable :: xin(:,:), xin_transpose(:,:)
  99    real(8), allocatable :: lambdaLocal(:,:)    ! auxiliary variable to MPI_ALLREDUCE of diff%Lambda
 100    real(4), allocatable :: lambdaLocal_r4(:,:) ! auxiliary variable to save diff%Lambda in fst file
 101
 102    ! diff_norm_fact is the name of the RPN format file for the normalization factors.
 103    character(len=*), parameter :: diff_norm_fact = './diffusmod.std'
 104
 105    ! Variables and functions required to write to RPN Standard files.
 106    integer  :: nmax
 107    integer  :: std_unit, ierr, ikey, npak, datyp
 108    integer  :: nii, njj, nkk
 109    integer  :: dateo
 110    integer  :: deet, npas
 111    integer  :: ip1, ip2, ip3
 112    integer  :: ig1, ig2, ig3, ig4
 113    character(len=1) :: grtyp
 114    character(len=2) :: typvar
 115    character(len=12) :: etiket
 116    logical  :: rewrit, file_exist
 117    real     :: dumwrk(1)
 118
 119    integer  :: fnom,fstouv,fstecr,fstlir,fstfrm,fclos
 120    external :: fnom,fstouv,fstecr,fstlir,fstfrm,fclos
 121
 122    integer  :: diffID
 123
 124    integer :: ni, nj
 125    integer :: nsize
 126    integer :: myLonBeg, myLonEnd, myLatBeg, myLatEnd
 127    integer :: seed                                   ! Seed for random number generator
 128
 129    character(len=*), parameter :: correlationLengthFileName = './bgstddev'
 130    type(struct_gsv)            :: statevector
 131    type(struct_ocm)            :: oceanMask
 132    real(4),pointer             :: field3D_r4_ptr(:,:,:)
 133
 134    if ( nDiffAlreadyAllocated == nMaxDiff ) then
 135      write(*,*) 'diff_setup: the maximum number of diffusion operators have already been allocated! ',nMaxDiff
 136      call utl_abort('diff_setup')
 137    end if
 138
 139    nDiffAlreadyAllocated = nDiffAlreadyAllocated + 1
 140    diffID = nDiffAlreadyAllocated
 141
 142    ni = hco%ni
 143    nj = hco%nj
 144    diff(diffID)%dlon = hco%dlon
 145    diff(diffID)%dlat = hco%dlat
 146    diff(diffID)%useImplicit = useImplicit
 147    diff(diffID)%latIndexIgnore = latIndexIgnore
 148
 149    ! domain partionning
 150    diff(diffID)%ni = ni
 151    diff(diffID)%nj = nj
 152    call mmpi_setup_latbands(diff(diffID)%nj, diff(diffID)%latPerPE,  &
 153         diff(diffID)%latPerPEmax, diff(diffID)%myLatBeg, diff(diffID)%myLatEnd)
 154    call mmpi_setup_lonbands(diff(diffID)%ni, diff(diffID)%lonPerPE,  &
 155         diff(diffID)%lonPerPEmax, diff(diffID)%myLonBeg, diff(diffID)%myLonEnd)
 156
 157    ! also, determine lonIndex begin and end for when array is transposed (for implicit diffusion only)
 158    call mmpi_setup_latbands(diff(diffID)%ni, diff(diffID)%lonPerPE_transpose,  &
 159         diff(diffID)%lonPerPEmax_transpose, diff(diffID)%myLonBeg_transpose, diff(diffID)%myLonEnd_transpose)
 160	 
 161    myLonBeg = diff(diffID)%myLonBeg
 162    myLonEnd = diff(diffID)%myLonEnd
 163    myLatBeg = diff(diffID)%myLatBeg
 164    myLatEnd = diff(diffID)%myLatEnd
 165    
 166    write(*,*) 'diff_setup: ***** Starting using the following parameters: *****'
 167    write(*,*) 'diff_setup: Variable : ', bdiff_varNameList( variableIndex ) 
 168    write(*,*) 'diff_setup: Horizontal correlation length scale (km): ', corr_len 
 169    write(*,*) 'diff_setup: Stability criteria: ', stab
 170    write(*,*) 'diff_setup: Indicate implicit diffusion operator (.true.) or explicit version (.false.).: ', useImplicit
 171    write(*,*) 'diff_setup: ni/nj: ', ni, nj   
 172    write(*,*) 'diff_setup: ### MPI domain partitionning ###:'
 173    write(*,*) 'diff_setup: [ myLonBeg, myLonEnd ]: [ ', myLonBeg, ' ', myLonEnd, ' ]'  
 174    write(*,*) 'diff_setup: [ myLatBeg, myLatEnd ]: [ ', myLatBeg, ' ', myLatEnd, ' ]'
 175
 176    ! For implicit diffusion we only allow decomposition by latitude bands
 177    if (useImplicit .and.  mmpi_npex > 1) then
 178      call utl_abort('diff_setup: Error: for implicit diffusion NPEX must be 1 (i.e. 1xNPEYxNUMTHREADS)' )
 179    end if
 180    
 181    allocate(diff(diffID)%cosyhalf(nj        ), diff(diffID)%cosyinv(nj)       , diff(diffID)%cosyinvsq(nj))
 182    allocate(diff(diffID)%Winv    (ni    , nj), diff(diffID)%Wsqrt  (ni, nj)   , diff(diffID)%Winvsqrt(ni, nj))
 183    allocate(diff(diffID)%khalfx  (ni - 1, nj), diff(diffID)%khalfy (ni, nj - 1))
 184    allocate(diff(diffID)%mhalfx  (ni - 1, nj), diff(diffID)%mhalfy (ni, nj - 1))
 185    allocate(diff(diffID)%Lambda  (ni    , nj))
 186    allocate(lambdaLocal          (ni    , nj))
 187    allocate(lambdaLocal_r4       (ni    , nj))
 188
 189    allocate(latr(nj))
 190    allocate(Lcorr(ni, nj))
 191    allocate(kappa(ni, nj))
 192    allocate(    W(ni, nj))
 193    allocate(    m(ni, nj))
 194    allocate(  xin(myLonBeg : myLonEnd, myLatBeg : myLatEnd))
 195
 196    allocate(diff(diffID)%diff1x_ap    (ni, nj))
 197    allocate(diff(diffID)%diff1x_bp_inv(ni, nj))
 198    allocate(diff(diffID)%diff1x_c     (ni, nj))
 199    allocate(diff(diffID)%diff1y_ap    (nj, ni))
 200    allocate(diff(diffID)%diff1y_bp_inv(nj, ni))
 201    allocate(diff(diffID)%diff1y_c     (nj, ni))
 202
 203    latr(:) = hco%lat(:)
 204
 205    diff(diffID)%cosyinv(:)   = 1.0d0 / cos(latr(:))
 206    diff(diffID)%cosyinvsq(:) = diff(diffID)%cosyinv(:) * diff(diffID)%cosyinv(:)
 207
 208    ! cosinus of latitudes on staggered grid
 209    diff(diffID)%cosyhalf(:) = cos(latr(:) + 0.5d0 * diff(diffID)%dlat)
 210
 211    ! Get mask from analysisgrid file
 212    call ocm_readMaskFromFile(oceanMask, hco, vco, './analysisgrid')
 213
 214    ! land mask (1=water, 0=land)
 215    if (oceanMask%maskPresent) then
 216      do latIndex = 1, nj
 217        do lonIndex = 1, ni
 218          
 219          if (oceanMask%mask (lonIndex, latIndex, 1)) then
 220            m (lonIndex, latIndex) = 1.0d0
 221          else
 222            m (lonIndex, latIndex) = 0.0d0
 223          end if
 224       
 225        end do
 226      end do
 227      call ocm_deallocate(oceanMask)
 228    else
 229      m(:,:) = 1.0d0
 230    end if
 231   
 232    m( :, 1 ) = 0.0d0
 233    m( 1, : ) = 0.0d0
 234    m( :, nj) = 0.0d0
 235    m(ni, : ) = 0.0d0
 236
 237    if (latIndexIgnore > 0) then
 238      m( :, 1 : 1+latIndexIgnore )   = 0.0d0
 239      m( :, nj-latIndexIgnore : nj ) = 0.0d0
 240    end if
 241
 242    ! define mask on staggered grids
 243    do latIndex = 1, nj
 244      do lonIndex = 1, ni - 1
 245        if (sum(m(lonIndex : lonIndex + 1, latIndex)) < 2.0d0) then
 246          diff(diffID)%mhalfx(lonIndex, latIndex) = 0.0d0
 247        else
 248          diff(diffID)%mhalfx(lonIndex, latIndex) = 1.0d0
 249        end if
 250      end do
 251    end do
 252
 253    do latIndex = 1, nj - 1
 254      do lonIndex = 1, ni
 255        if (sum(m (lonIndex, latIndex : latIndex + 1)) < 2.0d0) then
 256          diff(diffID)%mhalfy (lonIndex, latIndex) = 0.0d0
 257        else
 258          diff(diffID)%mhalfy (lonIndex, latIndex) = 1.0d0
 259        end if
 260      end do
 261    end do
 262
 263    ! minimum grid spacing over the domain
 264    mindxy = 100000.0d0
 265    do latIndex = 1, nj - 1
 266      do lonIndex = 1, ni - 1
 267
 268        if ((diff (diffID)%mhalfy (lonIndex, latIndex) == 1.0d0) .and. (diff(diffID)%mhalfx(lonIndex, latIndex) == 1.0d0)) then
 269
 270          currentLonSpacing = cos(latr( latIndex)) * diff(diffID)%dlon
 271          currentLatSpacing =                        diff(diffID)%dlat
 272          currentMin = min (currentLatSpacing, currentLonSpacing)  
 273
 274          if (currentMin < mindxy) then
 275            mindxy = currentMin
 276          end if
 277
 278        end if
 279
 280      end do
 281    end do
 282
 283    mindxy = min( mindxy, diff(diffID)%dlat )
 284    write(*,*) 'diff_setup: minimim grid spacing: mindxy = ', mindxy
 285
 286    if ( corr_len == -1 ) then
 287
 288      write(*,*) 'diff_setup: Correlation length scale 2D field will be read from the file: ', correlationLengthFileName
 289      call gsv_allocate(statevector, 1, hco, vco, dateStamp_opt=-1, dataKind_opt=4, &
 290                       hInterpolateDegree_opt='LINEAR', varNames_opt=bdiff_varNameList, mpi_local_opt=.false.)
 291      call gsv_zero(statevector)
 292      call gio_readFromFile(statevector, correlationLengthFileName, 'CORRLEN', ' ', unitConversion_opt = .false.)
 293      call gsv_getField( statevector, field3D_r4_ptr, bdiff_varNameList( variableIndex ) )
 294      Lcorr(:,:) = dble( field3D_r4_ptr( :, :, 1 ) )       
 295      write(*,*) 'diff_setup: correlation length scale 2D field for variable ', bdiff_varNameList(variableIndex),' min/max: ', &
 296                 minval(Lcorr(:,:)), maxval(Lcorr(:,:))
 297      call gsv_deallocate(statevector)
 298
 299    else
 300      
 301      Lcorr(:,:) = corr_len
 302
 303    end if
 304
 305    Lcorr(:,:) =  Lcorr(:,:) / (ec_rayt / 1000.0) ! lengthscale in radians
 306    maxL = maxval(Lcorr(2 : ni - 1, 2 : nj - 1 )) ! maximum lengthscale over domain
 307
 308    ! set main parameters for diffusion operator
 309    kappa(:,:) = Lcorr(:,:)**2                                              ! arbitrarily set k to L^2 (in radians)
 310    if ( useImplicit ) then
 311      ! specify number of timesteps and timestep length for implicit 1D diffusion
 312      diff(diffID)%numIterImp = 10
 313      diff(diffID)%numt = 5
 314      diff(diffID)%dt   = 1.0d0 / (2.0d0 * dble(2 * diff(diffID)%numIterImp) - 3.0d0)
 315      diff(diffID)%dt   = diff(diffID)%dt / dble(diff(diffID)%numt)
 316    else
 317      ! specify number of timesteps and timestep length for explicit 2D diffusion
 318      diff(diffID)%dt = stab * (mindxy**2) / (maxL**2)              ! determine dt from stability criteria (2)
 319      ! diff(diffID)%numt = 1.0d0/(2.0d0*diff(diffID)%dt)           ! determine number of timesteps from (1)
 320      diff(diffID)%numt = ceiling(1.0d0/(4.0d0*diff(diffID)%dt))*2  ! make sure it is an even integer
 321      diff(diffID)%dt = 1.0d0 / ( 2.0d0 * dble( diff(diffID)%numt)) ! recompute dt
 322    end if
 323
 324    ! interpolate diffusion coefficient onto 2 staggered lat-lon grids
 325    do latIndex = 1, nj
 326      do lonIndex = 1, ni - 1
 327        diff(diffID)%khalfx(lonIndex, latIndex) = (kappa(lonIndex, latIndex) + kappa(lonIndex + 1, latIndex)) / 2.0d0
 328      end do
 329    end do
 330    do latIndex = 1, nj - 1
 331      do lonIndex = 1, ni
 332        diff(diffID)%khalfy(lonIndex, latIndex) = (kappa(lonIndex, latIndex) + kappa(lonIndex, latIndex + 1)) / 2.0d0
 333      end do
 334    end do
 335
 336    ! print this stuff in listing file for user information:
 337    write(*,*)
 338    write(*,*) 'diff_setup: number of timesteps: ', diff(diffID)%numt 
 339    if (.not. useImplicit) write(*,*) 'diff_setup: stability: ', maxval(kappa) * diff(diffID)%dt / (mindxy**2)
 340    write(*,*)
 341
 342    ! this is the matrix necessary for defining the inner product: for lat-lon grid, only cos(y)
 343    ! Actually, only the diagonal of the matrix is stored in the array W, since the matrix is diagonal.
 344    W(1,:) = cos(latr(:))
 345    do lonIndex = 2, ni
 346      W(lonIndex, :) = W(1, :)
 347    end do
 348    diff(diffID)%Winv(:,:)     = 1.0d0 / W(:,:)
 349    diff(diffID)%Wsqrt(:,:)    = sqrt( W(:,:) )
 350    diff(diffID)%Winvsqrt(:,:) = 1.0d0 / diff(diffID)%Wsqrt(:,:)
 351
 352    ! compute the LU decomposition for the implicit 1D diffusion
 353    diff(diffID)%diff1x_ap(:,:) = 0.0d0
 354    diff(diffID)%diff1x_bp_inv(:,:) = 0.0d0      
 355    diff(diffID)%diff1x_c(:,:) = 0.0d0
 356    diff(diffID)%diff1y_ap(:,:) = 0.0d0
 357    diff(diffID)%diff1y_bp_inv(:,:) = 0.0d0      
 358    diff(diffID)%diff1y_c(:,:) = 0.0d0
 359
 360    !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex,a,b)
 361    do latIndex = 2, nj - 1
 362      lonIndex = 2
 363      diff(diffID)%diff1x_bp_inv(lonIndex, latIndex) = 1.0d0 / (1.0d0 + &
 364          diff(diffID)%dt * diff(diffID)%cosyinvsq(latIndex) * (diff(diffID)%mhalfx(lonIndex, latIndex) * diff(diffID)%khalfx(lonIndex, latIndex) + &
 365          diff(diffID)%mhalfx(lonIndex - 1, latIndex) * diff(diffID)%khalfx(lonIndex - 1, latIndex)) / (diff(diffID)%dlon * diff(diffID)%dlon))
 366      do lonIndex = 3, ni - 1
 367        ! elements of the tri-diagonal coefficient matrix
 368        a = - diff(diffID)%dt * diff(diffID)%cosyinvsq(latIndex) * diff(diffID)%mhalfx(lonIndex - 1, latIndex) * &
 369            diff(diffID)%khalfx(lonIndex - 1, latIndex ) / (diff(diffID)%dlon * diff(diffID)%dlon)
 370        b = 1 + diff(diffID)%dt * diff(diffID)%cosyinvsq(latIndex) * (diff(diffID)%mhalfx(lonIndex, latIndex) * &
 371            diff(diffID)%khalfx(lonIndex, latIndex) + &
 372            diff(diffID)%mhalfx(lonIndex - 1, latIndex) * diff(diffID)%khalfx(lonIndex - 1, latIndex)) / (diff(diffID)%dlon * diff(diffID)%dlon)
 373        diff(diffID)%diff1x_c(lonIndex, latIndex) = - diff(diffID)%dt * diff(diffID)%cosyinvsq(latIndex) * diff(diffID)%mhalfx(lonIndex, latIndex) * &
 374            diff(diffID)%khalfx(lonIndex, latIndex) / (diff(diffID)%dlon * diff(diffID)%dlon)
 375        diff(diffID)%diff1x_ap(lonIndex, latIndex) = a * diff(diffID)%diff1x_bp_inv(lonIndex - 1, latIndex)
 376        diff(diffID)%diff1x_bp_inv( lonIndex, latIndex ) = 1.0d0 / (b - a * diff(diffID)%diff1x_c(lonIndex - 1, latIndex) * &
 377                                                           diff(diffID)%diff1x_bp_inv(lonIndex - 1, latIndex))
 378      end do
 379    end do
 380    !$OMP END PARALLEL DO
 381
 382    !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex,a,b)
 383    do lonIndex = 2, ni - 1
 384      latIndex = 2
 385      diff(diffID)%diff1y_bp_inv( latIndex, lonIndex ) = 1.0d0 / (1.0d0 + &
 386           diff(diffID)%dt * diff(diffID)%cosyinvsq(latIndex) * (diff(diffID)%cosyhalf(latIndex) * diff(diffID)%mhalfy(lonIndex, latIndex) * &
 387           diff(diffID)%khalfy( lonIndex, latIndex ) + &
 388           diff(diffID)%cosyhalf(latIndex - 1) * diff(diffID)%mhalfy(lonIndex, latIndex - 1) * diff(diffID)%khalfy(lonIndex, latIndex - 1)) / &
 389           (diff(diffID)%dlat * diff(diffID)%dlat))
 390      do latIndex = 3, nj-1
 391        ! elements of the tri-diagonal coefficient matrix
 392        a = - diff(diffID)%dt*diff(diffID)%cosyinv(latIndex)*diff(diffID)%cosyhalf(latIndex-1)* &
 393            diff(diffID)%mhalfy(lonIndex,latIndex-1)*diff(diffID)%khalfy(lonIndex,latIndex-1)/(diff(diffID)%dlat*diff(diffID)%dlat)
 394        b = 1 + diff(diffID)%dt*diff(diffID)%cosyinv(latIndex)*(diff(diffID)%cosyhalf(latIndex)*diff(diffID)%mhalfy(lonIndex,latIndex) * &
 395            diff(diffID)%khalfy(lonIndex,latIndex) + &
 396            diff(diffID)%cosyhalf( latIndex - 1 ) * diff(diffID)%mhalfy( lonIndex, latIndex - 1) * diff(diffID)%khalfy(lonIndex, latIndex - 1)) / &
 397            (diff(diffID)%dlat * diff(diffID)%dlat)
 398        diff(diffID)%diff1y_c( latIndex, lonIndex ) = - diff(diffID)%dt * diff(diffID)%cosyinv(latIndex) * diff(diffID)%cosyhalf(latIndex) * &
 399            diff(diffID)%mhalfy( lonIndex, latIndex ) * diff(diffID)%khalfy( lonIndex, latIndex ) / (diff(diffID)%dlat * diff(diffID)%dlat)
 400        diff(diffID)%diff1y_ap(latIndex,lonIndex) = a * diff(diffID)%diff1y_bp_inv(latIndex - 1, lonIndex)
 401        diff(diffID)%diff1y_bp_inv(latIndex, lonIndex) = 1.0d0 / (b - a * diff(diffID)%diff1y_c(latIndex - 1, lonIndex) * &
 402                                                         diff(diffID)%diff1y_bp_inv(latIndex - 1, lonIndex))
 403      end do
 404    end do
 405    !$OMP END PARALLEL DO
 406
 407    std_unit = 0
 408    inquire (file=diff_norm_fact, exist=file_exist)
 409        
 410    if ( file_exist ) then
 411
 412      write(*,*) 'diff_setup: file containing normalization factors exists: ', trim(diff_norm_fact)
 413       
 414      ierr = fnom(std_unit, diff_norm_fact, 'RND+R/O', 0)
 415      nmax = fstouv(std_unit, 'RND')
 416
 417      allocate(buf2d(ni, nj))
 418      ! Looking for FST record parameters..
 419      dateo = -1
 420      etiket = ''
 421      ip1 = -1
 422      ip2 = -1
 423      ip3 = -1
 424      grtyp = ' '
 425      ikey = utl_fstlir_r4( buf2d, std_unit, nii, njj, nkk, dateo, etiket, ip1, ip2, ip3, grtyp, 'LAMB')
 426      write(*,*) 'diff_setup: nii / njj: ', nii, njj
 427      write(*,*) 'diff_setup: ni  / nj : ', ni, nj
 428      if (ikey <= 0) write(*,*) 'diff_setup: Attention! ikey = ', ikey
 429
 430      ierr = fstfrm(std_unit)
 431      ierr = fclos(std_unit)
 432
 433      diff(diffID)%Lambda = dble(buf2d)
 434
 435      deallocate(buf2d)
 436
 437    end if
 438
 439    if (nii /= ni .or. njj /= nj .or. ikey <= 0 .or. ( .not. file_exist)) then
 440
 441      if (.not. file_exist) then
 442        write(*,*) 'diff_setup: file does not exist: ', trim(diff_norm_fact)
 443        write(*,*) 'diff_setup: normalization factors will be computed...'
 444      end if
 445      
 446      seed = 1 
 447      call rng_setup(abs(seed + mmpi_myid))
 448
 449      write(*,*) 'diff_setup: Number of samples, ni * nj: ', numberSamples, ni * nj
 450
 451      ! compute normalization:  Lambda = inverse stddev of (Diffuse * W^-1/2)
 452      write(*,*) 'diff_setup: Estimate normalization factors for diffusion using randomization method...'
 453      write(*,*) 'diff_setup: will use ', numberSamples,' samples.',' ni and nj: ', ni, nj
 454      call flush(6)
 455      diff(diffID)%Lambda = 0.0d0
 456      lambdaLocal(:,:)    = 0.0d0
 457      lambdaLocal_r4(:,:) = 0.0d0
 458
 459      SAMPLE: do sampleIndex = 1, numberSamples
 460
 461        if (modulo(sampleIndex, 100) == 0) write(*,*) 'diff_setup: Computing sample: ', sampleIndex
 462
 463        do latIndex = myLatBeg, myLatEnd
 464          do lonIndex = myLonBeg, myLonEnd
 465            xin(lonIndex, latIndex) = diff(diffID)%Winvsqrt(lonIndex, latIndex) * rng_gaussian()
 466          end do
 467        end do
 468
 469        if (useImplicit) then
 470
 471          allocate(xin_transpose(diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose,diff(diffID)%nj))
 472          do timeStep = 1, diff(diffID)%numt
 473            call diffusion1x_implicit( diffID, xin, xin )
 474            call transposeLatToLonBands( diffID, xin, xin_transpose )
 475            call diffusion1y_implicit( diffID, xin_transpose, xin_transpose )
 476            call transposeLonToLatBands(diffID, xin_transpose, xin)
 477          end do
 478          deallocate(xin_transpose)
 479
 480        else
 481
 482          call diffusion_explicit(diffID, xin, xin)
 483
 484        end if
 485
 486        do latIndex = myLatBeg, myLatEnd
 487          do lonIndex = myLonBeg, myLonEnd
 488
 489            diff(diffID)%Lambda(lonIndex, latIndex) = diff(diffID)%Lambda(lonIndex, latIndex) + &
 490                                                      xin(lonIndex, latIndex) * xin(lonIndex, latIndex)
 491
 492	  end do
 493	end do
 494
 495      end do SAMPLE
 496
 497      do latIndex = myLatBeg, myLatEnd
 498        do lonIndex = myLonBeg, myLonEnd
 499          
 500          ! normalization: inverse of rms of ens
 501          diff(diffID)%Lambda(lonIndex, latIndex) = sqrt(diff(diffID)%Lambda(lonIndex, latIndex) / dble(numberSamples - 1))
 502
 503          if (diff(diffID)%Lambda(lonIndex, latIndex) > 0.0d0) then
 504            diff(diffID)%Lambda(lonIndex, latIndex) = 1.0d0 / diff(diffID)%Lambda(lonIndex, latIndex)
 505          end if
 506
 507        end do
 508      end do
 509
 510      lambdaLocal(myLonBeg : myLonEnd, myLatBeg : myLatEnd) = diff(diffID)%Lambda(myLonBeg : myLonEnd, myLatBeg : myLatEnd)    
 511      nsize = ni * nj
 512      call rpn_comm_allreduce(lambdaLocal, diff(diffID)%Lambda, nsize, "mpi_double_precision", "mpi_sum", "GRID", ierr)
 513
 514      if (mmpi_myid == 0) then
 515 
 516        ! preparing real(4) array to save into fst file avoiding huge values
 517        do latIndex = 1, nj
 518          do lonIndex = 1, ni
 519            lambdaLocal_r4(lonIndex, latIndex) = min(huge(real(diff(diffID)%Lambda(lonIndex, latIndex))),&
 520                                                     diff(diffID)%Lambda(lonIndex, latIndex))
 521          end do
 522        end do 	  
 523
 524	write(*,*) 'diff_setup: Save normalization coefficient on proc ', mmpi_myid, ' into the file: ', trim(diff_norm_fact)
 525        npak = 0
 526        dateo = 0
 527        deet = 0
 528        npas = 0
 529        ip1 = 0
 530        ip2 = 0
 531        ip3 = 0
 532        typvar = 'X'
 533        grtyp = 'X'
 534        ig1 = 0
 535        ig2 = 0
 536        ig3 = 0
 537        ig4 = 0
 538        datyp = 1
 539        rewrit = .FALSE.
 540
 541        if (useImplicit) then
 542          write (etiket, FMT='(''KM'',i3.3,''IMPLICI'')') int(corr_len)
 543        else
 544          write (etiket, FMT='(''KM'',i3.3,''STAB'',f3.1)') int(corr_len), stab
 545        end if
 546
 547        ierr = fnom(std_unit, diff_norm_fact, 'RND', 0)
 548        nmax = fstouv(std_unit, 'RND')
 549        ierr = fstecr(lambdaLocal_r4, dumwrk, npak, std_unit, dateo, deet, &
 550                     npas, ni, nj, 1, ip1, ip2, ip3, typvar, 'LAMB', etiket, grtyp, &
 551                     ig1, ig2, ig3, ig4, datyp, rewrit)
 552
 553        ierr = fstfrm( std_unit )
 554        ierr = fclos( std_unit )
 555
 556      end if 
 557
 558    end if
 559
 560    deallocate(xin)
 561    deallocate(m)
 562    deallocate(W)
 563    deallocate(kappa)
 564    deallocate(Lcorr)
 565    deallocate(latr)
 566    deallocate(lambdaLocal)
 567    deallocate(lambdaLocal_r4)
 568
 569    diff_setup = diffID
 570
 571    write(*,*) 'diff_setup: ***** END *****'
 572
 573  end function diff_setup
 574
 575  !----------------------------------------------------------------------------------------
 576  ! diff_finalize
 577  !----------------------------------------------------------------------------------------
 578  subroutine diff_finalize(diffID)
 579    !
 580    !:Purpose: To finalize the diffusion operator module, and to free up memory.
 581    !
 582    implicit none
 583
 584    ! Arguments:
 585    integer, intent(in)  :: diffID
 586
 587    write(*,*) 'diff_finalize: deallocating arrays fordiffID= ', diffID
 588
 589    deallocate( diff(diffID)%diff1y_c      )
 590    deallocate( diff(diffID)%diff1y_bp_inv )
 591    deallocate( diff(diffID)%diff1y_ap     )
 592    deallocate( diff(diffID)%diff1x_c      )
 593    deallocate( diff(diffID)%diff1x_bp_inv )
 594    deallocate( diff(diffID)%diff1x_ap     )
 595
 596    deallocate( diff(diffID)%Lambda )
 597    deallocate( diff(diffID)%mhalfy, diff(diffID)%mhalfx )
 598    deallocate( diff(diffID)%khalfy, diff(diffID)%khalfx )
 599    deallocate( diff(diffID)%Winvsqrt, diff(diffID)%Wsqrt, diff(diffID)%Winv )
 600    deallocate( diff(diffID)%cosyinvsq, diff(diffID)%cosyinv, diff(diffID)%cosyhalf )
 601
 602  end subroutine diff_finalize
 603
 604
 605  subroutine diffusion_explicit( diffID, xin, xout )
 606    !
 607    !:Purpose: To compute Lsqrt*xin (diffusion over numt/2 timesteps), and to
 608    !          specify initial conditions
 609    implicit none
 610
 611    ! Arguments:
 612    integer, intent(in)  :: diffID
 613    real(8), intent(in)  :: xin ( :, : )
 614    real(8), intent(out) :: xout( :, : )
 615    
 616    ! Locals:
 617    integer :: timeIndex, latIndex, lonIndex
 618    integer :: myLonBeg, myLonEnd, myLatBeg, myLatEnd
 619    integer :: myLonBegNoB, myLonEndNoB, myLatBegNoB, myLatEndNoB
 620    integer :: lonPerPE, latPerPE
 621    integer :: sendToPE, recvFromPE, sendTag, recvTag, status, ierr
 622    real(8) :: xhalo( (diff(diffID)%myLonBeg-1):(diff(diffID)%myLonEnd+1), (diff(diffID)%myLatBeg-1):(diff(diffID)%myLatEnd+1) )
 623    real(8) :: xlast( (diff(diffID)%myLonBeg-1):(diff(diffID)%myLonEnd+1), (diff(diffID)%myLatBeg-1):(diff(diffID)%myLatEnd+1) )
 624    real(8), allocatable :: sendBufLon(:), recvBufLon(:), sendBufLat(:), recvBufLat(:)
 625
 626    lonPerPE = diff(diffID)%lonPerPE
 627    latPerPE = diff(diffID)%latPerPE
 628
 629    myLonBeg = diff(diffID)%myLonBeg
 630    myLonEnd = diff(diffID)%myLonEnd
 631    myLatBeg = diff(diffID)%myLatBeg
 632    myLatEnd = diff(diffID)%myLatEnd
 633
 634    ! remove global border from range of grid points where output is calculated
 635    myLonBegNoB = max(myLonBeg, 2)
 636    myLonEndNoB = min(myLonEnd, diff(diffID)%ni-1)
 637    myLatBegNoB = max(myLatBeg, 2)
 638    myLatEndNoB = min(myLatEnd, diff(diffID)%nj-1)
 639
 640    ! setup for mpi halo exchange
 641    allocate(sendBufLon(lonPerPE))
 642    allocate(recvBufLon(lonPerPE))
 643    allocate(sendBufLat(latPerPE))
 644    allocate(recvBufLat(latPerPE))
 645
 646    xhalo(:,:) = 0.0d0
 647    xlast(:,:) = 0.0d0
 648    xlast( myLonBeg:myLonEnd, myLatBeg:myLatEnd ) = xin(:,:)
 649    
 650    ! iterate difference equations
 651    TIME: do timeIndex = 1, diff(diffID)%numt / 2
 652
 653      ! exchange 4 arrays of halo information between mpi tasks
 654
 655      ! halo array #1: send western edge interior, recv eastern edge halo
 656      if (mmpi_npex > 1) then
 657        sendToPE   = mmpi_myidx - 1
 658        recvFromPE = mmpi_myidx + 1
 659        sendTag = (mmpi_myidx)*10000 + (mmpi_myidx - 1)
 660        recvTag = (mmpi_myidx + 1)*10000 + (mmpi_myidx)
 661        if (mmpi_myidx == 0) then
 662          ! western-most tile only recv
 663          call rpn_comm_recv( recvBufLat, latPerPE, "mpi_real8", recvFromPE, recvTag, "EW", status, ierr )
 664          xlast(myLonEnd+1,myLatBeg:myLatEnd) = recvBufLat(:)
 665        else if (mmpi_myidx == (mmpi_npex-1)) then
 666          ! eastern-most tile only send
 667          sendBufLat(:) = xlast(myLonBeg,myLatBeg:myLatEnd)
 668          call rpn_comm_send( sendBufLat, latPerPE, "mpi_real8", sendToPE, sendTag, "EW", ierr )
 669        else
 670          ! interior tiles both send and recv
 671          sendBufLat(:) = xlast(myLonBeg,myLatBeg:myLatEnd)
 672          call rpn_comm_sendrecv( sendBufLat, latPerPE, "mpi_real8", sendToPE,   sendTag, &
 673                                  recvBufLat, latPerPE, "mpi_real8", recvFromPE, recvTag, &
 674                                  "EW", status, ierr )
 675          xlast(myLonEnd+1,myLatBeg:myLatEnd) = recvBufLat(:)
 676        end if
 677      end if
 678
 679      ! halo array #2: send eastern edge interior, recv western edge halo
 680      if (mmpi_npex > 1) then
 681        sendToPE   = mmpi_myidx + 1
 682        recvFromPE = mmpi_myidx - 1
 683        sendTag = (mmpi_myidx)*10000 + (mmpi_myidx + 1)
 684        recvTag = (mmpi_myidx - 1)*10000 + (mmpi_myidx)
 685        if (mmpi_myidx == (mmpi_npex-1)) then
 686          ! eastern-most tile only recv
 687          call rpn_comm_recv( recvBufLat, latPerPE, "mpi_real8", recvFromPE, recvTag, "EW", status, ierr )
 688          xlast(myLonBeg-1,myLatBeg:myLatEnd) = recvBufLat(:)
 689        else if (mmpi_myidx == 0) then
 690          ! western-most tile only send
 691          sendBufLat(:) = xlast(myLonEnd,myLatBeg:myLatEnd)
 692          call rpn_comm_send( sendBufLat, latPerPE, "mpi_real8", sendToPE, sendTag, "EW", ierr )
 693        else
 694          ! interior tiles both send and recv
 695          sendBufLat(:) = xlast(myLonEnd,myLatBeg:myLatEnd)
 696          call rpn_comm_sendrecv( sendBufLat, latPerPE, "mpi_real8", sendToPE,   sendTag, &
 697                                  recvBufLat, latPerPE, "mpi_real8", recvFromPE, recvTag, &
 698                                  "EW", status, ierr )
 699          xlast(myLonBeg-1,myLatBeg:myLatEnd) = recvBufLat(:)
 700        end if
 701      end if
 702
 703      ! halo array #3: send southern edge interior, recv northern edge halo
 704      if (mmpi_npey > 1) then
 705        sendToPE   = mmpi_myidy - 1
 706        recvFromPE = mmpi_myidy + 1
 707        sendTag = (mmpi_myidy)*10000 + (mmpi_myidy - 1)
 708        recvTag = (mmpi_myidy + 1)*10000 + (mmpi_myidy)
 709        if (mmpi_myidy == 0) then
 710          ! southern-most tile only recv
 711          call rpn_comm_recv( recvBufLon, lonPerPE, "mpi_real8", recvFromPE, recvTag, "NS", status, ierr )
 712          xlast(myLonBeg:myLonEnd,myLatEnd+1) = recvBufLon(:)
 713        else if (mmpi_myidy == (mmpi_npey-1)) then
 714          ! northern-most tile only send
 715          sendBufLon(:) = xlast(myLonBeg:myLonEnd,myLatBeg)
 716          call rpn_comm_send( sendBufLon, lonPerPE, "mpi_real8", sendToPE, sendTag, "NS", ierr )
 717        else
 718          ! interior tiles both send and recv
 719          sendBufLon(:) = xlast(myLonBeg:myLonEnd,myLatBeg)
 720          call rpn_comm_sendrecv( sendBufLon, lonPerPE, "mpi_real8", sendToPE,   sendTag, &
 721                                  recvBufLon, lonPerPE, "mpi_real8", recvFromPE, recvTag, &
 722                                  "NS", status, ierr )
 723          xlast(myLonBeg:myLonEnd,myLatEnd+1) = recvBufLon(:)
 724        end if
 725      end if
 726
 727      ! halo array #4: send northern edge interior, recv southern edge halo
 728      if (mmpi_npey > 1) then
 729        sendToPE   = mmpi_myidy + 1
 730        recvFromPE = mmpi_myidy - 1
 731        sendTag = (mmpi_myidy)*10000 + (mmpi_myidy + 1)
 732        recvTag = (mmpi_myidy - 1)*10000 + (mmpi_myidy)
 733        if (mmpi_myidy == (mmpi_npey-1)) then
 734          ! northern-most tile only recv
 735          call rpn_comm_recv( recvBufLon, lonPerPE, "mpi_real8", recvFromPE, recvTag, "NS", status, ierr )
 736          xlast(myLonBeg:myLonEnd,myLatBeg-1) = recvBufLon(:)
 737        else if (mmpi_myidy == 0) then
 738          ! southern-most tile only send
 739          sendBufLon(:) = xlast(myLonBeg:myLonEnd,myLatEnd)
 740          call rpn_comm_send( sendBufLon, lonPerPE, "mpi_real8", sendToPE, sendTag, "NS", ierr )
 741        else
 742          ! interior tiles both send and recv
 743          sendBufLon(:) = xlast(myLonBeg:myLonEnd,myLatEnd)
 744          call rpn_comm_sendrecv( sendBufLon, lonPerPE, "mpi_real8", sendToPE,   sendTag, &
 745                                  recvBufLon, lonPerPE, "mpi_real8", recvFromPE, recvTag, &
 746                                  "NS", status, ierr )
 747          xlast(myLonBeg:myLonEnd,myLatBeg-1) = recvBufLon(:)
 748        end if
 749      end if
 750
 751      !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex)
 752      do latIndex = myLatBegNoB, myLatEndNoB
 753        do lonIndex = myLonBegNoB, myLonEndNoB
 754          xhalo(lonIndex,latIndex) = xlast(lonIndex,latIndex) + diff(diffID)%dt *                                                             &
 755                ( diff(diffID)%cosyinvsq(latIndex) *                                                                                          &
 756                  ( diff(diffID)%mhalfx(lonIndex,latIndex) * diff(diffID)%khalfx(lonIndex,latIndex) *                                         &
 757                    ( xlast(lonIndex+1,latIndex) - xlast(lonIndex,latIndex) ) / diff(diffID)%dlon -                                           &
 758                    diff(diffID)%mhalfx(lonIndex-1,latIndex) * diff(diffID)%khalfx(lonIndex-1,latIndex) *                                     &
 759                    ( xlast(lonIndex,latIndex) - xlast(lonIndex-1,latIndex) ) / diff(diffID)%dlon                                             &
 760                  ) / diff(diffID)%dlon +                                                                                                     &
 761                  diff(diffID)%cosyinv(latIndex) *                                                                                            &
 762                  ( diff(diffID)%cosyhalf(latIndex) * diff(diffID)%mhalfy(lonIndex,latIndex) * diff(diffID)%khalfy(lonIndex,latIndex) *       &
 763                    ( xlast(lonIndex,latIndex+1) - xlast(lonIndex,latIndex) ) / diff(diffID)%dlat -                                           &
 764                    diff(diffID)%cosyhalf(latIndex-1) * diff(diffID)%mhalfy(lonIndex,latIndex-1) * diff(diffID)%khalfy(lonIndex,latIndex-1) * &
 765                    ( xlast(lonIndex,latIndex) - xlast(lonIndex,latIndex-1) ) / diff(diffID)%dlat                                             &
 766                  ) / diff(diffID)%dlat                                                                                                       &
 767                )
 768        end do
 769      end do
 770      !$OMP END PARALLEL DO
 771
 772      xlast(:,:) = xhalo(:,:)
 773
 774    end do TIME
 775
 776    xout(:,:) = xlast( myLonBeg:myLonEnd, myLatBeg:myLatEnd )
 777
 778    deallocate(sendBufLon)
 779    deallocate(recvBufLon)
 780    deallocate(sendBufLat)
 781    deallocate(recvBufLat)
 782
 783  end subroutine diffusion_explicit
 784
 785
 786  subroutine diff_Csqrt( diffID, xin, xout )
 787
 788    implicit none
 789
 790    ! Arguments
 791    integer, intent(in)  :: diffID
 792    real(8), intent(in)  :: xin ( diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
 793    real(8), intent(out) :: xout( diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
 794
 795    ! Locals:
 796    integer              :: timeStep, myLatBegIgnore, myLatEndIgnore
 797    real(8), allocatable :: xout_transpose(:,:)
 798
 799    ! compute Csqrt
 800
 801    ! this is the C^1/2 required for the forward model: Csqrt = Lambda * Diffuse * W^-1/2
 802    xout(:,:) = xin(:,:) *  &
 803                diff(diffID)%Winvsqrt(diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, &
 804                                          diff(diffID)%myLatBeg:diff(diffID)%myLatEnd)
 805    if (diff(diffID)%latIndexIgnore > 0) then
 806      ! south pole
 807      myLatBegIgnore = max(diff(diffID)%myLatBeg,1)
 808      myLatEndIgnore = max(diff(diffID)%myLatBeg,min(diff(diffID)%myLatEnd,diff(diffID)%latIndexIgnore))
 809      xout(:,myLatBegIgnore:myLatEndIgnore) = 0.0d0
 810      ! north pole
 811      myLatBegIgnore = min(diff(diffID)%myLatEnd,max(diff(diffID)%myLatBeg,diff(diffID)%nj-diff(diffID)%latIndexIgnore))
 812      myLatEndIgnore = min(diff(diffID)%myLatEnd,diff(diffID)%nj)
 813      xout(:,myLatBegIgnore:myLatEndIgnore) = 0.0d0
 814    end if
 815    write(*,*) 'min/maxval xout=', minval(xout),maxval(xout)
 816
 817    if ( diff(diffID)%useImplicit ) then
 818      allocate(xout_transpose(diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose,diff(diffID)%nj))
 819      do timeStep = 1, diff(diffID)%numt
 820        call diffusion1x_implicit( diffID, xout, xout )
 821
 822        call transposeLatToLonBands( diffID, xout, xout_transpose )
 823        call diffusion1y_implicit( diffID, xout_transpose, xout_transpose )
 824        call transposeLonToLatBands( diffID, xout_transpose, xout )
 825      end do
 826      deallocate(xout_transpose)
 827    else
 828      call diffusion_explicit ( diffID, xout, xout )
 829    end if
 830
 831    xout(:,:) = xout(:,:) *  &
 832                diff(diffID)%Lambda(diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, &
 833                                      diff(diffID)%myLatBeg:diff(diffID)%myLatEnd)
 834
 835  end subroutine diff_Csqrt
 836
 837
 838  subroutine diff_Csqrtadj( diffID, xin, xout )
 839
 840    implicit none
 841
 842    ! Arguments:
 843    integer, intent(in)  :: diffID
 844    real(8), intent(in)  :: xin ( diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
 845    real(8), intent(out) :: xout( diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
 846
 847    ! Locals:
 848    integer              :: timeStep
 849    real(8), allocatable :: xout_transpose(:,:)
 850
 851    ! compute Csqrtadj
 852
 853    ! this is the (C^1/2)^T required for the adjoint: Csqrt^T = W^1/2 * Diffuse * W^-1 * Lambda
 854    xout(:,:) = xin (:,:) * diff(diffID)%Lambda(diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, &
 855                                                diff(diffID)%myLatBeg:diff(diffID)%myLatEnd) 
 856    xout(:,:) = xout(:,:) * diff(diffID)%Winv(diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, &
 857                                              diff(diffID)%myLatBeg:diff(diffID)%myLatEnd) 
 858    if ( diff(diffID)%useImplicit ) then
 859      allocate(xout_transpose(diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose,diff(diffID)%nj))
 860      do timeStep = 1, diff(diffID)%numt
 861        call transposeLatToLonBands( diffID, xout, xout_transpose )
 862        call diffusion1y_implicit( diffID, xout_transpose, xout_transpose )
 863        call transposeLonToLatBands( diffID, xout_transpose, xout )
 864
 865        call diffusion1x_implicit( diffID, xout, xout )
 866      end do
 867      deallocate(xout_transpose)
 868    else
 869      call diffusion_explicit( diffID, xout, xout )
 870    end if
 871
 872    xout(:,:) = xout(:,:) * &
 873                diff(diffID)%Wsqrt(diff(diffID)%myLonBeg:diff(diffID)%myLonEnd, &
 874                                     diff(diffID)%myLatBeg:diff(diffID)%myLatEnd)
 875
 876  end subroutine diff_Csqrtadj
 877
 878
 879  subroutine transposeLatToLonBands(diffID, xin, xout)
 880    !
 881    !:Purpose: Perform an MPI transposition for a 2D array between latitude
 882    !          bands and longitude bands.
 883    !
 884    implicit none
 885
 886    ! Arguments:
 887    integer, intent(in)  :: diffID
 888    real(8), intent(in)  :: xin ( diff(diffID)%ni, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
 889    real(8), intent(out) :: xout( diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose, diff(diffID)%nj )
 890
 891    ! Locals:
 892    integer :: yourid, ierr, nsize, numLonPoints, numLatPoints
 893    integer :: allLonBeg(mmpi_nprocs), allLonEnd(mmpi_nprocs)
 894    integer :: allLatBeg(mmpi_nprocs), allLatEnd(mmpi_nprocs)
 895    real(8), allocatable :: xsend(:,:,:),xrecv(:,:,:)
 896
 897    call rpn_comm_allgather(diff(diffID)%myLonBeg_transpose,1,'mpi_integer',       &
 898                            allLonBeg                      ,1,'mpi_integer','GRID',ierr)
 899    call rpn_comm_allgather(diff(diffID)%myLonEnd_transpose,1,'mpi_integer',       &
 900                            allLonEnd                      ,1,'mpi_integer','GRID',ierr)
 901    call rpn_comm_allgather(diff(diffID)%myLatBeg,1,'mpi_integer',       &
 902                            allLatBeg            ,1,'mpi_integer','GRID',ierr)
 903    call rpn_comm_allgather(diff(diffID)%myLatEnd,1,'mpi_integer',       &
 904                            allLatEnd            ,1,'mpi_integer','GRID',ierr)
 905
 906    allocate(xsend(diff(diffID)%lonPerPEmax_transpose,diff(diffID)%latPerPEmax, mmpi_nprocs))
 907    allocate(xrecv(diff(diffID)%lonPerPEmax_transpose,diff(diffID)%latPerPEmax, mmpi_nprocs))
 908
 909    xout(:,:) = 0.0d0
 910    xsend(:,:,:) = 0.0d0
 911    xrecv(:,:,:) = 0.0d0
 912
 913    !$OMP PARALLEL DO PRIVATE(yourid, numLonPoints, numLatPoints)
 914    do yourid = 1, mmpi_nprocs
 915      numLonPoints = allLonEnd(yourid) - allLonBeg(yourid) + 1
 916      numLatPoints = size(xin,2)
 917      xsend(1:numLonPoints,1:numLatPoints,yourid) = xin(allLonBeg(yourid):allLonEnd(yourid),:) 
 918    end do
 919    !$OMP END PARALLEL DO
 920
 921    nsize = diff(diffID)%lonPerPEmax_transpose * diff(diffID)%latPerPEmax
 922    if (mmpi_nprocs > 1) then
 923      call rpn_comm_alltoall(xsend, nsize, 'mpi_real8',  &
 924                             xrecv, nsize, 'mpi_real8', 'grid', ierr)
 925    else
 926      xrecv(:,:,1) = xsend(:,:,1)
 927    end if
 928
 929    !$OMP PARALLEL DO PRIVATE(yourid, numLonPoints, numLatPoints)
 930    do yourid = 1, mmpi_nprocs
 931      numLonPoints = size(xout,1)
 932      numLatPoints = allLatEnd(yourid) - allLatBeg(yourid) + 1
 933      xout(:,allLatBeg(yourid):allLatEnd(yourid)) = xrecv(1:numLonPoints,1:numLatPoints,yourid)
 934    end do
 935    !$OMP END PARALLEL DO
 936
 937    deallocate(xsend)
 938    deallocate(xrecv)
 939
 940  end subroutine transposeLatToLonBands
 941
 942  
 943  subroutine transposeLonToLatBands(diffID, xin, xout)
 944    !
 945    !:Purpose: Perform an MPI transposition for a 2D array between longitude
 946    !          bands and latitude bands.
 947    !
 948    implicit none
 949
 950    ! Arguments:
 951    integer, intent(in)  :: diffID
 952    real(8), intent(in)  :: xin ( diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose, diff(diffID)%nj )
 953    real(8), intent(out) :: xout( diff(diffID)%ni, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
 954
 955    ! Locals:
 956    integer :: yourid, ierr, nsize, numLonPoints, numLatPoints
 957    integer :: allLonBeg(mmpi_nprocs), allLonEnd(mmpi_nprocs)
 958    integer :: allLatBeg(mmpi_nprocs), allLatEnd(mmpi_nprocs)
 959    real(8), allocatable :: xsend(:,:,:),xrecv(:,:,:)
 960
 961    call rpn_comm_allgather(diff(diffID)%myLonBeg_transpose,1,'mpi_integer',       &
 962                            allLonBeg                      ,1,'mpi_integer','GRID',ierr)
 963    call rpn_comm_allgather(diff(diffID)%myLonEnd_transpose,1,'mpi_integer',       &
 964                            allLonEnd                      ,1,'mpi_integer','GRID',ierr)
 965    call rpn_comm_allgather(diff(diffID)%myLatBeg,1,'mpi_integer',       &
 966                            allLatBeg            ,1,'mpi_integer','GRID',ierr)
 967    call rpn_comm_allgather(diff(diffID)%myLatEnd,1,'mpi_integer',       &
 968                            allLatEnd            ,1,'mpi_integer','GRID',ierr)
 969
 970    allocate(xsend(diff(diffID)%lonPerPEmax_transpose,diff(diffID)%latPerPEmax, mmpi_nprocs))
 971    allocate(xrecv(diff(diffID)%lonPerPEmax_transpose,diff(diffID)%latPerPEmax, mmpi_nprocs))
 972
 973    xout(:,:) = 0.0d0
 974    xsend(:,:,:) = 0.0d0
 975    xrecv(:,:,:) = 0.0d0
 976
 977    !$OMP PARALLEL DO PRIVATE(yourid, numLonPoints, numLatPoints)
 978    do yourid = 1, mmpi_nprocs
 979      numLonPoints = size(xin,1)
 980      numLatPoints = allLatEnd(yourid) - allLatBeg(yourid) + 1
 981      xsend(1:numLonPoints,1:numLatPoints,yourid) = xin(:,allLatBeg(yourid):allLatEnd(yourid))
 982    end do
 983    !$OMP END PARALLEL DO
 984
 985    nsize = diff(diffID)%lonPerPEmax_transpose * diff(diffID)%latPerPEmax
 986    if (mmpi_nprocs > 1) then
 987      call rpn_comm_alltoall(xsend, nsize, 'mpi_real8',  &
 988                             xrecv, nsize, 'mpi_real8', 'grid', ierr)
 989    else
 990      xrecv(:,:,1) = xsend(:,:,1)
 991    end if
 992
 993    !$OMP PARALLEL DO PRIVATE(yourid, numLonPoints, numLatPoints)
 994    do yourid = 1, mmpi_nprocs
 995      numLonPoints = allLonEnd(yourid) - allLonBeg(yourid) + 1
 996      numLatPoints = size(xout,2)
 997      xout(allLonBeg(yourid):allLonEnd(yourid),:) = xrecv(1:numLonPoints,1:numLatPoints,yourid) 
 998    end do
 999    !$OMP END PARALLEL DO
1000
1001    deallocate(xsend)
1002    deallocate(xrecv)
1003
1004  end subroutine transposeLonToLatBands
1005
1006  
1007  subroutine diffusion1x_implicit(diffID, xin, xout)
1008    !
1009    !:Purpose: To compute Lsqrt*xin (diffusion over 1 timestep, loop over
1010    !          timesteps is external to the subroutine).
1011    !
1012    implicit none
1013
1014    ! Arguments:
1015    integer, intent(in)  :: diffID
1016    real(8), intent(in)  :: xin ( diff(diffID)%ni, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
1017    real(8), intent(out) :: xout( diff(diffID)%ni, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
1018
1019    ! Locals:
1020    integer :: latIndex, lonIndex, iterIndex
1021    real(8) :: xlast( diff(diffID)%ni, diff(diffID)%myLatBeg:diff(diffID)%myLatEnd )
1022    real(8) :: dp( diff(diffID)%ni )
1023
1024    !$OMP PARALLEL DO PRIVATE( latIndex, lonIndex )
1025    do latIndex = diff(diffID)%myLatBeg, diff(diffID)%myLatEnd
1026      do lonIndex = 1, diff (diffID)%ni
1027        xlast ( lonIndex, latIndex ) = xin ( lonIndex, latIndex )
1028      end do
1029    end do
1030    !$OMP END PARALLEL DO
1031
1032    do iterIndex = 1, diff(diffID)%numIterImp
1033      
1034      !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex,dp)
1035      do latIndex =  max(2, diff(diffID)%myLatBeg), min(diff(diffID)%nj-1, diff(diffID)%myLatEnd)
1036        lonIndex = 2
1037        dp( lonIndex ) = xlast ( lonIndex, latIndex )
1038        do lonIndex = 3, diff (diffID)%ni - 1
1039          dp( lonIndex ) = xlast( lonIndex, latIndex ) - diff(diffID)%diff1x_ap( lonIndex, latIndex ) * dp( lonIndex - 1 )
1040        end do
1041        lonIndex = diff(diffID)%ni - 1
1042        xout( lonIndex, latIndex ) = dp( lonIndex ) * diff(diffID)%diff1x_bp_inv( lonIndex, latIndex )
1043        do lonIndex = diff(diffID)%ni - 2, 2, -1
1044          xout( lonIndex, latIndex ) = ( dp( lonIndex ) - diff(diffID)%diff1x_c( lonIndex, latIndex ) * xout( lonIndex + 1, latIndex ) ) * &
1045                                       diff(diffID)%diff1x_bp_inv( lonIndex, latIndex )
1046        end do
1047      end do
1048      !$OMP END PARALLEL DO
1049
1050      !$OMP PARALLEL DO PRIVATE( latIndex, lonIndex )
1051      do latIndex = diff(diffID)%myLatBeg, diff(diffID)%myLatEnd
1052        do lonIndex = 1, diff (diffID)%ni
1053          xlast ( lonIndex, latIndex ) = xout ( lonIndex, latIndex )
1054        end do
1055      end do
1056      !$OMP END PARALLEL DO
1057
1058    end do
1059
1060    do latIndex = diff(diffID)%myLatBeg, diff(diffID)%myLatEnd
1061      xout ( 1, latIndex )               = xin ( 1, latIndex )
1062      xout ( diff(diffID)%ni, latIndex ) = xin ( diff(diffID)%ni, latIndex )
1063    end do
1064
1065  end subroutine diffusion1x_implicit
1066
1067
1068  subroutine diffusion1y_implicit(diffID, xin, xout)
1069    !
1070    !:Purpose: To compute Lsqrt*xin (diffusion over 1 timestep, loop over 
1071    !          timesteps is external to the subroutine).
1072    !
1073    implicit none
1074
1075    ! Arguments:
1076    integer, intent(in)  :: diffID
1077    real(8), intent(in)  :: xin (diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose, diff(diffID)%nj)
1078    real(8), intent(out) :: xout(diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose, diff(diffID)%nj)
1079
1080    ! Locals:
1081    integer :: latIndex, lonIndex, iterIndex
1082    real(8) :: xlast(diff(diffID)%nj, diff(diffID)%myLonBeg_transpose:diff(diffID)%myLonEnd_transpose)
1083    real(8) :: dp(diff(diffID)%nj)
1084
1085    ! NOTE:for improved efficiency, the 2D fields used internally are
1086    !      ordered (diff(diffID)%nj,diff(diffID)%ni) and
1087    !      NOT (diff(diffID)%ni,diff(diffID)%nj) as in the rest of the code!
1088
1089    !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex)
1090    do latIndex = 1, diff (diffID)%nj
1091      do lonIndex = diff(diffID)%myLonBeg_transpose, diff(diffID)%myLonEnd_transpose
1092        xlast ( latIndex, lonIndex ) = xin ( lonIndex, latIndex )
1093      end do
1094    end do
1095    !$OMP END PARALLEL DO
1096
1097    do iterIndex = 1, diff(diffID)%numIterImp
1098
1099      !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex,dp)
1100      do lonIndex = max(2, diff(diffID)%myLonBeg_transpose), min(diff(diffID)%ni-1, diff(diffID)%myLonEnd_transpose)
1101        latIndex = 2
1102        dp ( latIndex ) = xlast ( latIndex, lonIndex )
1103        do latIndex = 3, diff (diffID)%nj - 1
1104          dp ( latIndex ) = xlast ( latIndex, lonIndex ) - diff (diffID)%diff1y_ap ( latIndex, lonIndex ) * dp ( latIndex - 1 )
1105        end do
1106        latIndex = diff (diffID)%nj - 1
1107        xout ( lonIndex, latIndex ) = dp ( latIndex ) * diff (diffID)%diff1y_bp_inv ( latIndex, lonIndex )
1108        do latIndex = diff(diffID)%nj - 2, 2, -1
1109          xout ( lonIndex, latIndex ) = ( dp ( latIndex ) - diff(diffID)%diff1y_c ( latIndex, lonIndex ) * xout ( lonIndex, latIndex + 1 ) ) * &
1110                                        diff (diffID)%diff1y_bp_inv ( latIndex, lonIndex )
1111        end do
1112      end do
1113      !$OMP END PARALLEL DO
1114
1115      !$OMP PARALLEL DO PRIVATE(latIndex,lonIndex)
1116      do latIndex = 1, diff (diffID)%nj
1117        do lonIndex = diff(diffID)%myLonBeg_transpose, diff(diffID)%myLonEnd_transpose
1118          xlast ( latIndex, lonIndex ) = xout ( lonIndex, latIndex )
1119        end do
1120      end do
1121      !$OMP END PARALLEL DO
1122
1123    end do
1124
1125    do lonIndex = diff(diffID)%myLonBeg_transpose, diff(diffID)%myLonEnd_transpose
1126      xout( lonIndex, 1 )                    = xin ( lonIndex, 1 )
1127      xout( lonIndex, diff (diffID)%nj ) = xin ( lonIndex, diff (diffID)%nj )
1128    end do
1129
1130  end subroutine diffusion1y_implicit
1131
1132end module diffusion_mod