quasiNewton_mod sourceΒΆ

   1module quasiNewton_mod
   2  ! MODULE quasiNewton_mod (prefix='qna' category='1. High-level functionality')
   3  !
   4  !:Purpose: The n1qn3 routine, and its supporting subroutines. The original code was
   5  !          modified to support MPI parallelization.
   6  !
   7      use midasMpi_mod
   8      implicit none
   9      save 
  10      private
  11
  12      public :: qna_n1qn3
  13
  14      contains
  15      
  16      subroutine dcube(t,f,fp,ta,fa,fpa,tlower,tupper)
  17!
  18!:Purpose: [to be completed]
  19!
  20
  21!     
  22!     --- arguments
  23!     
  24      real(8) sign,den,anum,t,f,fp,ta,fa,fpa,tlower,tupper
  25!     
  26!     --- variables locales
  27!     
  28      real(8) z1,b,discri
  29!     
  30!     Using f and fp at t and ta, computes new t by cubic formula
  31!     safeguarded inside [tlower,tupper].
  32!     
  33      z1=fp+fpa-3.d0*(fa-f)/(ta-t)
  34      b=z1+fp
  35!     
  36!     first compute the discriminant (without overflow)
  37!     
  38      if (dabs(z1).le.1.d0) then
  39         discri=z1*z1-fp*fpa
  40      else
  41         discri=fp/z1
  42         discri=discri*fpa
  43         discri=z1-discri
  44         if (z1.ge.0.d0 .and. discri.ge.0.d0) then
  45            discri=dsqrt(z1)*dsqrt(discri)
  46            go to 120
  47         endif
  48         if (z1.le.0.d0 .and. discri.le.0.d0) then
  49            discri=dsqrt(-z1)*dsqrt(-discri)
  50            go to 120
  51         endif
  52         discri=-1.d0
  53      endif
  54      if (discri.lt.0.d0) then
  55         if (fp.lt.0.d0) t=tupper
  56         if (fp.ge.0.d0) t=tlower
  57         go to 900
  58      endif
  59!     
  60!     discriminant nonnegative, compute solution (without overflow)
  61!     
  62      discri=dsqrt(discri)
  63 120  if (t-ta.lt.0.d0) discri=-discri
  64      sign=(t-ta)/dabs(t-ta)
  65      if (b*sign.gt.0.d+0) then
  66         t=t+fp*(ta-t)/(b+discri)
  67      else
  68         den=z1+b+fpa
  69         anum=b-discri
  70         if (dabs((t-ta)*anum).lt.(tupper-tlower)*dabs(den)) then
  71            t=t+anum*(ta-t)/den
  72         else
  73            t=tupper
  74         endif
  75      endif
  76 900  t=dmax1(t,tlower)
  77      t=dmin1(t,tupper)
  78!     return
  79      end subroutine
  80
  81      subroutine ddd (prosca,dtonb,dtcab,n,sscale,nm,depl,aux,jmin,jmax, &
  82                      precos,diag,ybar,sbar,izs,rzs,dzs)
  83      !
  84      !:Purpose: Calcule le produit H.g ou
  85      !
  86      !          * H est une matrice construite par la formule de bfgs inverse
  87      !            a nm memoires a partir de la matrice diagonale diag
  88      !            dans un espace hilbertien dont le produit scalaire
  89      !            est donne par prosca
  90      !            (cf. J. Nocedal, Math. of Comp. 35/151 (1980) 773-782)
  91      !          * g est un vecteur de dimension n (en general le gradient)
  92      !
  93      !          * la matrice diag apparait donc comme un preconditionneur diagonal
  94      !
  95      !          * depl = g (en entree), = H g (en sortie)
  96      !
  97      !          * la matrice H est memorisee par les vecteurs des tableaux
  98      !            ybar, sbar et les pointeurs jmin, jmax
  99      !
 100      !          * alpha(nm) est une zone de travail
 101      !
 102      !          * izs(1),rzs(1),dzs(1) sont des zones de travail pour prosca
 103      !
 104
 105      ! arguments
 106      logical sscale
 107      integer n,nm,jmin,jmax,izs(1)
 108      real rzs(1)
 109      real(8) depl(n),precos,diag(n),alpha(nm),ybar(n,1), &
 110          sbar(n,1),aux(n),dzs(1)
 111      external prosca,dtonb,dtcab
 112!
 113!         variables locales
 114!
 115      integer jfin,i,j,jp
 116      real(8) r,ps
 117!
 118      jfin=jmax
 119      if (jfin.lt.jmin) jfin=jmax+nm
 120!
 121!         phase de descente
 122!
 123      do 100 j=jfin,jmin,-1
 124          jp=j
 125          if (jp.gt.nm) jp=jp-nm
 126          call prosca (n,depl,sbar(1,jp),ps,izs,rzs,dzs)
 127          r=ps
 128          alpha(jp)=r
 129          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 130          do 20 i=1,n
 131              depl(i)=depl(i)-r*ybar(i,jp)
 13220        continue
 133          !$OMP END PARALLEL DO
 134100   continue
 135!
 136!         preconditionnement
 137!
 138      if (sscale) then
 139          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 140          do 150 i=1,n
 141              depl(i)=depl(i)*precos
 142  150     continue
 143          !$OMP END PARALLEL DO
 144      else
 145          call dtonb (n,depl,aux,izs,rzs,dzs)
 146          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 147          do 151 i=1,n
 148              aux(i)=aux(i)*diag(i)
 149  151     continue
 150          !$OMP END PARALLEL DO
 151          call dtcab (n,aux,depl,izs,rzs,dzs)
 152      endif
 153!
 154!         remontee
 155!
 156      do 200 j=jmin,jfin
 157          jp=j
 158          if (jp.gt.nm) jp=jp-nm
 159          call prosca (n,depl,ybar(1,jp),ps,izs,rzs,dzs)
 160          r=alpha(jp)-ps
 161          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 162          do 120 i=1,n
 163              depl(i)=depl(i)+r*sbar(i,jp)
 164120       continue
 165          !$OMP END PARALLEL DO
 166200   continue
 167
 168      end subroutine ddd
 169
 170      subroutine ddds (prosca,dtonb,dtcab,n,sscale,nm,depl,aux,jmin, &
 171                       jmax,precos,diag,ybar,sbar,izs,rzs,dzs)
 172      !
 173      !
 174      !:Purpose: This subroutine has the same role as ddd (computation of the
 175      !          product H.g). It supposes however that the (y,s) pairs are not
 176      !          stored in core memory, but on a devise chosen by the user.
 177      !
 178
 179!
 180!         arguments
 181!
 182      logical sscale
 183      integer n,nm,jmin,jmax,izs(1)
 184      real rzs(1)
 185      real(8) depl(n),precos,diag(n),alpha(nm),ybar(n),sbar(n), &
 186          aux(n),dzs(1)
 187      external prosca,dtonb,dtcab
 188!
 189!         variables locales
 190!
 191      integer jfin,i,j,jp
 192      real(8) r,ps
 193!
 194      jfin=jmax
 195      if (jfin.lt.jmin) jfin=jmax+nm
 196!
 197!         phase de descente
 198!
 199      do 100 j=jfin,jmin,-1
 200          jp=j
 201          if (jp.gt.nm) jp=jp-nm
 202          call prosca (n,depl,sbar,ps,izs,rzs,dzs)
 203          r=ps
 204          alpha(jp)=r
 205          do 20 i=1,n
 206              depl(i)=depl(i)-r*ybar(i)
 20720        continue
 208100   continue
 209!
 210!         preconditionnement
 211!
 212      if (sscale) then
 213          do 150 i=1,n
 214              depl(i)=depl(i)*precos
 215  150     continue
 216      else
 217          call dtonb (n,depl,aux,izs,rzs,dzs)
 218          do 151 i=1,n
 219              aux(i)=aux(i)*diag(i)
 220  151     continue
 221          call dtcab (n,aux,depl,izs,rzs,dzs)
 222      endif
 223!
 224!         remontee
 225!
 226      do 200 j=jmin,jfin
 227          jp=j
 228          if (jp.gt.nm) jp=jp-nm
 229          call prosca (n,depl,ybar(1),ps,izs,rzs,dzs)
 230          r=alpha(jp)-ps
 231          do 120 i=1,n
 232              depl(i)=depl(i)+r*sbar(i)
 233120       continue
 234200   continue
 235
 236      end subroutine ddds
 237
 238      subroutine mupdts (sscale,inmemo,n,m,nrz)
 239
 240!
 241!         arguments
 242!
 243      logical sscale,inmemo
 244      integer n,m,nrz
 245!----
 246!
 247!     On entry:
 248!       sscale: .true. if scalar initial scaling,
 249!               .false. if diagonal initial scaling
 250!       n:      number of variables
 251!
 252!     This routine has to return:
 253!       m:      the number of updates to form the approximate Hessien H,
 254!       inmemo: .true., if the vectors y and s used to form H are stored
 255!                  in core memory,
 256!               .false. otherwise (storage of y and s on disk, for
 257!                  instance).
 258!
 259!----
 260!
 261      if (n.eq.0) then
 262         m=0
 263      else
 264         if (sscale) then
 265            m=(nrz-3*n)/(2*n)
 266         else
 267            m=(nrz-4*n)/(2*n)
 268         endif
 269      endif
 270      inmemo=.true.
 271      ! return
 272      end subroutine mupdts
 273
 274      subroutine qna_n1qn3 (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1, &
 275           epsg,impres,io,mode,niter,nsim,iz,dz,ndz, &
 276           izs,rzs,dzs)
 277      !     
 278      !:Purpose: N1QN3, Version 2.0c, June 1995
 279      !          Jean Charles Gilbert, Claude Lemarechal, INRIA.
 280      !     
 281      !          Double precision version of M1QN3.
 282      !     
 283      !          N1qn3 has two running modes: the SID (Scalar Initial Scaling) mode
 284      !          and the DIS (Diagonal Initial Scaling) mode. Both do not require
 285      !          the same amount of storage, the same subroutines, ...
 286      !          In the description below, items that differ in the DIS mode with
 287      !          respect to the SIS mode are given in brakets.
 288      !     
 289      !          Use the following subroutines:
 290      !
 291      !          * N1QN3A
 292      !          * DDD, DDDS
 293      !          * NLIS0 + DCUBE (Dec 88)
 294      !          * MUPDT
 295      !     
 296      !          The following routines are proposed to the user in case the
 297      !          Euclidean scalar product is used:
 298      !          DUCLID, DTONBE, DTCABE.
 299      !     
 300      !          La sous-routine N1QN3 est une interface entre le programme
 301      !          appelant et la sous-routine N1QN3A, le minimiseur proprement dit.
 302      !     
 303      !          Le module PROSCA est sense realiser le produit scalaire de deux
 304      !          vecteurs de Rn; le module DTONB est sense realiser le changement
 305      !          de coordonnees correspondant au changement de bases: base
 306      !          euclidienne -> base orthonormale (pour le produit scalaire
 307      !          PROSCA); le module CTBAB fait la transformation inverse: base
 308      !          orthonormale -> base euclidienne.
 309      !     
 310      !          Iz is an integer working zone for N1QN3A, its dimension is 5.
 311      !          It is formed of 5 scalars that are set by the optimizer:
 312      !
 313      !          - the dimension of the problem,
 314      !          - a identifier of the scaling mode,
 315      !          - the number of updates,
 316      !          - two pointers.
 317      !     
 318      !          Dz est la zone de travail pour N1QN3A, de dimension ndz.
 319      !          Elle est subdivisee en
 320      !
 321      !          - 3 [ou 4] vecteurs de dimension n: d,gg,[diag,]aux
 322      !          - m vecteurs de dimension n: ybar
 323      !          - m vecteurs de dimension n: sbar
 324      !     
 325      !          m est alors le plus grand entier tel que
 326      !
 327      !          - m*(2*n+1)+3*n .le. ndz [m*(2*n+1)+4*n .le. ndz)]
 328      !          - soit m := (ndz-3*n) / (2*n+1) [m := (ndz-4*n) / (2*n+1)].
 329      !          Il faut avoir m >= 1, donc ndz >= 5n+1 [ndz >= 6n+1].
 330      !     
 331      !          A chaque iteration la metrique est formee a partir d'un multiple
 332      !          de l'identite [d'une matrice diagonale] D qui est mise a jour m
 333      !          fois par la formule de BFGS en utilisant les m couples {y,s} les
 334      !          plus recents.
 335      !     
 336
 337!     
 338!     arguments
 339!     
 340      integer n,impres,io,mode,niter,nsim,iz(5),ndz,izs(1)
 341      real rzs(1)
 342      real(8) x(n),f,g(n),dxmin,df1,epsg,dz(ndz),dzs(1)
 343      external simul,prosca,dtonb,dtcab
 344!     
 345!     variables locales
 346!     
 347      logical inmemo,sscale
 348      integer ntravu,id,igg,idiag,iaux,iybar,isbar,m,mmemo,ntotal,ierr
 349      integer m_max
 350      real(8) d1,d2,ps
 351!     
 352!---- impressions initiales et controle des arguments
 353!     
 354      write(io,*) '--------------------------------------------------'
 355      write(io,*) 'N1QN3: calling modified MPI version of modulopt!!!'
 356      write(io,*) '--------------------------------------------------'
 357
 358      call rpn_comm_allreduce(n,ntotal,1,"mpi_integer", &
 359                              "mpi_max","GRID",ierr)
 360
 361      if (impres.ge.1) &
 362           write (io,900) n,dxmin,df1,epsg,niter,nsim,impres
 363 900  format (/" N1QN3 (Version 2.0c, June 1995): entry point"/ &
 364           5x,"dimension of the problem (n):",i9/  &
 365           5x,"absolute precision on x (dxmin):",d9.2/ &
 366           5x,"expected decrease for f (df1):",d9.2/ &
 367           5x,"relative precision on g (epsg):",d9.2/ &
 368           5x,"maximal number of iterations (niter):",i6/ &
 369           5x,"maximal number of simulations (nsim):",i6/ &
 370           5x,"printing level (impres):",i4)
 371      if (ntotal.le.0.or.niter.le.0.or.nsim.le.0.or.dxmin.le.0.d+0.or. &
 372           epsg.le.0.d+0.or.epsg.gt.1.d+0.or.mode.lt.0.or.  &
 373           mode.gt.3) then
 374         mode=2
 375         write (io,901)
 376 901     format (/" >>> n1qn3: inconsistent call")
 377         return
 378      endif
 379!     
 380!---- what method
 381!     
 382      if (mod(mode,2).eq.0) then
 383         if (impres.ge.1) write (io,920)
 384 920     format (/" n1qn3: Diagonal Initial Scaling mode")
 385         sscale=.false.
 386      else
 387         if (impres.ge.1) write (io,921)
 388 921     format (/" n1qn3: Scalar Initial Scaling mode")
 389         sscale=.true.
 390      endif
 391!     
 392      if ( n.gt.0 .and. &
 393           ((ndz.lt.5*n+1).or.((.not.sscale).and.(ndz.lt.6*n+1))) ) then
 394         mode=2
 395         write (io,902)
 396 902     format (/" >>> n1qn3: not enough memory allocated")
 397         return
 398      endif
 399!     
 400!---- Compute m
 401!     
 402      call mupdts (sscale,inmemo,n,m,ndz)
 403      call rpn_comm_allreduce(m,m_max,1,"mpi_integer", &
 404                              "mpi_max","GRID",ierr)
 405      if (m.ne.m_max) then
 406         write(io,*) 'replacing value of m, ',m,' with ',m_max
 407         m=m_max
 408      endif
 409!     
 410!     --- Check the value of m (if (y,s) pairs in core, m will be >= 1)
 411!     
 412      if (m.lt.1) then
 413         mode=2
 414         write (io,9020)
 415 9020    format (/" >>> n1qn3: m is set too small in mupdts")
 416         return
 417      endif
 418!     
 419!     --- mmemo = number of (y,s) pairs in core memory
 420!     
 421      mmemo=1
 422      if (inmemo) mmemo=m
 423!     
 424      ntravu=2*(2+mmemo)*n
 425      if (sscale) ntravu=ntravu-n
 426      write (io,903) ndz,ntravu,m
 427 903  format (/5x,"allocated memory (ndz) :",i9/ &
 428           5x,"used memory :           ",i9/ &
 429           5x,"number of updates :     ",i9)
 430      if (ndz.lt.ntravu) then
 431         mode=2
 432         write (io,902)
 433         return
 434      endif
 435!     
 436      if (impres.ge.1) then
 437         if (inmemo) then
 438            write (io,907)
 439         else
 440            write (io,908)
 441         endif
 442      endif
 443 907  format (5x,"(y,s) pairs are stored in core memory")
 444 908  format (5x,"(y,s) pairs are stored by the user")
 445!     
 446!---- cold start or warm restart ?
 447!     check iz: iz(1)=n, iz(2)=(0 if DIS, 1 if SIS),
 448!     iz(3)=m, iz(4)=jmin, iz(5)=jmax
 449!     
 450      if (mode/2.eq.0) then
 451         if (impres.ge.1) write (io,922)
 452      else
 453         iaux=0
 454         if (sscale) iaux=1
 455!         if (iz(1).ne.n.or.iz(2).ne.iaux.or.iz(3).ne.m.or.iz(4).lt.1 &
 456!              .or.iz(5).lt.1.or.iz(4).gt.iz(3).or.iz(5).gt.iz(3)) then
 457         if (iz(2).ne.iaux.or.iz(3).ne.m.or.iz(4).lt.1 &
 458              .or.iz(5).lt.1.or.iz(4).gt.iz(3).or.iz(5).gt.iz(3)) then
 459            mode=2
 460            write(*,*) 'iz=',iz(:)
 461            write(*,*) 'n,iaux,m=',n,iaux,m
 462            if (impres.ge.1) write (io,923)
 463            return
 464         endif
 465         if (impres.ge.1) write (io,924)
 466      endif
 467 922  format (/" n1qn3: cold start"/,1x)
 468 923  format (/" >>> n1qn3: inconsistent iz for a warm restart")
 469 924  format (/" n1qn3: warm restart"/,1x)
 470      iz(1)=n
 471      iz(2)=0
 472      if (sscale) iz(2)=1
 473      iz(3)=m
 474!     
 475!---- split the working zone dz
 476!     
 477      idiag=1
 478      iybar=idiag+n
 479      if (sscale) iybar=1
 480      isbar=iybar+n*mmemo
 481      id=isbar+n*mmemo
 482      igg=id+n
 483      iaux=igg+n
 484!     
 485!---- call the optimization code
 486!     
 487      call n1qn3a (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1,epsg, &
 488           impres,io,mode,niter,nsim,inmemo,iz(3),iz(4),iz(5), &
 489           dz(id),dz(igg),dz(idiag),dz(iaux), &
 490           dz(iybar),dz(isbar),izs,rzs,dzs)
 491!     
 492!---- impressions finales
 493!     
 494      if (impres.ge.1) write (io,905) mode,niter,nsim,epsg
 495 905  format (/,1x,79("-")/  &
 496           /" n1qn3: output mode is ",i2 &
 497           /5x,"number of iterations: ",i4 &
 498           /5x,"number of simulations: ",i6 &
 499           /5x,"realized relative precision on g: ",d9.2)
 500      call prosca (n,x,x,ps,izs,rzs,dzs)
 501      d1=dsqrt(ps)
 502      call prosca (n,g,g,ps,izs,rzs,dzs)
 503      d2=dsqrt(ps)
 504      if (impres.ge.1) write (io,906) d1,f,d2
 505 906  format (5x,"norm of x = ",d15.8 &
 506           /5x,"f         = ",d15.8 &
 507           /5x,"norm of g = ",d15.8) 
 508      !return
 509      end subroutine qna_n1qn3
 510
 511      subroutine n1qn3a (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1, &
 512                         epsg,impres,io,mode,niter,nsim,inmemo,m,jmin, &
 513                         jmax,d,gg,diag,aux,ybar,sbar,izs,rzs,dzs)
 514     !
 515     !:Purpose: Code d'optimisation proprement dit.
 516     !
 517
 518!
 519!         arguments
 520!
 521      logical inmemo
 522      integer n,impres,io,mode,niter,nsim,m,jmin,jmax,izs(1)
 523      real rzs(1)
 524      real(8) x(n),f,g(n),dxmin,df1,epsg,d(n),gg(n),diag(n), &
 525          aux(n),ybar(n,1),sbar(n,1),dzs(1)
 526      external simul,prosca,dtonb,dtcab
 527!
 528!         variables locales
 529!
 530      logical sscale,cold,warm,ntotal
 531      integer i,itmax,moderl,isim,jcour,indic,ierr,impresmax
 532      real(8) d1,t,tmin,tmin_mpiglobal,tmax,gnorm,eps1,ff, &
 533           preco,precos,ys,den, &
 534           dk,dk1,ps,ps2,hp0
 535!
 536!         parametres
 537!
 538      real(8) rm1,rm2
 539      parameter (rm1=0.0001d+0,rm2=0.9d+0)
 540      real(8) pi
 541      parameter (pi=3.1415927d+0)
 542      real(8) rmin
 543!
 544!---- initialisation
 545!
 546      rmin=1.d-20
 547!
 548      sscale=.true.
 549      if (mod(mode,2).eq.0) sscale=.false.
 550!
 551      warm=.false.
 552      if (mode/2.eq.1) warm=.true.
 553      cold=.not.warm
 554!
 555      itmax=niter
 556      niter=0
 557      isim=1
 558      eps1=1.d+0
 559!
 560      call rpn_comm_allreduce(impres,impresmax,1,"mpi_integer", &
 561                              "mpi_max","GRID",ierr)
 562      call rpn_comm_allreduce(n,ntotal,1,"mpi_integer", &
 563                              "mpi_max","GRID",ierr)
 564!
 565      call prosca (n,g,g,ps,izs,rzs,dzs)
 566      gnorm=dsqrt(ps)
 567      if (impres.ge.1) write (io,900) f,gnorm
 568  900 format (5x,"f         = ",d15.8 &
 569             /5x,"norm of g = ",d15.8)
 570      if (gnorm.lt.rmin) then
 571          mode=2
 572          if (impres.ge.1) write (io,901)
 573          goto 1000
 574      endif
 575  901 format (/" >>> n1qn3a: initial gradient is too small")
 576!
 577!     --- initialisation pour ddd
 578!
 579      if (cold) then
 580          jmin=1
 581          jmax=0
 582      endif
 583      jcour=1
 584      if (inmemo) jcour=jmax
 585!
 586!     --- mise a l'echelle de la premiere direction de descente
 587!
 588      if (cold) then
 589!
 590!         --- use Fletcher's scaling and initialize diag to 1.
 591!
 592          precos=2.d+0*df1/gnorm**2
 593          do 10 i=1,n
 594              d(i)=-g(i)*precos
 595              diag(i)=1.d+0
 596   10     continue
 597          if (impres.ge.5) write(io,902) precos
 598  902     format (/" n1qn3a: descent direction -g: precon = ",d10.3)
 599      else
 600!
 601!         --- use the matrix stored in [diag and] the (y,s) pairs
 602!
 603          if (sscale) then
 604              call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
 605              precos=1.d+0/ps
 606          endif
 607          do 11 i=1,n
 608              d(i)=-g(i)
 609  11      continue
 610          if (inmemo) then
 611              call ddd (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
 612                       precos,diag,ybar,sbar,izs,rzs,dzs)
 613          else
 614              call ddds (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
 615                         precos,diag,ybar,sbar,izs,rzs,dzs)
 616          endif
 617      endif
 618!
 619      if (impres.eq.3) then
 620          write(io,903)
 621          write(io,904)
 622      endif
 623      if (impres.eq.4) write(io,903)
 624  903 format (/,1x,79("-"))
 625  904 format (1x)
 626!
 627!     --- initialisation pour nlis0
 628!
 629      tmax=1.d+20
 630      call prosca (n,d,g,hp0,izs,rzs,dzs)
 631      if (hp0.ge.0.d+0) then
 632          mode=7
 633          if (impres.ge.1) write (io,905) niter,hp0
 634          goto 1000
 635      endif
 636  905 format (/" >>> n1qn3 (iteration ",i2,"): " &
 637              /5x," the search direction d is not a ", &
 638               "descent direction: (g,d) = ",d12.5)
 639!
 640!     --- compute the angle (-g,d)
 641!
 642      if (warm.and.impresmax.ge.5) then
 643          call prosca (n,g,g,ps,izs,rzs,dzs)
 644          ps=dsqrt(ps)
 645          call prosca (n,d,d,ps2,izs,rzs,dzs)
 646          ps2=dsqrt(ps2)
 647          ps=hp0/ps/ps2
 648          ps=dmin1(-ps,1.d+0)
 649          ps=dacos(ps)
 650          d1=ps*180.d+0/pi
 651          if(impres.ge.5) write (io,906) sngl(d1)
 652      endif
 653  906 format (/" n1qn3: descent direction d: ", &
 654              "angle(-g,d) = ",f5.1," degrees")
 655!
 656!---- Debut de l'iteration. on cherche x(k+1) de la forme x(k) + t*d,
 657!     avec t > 0. On connait d.
 658!
 659!         Debut de la boucle: etiquette 100,
 660!         Sortie de la boucle: goto 1000.
 661!
 662100   niter=niter+1
 663      if (impres.lt.0) then
 664          if (mod(niter,-impres).eq.0) then
 665              indic=1
 666              call simul (indic,n,x,f,g,izs,rzs,dzs)
 667          endif
 668      endif
 669      if (impres.ge.5) write(io,903)
 670      if (impres.ge.4) write(io,904)
 671      if (impres.ge.3) write (io,910) niter,isim,f,hp0
 672  910 format (" n1qn3: iter ",i3,", simul ",i3, &
 673              ", f=",d15.8,", h'(0)=",d12.5)
 674      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 675      do 101 i=1,n
 676          gg(i)=g(i)
 677101   continue
 678      !$OMP END PARALLEL DO
 679      ff=f
 680!
 681!     --- recherche lineaire et nouveau point x(k+1)
 682!
 683      if (impres.ge.5) write (io,911)
 684  911 format (/" n1qn3: line search")
 685!
 686!         --- calcul de tmin
 687!
 688      tmin=0.d+0
 689      do 200 i=1,n
 690          tmin=max(tmin,dabs(d(i)))
 691200   continue
 692      call rpn_comm_allreduce(tmin,tmin_mpiglobal,1, &
 693                              "mpi_double_precision", &
 694                              "mpi_max","GRID",ierr)
 695      tmin = tmin_mpiglobal
 696
 697      tmin=dxmin/tmin
 698      t=1.d+0
 699      d1=hp0
 700!
 701      call nlis0 (n,simul,prosca,x,f,d1,t,tmin,tmax,d,g,rm2,rm1, &
 702                 impres,io,moderl,isim,nsim,aux,izs,rzs,dzs)
 703!
 704!         --- nlis0 renvoie les nouvelles valeurs de x, f et g
 705!
 706      if (moderl.ne.0) then
 707          if (moderl.lt.0) then
 708!
 709!             --- calcul impossible
 710!                 t, g: ou les calculs sont impossibles
 711!                 x, f: ceux du t_gauche (donc f <= ff)
 712!
 713              mode=moderl
 714          elseif (moderl.eq.1) then
 715!
 716!             --- descente bloquee sur tmax
 717!                 [sortie rare (!!) d'apres le code de nlis0]
 718!
 719              mode=3
 720              if (impres.ge.1) write(io,912) niter
 721  912         format (/" >>> n1qn3 (iteration ",i3, &
 722                      "): line search blocked on tmax: ", &
 723                      "decrease the scaling")
 724          elseif (moderl.eq.4) then
 725!
 726!             --- nsim atteint
 727!                 x, f: ceux du t_gauche (donc f <= ff)
 728!
 729              mode=5
 730          elseif (moderl.eq.5) then
 731!
 732!             --- arret demande par l'utilisateur (indic = 0)
 733!                 x, f: ceux en sortie du simulateur
 734!
 735              mode=0
 736          elseif (moderl.eq.6) then
 737!
 738!             --- arret sur dxmin ou appel incoherent
 739!                 x, f: ceux du t_gauche (donc f <= ff)
 740!
 741              mode=6
 742          endif
 743          goto 1000
 744      endif
 745!
 746! NOTE: stopping tests are now done after having updated the matrix, so
 747! that update information can be stored in case of a later warm restart
 748!
 749!     --- mise a jour de la matrice
 750!
 751      if (m.gt.0) then
 752!
 753!         --- mise a jour des pointeurs
 754!
 755          jmax=jmax+1
 756          if (jmax.gt.m) jmax=jmax-m
 757          if ((cold.and.niter.gt.m).or.(warm.and.jmin.eq.jmax)) then
 758              jmin=jmin+1
 759              if (jmin.gt.m) jmin=jmin-m
 760          endif
 761          if (inmemo) jcour=jmax
 762!
 763!         --- y, s et (y,s)
 764!
 765          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 766          do 400 i=1,n
 767              sbar(i,jcour)=t*d(i)
 768              ybar(i,jcour)=g(i)-gg(i)
 769400       continue
 770          !$OMP END PARALLEL DO
 771          if (impresmax.ge.5) then
 772              call prosca (n,sbar(1,jcour),sbar(1,jcour),ps,izs,rzs,dzs)
 773              dk1=dsqrt(ps)
 774              if (impres.ge.5.and.niter.gt.1) write (io,930) dk1/dk
 775  930         format (/" n1qn3: convergence rate, s(k)/s(k-1) = ", &
 776                      d12.5)
 777              dk=dk1
 778          endif
 779          call prosca (n,ybar(1,jcour),sbar(1,jcour),ys,izs,rzs,dzs)
 780          if (ys.le.0.d+0) then
 781              mode=7
 782              if (impres.ge.1) write (io,931) niter,ys
 783  931         format (/" >>> n1qn3 (iteration ",i2, &
 784                      "): the scalar product (y,s) = ",d12.5 &
 785                      /27x,"is not positive")
 786              goto 1000
 787          endif
 788!
 789!         --- ybar et sbar
 790!
 791          d1=dsqrt(1.d+0/ys)
 792          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 793          do 410 i=1,n
 794              sbar(i,jcour)=d1*sbar(i,jcour)
 795              ybar(i,jcour)=d1*ybar(i,jcour)
 796  410     continue
 797          !$OMP END PARALLEL DO
 798!
 799!         --- compute the scalar or diagonal preconditioner
 800!
 801          if (impres.ge.5) write(io,932)
 802  932     format (/" n1qn3: matrix update:")
 803!
 804!             --- Here is the Oren-Spedicato factor, for scalar scaling
 805!
 806          if (sscale) then
 807              call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
 808              precos=1.d+0/ps
 809!
 810              if (impres.ge.5) write (io,933) precos
 811  933         format (5x,"Oren-Spedicato factor = ",d10.3)
 812!
 813!             --- Scale the diagonal to Rayleigh's ellipsoid.
 814!                 Initially (niter.eq.1) and for a cold start, this is
 815!                 equivalent to an Oren-Spedicato scaling of the
 816!                 identity matrix.
 817!
 818          else
 819              call dtonb (n,ybar(1,jcour),aux,izs,rzs,dzs)
 820              ps=0.d0
 821              do 420 i=1,n
 822                  ps=ps+diag(i)*aux(i)*aux(i)
 823  420         continue
 824              call mmpi_allreduce_sumreal8scalar(ps,"GRID")
 825              d1=1.d0/ps
 826              if (impres.ge.5) then
 827                  write (io,934) d1
 828  934             format(5x,"fitting the ellipsoid: factor = ",d10.3)
 829              endif
 830              !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 831              do 421 i=1,n
 832                  diag(i)=diag(i)*d1
 833  421         continue
 834              !$OMP END PARALLEL DO
 835!
 836!             --- update the diagonal
 837!                 (gg is used as an auxiliary vector)
 838!
 839              call dtonb (n,sbar(1,jcour),gg,izs,rzs,dzs)
 840              ps=0.d0
 841              do 430 i=1,n
 842                  ps=ps+gg(i)*gg(i)/diag(i)
 843  430         continue
 844              call mmpi_allreduce_sumreal8scalar(ps,"GRID")
 845              den=ps
 846              !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 847              do 431 i=1,n
 848                  diag(i)=1.d0/ &
 849                         (1.d0/diag(i)+aux(i)**2-(gg(i)/diag(i))**2/den)
 850                  if (diag(i).le.0.d0) then
 851                      if (impres.ge.5) write (io,935) i,diag(i),rmin
 852                      diag(i)=rmin
 853                  endif
 854  431         continue
 855              !$OMP END PARALLEL DO
 856  935         format (/" >>> n1qn3-WARNING: diagonal element ",i8, &
 857                       " is negative (",d10.3,"), reset to ",d10.3)
 858!
 859              if (impresmax.ge.5) then
 860                  ps=0.d0
 861                  do 440 i=1,n
 862                      ps=ps+diag(i)
 863  440             continue
 864                  call mmpi_allreduce_sumreal8scalar(ps,"GRID")
 865                  ps=ps/ntotal
 866                  preco=ps
 867!
 868                  ps2=0.d0
 869                  do 441 i=1,n
 870                      ps2=ps2+(diag(i)-ps)**2
 871  441             continue
 872                  call mmpi_allreduce_sumreal8scalar(ps2,"GRID")
 873                  ps2=dsqrt(ps2/ntotal)
 874                  if (impres.ge.5) write (io,936) preco,ps2
 875  936             format (5x,"updated diagonal: average value = ",d10.3, &
 876                         ", sqrt(variance) = ",d10.3)
 877              endif
 878          endif
 879      endif
 880!
 881!     --- tests d'arret
 882!
 883      call prosca(n,g,g,ps,izs,rzs,dzs)
 884      eps1=ps
 885      eps1=dsqrt(eps1)/gnorm
 886!
 887      if (impres.ge.5) write (io,940) eps1
 888  940 format (/" n1qn3: stopping criterion on g: ",d12.5)
 889      if (eps1.lt.epsg) then
 890          mode=1
 891          goto 1000
 892      endif
 893      if (niter.eq.itmax) then
 894          mode=4
 895          if (impres.ge.1) write (io,941) niter
 896  941     format (/" >>> n1qn3 (iteration ",i3, &
 897                  "): maximal number of iterations")
 898          goto 1000
 899      endif
 900      if (isim.gt.nsim) then
 901          mode=5
 902          if (impres.ge.1) write (io,942) niter,isim
 903  942     format (/" >>> n1qn3 (iteration ",i3,"): ",i6, &
 904                  " simulations (maximal number reached)")
 905          goto 1000
 906      endif
 907!
 908!     --- calcul de la nouvelle direction de descente d = - H.g
 909!
 910      if (m.eq.0) then
 911          preco=2.d0*(ff-f)/(eps1*gnorm)**2
 912          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 913          do 500 i=1,n
 914              d(i)=-g(i)*preco
 915  500     continue
 916          !$OMP END PARALLEL DO
 917      else
 918          !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
 919          do 510 i=1,n
 920              d(i)=-g(i)
 921  510     continue
 922          !$OMP END PARALLEL DO
 923          if (inmemo) then
 924              call ddd (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
 925                        precos,diag,ybar,sbar,izs,rzs,dzs)
 926          else
 927              call ddds (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
 928                         precos,diag,ybar,sbar,izs,rzs,dzs)
 929          endif
 930      endif
 931!
 932!         --- test: la direction d est-elle de descente ?
 933!             hp0 sera utilise par nlis0
 934!
 935      call prosca (n,d,g,hp0,izs,rzs,dzs)
 936      if (hp0.ge.0.d+0) then
 937          mode=7
 938          if (impres.ge.1) write (io,905) niter,hp0
 939          goto 1000
 940      endif
 941      if (impresmax.ge.5) then
 942          call prosca (n,g,g,ps,izs,rzs,dzs)
 943          ps=dsqrt(ps)
 944          call prosca (n,d,d,ps2,izs,rzs,dzs)
 945          ps2=dsqrt(ps2)
 946          ps=hp0/ps/ps2
 947          ps=dmin1(-ps,1.d+0)
 948          ps=dacos(ps)
 949          d1=ps
 950          d1=d1*180.d0/pi
 951          if (impres.ge.5) write (io,906) sngl(d1)
 952      endif
 953!
 954!---- on poursuit les iterations
 955!
 956      goto 100
 957!
 958!---- retour
 959!
 960 1000 continue
 961      nsim=isim
 962      epsg=eps1
 963
 964      end subroutine n1qn3a
 965
 966      subroutine nlis0 (n,simul,prosca,xn,fn,fpn,t,tmin,tmax,d,g, &
 967                        amd,amf,imp,io,logic,nap,napmax,x,izs,rzs,dzs)
 968      !
 969      !:Purpose: en sortie logic =
 970      !
 971      !          * 0          descente serieuse
 972      !          * 1          descente bloquee
 973      !          * 4          nap > napmax
 974      !          * 5          retour a l'utilisateur
 975      !          * 6          fonction et gradient pas d'accord
 976      !          * < 0        contrainte implicite active
 977      !
 978
 979!!
 980!!--- arguments
 981!!
 982      external simul,prosca
 983      integer n,imp,io,logic,nap,napmax,izs(*)
 984      real rzs(*)
 985      real(8) xn(n),fn,fpn,t,tmin,tmax,d(n),g(n),amd,amf,x(n),dzs(*)
 986!
 987! --- variables locales
 988!
 989      logical lfound,lfound2
 990      integer i,indic,indica,indicd,ierr,ntotal
 991      real(8) tesf,tesd,tg,fg,fpg,td,ta,fa,fpa,d2,f,fp,ffn,fd, &
 992       fpd,z,test,barmin,barmul,barmax,barr,gauche,droite,taa,ps
 993!
 994 1000 format (/4x,9h nlis0   ,4x,4hfpn=,d10.3,4h d2=,d9.2, &
 995       7h  tmin=,d9.2,6h tmax=,d9.2)
 996 1001 format (/4x,6h mlis0,3x,"stop on tmin",8x,  &
 997         "step",11x,"functions",5x,"derivatives")
 998 1002 format (4x,6h nlis0,37x,d10.3,2d11.3)
 999 1003 format (4x,6h nlis0,d14.3,2d11.3)
1000 1004 format (4x,6h nlis0,37x,d10.3,7h indic=,i3)
1001 1005 format (4x,6h nlis0,14x,2d18.8,d11.3)
1002 1006 format (4x,6h nlis0,14x,d18.8,12h      indic=,i3)
1003 1007 format (/4x,6h mlis0,10x,"tmin forced to tmax")
1004 1008 format (/4x,6h mlis0,10x,"inconsistent call")
1005      call rpn_comm_allreduce(n,ntotal,1,"mpi_integer", &
1006                              "mpi_max","GRID",ierr)
1007      if (ntotal.gt.0 .and. fpn.lt.0.d0 .and. t.gt.0.d0 &
1008       .and. tmax.gt.0.d0 .and. amf.gt.0.d0 &
1009       .and. amd.gt.amf .and. amd.lt.1.d0) go to 5
1010      logic=6
1011      go to 999
1012    5 tesf=amf*fpn
1013      tesd=amd*fpn
1014      barmin=0.01d0
1015      barmul=5.d0
1016      barmax=0.3d0
1017      barr=barmin
1018      td=0.d0
1019      tg=0.d0
1020      fg=fn
1021      fpg=fpn
1022      ta=0.d0
1023      fa=fn
1024      fpa=fpn
1025      call prosca (n,d,d,ps,izs,rzs,dzs)
1026      d2=ps
1027!
1028!               elimination d'un t initial ridiculement petit
1029!
1030      if (t.gt.tmin) go to 20
1031      t=tmin
1032      if (t.le.tmax) go to 20
1033      if (imp.gt.0) write (io,1007)
1034      tmin=tmax
1035   20 if (fn+t*fpn.lt.fn+0.9d0*t*fpn) go to 30
1036      t=2.d0*t
1037      go to 20
1038   30 indica=1
1039      logic=0
1040      if (t.gt.tmax) then
1041          t=tmax
1042          logic=1
1043      endif
1044      if (imp.ge.4) write (io,1000) fpn,d2,tmin,tmax
1045!
1046!     --- nouveau x
1047!
1048      do 50 i=1,n
1049          x(i)=xn(i)+t*d(i)
1050   50 continue
1051!
1052! --- boucle
1053!
1054  100 nap=nap+1
1055      if(nap.gt.napmax) then
1056          logic=4
1057          fn=fg
1058          do 120 i=1,n
1059              xn(i)=xn(i)+tg*d(i)
1060  120     continue
1061          go to 999
1062      endif
1063      indic=4
1064!
1065!     --- appel simulateur
1066!
1067      call simul(indic,n,x,f,g,izs,rzs,dzs)
1068      if(indic.eq.0) then
1069!
1070!         --- arret demande par l'utilisateur
1071!
1072          logic=5
1073          fn=f
1074          do 170 i=1,n
1075              xn(i)=x(i)
1076  170     continue
1077          go to 999
1078      endif
1079      if(indic.lt.0) then
1080!
1081!         --- les calculs n'ont pas pu etre effectues par le simulateur
1082!
1083          td=t
1084          indicd=indic
1085          logic=0
1086          if (imp.ge.4) write (io,1004) t,indic
1087          t=tg+0.1d0*(td-tg)
1088          go to 905
1089      endif
1090!
1091!     --- les tests elementaires sont faits, on y va
1092!
1093      call prosca (n,d,g,ps,izs,rzs,dzs)
1094      fp=ps
1095!
1096!     --- premier test de Wolfe
1097!
1098      ffn=f-fn
1099      if(ffn.gt.t*tesf) then
1100          td=t
1101          fd=f
1102          fpd=fp
1103          indicd=indic
1104          logic=0
1105          if(imp.ge.4) write (io,1002) t,ffn,fp
1106          go to 500
1107      endif
1108!
1109!     --- test 1 ok, donc deuxieme test de Wolfe
1110!
1111      if(imp.ge.4) write (io,1003) t,ffn,fp
1112      if(fp.gt.tesd) then
1113          logic=0
1114          go to 320
1115      endif
1116      if (logic.eq.0) go to 350
1117!
1118!     --- test 2 ok, donc pas serieux, on sort
1119!
1120  320 fn=f
1121      do 330 i=1,n
1122          xn(i)=x(i)
1123  330 continue
1124      go to 999
1125!
1126!
1127!
1128  350 tg=t
1129      fg=f
1130      fpg=fp
1131      if(td.ne.0.d0) go to 500
1132!
1133!              extrapolation
1134!
1135      taa=t
1136      gauche=(1.d0+barmin)*t
1137      droite=10.d0*t
1138      call dcube (t,f,fp,ta,fa,fpa,gauche,droite)
1139      ta=taa
1140      if(t.lt.tmax) go to 900
1141      logic=1
1142      t=tmax
1143      go to 900
1144!
1145!              interpolation
1146!
1147  500 if(indica.le.0) then
1148          ta=t
1149          t=0.9d0*tg+0.1d0*td
1150          go to 900
1151      endif
1152      test=barr*(td-tg)
1153      gauche=tg+test
1154      droite=td-test
1155      taa=t
1156      call dcube (t,f,fp,ta,fa,fpa,gauche,droite)
1157      ta=taa
1158      if (t.gt.gauche .and. t.lt.droite) then
1159          barr=dmax1(barmin,barr/barmul)
1160!         barr=barmin
1161        else
1162          barr=dmin1(barmul*barr,barmax)
1163      endif
1164!
1165! --- fin de boucle
1166!     - t peut etre bloque sur tmax
1167!       (venant de l'extrapolation avec logic=1)
1168!
1169  900 fa=f
1170      fpa=fp
1171  905 indica=indic
1172!
1173! --- faut-il continuer ?
1174!
1175      if (td.eq.0.d0) go to 950
1176      if (td-tg.lt.tmin) go to 920
1177!
1178!     --- limite de precision machine (arret de secours) ?
1179!
1180      lfound=.false.
1181      do i=1,n
1182          z=xn(i)+t*d(i)
1183          if (z.ne.xn(i).and.z.ne.x(i)) then
1184            lfound=.true.
1185            exit
1186          endif
1187      enddo
1188      call rpn_comm_allreduce(lfound,lfound2,1,"mpi_logical", &
1189           "mpi_lor","GRID",ierr)
1190      if(lfound2) go to 950
1191!
1192! --- arret sur dxmin ou de secours
1193!
1194  920 logic=6
1195!
1196!     si indicd<0, derniers calculs non faits par simul
1197!
1198      if (indicd.lt.0) logic=indicd
1199!
1200!     si tg=0, xn = xn_depart,
1201!     sinon on prend xn=x_gauche qui fait decroitre f
1202!
1203      if (tg.eq.0.d0) go to 940
1204      fn=fg
1205      do 930 i=1,n
1206  930 xn(i)=xn(i)+tg*d(i)
1207  940 if (imp.le.0) go to 999
1208      write (io,1001)
1209      write (io,1005) tg,fg,fpg
1210      if (logic.eq.6) write (io,1005) td,fd,fpd
1211      if (logic.eq.7) write (io,1006) td,indicd
1212      go to 999
1213!
1214!               recopiage de x et boucle
1215!
1216  950 do 960 i=1,n
1217  960 x(i)=xn(i)+t*d(i)
1218      go to 100
1219  999 continue
1220      end subroutine nlis0
1221
1222end module quasiNewton_mod