1module quasiNewton_mod
2 ! MODULE quasiNewton_mod (prefix='qna' category='1. High-level functionality')
3 !
4 !:Purpose: The n1qn3 routine, and its supporting subroutines. The original code was
5 ! modified to support MPI parallelization.
6 !
7 use midasMpi_mod
8 implicit none
9 save
10 private
11
12 public :: qna_n1qn3
13
14 contains
15
16 subroutine dcube(t,f,fp,ta,fa,fpa,tlower,tupper)
17!
18!:Purpose: [to be completed]
19!
20
21!
22! --- arguments
23!
24 real(8) sign,den,anum,t,f,fp,ta,fa,fpa,tlower,tupper
25!
26! --- variables locales
27!
28 real(8) z1,b,discri
29!
30! Using f and fp at t and ta, computes new t by cubic formula
31! safeguarded inside [tlower,tupper].
32!
33 z1=fp+fpa-3.d0*(fa-f)/(ta-t)
34 b=z1+fp
35!
36! first compute the discriminant (without overflow)
37!
38 if (dabs(z1).le.1.d0) then
39 discri=z1*z1-fp*fpa
40 else
41 discri=fp/z1
42 discri=discri*fpa
43 discri=z1-discri
44 if (z1.ge.0.d0 .and. discri.ge.0.d0) then
45 discri=dsqrt(z1)*dsqrt(discri)
46 go to 120
47 endif
48 if (z1.le.0.d0 .and. discri.le.0.d0) then
49 discri=dsqrt(-z1)*dsqrt(-discri)
50 go to 120
51 endif
52 discri=-1.d0
53 endif
54 if (discri.lt.0.d0) then
55 if (fp.lt.0.d0) t=tupper
56 if (fp.ge.0.d0) t=tlower
57 go to 900
58 endif
59!
60! discriminant nonnegative, compute solution (without overflow)
61!
62 discri=dsqrt(discri)
63 120 if (t-ta.lt.0.d0) discri=-discri
64 sign=(t-ta)/dabs(t-ta)
65 if (b*sign.gt.0.d+0) then
66 t=t+fp*(ta-t)/(b+discri)
67 else
68 den=z1+b+fpa
69 anum=b-discri
70 if (dabs((t-ta)*anum).lt.(tupper-tlower)*dabs(den)) then
71 t=t+anum*(ta-t)/den
72 else
73 t=tupper
74 endif
75 endif
76 900 t=dmax1(t,tlower)
77 t=dmin1(t,tupper)
78! return
79 end subroutine
80
81 subroutine ddd (prosca,dtonb,dtcab,n,sscale,nm,depl,aux,jmin,jmax, &
82 precos,diag,ybar,sbar,izs,rzs,dzs)
83 !
84 !:Purpose: Calcule le produit H.g ou
85 !
86 ! * H est une matrice construite par la formule de bfgs inverse
87 ! a nm memoires a partir de la matrice diagonale diag
88 ! dans un espace hilbertien dont le produit scalaire
89 ! est donne par prosca
90 ! (cf. J. Nocedal, Math. of Comp. 35/151 (1980) 773-782)
91 ! * g est un vecteur de dimension n (en general le gradient)
92 !
93 ! * la matrice diag apparait donc comme un preconditionneur diagonal
94 !
95 ! * depl = g (en entree), = H g (en sortie)
96 !
97 ! * la matrice H est memorisee par les vecteurs des tableaux
98 ! ybar, sbar et les pointeurs jmin, jmax
99 !
100 ! * alpha(nm) est une zone de travail
101 !
102 ! * izs(1),rzs(1),dzs(1) sont des zones de travail pour prosca
103 !
104
105 ! arguments
106 logical sscale
107 integer n,nm,jmin,jmax,izs(1)
108 real rzs(1)
109 real(8) depl(n),precos,diag(n),alpha(nm),ybar(n,1), &
110 sbar(n,1),aux(n),dzs(1)
111 external prosca,dtonb,dtcab
112!
113! variables locales
114!
115 integer jfin,i,j,jp
116 real(8) r,ps
117!
118 jfin=jmax
119 if (jfin.lt.jmin) jfin=jmax+nm
120!
121! phase de descente
122!
123 do 100 j=jfin,jmin,-1
124 jp=j
125 if (jp.gt.nm) jp=jp-nm
126 call prosca (n,depl,sbar(1,jp),ps,izs,rzs,dzs)
127 r=ps
128 alpha(jp)=r
129 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
130 do 20 i=1,n
131 depl(i)=depl(i)-r*ybar(i,jp)
13220 continue
133 !$OMP END PARALLEL DO
134100 continue
135!
136! preconditionnement
137!
138 if (sscale) then
139 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
140 do 150 i=1,n
141 depl(i)=depl(i)*precos
142 150 continue
143 !$OMP END PARALLEL DO
144 else
145 call dtonb (n,depl,aux,izs,rzs,dzs)
146 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
147 do 151 i=1,n
148 aux(i)=aux(i)*diag(i)
149 151 continue
150 !$OMP END PARALLEL DO
151 call dtcab (n,aux,depl,izs,rzs,dzs)
152 endif
153!
154! remontee
155!
156 do 200 j=jmin,jfin
157 jp=j
158 if (jp.gt.nm) jp=jp-nm
159 call prosca (n,depl,ybar(1,jp),ps,izs,rzs,dzs)
160 r=alpha(jp)-ps
161 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
162 do 120 i=1,n
163 depl(i)=depl(i)+r*sbar(i,jp)
164120 continue
165 !$OMP END PARALLEL DO
166200 continue
167
168 end subroutine ddd
169
170 subroutine ddds (prosca,dtonb,dtcab,n,sscale,nm,depl,aux,jmin, &
171 jmax,precos,diag,ybar,sbar,izs,rzs,dzs)
172 !
173 !
174 !:Purpose: This subroutine has the same role as ddd (computation of the
175 ! product H.g). It supposes however that the (y,s) pairs are not
176 ! stored in core memory, but on a devise chosen by the user.
177 !
178
179!
180! arguments
181!
182 logical sscale
183 integer n,nm,jmin,jmax,izs(1)
184 real rzs(1)
185 real(8) depl(n),precos,diag(n),alpha(nm),ybar(n),sbar(n), &
186 aux(n),dzs(1)
187 external prosca,dtonb,dtcab
188!
189! variables locales
190!
191 integer jfin,i,j,jp
192 real(8) r,ps
193!
194 jfin=jmax
195 if (jfin.lt.jmin) jfin=jmax+nm
196!
197! phase de descente
198!
199 do 100 j=jfin,jmin,-1
200 jp=j
201 if (jp.gt.nm) jp=jp-nm
202 call prosca (n,depl,sbar,ps,izs,rzs,dzs)
203 r=ps
204 alpha(jp)=r
205 do 20 i=1,n
206 depl(i)=depl(i)-r*ybar(i)
20720 continue
208100 continue
209!
210! preconditionnement
211!
212 if (sscale) then
213 do 150 i=1,n
214 depl(i)=depl(i)*precos
215 150 continue
216 else
217 call dtonb (n,depl,aux,izs,rzs,dzs)
218 do 151 i=1,n
219 aux(i)=aux(i)*diag(i)
220 151 continue
221 call dtcab (n,aux,depl,izs,rzs,dzs)
222 endif
223!
224! remontee
225!
226 do 200 j=jmin,jfin
227 jp=j
228 if (jp.gt.nm) jp=jp-nm
229 call prosca (n,depl,ybar(1),ps,izs,rzs,dzs)
230 r=alpha(jp)-ps
231 do 120 i=1,n
232 depl(i)=depl(i)+r*sbar(i)
233120 continue
234200 continue
235
236 end subroutine ddds
237
238 subroutine mupdts (sscale,inmemo,n,m,nrz)
239
240!
241! arguments
242!
243 logical sscale,inmemo
244 integer n,m,nrz
245!----
246!
247! On entry:
248! sscale: .true. if scalar initial scaling,
249! .false. if diagonal initial scaling
250! n: number of variables
251!
252! This routine has to return:
253! m: the number of updates to form the approximate Hessien H,
254! inmemo: .true., if the vectors y and s used to form H are stored
255! in core memory,
256! .false. otherwise (storage of y and s on disk, for
257! instance).
258!
259!----
260!
261 if (n.eq.0) then
262 m=0
263 else
264 if (sscale) then
265 m=(nrz-3*n)/(2*n)
266 else
267 m=(nrz-4*n)/(2*n)
268 endif
269 endif
270 inmemo=.true.
271 ! return
272 end subroutine mupdts
273
274 subroutine qna_n1qn3 (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1, &
275 epsg,impres,io,mode,niter,nsim,iz,dz,ndz, &
276 izs,rzs,dzs)
277 !
278 !:Purpose: N1QN3, Version 2.0c, June 1995
279 ! Jean Charles Gilbert, Claude Lemarechal, INRIA.
280 !
281 ! Double precision version of M1QN3.
282 !
283 ! N1qn3 has two running modes: the SID (Scalar Initial Scaling) mode
284 ! and the DIS (Diagonal Initial Scaling) mode. Both do not require
285 ! the same amount of storage, the same subroutines, ...
286 ! In the description below, items that differ in the DIS mode with
287 ! respect to the SIS mode are given in brakets.
288 !
289 ! Use the following subroutines:
290 !
291 ! * N1QN3A
292 ! * DDD, DDDS
293 ! * NLIS0 + DCUBE (Dec 88)
294 ! * MUPDT
295 !
296 ! The following routines are proposed to the user in case the
297 ! Euclidean scalar product is used:
298 ! DUCLID, DTONBE, DTCABE.
299 !
300 ! La sous-routine N1QN3 est une interface entre le programme
301 ! appelant et la sous-routine N1QN3A, le minimiseur proprement dit.
302 !
303 ! Le module PROSCA est sense realiser le produit scalaire de deux
304 ! vecteurs de Rn; le module DTONB est sense realiser le changement
305 ! de coordonnees correspondant au changement de bases: base
306 ! euclidienne -> base orthonormale (pour le produit scalaire
307 ! PROSCA); le module CTBAB fait la transformation inverse: base
308 ! orthonormale -> base euclidienne.
309 !
310 ! Iz is an integer working zone for N1QN3A, its dimension is 5.
311 ! It is formed of 5 scalars that are set by the optimizer:
312 !
313 ! - the dimension of the problem,
314 ! - a identifier of the scaling mode,
315 ! - the number of updates,
316 ! - two pointers.
317 !
318 ! Dz est la zone de travail pour N1QN3A, de dimension ndz.
319 ! Elle est subdivisee en
320 !
321 ! - 3 [ou 4] vecteurs de dimension n: d,gg,[diag,]aux
322 ! - m vecteurs de dimension n: ybar
323 ! - m vecteurs de dimension n: sbar
324 !
325 ! m est alors le plus grand entier tel que
326 !
327 ! - m*(2*n+1)+3*n .le. ndz [m*(2*n+1)+4*n .le. ndz)]
328 ! - soit m := (ndz-3*n) / (2*n+1) [m := (ndz-4*n) / (2*n+1)].
329 ! Il faut avoir m >= 1, donc ndz >= 5n+1 [ndz >= 6n+1].
330 !
331 ! A chaque iteration la metrique est formee a partir d'un multiple
332 ! de l'identite [d'une matrice diagonale] D qui est mise a jour m
333 ! fois par la formule de BFGS en utilisant les m couples {y,s} les
334 ! plus recents.
335 !
336
337!
338! arguments
339!
340 integer n,impres,io,mode,niter,nsim,iz(5),ndz,izs(1)
341 real rzs(1)
342 real(8) x(n),f,g(n),dxmin,df1,epsg,dz(ndz),dzs(1)
343 external simul,prosca,dtonb,dtcab
344!
345! variables locales
346!
347 logical inmemo,sscale
348 integer ntravu,id,igg,idiag,iaux,iybar,isbar,m,mmemo,ntotal,ierr
349 integer m_max
350 real(8) d1,d2,ps
351!
352!---- impressions initiales et controle des arguments
353!
354 write(io,*) '--------------------------------------------------'
355 write(io,*) 'N1QN3: calling modified MPI version of modulopt!!!'
356 write(io,*) '--------------------------------------------------'
357
358 call rpn_comm_allreduce(n,ntotal,1,"mpi_integer", &
359 "mpi_max","GRID",ierr)
360
361 if (impres.ge.1) &
362 write (io,900) n,dxmin,df1,epsg,niter,nsim,impres
363 900 format (/" N1QN3 (Version 2.0c, June 1995): entry point"/ &
364 5x,"dimension of the problem (n):",i9/ &
365 5x,"absolute precision on x (dxmin):",d9.2/ &
366 5x,"expected decrease for f (df1):",d9.2/ &
367 5x,"relative precision on g (epsg):",d9.2/ &
368 5x,"maximal number of iterations (niter):",i6/ &
369 5x,"maximal number of simulations (nsim):",i6/ &
370 5x,"printing level (impres):",i4)
371 if (ntotal.le.0.or.niter.le.0.or.nsim.le.0.or.dxmin.le.0.d+0.or. &
372 epsg.le.0.d+0.or.epsg.gt.1.d+0.or.mode.lt.0.or. &
373 mode.gt.3) then
374 mode=2
375 write (io,901)
376 901 format (/" >>> n1qn3: inconsistent call")
377 return
378 endif
379!
380!---- what method
381!
382 if (mod(mode,2).eq.0) then
383 if (impres.ge.1) write (io,920)
384 920 format (/" n1qn3: Diagonal Initial Scaling mode")
385 sscale=.false.
386 else
387 if (impres.ge.1) write (io,921)
388 921 format (/" n1qn3: Scalar Initial Scaling mode")
389 sscale=.true.
390 endif
391!
392 if ( n.gt.0 .and. &
393 ((ndz.lt.5*n+1).or.((.not.sscale).and.(ndz.lt.6*n+1))) ) then
394 mode=2
395 write (io,902)
396 902 format (/" >>> n1qn3: not enough memory allocated")
397 return
398 endif
399!
400!---- Compute m
401!
402 call mupdts (sscale,inmemo,n,m,ndz)
403 call rpn_comm_allreduce(m,m_max,1,"mpi_integer", &
404 "mpi_max","GRID",ierr)
405 if (m.ne.m_max) then
406 write(io,*) 'replacing value of m, ',m,' with ',m_max
407 m=m_max
408 endif
409!
410! --- Check the value of m (if (y,s) pairs in core, m will be >= 1)
411!
412 if (m.lt.1) then
413 mode=2
414 write (io,9020)
415 9020 format (/" >>> n1qn3: m is set too small in mupdts")
416 return
417 endif
418!
419! --- mmemo = number of (y,s) pairs in core memory
420!
421 mmemo=1
422 if (inmemo) mmemo=m
423!
424 ntravu=2*(2+mmemo)*n
425 if (sscale) ntravu=ntravu-n
426 write (io,903) ndz,ntravu,m
427 903 format (/5x,"allocated memory (ndz) :",i9/ &
428 5x,"used memory : ",i9/ &
429 5x,"number of updates : ",i9)
430 if (ndz.lt.ntravu) then
431 mode=2
432 write (io,902)
433 return
434 endif
435!
436 if (impres.ge.1) then
437 if (inmemo) then
438 write (io,907)
439 else
440 write (io,908)
441 endif
442 endif
443 907 format (5x,"(y,s) pairs are stored in core memory")
444 908 format (5x,"(y,s) pairs are stored by the user")
445!
446!---- cold start or warm restart ?
447! check iz: iz(1)=n, iz(2)=(0 if DIS, 1 if SIS),
448! iz(3)=m, iz(4)=jmin, iz(5)=jmax
449!
450 if (mode/2.eq.0) then
451 if (impres.ge.1) write (io,922)
452 else
453 iaux=0
454 if (sscale) iaux=1
455! if (iz(1).ne.n.or.iz(2).ne.iaux.or.iz(3).ne.m.or.iz(4).lt.1 &
456! .or.iz(5).lt.1.or.iz(4).gt.iz(3).or.iz(5).gt.iz(3)) then
457 if (iz(2).ne.iaux.or.iz(3).ne.m.or.iz(4).lt.1 &
458 .or.iz(5).lt.1.or.iz(4).gt.iz(3).or.iz(5).gt.iz(3)) then
459 mode=2
460 write(*,*) 'iz=',iz(:)
461 write(*,*) 'n,iaux,m=',n,iaux,m
462 if (impres.ge.1) write (io,923)
463 return
464 endif
465 if (impres.ge.1) write (io,924)
466 endif
467 922 format (/" n1qn3: cold start"/,1x)
468 923 format (/" >>> n1qn3: inconsistent iz for a warm restart")
469 924 format (/" n1qn3: warm restart"/,1x)
470 iz(1)=n
471 iz(2)=0
472 if (sscale) iz(2)=1
473 iz(3)=m
474!
475!---- split the working zone dz
476!
477 idiag=1
478 iybar=idiag+n
479 if (sscale) iybar=1
480 isbar=iybar+n*mmemo
481 id=isbar+n*mmemo
482 igg=id+n
483 iaux=igg+n
484!
485!---- call the optimization code
486!
487 call n1qn3a (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1,epsg, &
488 impres,io,mode,niter,nsim,inmemo,iz(3),iz(4),iz(5), &
489 dz(id),dz(igg),dz(idiag),dz(iaux), &
490 dz(iybar),dz(isbar),izs,rzs,dzs)
491!
492!---- impressions finales
493!
494 if (impres.ge.1) write (io,905) mode,niter,nsim,epsg
495 905 format (/,1x,79("-")/ &
496 /" n1qn3: output mode is ",i2 &
497 /5x,"number of iterations: ",i4 &
498 /5x,"number of simulations: ",i6 &
499 /5x,"realized relative precision on g: ",d9.2)
500 call prosca (n,x,x,ps,izs,rzs,dzs)
501 d1=dsqrt(ps)
502 call prosca (n,g,g,ps,izs,rzs,dzs)
503 d2=dsqrt(ps)
504 if (impres.ge.1) write (io,906) d1,f,d2
505 906 format (5x,"norm of x = ",d15.8 &
506 /5x,"f = ",d15.8 &
507 /5x,"norm of g = ",d15.8)
508 !return
509 end subroutine qna_n1qn3
510
511 subroutine n1qn3a (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1, &
512 epsg,impres,io,mode,niter,nsim,inmemo,m,jmin, &
513 jmax,d,gg,diag,aux,ybar,sbar,izs,rzs,dzs)
514 !
515 !:Purpose: Code d'optimisation proprement dit.
516 !
517
518!
519! arguments
520!
521 logical inmemo
522 integer n,impres,io,mode,niter,nsim,m,jmin,jmax,izs(1)
523 real rzs(1)
524 real(8) x(n),f,g(n),dxmin,df1,epsg,d(n),gg(n),diag(n), &
525 aux(n),ybar(n,1),sbar(n,1),dzs(1)
526 external simul,prosca,dtonb,dtcab
527!
528! variables locales
529!
530 logical sscale,cold,warm,ntotal
531 integer i,itmax,moderl,isim,jcour,indic,ierr,impresmax
532 real(8) d1,t,tmin,tmin_mpiglobal,tmax,gnorm,eps1,ff, &
533 preco,precos,ys,den, &
534 dk,dk1,ps,ps2,hp0
535!
536! parametres
537!
538 real(8) rm1,rm2
539 parameter (rm1=0.0001d+0,rm2=0.9d+0)
540 real(8) pi
541 parameter (pi=3.1415927d+0)
542 real(8) rmin
543!
544!---- initialisation
545!
546 rmin=1.d-20
547!
548 sscale=.true.
549 if (mod(mode,2).eq.0) sscale=.false.
550!
551 warm=.false.
552 if (mode/2.eq.1) warm=.true.
553 cold=.not.warm
554!
555 itmax=niter
556 niter=0
557 isim=1
558 eps1=1.d+0
559!
560 call rpn_comm_allreduce(impres,impresmax,1,"mpi_integer", &
561 "mpi_max","GRID",ierr)
562 call rpn_comm_allreduce(n,ntotal,1,"mpi_integer", &
563 "mpi_max","GRID",ierr)
564!
565 call prosca (n,g,g,ps,izs,rzs,dzs)
566 gnorm=dsqrt(ps)
567 if (impres.ge.1) write (io,900) f,gnorm
568 900 format (5x,"f = ",d15.8 &
569 /5x,"norm of g = ",d15.8)
570 if (gnorm.lt.rmin) then
571 mode=2
572 if (impres.ge.1) write (io,901)
573 goto 1000
574 endif
575 901 format (/" >>> n1qn3a: initial gradient is too small")
576!
577! --- initialisation pour ddd
578!
579 if (cold) then
580 jmin=1
581 jmax=0
582 endif
583 jcour=1
584 if (inmemo) jcour=jmax
585!
586! --- mise a l'echelle de la premiere direction de descente
587!
588 if (cold) then
589!
590! --- use Fletcher's scaling and initialize diag to 1.
591!
592 precos=2.d+0*df1/gnorm**2
593 do 10 i=1,n
594 d(i)=-g(i)*precos
595 diag(i)=1.d+0
596 10 continue
597 if (impres.ge.5) write(io,902) precos
598 902 format (/" n1qn3a: descent direction -g: precon = ",d10.3)
599 else
600!
601! --- use the matrix stored in [diag and] the (y,s) pairs
602!
603 if (sscale) then
604 call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
605 precos=1.d+0/ps
606 endif
607 do 11 i=1,n
608 d(i)=-g(i)
609 11 continue
610 if (inmemo) then
611 call ddd (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
612 precos,diag,ybar,sbar,izs,rzs,dzs)
613 else
614 call ddds (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
615 precos,diag,ybar,sbar,izs,rzs,dzs)
616 endif
617 endif
618!
619 if (impres.eq.3) then
620 write(io,903)
621 write(io,904)
622 endif
623 if (impres.eq.4) write(io,903)
624 903 format (/,1x,79("-"))
625 904 format (1x)
626!
627! --- initialisation pour nlis0
628!
629 tmax=1.d+20
630 call prosca (n,d,g,hp0,izs,rzs,dzs)
631 if (hp0.ge.0.d+0) then
632 mode=7
633 if (impres.ge.1) write (io,905) niter,hp0
634 goto 1000
635 endif
636 905 format (/" >>> n1qn3 (iteration ",i2,"): " &
637 /5x," the search direction d is not a ", &
638 "descent direction: (g,d) = ",d12.5)
639!
640! --- compute the angle (-g,d)
641!
642 if (warm.and.impresmax.ge.5) then
643 call prosca (n,g,g,ps,izs,rzs,dzs)
644 ps=dsqrt(ps)
645 call prosca (n,d,d,ps2,izs,rzs,dzs)
646 ps2=dsqrt(ps2)
647 ps=hp0/ps/ps2
648 ps=dmin1(-ps,1.d+0)
649 ps=dacos(ps)
650 d1=ps*180.d+0/pi
651 if(impres.ge.5) write (io,906) sngl(d1)
652 endif
653 906 format (/" n1qn3: descent direction d: ", &
654 "angle(-g,d) = ",f5.1," degrees")
655!
656!---- Debut de l'iteration. on cherche x(k+1) de la forme x(k) + t*d,
657! avec t > 0. On connait d.
658!
659! Debut de la boucle: etiquette 100,
660! Sortie de la boucle: goto 1000.
661!
662100 niter=niter+1
663 if (impres.lt.0) then
664 if (mod(niter,-impres).eq.0) then
665 indic=1
666 call simul (indic,n,x,f,g,izs,rzs,dzs)
667 endif
668 endif
669 if (impres.ge.5) write(io,903)
670 if (impres.ge.4) write(io,904)
671 if (impres.ge.3) write (io,910) niter,isim,f,hp0
672 910 format (" n1qn3: iter ",i3,", simul ",i3, &
673 ", f=",d15.8,", h'(0)=",d12.5)
674 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
675 do 101 i=1,n
676 gg(i)=g(i)
677101 continue
678 !$OMP END PARALLEL DO
679 ff=f
680!
681! --- recherche lineaire et nouveau point x(k+1)
682!
683 if (impres.ge.5) write (io,911)
684 911 format (/" n1qn3: line search")
685!
686! --- calcul de tmin
687!
688 tmin=0.d+0
689 do 200 i=1,n
690 tmin=max(tmin,dabs(d(i)))
691200 continue
692 call rpn_comm_allreduce(tmin,tmin_mpiglobal,1, &
693 "mpi_double_precision", &
694 "mpi_max","GRID",ierr)
695 tmin = tmin_mpiglobal
696
697 tmin=dxmin/tmin
698 t=1.d+0
699 d1=hp0
700!
701 call nlis0 (n,simul,prosca,x,f,d1,t,tmin,tmax,d,g,rm2,rm1, &
702 impres,io,moderl,isim,nsim,aux,izs,rzs,dzs)
703!
704! --- nlis0 renvoie les nouvelles valeurs de x, f et g
705!
706 if (moderl.ne.0) then
707 if (moderl.lt.0) then
708!
709! --- calcul impossible
710! t, g: ou les calculs sont impossibles
711! x, f: ceux du t_gauche (donc f <= ff)
712!
713 mode=moderl
714 elseif (moderl.eq.1) then
715!
716! --- descente bloquee sur tmax
717! [sortie rare (!!) d'apres le code de nlis0]
718!
719 mode=3
720 if (impres.ge.1) write(io,912) niter
721 912 format (/" >>> n1qn3 (iteration ",i3, &
722 "): line search blocked on tmax: ", &
723 "decrease the scaling")
724 elseif (moderl.eq.4) then
725!
726! --- nsim atteint
727! x, f: ceux du t_gauche (donc f <= ff)
728!
729 mode=5
730 elseif (moderl.eq.5) then
731!
732! --- arret demande par l'utilisateur (indic = 0)
733! x, f: ceux en sortie du simulateur
734!
735 mode=0
736 elseif (moderl.eq.6) then
737!
738! --- arret sur dxmin ou appel incoherent
739! x, f: ceux du t_gauche (donc f <= ff)
740!
741 mode=6
742 endif
743 goto 1000
744 endif
745!
746! NOTE: stopping tests are now done after having updated the matrix, so
747! that update information can be stored in case of a later warm restart
748!
749! --- mise a jour de la matrice
750!
751 if (m.gt.0) then
752!
753! --- mise a jour des pointeurs
754!
755 jmax=jmax+1
756 if (jmax.gt.m) jmax=jmax-m
757 if ((cold.and.niter.gt.m).or.(warm.and.jmin.eq.jmax)) then
758 jmin=jmin+1
759 if (jmin.gt.m) jmin=jmin-m
760 endif
761 if (inmemo) jcour=jmax
762!
763! --- y, s et (y,s)
764!
765 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
766 do 400 i=1,n
767 sbar(i,jcour)=t*d(i)
768 ybar(i,jcour)=g(i)-gg(i)
769400 continue
770 !$OMP END PARALLEL DO
771 if (impresmax.ge.5) then
772 call prosca (n,sbar(1,jcour),sbar(1,jcour),ps,izs,rzs,dzs)
773 dk1=dsqrt(ps)
774 if (impres.ge.5.and.niter.gt.1) write (io,930) dk1/dk
775 930 format (/" n1qn3: convergence rate, s(k)/s(k-1) = ", &
776 d12.5)
777 dk=dk1
778 endif
779 call prosca (n,ybar(1,jcour),sbar(1,jcour),ys,izs,rzs,dzs)
780 if (ys.le.0.d+0) then
781 mode=7
782 if (impres.ge.1) write (io,931) niter,ys
783 931 format (/" >>> n1qn3 (iteration ",i2, &
784 "): the scalar product (y,s) = ",d12.5 &
785 /27x,"is not positive")
786 goto 1000
787 endif
788!
789! --- ybar et sbar
790!
791 d1=dsqrt(1.d+0/ys)
792 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
793 do 410 i=1,n
794 sbar(i,jcour)=d1*sbar(i,jcour)
795 ybar(i,jcour)=d1*ybar(i,jcour)
796 410 continue
797 !$OMP END PARALLEL DO
798!
799! --- compute the scalar or diagonal preconditioner
800!
801 if (impres.ge.5) write(io,932)
802 932 format (/" n1qn3: matrix update:")
803!
804! --- Here is the Oren-Spedicato factor, for scalar scaling
805!
806 if (sscale) then
807 call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
808 precos=1.d+0/ps
809!
810 if (impres.ge.5) write (io,933) precos
811 933 format (5x,"Oren-Spedicato factor = ",d10.3)
812!
813! --- Scale the diagonal to Rayleigh's ellipsoid.
814! Initially (niter.eq.1) and for a cold start, this is
815! equivalent to an Oren-Spedicato scaling of the
816! identity matrix.
817!
818 else
819 call dtonb (n,ybar(1,jcour),aux,izs,rzs,dzs)
820 ps=0.d0
821 do 420 i=1,n
822 ps=ps+diag(i)*aux(i)*aux(i)
823 420 continue
824 call mmpi_allreduce_sumreal8scalar(ps,"GRID")
825 d1=1.d0/ps
826 if (impres.ge.5) then
827 write (io,934) d1
828 934 format(5x,"fitting the ellipsoid: factor = ",d10.3)
829 endif
830 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
831 do 421 i=1,n
832 diag(i)=diag(i)*d1
833 421 continue
834 !$OMP END PARALLEL DO
835!
836! --- update the diagonal
837! (gg is used as an auxiliary vector)
838!
839 call dtonb (n,sbar(1,jcour),gg,izs,rzs,dzs)
840 ps=0.d0
841 do 430 i=1,n
842 ps=ps+gg(i)*gg(i)/diag(i)
843 430 continue
844 call mmpi_allreduce_sumreal8scalar(ps,"GRID")
845 den=ps
846 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
847 do 431 i=1,n
848 diag(i)=1.d0/ &
849 (1.d0/diag(i)+aux(i)**2-(gg(i)/diag(i))**2/den)
850 if (diag(i).le.0.d0) then
851 if (impres.ge.5) write (io,935) i,diag(i),rmin
852 diag(i)=rmin
853 endif
854 431 continue
855 !$OMP END PARALLEL DO
856 935 format (/" >>> n1qn3-WARNING: diagonal element ",i8, &
857 " is negative (",d10.3,"), reset to ",d10.3)
858!
859 if (impresmax.ge.5) then
860 ps=0.d0
861 do 440 i=1,n
862 ps=ps+diag(i)
863 440 continue
864 call mmpi_allreduce_sumreal8scalar(ps,"GRID")
865 ps=ps/ntotal
866 preco=ps
867!
868 ps2=0.d0
869 do 441 i=1,n
870 ps2=ps2+(diag(i)-ps)**2
871 441 continue
872 call mmpi_allreduce_sumreal8scalar(ps2,"GRID")
873 ps2=dsqrt(ps2/ntotal)
874 if (impres.ge.5) write (io,936) preco,ps2
875 936 format (5x,"updated diagonal: average value = ",d10.3, &
876 ", sqrt(variance) = ",d10.3)
877 endif
878 endif
879 endif
880!
881! --- tests d'arret
882!
883 call prosca(n,g,g,ps,izs,rzs,dzs)
884 eps1=ps
885 eps1=dsqrt(eps1)/gnorm
886!
887 if (impres.ge.5) write (io,940) eps1
888 940 format (/" n1qn3: stopping criterion on g: ",d12.5)
889 if (eps1.lt.epsg) then
890 mode=1
891 goto 1000
892 endif
893 if (niter.eq.itmax) then
894 mode=4
895 if (impres.ge.1) write (io,941) niter
896 941 format (/" >>> n1qn3 (iteration ",i3, &
897 "): maximal number of iterations")
898 goto 1000
899 endif
900 if (isim.gt.nsim) then
901 mode=5
902 if (impres.ge.1) write (io,942) niter,isim
903 942 format (/" >>> n1qn3 (iteration ",i3,"): ",i6, &
904 " simulations (maximal number reached)")
905 goto 1000
906 endif
907!
908! --- calcul de la nouvelle direction de descente d = - H.g
909!
910 if (m.eq.0) then
911 preco=2.d0*(ff-f)/(eps1*gnorm)**2
912 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
913 do 500 i=1,n
914 d(i)=-g(i)*preco
915 500 continue
916 !$OMP END PARALLEL DO
917 else
918 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
919 do 510 i=1,n
920 d(i)=-g(i)
921 510 continue
922 !$OMP END PARALLEL DO
923 if (inmemo) then
924 call ddd (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
925 precos,diag,ybar,sbar,izs,rzs,dzs)
926 else
927 call ddds (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax, &
928 precos,diag,ybar,sbar,izs,rzs,dzs)
929 endif
930 endif
931!
932! --- test: la direction d est-elle de descente ?
933! hp0 sera utilise par nlis0
934!
935 call prosca (n,d,g,hp0,izs,rzs,dzs)
936 if (hp0.ge.0.d+0) then
937 mode=7
938 if (impres.ge.1) write (io,905) niter,hp0
939 goto 1000
940 endif
941 if (impresmax.ge.5) then
942 call prosca (n,g,g,ps,izs,rzs,dzs)
943 ps=dsqrt(ps)
944 call prosca (n,d,d,ps2,izs,rzs,dzs)
945 ps2=dsqrt(ps2)
946 ps=hp0/ps/ps2
947 ps=dmin1(-ps,1.d+0)
948 ps=dacos(ps)
949 d1=ps
950 d1=d1*180.d0/pi
951 if (impres.ge.5) write (io,906) sngl(d1)
952 endif
953!
954!---- on poursuit les iterations
955!
956 goto 100
957!
958!---- retour
959!
960 1000 continue
961 nsim=isim
962 epsg=eps1
963
964 end subroutine n1qn3a
965
966 subroutine nlis0 (n,simul,prosca,xn,fn,fpn,t,tmin,tmax,d,g, &
967 amd,amf,imp,io,logic,nap,napmax,x,izs,rzs,dzs)
968 !
969 !:Purpose: en sortie logic =
970 !
971 ! * 0 descente serieuse
972 ! * 1 descente bloquee
973 ! * 4 nap > napmax
974 ! * 5 retour a l'utilisateur
975 ! * 6 fonction et gradient pas d'accord
976 ! * < 0 contrainte implicite active
977 !
978
979!!
980!!--- arguments
981!!
982 external simul,prosca
983 integer n,imp,io,logic,nap,napmax,izs(*)
984 real rzs(*)
985 real(8) xn(n),fn,fpn,t,tmin,tmax,d(n),g(n),amd,amf,x(n),dzs(*)
986!
987! --- variables locales
988!
989 logical lfound,lfound2
990 integer i,indic,indica,indicd,ierr,ntotal
991 real(8) tesf,tesd,tg,fg,fpg,td,ta,fa,fpa,d2,f,fp,ffn,fd, &
992 fpd,z,test,barmin,barmul,barmax,barr,gauche,droite,taa,ps
993!
994 1000 format (/4x,9h nlis0 ,4x,4hfpn=,d10.3,4h d2=,d9.2, &
995 7h tmin=,d9.2,6h tmax=,d9.2)
996 1001 format (/4x,6h mlis0,3x,"stop on tmin",8x, &
997 "step",11x,"functions",5x,"derivatives")
998 1002 format (4x,6h nlis0,37x,d10.3,2d11.3)
999 1003 format (4x,6h nlis0,d14.3,2d11.3)
1000 1004 format (4x,6h nlis0,37x,d10.3,7h indic=,i3)
1001 1005 format (4x,6h nlis0,14x,2d18.8,d11.3)
1002 1006 format (4x,6h nlis0,14x,d18.8,12h indic=,i3)
1003 1007 format (/4x,6h mlis0,10x,"tmin forced to tmax")
1004 1008 format (/4x,6h mlis0,10x,"inconsistent call")
1005 call rpn_comm_allreduce(n,ntotal,1,"mpi_integer", &
1006 "mpi_max","GRID",ierr)
1007 if (ntotal.gt.0 .and. fpn.lt.0.d0 .and. t.gt.0.d0 &
1008 .and. tmax.gt.0.d0 .and. amf.gt.0.d0 &
1009 .and. amd.gt.amf .and. amd.lt.1.d0) go to 5
1010 logic=6
1011 go to 999
1012 5 tesf=amf*fpn
1013 tesd=amd*fpn
1014 barmin=0.01d0
1015 barmul=5.d0
1016 barmax=0.3d0
1017 barr=barmin
1018 td=0.d0
1019 tg=0.d0
1020 fg=fn
1021 fpg=fpn
1022 ta=0.d0
1023 fa=fn
1024 fpa=fpn
1025 call prosca (n,d,d,ps,izs,rzs,dzs)
1026 d2=ps
1027!
1028! elimination d'un t initial ridiculement petit
1029!
1030 if (t.gt.tmin) go to 20
1031 t=tmin
1032 if (t.le.tmax) go to 20
1033 if (imp.gt.0) write (io,1007)
1034 tmin=tmax
1035 20 if (fn+t*fpn.lt.fn+0.9d0*t*fpn) go to 30
1036 t=2.d0*t
1037 go to 20
1038 30 indica=1
1039 logic=0
1040 if (t.gt.tmax) then
1041 t=tmax
1042 logic=1
1043 endif
1044 if (imp.ge.4) write (io,1000) fpn,d2,tmin,tmax
1045!
1046! --- nouveau x
1047!
1048 do 50 i=1,n
1049 x(i)=xn(i)+t*d(i)
1050 50 continue
1051!
1052! --- boucle
1053!
1054 100 nap=nap+1
1055 if(nap.gt.napmax) then
1056 logic=4
1057 fn=fg
1058 do 120 i=1,n
1059 xn(i)=xn(i)+tg*d(i)
1060 120 continue
1061 go to 999
1062 endif
1063 indic=4
1064!
1065! --- appel simulateur
1066!
1067 call simul(indic,n,x,f,g,izs,rzs,dzs)
1068 if(indic.eq.0) then
1069!
1070! --- arret demande par l'utilisateur
1071!
1072 logic=5
1073 fn=f
1074 do 170 i=1,n
1075 xn(i)=x(i)
1076 170 continue
1077 go to 999
1078 endif
1079 if(indic.lt.0) then
1080!
1081! --- les calculs n'ont pas pu etre effectues par le simulateur
1082!
1083 td=t
1084 indicd=indic
1085 logic=0
1086 if (imp.ge.4) write (io,1004) t,indic
1087 t=tg+0.1d0*(td-tg)
1088 go to 905
1089 endif
1090!
1091! --- les tests elementaires sont faits, on y va
1092!
1093 call prosca (n,d,g,ps,izs,rzs,dzs)
1094 fp=ps
1095!
1096! --- premier test de Wolfe
1097!
1098 ffn=f-fn
1099 if(ffn.gt.t*tesf) then
1100 td=t
1101 fd=f
1102 fpd=fp
1103 indicd=indic
1104 logic=0
1105 if(imp.ge.4) write (io,1002) t,ffn,fp
1106 go to 500
1107 endif
1108!
1109! --- test 1 ok, donc deuxieme test de Wolfe
1110!
1111 if(imp.ge.4) write (io,1003) t,ffn,fp
1112 if(fp.gt.tesd) then
1113 logic=0
1114 go to 320
1115 endif
1116 if (logic.eq.0) go to 350
1117!
1118! --- test 2 ok, donc pas serieux, on sort
1119!
1120 320 fn=f
1121 do 330 i=1,n
1122 xn(i)=x(i)
1123 330 continue
1124 go to 999
1125!
1126!
1127!
1128 350 tg=t
1129 fg=f
1130 fpg=fp
1131 if(td.ne.0.d0) go to 500
1132!
1133! extrapolation
1134!
1135 taa=t
1136 gauche=(1.d0+barmin)*t
1137 droite=10.d0*t
1138 call dcube (t,f,fp,ta,fa,fpa,gauche,droite)
1139 ta=taa
1140 if(t.lt.tmax) go to 900
1141 logic=1
1142 t=tmax
1143 go to 900
1144!
1145! interpolation
1146!
1147 500 if(indica.le.0) then
1148 ta=t
1149 t=0.9d0*tg+0.1d0*td
1150 go to 900
1151 endif
1152 test=barr*(td-tg)
1153 gauche=tg+test
1154 droite=td-test
1155 taa=t
1156 call dcube (t,f,fp,ta,fa,fpa,gauche,droite)
1157 ta=taa
1158 if (t.gt.gauche .and. t.lt.droite) then
1159 barr=dmax1(barmin,barr/barmul)
1160! barr=barmin
1161 else
1162 barr=dmin1(barmul*barr,barmax)
1163 endif
1164!
1165! --- fin de boucle
1166! - t peut etre bloque sur tmax
1167! (venant de l'extrapolation avec logic=1)
1168!
1169 900 fa=f
1170 fpa=fp
1171 905 indica=indic
1172!
1173! --- faut-il continuer ?
1174!
1175 if (td.eq.0.d0) go to 950
1176 if (td-tg.lt.tmin) go to 920
1177!
1178! --- limite de precision machine (arret de secours) ?
1179!
1180 lfound=.false.
1181 do i=1,n
1182 z=xn(i)+t*d(i)
1183 if (z.ne.xn(i).and.z.ne.x(i)) then
1184 lfound=.true.
1185 exit
1186 endif
1187 enddo
1188 call rpn_comm_allreduce(lfound,lfound2,1,"mpi_logical", &
1189 "mpi_lor","GRID",ierr)
1190 if(lfound2) go to 950
1191!
1192! --- arret sur dxmin ou de secours
1193!
1194 920 logic=6
1195!
1196! si indicd<0, derniers calculs non faits par simul
1197!
1198 if (indicd.lt.0) logic=indicd
1199!
1200! si tg=0, xn = xn_depart,
1201! sinon on prend xn=x_gauche qui fait decroitre f
1202!
1203 if (tg.eq.0.d0) go to 940
1204 fn=fg
1205 do 930 i=1,n
1206 930 xn(i)=xn(i)+tg*d(i)
1207 940 if (imp.le.0) go to 999
1208 write (io,1001)
1209 write (io,1005) tg,fg,fpg
1210 if (logic.eq.6) write (io,1005) td,fd,fpd
1211 if (logic.eq.7) write (io,1006) td,indicd
1212 go to 999
1213!
1214! recopiage de x et boucle
1215!
1216 950 do 960 i=1,n
1217 960 x(i)=xn(i)+t*d(i)
1218 go to 100
1219 999 continue
1220 end subroutine nlis0
1221
1222end module quasiNewton_mod