Literate version of PEXSI driver
Table of Contents
- 1. Introduction
- 2. Auxiliary module for holding the plan
- 3. The PEXSI solver code
- 3.1. Main structure
- 3.2. Routine header
- 3.3. Routine variables
- 3.4. Define communicators
- 3.5. Re-distribute matrices
- 3.6. Set tolerance
- 3.7. Set options
- 3.8. Initialize plan at first scf step
- 3.9. Load H and S matrices
- 3.10. Factorization
- 3.11. Call the solver
- 3.12. Get output matrices and compute energies
- 3.13. Copy information to Siesta side
- 3.14. Clean up
- 3.15. Support routines
- 3.15.1. Inertia-count iteration
- 3.15.2. On-the-fly tolerance determination
- 3.15.3. Decide whether inertia-counting is needed
- 3.15.4. Get bracket for inertia-counting
- 3.15.5. Get bracket for solver
- 3.15.6. Compute current temperature if annealing
- 3.15.7. Linear interpolation routine
- 3.15.8. Error dispatcher
1 Introduction
This is a version of the SIESTA-PEXSI interface for the scf cycle, employing an "expert" mode instead of the simple DFT driver included with the library. It can also deal with spin polarization.
2 Auxiliary module for holding the plan
The plan feature allows, among other things, the re-use of factorization information.
2.1 TODO Maybe integrate the plan handling in the pexsi_solver
module.
module m_pexsi use precision, only: dp use iso_c_binding integer(c_intptr_t), public :: plan public :: pexsi_initialize_scfloop public :: pexsi_finalize_scfloop private CONTAINS subroutine pexsi_initialize_scfloop(World_Comm,npPerPole,mpirank,info) use f_ppexsi_interface integer, intent(in) :: npPerPole, mpirank integer, intent(in) :: World_Comm integer, intent(out):: info integer :: numProcRow, numProcCol integer :: outputFileIndex call get_row_col(npPerPole,numProcRow,numProcCol) ! Set the outputFileIndex to be the pole index. ! Starting from PEXSI v0.8.0, the first processor for each pole outputs ! information if( mod( mpirank, npPerPole ) .eq. 0 ) then outputFileIndex = mpirank / npPerPole; else outputFileIndex = -1; endif plan = f_ppexsi_plan_initialize(& World_Comm,& numProcRow,& numProcCol,& outputFileIndex,& info) end subroutine pexsi_initialize_scfloop subroutine pexsi_finalize_scfloop() use f_ppexsi_interface integer :: info call f_ppexsi_plan_finalize( plan, info ) end subroutine pexsi_finalize_scfloop subroutine get_row_col(np,nrow,ncol) integer, intent(in) :: np integer, intent(out) :: nrow, ncol ! ! Finds the factors nrow and ncol such that nrow*ncol=np, ! are as similar as possible, and nrow>=ncol. ! For prime np, ncol=1, nrow=np. ncol = floor(sqrt(dble(np))) do nrow = np/ncol if (nrow*ncol == np) exit ncol = ncol - 1 enddo end subroutine get_row_col end module m_pexsi
subroutine pexsi_initialize_scfloop(World_Comm,npPerPole,mpirank,info) use f_ppexsi_interface integer, intent(in) :: npPerPole, mpirank integer, intent(in) :: World_Comm integer, intent(out):: info integer :: numProcRow, numProcCol integer :: outputFileIndex call get_row_col(npPerPole,numProcRow,numProcCol) ! Set the outputFileIndex to be the pole index. ! Starting from PEXSI v0.8.0, the first processor for each pole outputs ! information if( mod( mpirank, npPerPole ) .eq. 0 ) then outputFileIndex = mpirank / npPerPole; else outputFileIndex = -1; endif plan = f_ppexsi_plan_initialize(& World_Comm,& numProcRow,& numProcCol,& outputFileIndex,& info) end subroutine pexsi_initialize_scfloop
subroutine pexsi_finalize_scfloop() use f_ppexsi_interface integer :: info call f_ppexsi_plan_finalize( plan, info ) end subroutine pexsi_finalize_scfloop
subroutine get_row_col(np,nrow,ncol) integer, intent(in) :: np integer, intent(out) :: nrow, ncol ! ! Finds the factors nrow and ncol such that nrow*ncol=np, ! are as similar as possible, and nrow>=ncol. ! For prime np, ncol=1, nrow=np. ncol = floor(sqrt(dble(np))) do nrow = np/ncol if (nrow*ncol == np) exit ncol = ncol - 1 enddo end subroutine get_row_col
3 The PEXSI solver code
3.1 Main structure
This is the main structure of the code.
module m_pexsi_solver use precision, only : dp implicit none public :: pexsi_solver real(dp), save :: prevDmax ! For communication of max diff in DM in scf loop ! used in the heuristics for N_el tolerance public :: prevDmax CONTAINS <<routine-header>> <<routine-variables>> ! -------- for serial compilation #ifndef MPI call die("PEXSI needs MPI") #else <<define-communicators>> <<re-distribute-matrices>> <<set-tolerance>> <<set-options>> <<initialize-plan-at-first-step>> <<load-hs-matrices>> <<factorization>> <<call-driver>> <<get-matrices-and-energy>> <<copy-to-siesta-side>> <<clean-up>> #endif CONTAINS <<support-routines>> end subroutine pexsi_solver end module m_pexsi_solver
3.2 Routine header
! This version uses separate distributions for Siesta ! (setup_H et al) and PEXSI. ! subroutine pexsi_solver(iscf, no_u, no_l, nspin_in, & maxnh, numh, listhptr, listh, H, S, qtot, DM, EDM, & ef, Entropy, temp, delta_Efermi) <<used-modules>> implicit none integer, intent(in) :: iscf ! scf step number integer, intent(in) :: maxnh, no_u, no_l, nspin_in integer, intent(in), target :: listh(maxnh), numh(no_l), listhptr(no_l) real(dp), intent(in), target :: H(maxnh,nspin_in), S(maxnh) real(dp), intent(in) :: qtot real(dp), intent(out), target:: DM(maxnh,nspin_in), EDM(maxnh,nspin_in) real(dp), intent(out) :: ef ! Fermi energy real(dp), intent(out) :: Entropy ! Entropy/k, dimensionless real(dp), intent(in) :: temp ! Electronic temperature real(dp), intent(in) :: delta_Efermi ! Estimated shift in E_fermi
3.2.1 Used modules
use fdf use parallel, only : SIESTA_worker, BlockSize use parallel, only : SIESTA_Group, SIESTA_Comm use m_mpi_utils, only: globalize_sum, globalize_max use m_mpi_utils, only: broadcast use units, only: Kelvin, eV use m_redist_spmatrix, only: aux_matrix, redistribute_spmatrix use class_Dist use alloc, only: re_alloc, de_alloc use siesta_options, only: dDtol #ifdef MPI use mpi_siesta #endif use f_ppexsi_interface use iso_c_binding use m_pexsi, only: plan, pexsi_initialize_scfloop #ifdef TRACING_SOLVEONLY use extrae_module #endif
3.3 Routine variables
The local variables for the routine must be declared in a certain
place for the compiler, but it is more clear to introduce them as they
are needed. The routine-variables
noweb-ref will be used for this
throughout this document.
integer :: ih, i integer :: info logical :: write_ok !------------ external :: timer
3.4 Define communicators
World_Comm
, which is in principle set to Siesta's copy of
MPI_Comm_World
, is the global communicator.
Some variables need to be broadcast since they were assigned only by the SIESTA worker subset of nodes. They are renamed for clarity.
integer :: World_Comm, mpirank, ierr ! real(dp) :: temperature, numElectronExact integer :: norbs, scf_step real(dp) :: delta_Ef ! integer :: nspin
! ! Our global communicator is a duplicate of the passed communicator ! call MPI_Comm_Dup(true_MPI_Comm_World, World_Comm, ierr) call mpi_comm_rank( World_Comm, mpirank, ierr ) ! NOTE: fdf calls will assign values to the whole processor set, ! but some other variables will have to be re-broadcast (see examples ! below) call timer("pexsi", 1) if (SIESTA_worker) then ! rename some intent(in) variables, which are only ! defined for the Siesta subset norbs = no_u nspin = nspin_in scf_step = iscf delta_Ef = delta_Efermi numElectronExact = qtot ! Note that the energy units for the PEXSI interface are arbitrary, but ! H, the interval limits, and the temperature have to be in the ! same units. Siesta uses Ry units. temperature = temp if (mpirank==0) write(6,"(a,f10.2)") & "Electronic temperature (K): ", temperature/Kelvin endif ! call broadcast(norbs,comm=World_Comm) call broadcast(scf_step,comm=World_Comm) call broadcast(delta_Ef,comm=World_Comm) call broadcast(numElectronExact,World_Comm) call broadcast(temperature,World_Comm) call broadcast(nspin,World_Comm) ! Imported from modules, but set only in Siesta side call broadcast(prevDmax,comm=World_Comm) call broadcast(dDtol,comm=World_Comm)
Now we need to define the Siesta distribution object and the communicator and distribution object for the first team of PEXSI workers, for the purposes of re-distribution of the relevant matrices. The PEXSI library takes care of further redistribution among teams.
Note that the first team of PEXSI workers starts at the root node. This means that there is overlap between the Siesta workers and the PEXSI workers. While this is in principle more economical (and convenient for information exchange), it can pose problems later on.
I will leave it like that, as I do not yet know how to move information among disjoint communicators (use of an intercommunicator?)
For spin, things are a bit more complicated. We need to make sure that the distributions are defined (via actual ranks) with respect to the same reference bridge communicator. For now, this is WorldComm.
integer :: PEXSI_Pole_Group, PEXSI_Spatial_Group, World_Group integer, allocatable :: pexsi_pole_ranks_in_world(:) integer :: nworkers_SIESTA integer, allocatable :: siesta_ranks_in_world(:) integer :: PEXSI_Pole_Group_in_World integer, allocatable :: PEXSI_Pole_ranks_in_World_Spin(:,:) integer :: PEXSI_Pole_Comm, PEXSI_Spatial_Comm, PEXSI_Spin_Comm integer :: numNodesTotal integer :: npPerPole logical :: PEXSI_worker ! type(Dist) :: dist1 type(Dist), allocatable, target :: dist2_spin(:) type(Dist), pointer :: dist2 integer :: pbs, color, global_rank, spin_rank
Define the Siesta distribution. Note that this is known to all nodes.
call MPI_Comm_Group(World_Comm,World_Group, ierr) call MPI_Group_Size(SIESTA_Group, nworkers_SIESTA, ierr) allocate(siesta_ranks_in_world(nworkers_SIESTA)) call MPI_Group_translate_ranks( SIESTA_Group, nworkers_SIESTA, & (/ (i,i=0,nworkers_SIESTA-1) /), & World_Group, siesta_ranks_in_world, ierr ) call newDistribution(dist1,World_Comm,siesta_ranks_in_world, & TYPE_BLOCK_CYCLIC,BlockSize,"bc dist") deallocate(siesta_ranks_in_world) call MPI_Barrier(World_Comm,ierr)
For possibly spin-polarized calculations, we split the communicator.
call mpi_comm_size( World_Comm, numNodesTotal, ierr ) npPerPole = fdf_get("PEXSI.np-per-pole",4) if (nspin*npPerPole > numNodesTotal) & call die("PEXSI.np-per-pole is too big for MPI size") ! "Row" communicator for independent PEXSI operations on each spin ! The name refers to "spatial" degrees of freedom. color = mod(mpirank,nspin) ! {0,1} for nspin = 2, or {0} for nspin = 1 call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spatial_Comm, ierr) ! "Column" communicator for spin reductions color = mpirank/nspin call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spin_Comm, ierr) ! Group and Communicator for first-pole team of PEXSI workers ! call MPI_Comm_Group(PEXSI_Spatial_Comm, PEXSI_Spatial_Group, Ierr) call MPI_Group_incl(PEXSI_Spatial_Group, npPerPole, & (/ (i,i=0,npPerPole-1) /),& PEXSI_Pole_Group, Ierr) call MPI_Comm_create(PEXSI_Spatial_Comm, PEXSI_Pole_Group,& PEXSI_Pole_Comm, Ierr) call mpi_comm_rank( PEXSI_Spatial_Comm, global_rank, ierr ) call mpi_comm_rank( PEXSI_Spin_Comm, spin_rank, ierr ) PEXSI_worker = (global_rank < npPerPole) ! Could be spin up or spin down ! PEXSI blocksize pbs = norbs/npPerPole ! Careful with this. For the purposes of matrix transfers, ! we need the ranks of the Pole group ! in the "bridge" communicator/group (World) allocate(pexsi_pole_ranks_in_world(npPerPole)) call MPI_Comm_Group(World_Comm, World_Group, Ierr) call MPI_Group_translate_ranks( PEXSI_Pole_Group, npPerPole, & (/ (i,i=0,npPerPole-1) /), & World_Group, pexsi_pole_ranks_in_world, ierr ) ! What we need is to include the actual world ranks ! in the distribution object allocate (PEXSI_Pole_ranks_in_World_Spin(npPerPole,nspin)) call MPI_AllGather(pexsi_pole_ranks_in_world,npPerPole,MPI_integer,& PEXSI_Pole_Ranks_in_World_Spin,npPerPole, & MPI_integer,PEXSI_Spin_Comm,ierr) ! Create distributions known to all nodes allocate(dist2_spin(nspin)) do ispin = 1, nspin call newDistribution(dist2_spin(ispin), World_Comm, & PEXSI_Pole_Ranks_in_World_Spin(:,ispin), & TYPE_PEXSI, pbs, "px dist") enddo deallocate(pexsi_pole_ranks_in_world,PEXSI_Pole_Ranks_in_World_Spin) call MPI_Barrier(World_Comm,ierr)
3.5 Re-distribute matrices
This is slightly unseemly, but it works. The aux_matrix
derived
types are used to store and retrieve the matrices in either side. The
code is in external auxiliary modules.
type(aux_matrix), allocatable, target :: m1_spin(:) type(aux_matrix) :: m2 type(aux_matrix), pointer :: m1 integer :: nrows, nnz, nnzLocal, numColLocal integer, pointer, dimension(:) :: colptrLocal=> null(), rowindLocal=>null() ! real(dp), pointer, dimension(:) :: & HnzvalLocal=>null(), SnzvalLocal=>null(), & DMnzvalLocal => null() , EDMnzvalLocal => null(), & FDMnzvalLocal => null() ! integer :: ispin, pexsi_spin
pexsi_spin = spin_rank+1 ! {1,2} ! This is done serially on the Siesta side, each time ! filling in the structures in one PEXSI set allocate(m1_spin(nspin)) do ispin = 1, nspin m1 => m1_spin(ispin) if (SIESTA_worker) then m1%norbs = norbs m1%no_l = no_l m1%nnzl = sum(numH(1:no_l)) m1%numcols => numH m1%cols => listH allocate(m1%vals(2)) m1%vals(1)%data => S(:) m1%vals(2)%data => H(:,ispin) endif ! SIESTA_worker call timer("redist_orbs_fwd", 1) ! Note that we cannot simply wrap this in a pexsi_spin test, as ! there are Siesta nodes in both spin sets. ! We must discriminate the PEXSI workers by the distribution info dist2 => dist2_spin(ispin) call redistribute_spmatrix(norbs,m1,dist1,m2,dist2,World_Comm) call timer("redist_orbs_fwd", 2) if (PEXSI_worker .and. (pexsi_spin == ispin) ) then nrows = m2%norbs ! or simply 'norbs' numColLocal = m2%no_l nnzLocal = m2%nnzl call MPI_AllReduce(nnzLocal,nnz,1,MPI_integer,MPI_sum,PEXSI_Pole_Comm,ierr) call re_alloc(colptrLocal,1,numColLocal+1,"colptrLocal","pexsi_solver") colptrLocal(1) = 1 do ih = 1,numColLocal colptrLocal(ih+1) = colptrLocal(ih) + m2%numcols(ih) enddo rowindLocal => m2%cols SnzvalLocal => m2%vals(1)%data HnzvalLocal => m2%vals(2)%data call re_alloc(DMnzvalLocal,1,nnzLocal,"DMnzvalLocal","pexsi_solver") call re_alloc(EDMnzvalLocal,1,nnzLocal,"EDMnzvalLocal","pexsi_solver") call re_alloc(FDMnzvalLocal,1,nnzLocal,"FDMnzvalLocal","pexsi_solver") call memory_all("after setting up H+S for PEXSI (PEXSI_workers)",PEXSI_Pole_Comm) endif ! PEXSI worker enddo ! Make these available to all ! (Note that the values are those on process 0, which is in the spin=1 set ! In fact, they are only needed for calls to the interface, so the broadcast ! could be over PEXSI_Spatial_Comm only. call MPI_Bcast(nrows,1,MPI_integer,0,World_Comm,ierr) call MPI_Bcast(nnz,1,MPI_integer,0,World_Comm,ierr) call memory_all("after setting up H+S for PEXSI",World_comm)
3.6 Set tolerance
These are wrapped in a test for first_call
. Some other operations
could be done in that case. Maybe we can remove the save
'd character
of some variables (for example, moving the setting of the tolerances outside the
first_call
block). Even the bracket and μ values could maybe be
passed as arguments.
real(dp), save :: PEXSINumElectronToleranceMin, & PEXSINumElectronToleranceMax, & PEXSINumElectronTolerance logical, save :: first_call = .true. real(dp), save :: muMin0, muMax0, mu real(dp) :: on_the_fly_tolerance
if (first_call) then ! Initial guess of chemical potential and containing interval ! When using inertia counts, this interval can be wide. ! Note that mu, muMin0 and muMax0 are saved variables muMin0 = fdf_get("PEXSI.mu-min",-1.0_dp,"Ry") muMax0 = fdf_get("PEXSI.mu-max", 0.0_dp,"Ry") mu = fdf_get("PEXSI.mu",-0.60_dp,"Ry") PEXSINumElectronToleranceMin = & fdf_get("PEXSI.num-electron-tolerance-lower-bound",0.01_dp) PEXSINumElectronToleranceMax = & fdf_get("PEXSI.num-electron-tolerance-upper-bound",0.5_dp) ! start with largest tolerance ! (except if overriden by user) PEXSINumElectronTolerance = fdf_get("PEXSI.num-electron-tolerance",& PEXSINumElectronToleranceMax) first_call = .false. else ! ! Here we could also check whether we are in the first scf iteration ! of a multi-geometry run... ! ! Use a moving tolerance, based on how far DM_out was to DM_in ! in the previous iteration (except if overriden by user) call get_on_the_fly_tolerance(prevDmax,on_the_fly_tolerance) ! Override if tolerance is explicitly specified in the fdf file PEXSINumElectronTolerance = fdf_get("PEXSI.num-electron-tolerance",& on_the_fly_tolerance) endif
3.7 Set options
We use the options interface to get a template with default values,
and then fill in a few custom options based on fdf variables. Note
that the options
derived type is of limited usefulness when the
simple DFT driver is not used. The most important entries are then the
factorization flag and the number of processors per pole.
We also use this section to define other user-level options. This is a
bit of a mess, since the logic of the expert interface uses a mixed
grab-bag of options
entries and orphan entries, such as
inertiaMaxIter
.
type(f_ppexsi_options) :: options ! integer :: isSIdentity integer :: verbosity integer :: inertiaMaxIter ! real(dp), save :: energyWidthInertiaTolerance real(dp) :: pexsi_temperature, two_kT
! call f_ppexsi_set_default_options( options ) options%muPEXSISafeGuard = fdf_get("PEXSI.mu-pexsi-safeguard",0.05_dp,"Ry") options%maxPEXSIIter = fdf_get("PEXSI.mu-max-iter",10) isSIdentity = 0 options%numPole = fdf_get("PEXSI.num-poles",40) options%gap = fdf_get("PEXSI.gap",0.0_dp,"Ry") ! deltaE is in theory the spectrum width, but in practice can be much smaller ! than | E_max - mu |. It is found that deltaE that is slightly bigger ! than | E_min - mu | is usually good enough. options%deltaE = fdf_get("PEXSI.delta-E",3.0_dp,"Ry") ! Lin: 10 Ry... ! Ordering flag: ! 1: Use METIS ! 0: Use PARMETIS/PTSCOTCH options%ordering = fdf_get("PEXSI.ordering",1) ! Number of processors for symbolic factorization ! Only relevant for PARMETIS/PT_SCOTCH options%npSymbFact = fdf_get("PEXSI.np-symbfact",1) verbosity = fdf_get("PEXSI.verbosity",1) options%verbosity = verbosity call get_current_temperature(pexsi_temperature) options%temperature = pexsi_temperature ! ! Set guard smearing for later use ! two_kT = 2.0_dp * pexsi_temperature options%numElectronPEXSITolerance = PEXSINumElectronTolerance ! Stop inertia count if mu has not changed much from iteration to iteration. options%muInertiaTolerance = & fdf_get("PEXSI.inertia-mu-tolerance",0.05_dp,"Ry") ! One-sided expansion of interval if correct mu falls outside it options%muInertiaExpansion = & fdf_get("PEXSI.lateral-expansion-inertia",3.0_dp*eV,"Ry") ! Other user options ! Maximum number of iterations for computing the inertia ! in a given scf step (until a proper bracket is obtained) inertiaMaxIter = fdf_get("PEXSI.inertia-max-iter",5) ! Energy-width termination tolerance for inertia-counting ! By default, it is the same as the mu tolerance, to match ! the criterion in the simple DFT driver energyWidthInertiaTolerance = & fdf_get("PEXSI.inertia-energy-width-tolerance", & options%muInertiaTolerance,"Ry")
3.8 Initialize plan at first scf step
Each spin-set of PEXSI processors has its own plan.
if (scf_step == 1) then call pexsi_initialize_scfloop(PEXSI_Spatial_Comm,npPerPole,global_rank,info) call check_info(info,"initialize_plan") endif
3.9 Load H and S matrices
In this version H and S are symmetric. We associate them with the plan (I really do not know very well what happens behind the scenes. Presumably no copy is made.)
call f_ppexsi_load_real_symmetric_hs_matrix(& plan,& options,& nrows,& nnz,& nnzLocal,& numColLocal,& colptrLocal,& rowindLocal,& HnzvalLocal,& isSIdentity,& SnzvalLocal,& info) call check_info(info,"load_real_sym_hs_matrix")
3.10 Factorization
This is only done at the beginning of the scf cycle. Presumably it could be done once and for all for one of the spins, but we have no way to transfer the info from the spin-up to the spin-down manifold.
if (scf_step == 1) then ! This is only needed for inertia-counting call f_ppexsi_symbolic_factorize_real_symmetric_matrix(& plan, & options,& info) call check_info(info,"symbolic_factorize_real_symmetric_matrix") call f_ppexsi_symbolic_factorize_complex_symmetric_matrix(& plan, & options,& info) call check_info(info,"symbolic_factorize_complex_symmetric_matrix") endif options%isSymbolicFactorize = 0 ! We do not need it anymore
3.11 Call the solver
3.11.1 Solver call structure
This was too black of a black box, as there is very little control of the operations. The most glaring shortcoming is the lack of a proper handling of the convergence conditions.
The plan for improvement is to call the inertia counting routine, and the fermi-operator calculator, explicitly.
real(dp) :: deltaMu real(dp) :: numElectronDrvMuPEXSI, numElectronPEXSI real(dp), allocatable :: numElectronSpin(:), numElectronDrvMuSpin(:) real(dp) :: numElectron_out, numElectronDrvMu_out integer :: numTotalPEXSIIter integer :: numTotalInertiaIter
! numTotalInertiaIter = 0 call timer("pexsi-solver", 1) ! This is for the initial phase of the scf loop if (need_inertia_counting()) then call get_bracket_for_inertia_count( ) call do_inertia_count(plan,muMin0,muMax0,mu) else ! Maybe there is no need for bracket, just for mu estimation call get_bracket_for_solver() endif numTotalPEXSIIter = 0 allocate(numElectronSpin(nspin),numElectronDrvMuSpin(nspin)) solver_loop: do if (numTotalPEXSIIter > options%maxPEXSIIter ) then ! Maybe do not die, and trust further DM normalization ! to fix the number of electrons for unstable cases call die("too many PEXSI iterations") endif if(mpirank == 0) then write (6,"(a,f9.4,a,f9.5)") 'Computing DM for mu(eV): ', mu/eV, & ' Tol: ', PEXSINumElectronTolerance write (6,"(a,f9.4,f9.5)") 'Bracket: ', muMin0/eV, muMax0/eV endif call f_ppexsi_calculate_fermi_operator_real(& plan,& options,& mu,& numElectronExact,& numElectron_out,& numElectronDrvMu_out,& info) call check_info(info,"fermi_operator") ! Per spin numElectron_out = numElectron_out / nspin numElectronDrvMu_out = numElectronDrvMu_out / nspin ! Gather the results for both spins on all processors call MPI_AllGather(numElectron_out,1,MPI_Double_precision,& numElectronSpin,1,MPI_Double_precision,PEXSI_Spin_Comm,ierr) call MPI_AllGather(numElectronDrvMu_out,1,MPI_Double_precision,& numElectronDrvMuSpin,1,MPI_Double_precision,PEXSI_Spin_Comm,ierr) numElectronPEXSI = sum(numElectronSpin(1:nspin)) numElectronDrvMuPEXSI = sum(numElectronDrvMuSpin(1:nspin)) if (mpirank == 0) then write(6,"(a,f10.4)") "Fermi Operator. mu: ", mu/eV if (nspin == 2) then write(6,"(a,2f10.4,a,f10.4)") "Fermi Operator. numElectron(Up,Down): ", & numElectronSpin(1:nspin), " Total: ", numElectronPEXSI write(6,"(a,2f10.4,a,f10.4)") "Fermi Operator. dN_e/dmu(Up,Down): ", & numElectronDrvMuSpin(1:nspin)*eV, " Total: ", numElectronDrvMuPEXSI*eV else write(6,"(a,f10.4)") "Fermi Operator. numElectron: ", numElectronPEXSI write(6,"(a,f10.4)") "Fermi Operator. dN_e/dmu: ", numElectronDrvMuPEXSI*eV endif endif numTotalPEXSIIter = numTotalPEXSIIter + 1 if (abs(numElectronPEXSI-numElectronExact) > PEXSINumElectronTolerance) then deltaMu = - (numElectronPEXSI - numElectronExact) / numElectronDrvMuPEXSI ! The simple DFT driver uses the size of the jump to flag problems: ! if (abs(deltaMu) > options%muPEXSISafeGuard) then if ( ((mu + deltaMu) < muMin0) .or. ((mu + deltaMu) > muMax0) ) then if (mpirank ==0) then write(6,"(a,f9.3)") "DeltaMu: ", deltaMu, " is too big. Falling back to IC" endif ! We must choose a new starting bracket, otherwise we will fall into the same ! cycle of values call do_inertia_count(plan,muMin0,muMax0,mu) cycle solver_loop endif mu = mu + deltaMu cycle solver_loop else ! Converged if (mpirank == 0) then write(6,"(a,f10.4)") "PEXSI solver converged. mu: ", mu/eV endif exit solver_loop endif end do solver_loop deallocate(numElectronSpin,numElectronDrvMuSpin) call timer("pexsi-solver", 2)
3.12 Get output matrices and compute energies
This section is still done by the PEXSI group processors. But note that the energies are not set if we do not use the simple DFT driver.
real(dp) :: bs_energy, eBandH, free_bs_energy real(dp) :: buffer1
if( PEXSI_worker ) then call f_ppexsi_retrieve_real_symmetric_dft_matrix(& plan,& DMnzvalLocal,& EDMnzvalLocal,& FDMnzvalLocal,& eBandH,& ! Will not be available bs_energy,& free_bs_energy,& info) call check_info(info,"retrieve_real_symmetric_dft_matrix") if (nspin == 2) then ! The matrices have to be divided by two... DMnzvalLocal(:) = 0.5_dp * DMnzvalLocal(:) EDMnzvalLocal(:) = 0.5_dp * EDMnzvalLocal(:) !!! Watch out with this. Internals?? FDMnzvalLocal(:) = 0.5_dp * FDMnzvalLocal(:) endif endif if ((mpirank == 0) .and. (verbosity >= 1)) then write(6,"(a,i3)") " #&s Number of solver iterations: ", numTotalPEXSIIter write(6,"(a,i3)") " #&s Number of inertia iterations: ", numTotalInertiaIter write(6,"(a,f12.5,f12.4,2x,a2)") "mu, N_e:", mu/eV, & numElectronPEXSI, "&s" endif if (PEXSI_worker) then free_bs_energy = 0.0_dp bs_energy = 0.0_dp eBandH = 0.0_dp do i = 1,nnzLocal free_bs_energy = free_bs_energy + SnzvalLocal(i) * & ( FDMnzvalLocal(i) ) bs_energy = bs_energy + SnzvalLocal(i) * & ( EDMnzvalLocal(i) ) eBandH = eBandH + HnzvalLocal(i) * & ( DMnzvalLocal(i) ) enddo ! First, reduce over the Pole_comm call globalize_sum( free_bs_energy, buffer1, comm=PEXSI_Pole_Comm ) free_bs_energy = buffer1 call globalize_sum( bs_energy, buffer1, comm=PEXSI_Pole_Comm ) bs_energy = buffer1 call globalize_sum( eBandH, buffer1, comm=PEXSI_Pole_Comm ) eBandH = buffer1 ! Now, reduce over both spins call globalize_sum( free_bs_energy, buffer1, comm=PEXSI_Spin_Comm ) ! Note that we need an extra term: mu*N for the free energy free_bs_energy = buffer1 + mu*numElectronPEXSI call globalize_sum( bs_energy, buffer1, comm=PEXSI_Spin_Comm ) bs_energy = buffer1 call globalize_sum( eBandH, buffer1, comm=PEXSI_Spin_Comm ) eBandH = buffer1 ! This output block will be executed only if World's root node is ! in one of the leading pole groups. This might not be so if ((mpirank == 0) .and. (verbosity >= 2)) then write(6, "(a,f12.4)") "#&s Tr(S*EDM) (eV) = ", bs_energy/eV write(6,"(a,f12.4)") "#&s Tr(H*DM) (eV) = ", eBandH/eV write(6,"(a,f12.4)") "#&s Tr(S*FDM) + mu*N (eV) = ", (free_bs_energy)/eV endif ef = mu ! Note that we use the S*EDM version of the band-structure energy ! to estimate the entropy, by comparing it to S*FDM This looks ! consistent, but note that the EDM is not used in Siesta to ! estimate the total energy, only the DM (via the density) (that ! is, the XC and Hartree correction terms to Ebs going into Etot ! are estimated using the DM) Entropy = - (free_bs_energy - bs_energy) / temp ! ef and Entropy are now known to the leading-pole processes endif ! PEXSI_worker
3.13 Copy information to Siesta side
do ispin = 1, nspin m1 => m1_spin(ispin) if (PEXSI_worker .and. (pexsi_spin == ispin)) then ! Prepare m2 to transfer call de_alloc(FDMnzvalLocal,"FDMnzvalLocal","pexsi_solver") call de_alloc(colPtrLocal,"colPtrLocal","pexsi_solver") call de_alloc(m2%vals(1)%data,"m2%vals(1)%data","pexsi_solver") call de_alloc(m2%vals(2)%data,"m2%vals(2)%data","pexsi_solver") m2%vals(1)%data => DMnzvalLocal(1:nnzLocal) m2%vals(2)%data => EDMnzvalLocal(1:nnzLocal) endif ! Prepare m1 to receive the results if (SIESTA_worker) then nullify(m1%vals(1)%data) ! formerly pointing to S nullify(m1%vals(2)%data) ! formerly pointing to H deallocate(m1%vals) nullify(m1%numcols) ! formerly pointing to numH nullify(m1%cols) ! formerly pointing to listH endif call timer("redist_orbs_bck", 1) dist2 => dist2_spin(ispin) call redistribute_spmatrix(norbs,m2,dist2,m1,dist1,World_Comm) call timer("redist_orbs_bck", 2) if (PEXSI_worker .and. (pexsi_spin == ispin)) then call de_alloc(DMnzvalLocal, "DMnzvalLocal", "pexsi_solver") call de_alloc(EDMnzvalLocal,"EDMnzvalLocal","pexsi_solver") nullify(m2%vals(1)%data) ! formerly pointing to DM nullify(m2%vals(2)%data) ! formerly pointing to EDM deallocate(m2%vals) ! allocated in the direct transfer call de_alloc(m2%numcols,"m2%numcols","pexsi_solver") call de_alloc(m2%cols, "m2%cols", "pexsi_solver") endif ! We assume that Siesta's root node also belongs to one of the ! leading-pole PEXSI communicators. ! Note that by wrapping the broadcasts for SIESTA_workers we ! do not make ef and Entropy known to the non-leading PEXSI processes. if (SIESTA_worker) then call broadcast(ef,comm=SIESTA_Comm) call broadcast(Entropy,comm=SIESTA_Comm) ! In future, m1%vals(1,2) could be pointing to DM and EDM, ! and the 'redistribute' routine check whether the vals arrays are ! associated, to use them instead of allocating them. DM(:,ispin) = m1%vals(1)%data(:) EDM(:,ispin) = m1%vals(2)%data(:) ! Check no_l if (no_l /= m1%no_l) then call die("Mismatch in no_l") endif ! Check listH if (any(listH(:) /= m1%cols(:))) then call die("Mismatch in listH") endif call de_alloc(m1%vals(1)%data,"m1%vals(1)%data","pexsi_solver") call de_alloc(m1%vals(2)%data,"m1%vals(2)%data","pexsi_solver") deallocate(m1%vals) ! allocated in the direct transfer call de_alloc(m1%numcols,"m1%numcols","pexsi_solver") call de_alloc(m1%cols, "m1%cols", "pexsi_solver") endif enddo call timer("pexsi", 2)
3.14 Clean up
We cannot finalize the plan now if we are going to reuse
it in subsequent iterations… This is done by siesta_forces
at the
end of the scf cycle.
Here we only clean up the communicators and groups and the distributions.
call delete(dist1) do ispin = 1, nspin call delete(dist2_spin(ispin)) enddo deallocate(dist2_spin) deallocate(m1_spin) call MPI_Comm_Free(PEXSI_Spatial_Comm, ierr) call MPI_Comm_Free(PEXSI_Pole_Comm, ierr) call MPI_Comm_Free(PEXSI_Spin_Comm, ierr) call MPI_Comm_Free(World_Comm, ierr) ! call MPI_Group_Free(PEXSI_Spatial_Group, ierr) call MPI_Group_Free(PEXSI_Pole_Group, ierr) call MPI_Group_Free(World_Group, ierr)
3.15 Support routines
Several routines
<<inertia-count-iteration>> <<get-on-the-fly-tolerance>> <<need-inertia-counting>> <<get-bracket-for-inertia-count>> <<get-bracket-for-solver>> <<get-current-temperature>> <<linear-interpolation-routine>> <<check-info>>
3.15.1 Inertia-count iteration
subroutine do_inertia_count(plan,muMin0,muMax0,muInertia) use iso_c_binding, only : c_intptr_t use m_convergence integer(c_intptr_t) :: plan real(dp), intent(inout) :: muMin0, muMax0 real(dp), intent(out) :: muInertia real(dp) :: muMinInertia, muMaxInertia integer :: nInertiaRounds real(dp), parameter :: eps_inertia = 0.1_dp type(converger_t) :: conv_mu logical :: bad_lower_bound, bad_upper_bound logical :: interval_problem, one_more_round real(dp) :: inertia_electron_width real(dp) :: inertia_original_electron_width real(dp) :: inertia_energy_width real(dp) :: muLower, muUpper integer :: numMinICountShifts, numShift real(dp), allocatable :: shiftList(:), inertiaList(:) real(dp), allocatable :: inertiaList_out(:) integer :: imin, imax <<determine-number-of-shifts>> nInertiaRounds = 0 refine_interval: do <<refine-interval-by-inertia-count>> numTotalInertiaIter = numTotalInertiaIter + 1 enddo refine_interval deallocate(shiftList,inertiaList) end subroutine do_inertia_count
- Determine number of inertia-count shifts
This is based on the total number of processors available, in such a way that each group of
np-PerPole
processors deals with a shift.! Minimum number of sampling points for inertia counts numMinICountShifts = fdf_get("PEXSI.inertia-min-num-shifts", 10) numShift = numNodesTotal/npPerPole do if (numShift < numMinICountShifts) then numShift = numShift + numNodesTotal/npPerPole else exit endif enddo allocate(shiftList(numShift), inertiaList(numShift)) allocate(inertiaList_out(numShift))
- Refine interval by inertia count
This is the body of the old expert inertia-count loop. We begin by setting up the shift list and calling the workhorse routine (which does not do anything extra inside, just compute the T=0K inertia).
options%muMin0 = muMin0 options%muMax0 = muMax0 if (mpirank == 0) then write (6,"(a,2f9.4,a,a,i4)") 'Calling inertiaCount: [', & muMin0/eV, muMax0/eV, "] (eV)", & " Nshifts: ", numShift endif call timer("pexsi-inertia-ct", 1) do i = 1, numShift shiftList(i) = muMin0 + (i-1) * (muMax0-muMin0)/(numShift-1) enddo call f_ppexsi_inertia_count_real_symmetric_matrix(& plan,& options,& numShift,& shiftList,& inertiaList_out,& info) call check_info(info,"inertia-count") ! All-Reduce to add the (two) spin inertias ! so that all processors have the complete inertiaList(:) call MPI_AllReduce(inertiaList_out, inertiaList, & numShift, MPI_Double_precision, & MPI_Sum, PEXSI_Spin_Comm, ierr) ! If nspin=1, each state is doubly occupied ! inertiaList(:) = 2 * inertiaList(:) / nspin call timer("pexsi-inertia-ct", 2)
Now we need to make sure that the results make sense. First, that we had μ in our starting interval. If not, we immediately expand the bounds and go back to the top of the loop.
interval_problem = .false. if(mpirank == 0) then bad_lower_bound = (inertiaList(1) > (numElectronExact - 0.1)) bad_upper_bound = (inertiaList(numShift) < (numElectronExact + 0.1)) endif call broadcast(bad_lower_bound,comm=World_Comm) call broadcast(bad_upper_bound,comm=World_Comm) if (bad_lower_bound) then interval_problem = .true. muMin0 = muMin0 - options%muInertiaExpansion ! 0.5 if (mpirank==0) then write (6,"(a,2f12.4,a,2f10.4)") 'Wrong inertia-count interval (lower end). Counts: ', & inertiaList(1), inertiaList(numShift), & ' New interval: ', muMin0/eV, muMax0/eV endif endif if (bad_upper_bound) then interval_problem = .true. muMax0 = muMax0 + options%muInertiaExpansion ! 0.5 if (mpirank==0) then write (6,"(a,2f12.4,a,2f10.4)") 'Wrong inertia-count interval (upper end). Counts: ', & inertiaList(1), inertiaList(numShift), & ' New interval: ', muMin0/eV, muMax0/eV endif endif if (interval_problem) then ! do nothing more, stay in loop cycle refine_interval endif
If we did have μ in the interval, we consider this a bona-fide inertia-count iteration and update the counter.
Next, we scan the list of inertia values to obtain a new interval for μ. For now, we use the T=0 values, without any broadening.
nInertiaRounds = nInertiaRounds + 1 imin = 1; imax = numShift do i = 1, numShift if (inertiaList(i) < numElectronExact - eps_inertia) then imin = max(imin,i) endif if (inertiaList(i) > numElectronExact + eps_inertia) then imax = min(imax,i) endif enddo muMaxInertia = shiftList(imax) muMinInertia = shiftList(imin) ! Get the band edges by interpolation muLower = interpolate(inertiaList,shiftList,numElectronExact-eps_inertia) muUpper = interpolate(inertiaList,shiftList,numElectronExact+eps_inertia) muInertia = 0.5_dp * (muUpper + muLower) if (mpirank == 0) then write (6,"(a,i3,f10.4,i3,f10.4)") 'imin, muMinInertia, imax, muMaxInertia: ',& imin, muMinInertia/eV, imax, muMaxInertia/eV write (6,"(a,2f10.4,a,f10.4)") 'muLower, muUpper: ', muLower/eV, muUpper/eV, & ' mu estimated: ', muInertia/eV endif
Now we have to decide whether we are satisfied with the estimation of μ and the new bracket. We have several possible criteria: the actual width in energy of the bracket, the width in "states" (both of the original interval and the narrower interval), and the behavior of μ itself with successive iterations.
The problem with the "states" measures is that they depend on the size of the system. Energy measures (width and convergence of μ) are more universal. Setting the change in μ to 0.8 eV is the default, and double that for the interval width.
We also exit the loop if we have done too many IC iterations.
Note: We can probably do this for all processors, without need to wrap the tests in
(mpirank =
0)= and broadcasting later. But proper documentation in the PEXSI library about these issues is lacking.if (mpirank==0) then inertia_energy_width = (muMaxInertia - muMinInertia) ! Note that this is the width of the starting interval... inertia_original_electron_width = (inertiaList(numShift) - inertiaList(1)) inertia_electron_width = (inertiaList(imax) - inertiaList(imin)) write (6,"(a,2f9.4,a,f9.4,3(a,f10.3))") ' -- new bracket (eV): [', & muMinInertia/eV, muMaxInertia/eV, & "] estimated mu: ", muInertia/eV, & " Nel width: ", inertia_electron_width, & " (Base: ", inertia_original_electron_width, & " ) E width: ", inertia_energy_width/eV if (nInertiaRounds == 1) then call reset(conv_mu) call set_tolerance(conv_mu,options%muInertiaTolerance) endif call add_value(conv_mu, muInertia) one_more_round = .true. !!$ if (inertia_original_electron_width < inertiaNumElectronTolerance) then !!$ write (6,"(a)") 'Leaving inertia loop: electron tolerance' !!$ one_more_round = .false. !!$ endif !!$ if (inertia_electron_width < inertiaMinNumElectronTolerance) then !!$ write (6,"(a)") 'Leaving inertia loop: minimum workable electron tolerance' !!$ one_more_round = .false. !!$ endif ! This is the first clause of Lin's criterion ! in the simple DFT driver. The second clause is the same as the next one ! when the energy-width tolerance is the same as the mu tolerance (my default) ! I am not sure about the basis for this if (abs(muMaxInertia -numElectronExact) < eps_inertia ) then write (6,"(a,f12.6)") "Leaving inertia loop: |muMaxInertia-N_e|: ", & abs(muMaxInertia -numElectronExact) one_more_round = .false. endif if (inertia_energy_width < energyWidthInertiaTolerance) then write (6,"(a,f12.6)") 'Leaving inertia loop: energy width tolerance: ', & energyWidthInertiaTolerance/eV one_more_round = .false. endif if (is_converged(conv_mu)) then write (6,"(a,f12.6)") 'Leaving inertia loop: mu tolerance: ', options%muInertiaTolerance/eV one_more_round = .false. endif if (nInertiaRounds == inertiaMaxIter) then write (6,"(a)") 'Leaving inertia loop: too many rounds' one_more_round = .false. endif endif call broadcast(one_more_round,comm=World_Comm) if (one_more_round) then ! stay in loop ! These values should be guarded, in case the refined interval ! is too tight. Use 2*kT ! muMin0 = muMinInertia - two_kT muMax0 = muMaxInertia + two_kT else exit refine_interval endif
3.15.2 On-the-fly tolerance determination
! ! This routine encodes the heuristics to compute the ! tolerance dynamically. ! subroutine get_on_the_fly_tolerance(dDmax,tolerance) real(dp), intent(in) :: dDmax real(dp), intent(out) :: tolerance real(dp) :: tolerance_preconditioner real(dp) :: tolerance_target_factor, tolerance_exp real(dp), save :: previous_tolerance logical :: new_algorithm new_algorithm = fdf_get("PEXSI.dynamical-tolerance",.false.) ! ! if (new_algorithm) then ! By default, the tolerance goes to the (minimum) target ! at a level 5 times dDtol tolerance_target_factor = fdf_get("PEXSI.tolerance-target-factor",5.0_dp) ! ! This can range in a (0.5,2.0) interval, approximately tolerance_preconditioner = fdf_get("PEXSI.tolerance-preconditioner",1.0_dp) if (scf_step > 1 ) then tolerance_exp = log10(dDmax/(tolerance_target_factor*dDtol)) ! ! range = log10(PEXSINumElectronToleranceMax/PEXSINumElectronToleranceMin) tolerance_exp = max(tolerance_exp,0.0_dp)*tolerance_preconditioner tolerance = PEXSINumElectronToleranceMin * 10.0_dp**tolerance_exp tolerance = min(tolerance,PEXSINumElectronToleranceMax) if (tolerance > previous_tolerance) then if (mpirank==0) write(6,"(a,f10.2)") & "Will not raise PEXSI solver tolerance to: ", & tolerance tolerance = previous_tolerance endif previous_tolerance = tolerance else ! No heuristics for now for first step ! Note that this should really change in MD or geometry optimization previous_tolerance = huge(1.0_dp) tolerance = PEXSINumElectronToleranceMax endif else tolerance = Max(PEXSINumElectronToleranceMin, & Min(dDmax*1.0, PEXSINumElectronToleranceMax)) endif if (mpirank==0) write(6,"(a,f10.2)") & "Current PEXSI solver tolerance: ", tolerance end subroutine get_on_the_fly_tolerance
3.15.3 Decide whether inertia-counting is needed
!------------------------------------------------------------------ ! This function will determine whether an initial inertia-counting ! stage is needed, based on user input and the level of convergence ! ! Variables used through host association for now: ! ! scf_step ! prevDmax, safe_dDmax_NoInertia ! ! Some logging output is done, so this function is not pure. function need_inertia_counting() result(do_inertia_count) logical :: do_inertia_count real(dp) :: safe_dDmax_NoInertia integer :: isInertiaCount, numInertiaCounts ! Use inertia counts? ! The use of this input variable is deprecated. Warn the user ! only if there is a disagreement. isInertiaCount = fdf_get("PEXSI.inertia-count",-1) ! For how many scf steps? numInertiaCounts = fdf_get("PEXSI.inertia-counts",3) if ((isInertiaCount == 0) .and. (numInertiaCounts > 0)) then if (mpirank == 0) write(6,"(a,i4)") & "Warning: Inertia-counts turned off by legacy parameter" // & " PEXSI.inertia-count" numInertiaCounts = 0 endif safe_dDmax_NoInertia = fdf_get("PEXSI.safe-dDmax-no-inertia",0.05) do_inertia_count = .false. write_ok = ((mpirank == 0) .and. (verbosity >= 1)) if (numInertiaCounts > 0) then if (scf_step .le. numInertiaCounts) then if (write_ok) write(6,"(a,i4)") & "&o Inertia-count step scf_step<numIC", scf_step do_inertia_count = .true. endif else if (numInertiaCounts < 0) then if (scf_step <= -numInertiaCounts) then if (write_ok) write(6,"(a,i4)") & "&o Inertia-count step scf_step<-numIC ", scf_step do_inertia_count = .true. else if (prevDmax > safe_dDmax_NoInertia) then if (write_ok) write(6,"(a,i4)") & "&o Inertia-count step as prevDmax > safe_Dmax ", scf_step do_inertia_count = .true. endif endif end function need_inertia_counting
3.15.4 Get bracket for inertia-counting
!--------------------------------------------------------------- ! Chooses the proper interval for the call to the driver ! in case we need a stage of inertia counting ! subroutine get_bracket_for_inertia_count() real(dp) :: safe_width_ic real(dp) :: safe_dDmax_Ef_inertia safe_width_ic = fdf_get("PEXSI.safe-width-ic-bracket",4.0_dp*eV,"Ry") safe_dDmax_Ef_Inertia = fdf_get("PEXSI.safe-dDmax-ef-inertia",0.1) write_ok = ((mpirank == 0) .and. (verbosity >= 1)) ! Proper bracketing if (scf_step > 1) then if (prevDmax < safe_dDmax_Ef_inertia) then ! Shift brackets using estimate of Ef change from previous iteration ! if (write_ok) write(6,"(a)") & "&o Inertia-count bracket shifted by Delta_Ef" ! This might be risky, if the final interval of the previous iteration ! is too narrow. We should broaden it by o(kT) ! The usefulness of delta_Ef is thus debatable... muMin0 = muMin0 + delta_Ef - two_kT muMax0 = muMax0 + delta_Ef + two_kT else ! Use a large enough interval around the previous estimation of ! mu (the gap edges are not available...) if (write_ok) write(6,"(a)") "&o Inertia-count safe bracket" ! muMin0 = min(muLowerEdge - 0.5*safe_width_ic, muMinInertia) muMin0 = min(mu - 0.5*safe_width_ic, muMin0) ! muMax0 = max(muUpperEdge + 0.5*safe_width_ic, muMaxInertia) muMax0 = max(mu + 0.5*safe_width_ic, muMax0) endif else if (write_ok) write(6,"(a)") & "&o Inertia-count called with iscf=1 parameters" endif end subroutine get_bracket_for_inertia_count
3.15.5 Get bracket for solver
subroutine get_bracket_for_solver() real(dp) :: safe_width_solver real(dp) :: safe_dDmax_Ef_solver safe_width_solver = fdf_get("PEXSI.safe-width-solver-bracket",4.0_dp*eV,"Ry") safe_dDmax_Ef_solver = fdf_get("PEXSI.safe-dDmax-ef-solver",0.05) write_ok = ((mpirank == 0) .and. (verbosity >= 1)) ! Do nothing for now ! No setting of muMin0 and muMax0 yet, pending clarification of flow if (scf_step > 1) then if (prevDmax < safe_dDmax_Ef_solver) then if (write_ok) write(6,"(a)") "&o Solver mu shifted by delta_Ef" mu = mu + delta_Ef endif ! Always provide a safe bracket around mu, in case we need to fallback ! to executing a cycle of inertia-counting if (write_ok) write(6,"(a)") "&o Safe solver bracket around mu" muMin0 = mu - 0.5*safe_width_solver muMax0 = mu + 0.5*safe_width_solver else if (write_ok) write(6,"(a)") "&o Solver called with iscf=1 parameters" ! do nothing. Keep mu, muMin0 and muMax0 as they are inherited endif end subroutine get_bracket_for_solver
3.15.6 Compute current temperature if annealing
We use a saved variable for keeping track of the previous temperature.
real(dp), save :: previous_pexsi_temperature
!------------------------------------------------------ ! If using the "annealing" feature, this routine computes ! the current temperature to use in the PEXSI solver ! subroutine get_current_temperature(pexsi_temperature) real(dp), intent(out) :: pexsi_temperature logical :: use_annealing real(dp) :: annealing_preconditioner, temp_factor real(dp) :: annealing_target_factor use_annealing = fdf_get("PEXSI.use-annealing",.false.) if (use_annealing) then annealing_preconditioner = fdf_get("PEXSI.annealing-preconditioner",1.0_dp) ! By default, the temperature goes to the target at a level 10 times dDtol annealing_target_factor = fdf_get("PEXSI.annealing-target-factor",10.0_dp) if (scf_step > 1 ) then ! Examples for target_factor = 10, dDtol=0.0001: ! prevDmax=0.1, preconditioner=1, factor=3 ! prevDmax=0.1, preconditioner=2, factor=5 ! prevDmax=0.1, preconditioner=3, factor=7 ! prevDmax<=0.001, factor = 1 ! prevDmax<0.001, factor = 1 temp_factor = (log10(prevDmax/(annealing_target_factor*dDtol))) temp_factor = 1 + annealing_preconditioner * max(0.0_dp, temp_factor) pexsi_temperature = temp_factor * temperature if (pexsi_temperature > previous_pexsi_temperature) then if (mpirank==0) write(6,"(a,f10.2)") & "Will not raise PEXSI temperature to: ", & pexsi_temperature/Kelvin pexsi_temperature = previous_pexsi_temperature endif previous_pexsi_temperature = pexsi_temperature else ! No heuristics for now for first step previous_pexsi_temperature = huge(1.0_dp) pexsi_temperature = temperature ! Keep in mind for the future if modifying T at the 1st step ! previous_pexsi_temperature = pexsi_temperature endif else pexsi_temperature = temperature endif if (mpirank==0) write(6,"(a,f10.2)") & "Current PEXSI temperature (K): ", pexsi_temperature/Kelvin end subroutine get_current_temperature
3.15.7 Linear interpolation routine
A very simple routine.
function interpolate(xx,yy,x) result(val) ! ! Interpolate linearly in the (monotonically increasing!) arrays xx and yy ! integer, parameter :: dp = selected_real_kind(10,100) real(dp), intent(in) :: xx(:), yy(:) real(dp), intent(in) :: x real(dp) :: val integer :: i, n n = size(xx) if (size(yy) /= n) call die("Mismatch in array sizes in interpolate") if ( (x < xx(1)) .or. (x > xx(n))) then call die("Interpolate: x not in range") endif do i = 2, n if (x <= xx(i)) then val = yy(i-1) + (x-xx(i-1)) * (yy(i)-yy(i-1))/(xx(i)-xx(i-1)) exit endif enddo end function interpolate
3.15.8 Error dispatcher
subroutine check_info(info,str) integer, intent(in) :: info character(len=*), intent(in) :: str if(mpirank == 0) then if (info /= 0) then write(6,*) trim(str) // " info : ", info call die("Error exit from " // trim(str) // " routine") endif call pxfflush(6) endif end subroutine check_info