Literate version of PEXSI driver

Table of Contents

1 Introduction

This is a version of the SIESTA-PEXSI interface for the scf cycle, employing an "expert" mode instead of the simple DFT driver included with the library. It can also deal with spin polarization.

2 Auxiliary module for holding the plan

The plan feature allows, among other things, the re-use of factorization information.

2.1 TODO Maybe integrate the plan handling in the pexsi_solver module.

  module m_pexsi
  use precision, only: dp
  use iso_c_binding

  integer(c_intptr_t), public :: plan

  public :: pexsi_initialize_scfloop
  public :: pexsi_finalize_scfloop

  private

  CONTAINS

    subroutine pexsi_initialize_scfloop(World_Comm,npPerPole,mpirank,info)
      use f_ppexsi_interface
      integer, intent(in) :: npPerPole, mpirank
      integer, intent(in) :: World_Comm
      integer, intent(out):: info

      integer :: numProcRow, numProcCol
      integer :: outputFileIndex

      call get_row_col(npPerPole,numProcRow,numProcCol)

      ! Set the outputFileIndex to be the pole index.
      ! Starting from PEXSI v0.8.0, the first processor for each pole outputs
      ! information

      if( mod( mpirank, npPerPole ) .eq. 0 ) then
        outputFileIndex = mpirank / npPerPole;
      else
        outputFileIndex = -1;
      endif

      plan = f_ppexsi_plan_initialize(&
        World_Comm,&
        numProcRow,&
        numProcCol,&
        outputFileIndex,&
        info) 

    end subroutine pexsi_initialize_scfloop


    subroutine pexsi_finalize_scfloop()
      use f_ppexsi_interface

      integer :: info

      call f_ppexsi_plan_finalize( plan, info )

    end subroutine pexsi_finalize_scfloop

    subroutine get_row_col(np,nrow,ncol)
      integer, intent(in)  :: np
      integer, intent(out) :: nrow, ncol
      !
      ! Finds the factors nrow and ncol such that nrow*ncol=np,
      ! are as similar as possible, and nrow>=ncol.
      ! For prime np, ncol=1, nrow=np.

      ncol  = floor(sqrt(dble(np)))
      do
        nrow = np/ncol
        if (nrow*ncol == np) exit
        ncol = ncol - 1
      enddo
    end subroutine get_row_col

end module m_pexsi
subroutine pexsi_initialize_scfloop(World_Comm,npPerPole,mpirank,info)
  use f_ppexsi_interface
  integer, intent(in) :: npPerPole, mpirank
  integer, intent(in) :: World_Comm
  integer, intent(out):: info

  integer :: numProcRow, numProcCol
  integer :: outputFileIndex

  call get_row_col(npPerPole,numProcRow,numProcCol)

  ! Set the outputFileIndex to be the pole index.
  ! Starting from PEXSI v0.8.0, the first processor for each pole outputs
  ! information

  if( mod( mpirank, npPerPole ) .eq. 0 ) then
    outputFileIndex = mpirank / npPerPole;
  else
    outputFileIndex = -1;
  endif

  plan = f_ppexsi_plan_initialize(&
    World_Comm,&
    numProcRow,&
    numProcCol,&
    outputFileIndex,&
    info) 

end subroutine pexsi_initialize_scfloop
subroutine pexsi_finalize_scfloop()
  use f_ppexsi_interface

  integer :: info

  call f_ppexsi_plan_finalize( plan, info )

end subroutine pexsi_finalize_scfloop
subroutine get_row_col(np,nrow,ncol)
  integer, intent(in)  :: np
  integer, intent(out) :: nrow, ncol
  !
  ! Finds the factors nrow and ncol such that nrow*ncol=np,
  ! are as similar as possible, and nrow>=ncol.
  ! For prime np, ncol=1, nrow=np.

  ncol  = floor(sqrt(dble(np)))
  do
    nrow = np/ncol
    if (nrow*ncol == np) exit
    ncol = ncol - 1
  enddo
end subroutine get_row_col

3 The PEXSI solver code

3.1 Main structure

This is the main structure of the code.

module m_pexsi_solver
 use precision, only  : dp

 implicit none

 public :: pexsi_solver

 real(dp), save :: prevDmax  ! For communication of max diff in DM in scf loop
                             ! used in the heuristics for N_el tolerance
 public :: prevDmax

CONTAINS

<<routine-header>>
<<routine-variables>>
!  --------  for serial compilation
#ifndef MPI
    call die("PEXSI needs MPI")
#else
<<define-communicators>>
<<re-distribute-matrices>>
<<set-tolerance>>
<<set-options>>
<<initialize-plan-at-first-step>>
<<load-hs-matrices>>
<<factorization>>
<<call-driver>>
<<get-matrices-and-energy>>
<<copy-to-siesta-side>>
<<clean-up>>
#endif

CONTAINS

<<support-routines>>

end subroutine pexsi_solver
end module m_pexsi_solver

3.2 Routine header

! This version uses separate distributions for Siesta 
! (setup_H et al) and PEXSI.
!
subroutine pexsi_solver(iscf, no_u, no_l, nspin_in,  &
     maxnh, numh, listhptr, listh, H, S, qtot, DM, EDM, &
     ef, Entropy, temp, delta_Efermi)

<<used-modules>>

  implicit          none

  integer, intent(in)  :: iscf  ! scf step number
  integer, intent(in)  :: maxnh, no_u, no_l, nspin_in
  integer, intent(in), target  :: listh(maxnh), numh(no_l), listhptr(no_l)
  real(dp), intent(in), target :: H(maxnh,nspin_in), S(maxnh)
  real(dp), intent(in) :: qtot
  real(dp), intent(out), target:: DM(maxnh,nspin_in), EDM(maxnh,nspin_in)
  real(dp), intent(out)        :: ef  ! Fermi energy
  real(dp), intent(out)        :: Entropy ! Entropy/k, dimensionless
  real(dp), intent(in)         :: temp   ! Electronic temperature
  real(dp), intent(in)         :: delta_Efermi  ! Estimated shift in E_fermi

3.2.1 Used modules

    use fdf
    use parallel, only   : SIESTA_worker, BlockSize
    use parallel, only   : SIESTA_Group, SIESTA_Comm
    use m_mpi_utils, only: globalize_sum, globalize_max
    use m_mpi_utils, only: broadcast
    use units,       only: Kelvin, eV
    use m_redist_spmatrix, only: aux_matrix, redistribute_spmatrix
    use class_Dist
    use alloc,             only: re_alloc, de_alloc
    use siesta_options,    only: dDtol
#ifdef MPI
    use mpi_siesta
#endif
use f_ppexsi_interface
use iso_c_binding
use m_pexsi, only: plan, pexsi_initialize_scfloop

#ifdef TRACING_SOLVEONLY
      use extrae_module
#endif

3.3 Routine variables

The local variables for the routine must be declared in a certain place for the compiler, but it is more clear to introduce them as they are needed. The routine-variables noweb-ref will be used for this throughout this document.

integer        :: ih, i
integer        :: info
logical        :: write_ok
!------------
external         :: timer

3.4 Define communicators

World_Comm, which is in principle set to Siesta's copy of MPI_Comm_World, is the global communicator.

Some variables need to be broadcast since they were assigned only by the SIESTA worker subset of nodes. They are renamed for clarity.

integer          :: World_Comm, mpirank, ierr
!
real(dp)  :: temperature, numElectronExact
integer   :: norbs, scf_step
real(dp)  :: delta_Ef
!
integer   :: nspin
!
! Our global communicator is a duplicate of the passed communicator
!
call MPI_Comm_Dup(true_MPI_Comm_World, World_Comm, ierr)
call mpi_comm_rank( World_Comm, mpirank, ierr )

! NOTE:  fdf calls will assign values to the whole processor set,
! but some other variables will have to be re-broadcast (see examples
! below)

call timer("pexsi", 1)  

if (SIESTA_worker) then

   ! rename some intent(in) variables, which are only
   ! defined for the Siesta subset

   norbs = no_u
   nspin = nspin_in
   scf_step = iscf
   delta_Ef = delta_Efermi
   numElectronExact = qtot 

   ! Note that the energy units for the PEXSI interface are arbitrary, but
   ! H, the interval limits, and the temperature have to be in the
   ! same units. Siesta uses Ry units.

   temperature      = temp

   if (mpirank==0) write(6,"(a,f10.2)") &
               "Electronic temperature (K): ", temperature/Kelvin
endif
!
call broadcast(norbs,comm=World_Comm)
call broadcast(scf_step,comm=World_Comm)
call broadcast(delta_Ef,comm=World_Comm)
call broadcast(numElectronExact,World_Comm)
call broadcast(temperature,World_Comm)
call broadcast(nspin,World_Comm)
! Imported from modules, but set only in Siesta side
call broadcast(prevDmax,comm=World_Comm)
call broadcast(dDtol,comm=World_Comm)

Now we need to define the Siesta distribution object and the communicator and distribution object for the first team of PEXSI workers, for the purposes of re-distribution of the relevant matrices. The PEXSI library takes care of further redistribution among teams.

Note that the first team of PEXSI workers starts at the root node. This means that there is overlap between the Siesta workers and the PEXSI workers. While this is in principle more economical (and convenient for information exchange), it can pose problems later on.

I will leave it like that, as I do not yet know how to move information among disjoint communicators (use of an intercommunicator?)

For spin, things are a bit more complicated. We need to make sure that the distributions are defined (via actual ranks) with respect to the same reference bridge communicator. For now, this is WorldComm.

integer :: PEXSI_Pole_Group, PEXSI_Spatial_Group, World_Group
integer, allocatable :: pexsi_pole_ranks_in_world(:)
integer  :: nworkers_SIESTA
integer, allocatable :: siesta_ranks_in_world(:)
integer :: PEXSI_Pole_Group_in_World
integer, allocatable :: PEXSI_Pole_ranks_in_World_Spin(:,:)
integer :: PEXSI_Pole_Comm, PEXSI_Spatial_Comm, PEXSI_Spin_Comm
integer :: numNodesTotal
integer :: npPerPole
logical  :: PEXSI_worker
!
type(Dist)   :: dist1
type(Dist), allocatable, target   :: dist2_spin(:)
type(Dist), pointer :: dist2
integer  :: pbs, color, global_rank, spin_rank

Define the Siesta distribution. Note that this is known to all nodes.

call MPI_Comm_Group(World_Comm,World_Group, ierr)
call MPI_Group_Size(SIESTA_Group, nworkers_SIESTA, ierr)
allocate(siesta_ranks_in_world(nworkers_SIESTA))
call MPI_Group_translate_ranks( SIESTA_Group, nworkers_SIESTA, &
     (/ (i,i=0,nworkers_SIESTA-1) /), &
     World_Group, siesta_ranks_in_world, ierr )
call newDistribution(dist1,World_Comm,siesta_ranks_in_world, &
                     TYPE_BLOCK_CYCLIC,BlockSize,"bc dist")
deallocate(siesta_ranks_in_world)
call MPI_Barrier(World_Comm,ierr)

For possibly spin-polarized calculations, we split the communicator.

call mpi_comm_size( World_Comm, numNodesTotal, ierr )

npPerPole  = fdf_get("PEXSI.np-per-pole",4)
if (nspin*npPerPole > numNodesTotal) &
          call die("PEXSI.np-per-pole is too big for MPI size")

! "Row" communicator for independent PEXSI operations on each spin
! The name refers to "spatial" degrees of freedom.
color = mod(mpirank,nspin)    ! {0,1} for nspin = 2, or {0} for nspin = 1
call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spatial_Comm, ierr)

! "Column" communicator for spin reductions
color = mpirank/nspin       
call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spin_Comm, ierr)

! Group and Communicator for first-pole team of PEXSI workers
!
call MPI_Comm_Group(PEXSI_Spatial_Comm, PEXSI_Spatial_Group, Ierr)
call MPI_Group_incl(PEXSI_Spatial_Group, npPerPole,   &
     (/ (i,i=0,npPerPole-1) /),&
     PEXSI_Pole_Group, Ierr)
call MPI_Comm_create(PEXSI_Spatial_Comm, PEXSI_Pole_Group,&
     PEXSI_Pole_Comm, Ierr)


call mpi_comm_rank( PEXSI_Spatial_Comm, global_rank, ierr )
call mpi_comm_rank( PEXSI_Spin_Comm, spin_rank, ierr )
PEXSI_worker = (global_rank < npPerPole)   ! Could be spin up or spin down

! PEXSI blocksize 
pbs = norbs/npPerPole

! Careful with this. For the purposes of matrix transfers,
! we need the ranks of the Pole group
! in the "bridge" communicator/group (World)

allocate(pexsi_pole_ranks_in_world(npPerPole))
call MPI_Comm_Group(World_Comm, World_Group, Ierr)

call MPI_Group_translate_ranks( PEXSI_Pole_Group, npPerPole, &
     (/ (i,i=0,npPerPole-1) /), &
     World_Group, pexsi_pole_ranks_in_world, ierr )

! What we need is to include the actual world ranks
! in the distribution object
allocate (PEXSI_Pole_ranks_in_World_Spin(npPerPole,nspin))
call MPI_AllGather(pexsi_pole_ranks_in_world,npPerPole,MPI_integer,&
     PEXSI_Pole_Ranks_in_World_Spin,npPerPole, &
     MPI_integer,PEXSI_Spin_Comm,ierr)

! Create distributions known to all nodes
allocate(dist2_spin(nspin))
do ispin = 1, nspin
   call newDistribution(dist2_spin(ispin), World_Comm, &
                        PEXSI_Pole_Ranks_in_World_Spin(:,ispin),  &
                        TYPE_PEXSI, pbs, "px dist")
enddo
deallocate(pexsi_pole_ranks_in_world,PEXSI_Pole_Ranks_in_World_Spin)
call MPI_Barrier(World_Comm,ierr)

3.5 Re-distribute matrices

This is slightly unseemly, but it works. The aux_matrix derived types are used to store and retrieve the matrices in either side. The code is in external auxiliary modules.

type(aux_matrix), allocatable, target :: m1_spin(:)
type(aux_matrix) :: m2
type(aux_matrix), pointer :: m1
integer :: nrows, nnz, nnzLocal, numColLocal
integer, pointer, dimension(:) ::  colptrLocal=> null(), rowindLocal=>null()
!
real(dp), pointer, dimension(:) :: &
        HnzvalLocal=>null(), SnzvalLocal=>null(),  &
        DMnzvalLocal => null() , EDMnzvalLocal => null(), &
        FDMnzvalLocal => null()
!
integer :: ispin, pexsi_spin
pexsi_spin = spin_rank+1  ! {1,2}
! This is done serially on the Siesta side, each time
! filling in the structures in one PEXSI set

allocate(m1_spin(nspin))
do ispin = 1, nspin

   m1 => m1_spin(ispin)

   if (SIESTA_worker) then
      m1%norbs = norbs
      m1%no_l  = no_l
      m1%nnzl  = sum(numH(1:no_l))
      m1%numcols => numH
      m1%cols    => listH
      allocate(m1%vals(2))
      m1%vals(1)%data => S(:)
      m1%vals(2)%data => H(:,ispin)

   endif  ! SIESTA_worker

   call timer("redist_orbs_fwd", 1)

   ! Note that we cannot simply wrap this in a pexsi_spin test, as
   ! there are Siesta nodes in both spin sets.
   ! We must discriminate the PEXSI workers by the distribution info
   dist2 => dist2_spin(ispin)
   call redistribute_spmatrix(norbs,m1,dist1,m2,dist2,World_Comm)

   call timer("redist_orbs_fwd", 2)

   if (PEXSI_worker .and. (pexsi_spin == ispin) ) then

      nrows = m2%norbs          ! or simply 'norbs'
      numColLocal = m2%no_l
      nnzLocal    = m2%nnzl
      call MPI_AllReduce(nnzLocal,nnz,1,MPI_integer,MPI_sum,PEXSI_Pole_Comm,ierr)

      call re_alloc(colptrLocal,1,numColLocal+1,"colptrLocal","pexsi_solver")
      colptrLocal(1) = 1
      do ih = 1,numColLocal
         colptrLocal(ih+1) = colptrLocal(ih) + m2%numcols(ih)
      enddo

      rowindLocal => m2%cols
      SnzvalLocal => m2%vals(1)%data
      HnzvalLocal => m2%vals(2)%data

      call re_alloc(DMnzvalLocal,1,nnzLocal,"DMnzvalLocal","pexsi_solver")
      call re_alloc(EDMnzvalLocal,1,nnzLocal,"EDMnzvalLocal","pexsi_solver")
      call re_alloc(FDMnzvalLocal,1,nnzLocal,"FDMnzvalLocal","pexsi_solver")

      call memory_all("after setting up H+S for PEXSI (PEXSI_workers)",PEXSI_Pole_Comm)

   endif ! PEXSI worker
enddo

! Make these available to all
! (Note that the values are those on process 0, which is in the spin=1 set
! In fact, they are only needed for calls to the interface, so the broadcast
! could be over PEXSI_Spatial_Comm only.

call MPI_Bcast(nrows,1,MPI_integer,0,World_Comm,ierr)
call MPI_Bcast(nnz,1,MPI_integer,0,World_Comm,ierr)

call memory_all("after setting up H+S for PEXSI",World_comm)

3.6 Set tolerance

These are wrapped in a test for first_call. Some other operations could be done in that case. Maybe we can remove the save'd character of some variables (for example, moving the setting of the tolerances outside the first_call block). Even the bracket and μ values could maybe be passed as arguments.

real(dp), save :: PEXSINumElectronToleranceMin, &
            PEXSINumElectronToleranceMax, &
            PEXSINumElectronTolerance
logical, save  :: first_call = .true.
real(dp), save :: muMin0, muMax0, mu
real(dp)       :: on_the_fly_tolerance
if (first_call) then

! Initial guess of chemical potential and containing interval
! When using inertia counts, this interval can be wide.
! Note that mu, muMin0 and muMax0 are saved variables

   muMin0           = fdf_get("PEXSI.mu-min",-1.0_dp,"Ry")
   muMax0           = fdf_get("PEXSI.mu-max", 0.0_dp,"Ry")
   mu               = fdf_get("PEXSI.mu",-0.60_dp,"Ry")

   PEXSINumElectronToleranceMin =  &
         fdf_get("PEXSI.num-electron-tolerance-lower-bound",0.01_dp)
   PEXSINumElectronToleranceMax =  &
         fdf_get("PEXSI.num-electron-tolerance-upper-bound",0.5_dp)

   ! start with largest tolerance
   ! (except if overriden by user)
   PEXSINumElectronTolerance = fdf_get("PEXSI.num-electron-tolerance",&
                                       PEXSINumElectronToleranceMax)
   first_call = .false.
else
!
!  Here we could also check whether we are in the first scf iteration
!  of a multi-geometry run...
!
   ! Use a moving tolerance, based on how far DM_out was to DM_in
   ! in the previous iteration (except if overriden by user)

   call get_on_the_fly_tolerance(prevDmax,on_the_fly_tolerance)

   ! Override if tolerance is explicitly specified in the fdf file
   PEXSINumElectronTolerance =  fdf_get("PEXSI.num-electron-tolerance",&
                                        on_the_fly_tolerance)
endif

3.7 Set options

We use the options interface to get a template with default values, and then fill in a few custom options based on fdf variables. Note that the options derived type is of limited usefulness when the simple DFT driver is not used. The most important entries are then the factorization flag and the number of processors per pole.

We also use this section to define other user-level options. This is a bit of a mess, since the logic of the expert interface uses a mixed grab-bag of options entries and orphan entries, such as inertiaMaxIter.

type(f_ppexsi_options) :: options
!
integer                :: isSIdentity
integer                :: verbosity
integer                :: inertiaMaxIter
!
real(dp), save         :: energyWidthInertiaTolerance
real(dp)               :: pexsi_temperature, two_kT
!
call f_ppexsi_set_default_options( options )

options%muPEXSISafeGuard = fdf_get("PEXSI.mu-pexsi-safeguard",0.05_dp,"Ry")
options%maxPEXSIIter = fdf_get("PEXSI.mu-max-iter",10)

isSIdentity = 0

options%numPole  = fdf_get("PEXSI.num-poles",40)
options%gap      = fdf_get("PEXSI.gap",0.0_dp,"Ry")

! deltaE is in theory the spectrum width, but in practice can be much smaller
! than | E_max - mu |.  It is found that deltaE that is slightly bigger
! than  | E_min - mu | is usually good enough.

options%deltaE     = fdf_get("PEXSI.delta-E",3.0_dp,"Ry") ! Lin: 10 Ry...

! Ordering flag:
!   1: Use METIS
!   0: Use PARMETIS/PTSCOTCH
options%ordering = fdf_get("PEXSI.ordering",1)

! Number of processors for symbolic factorization
! Only relevant for PARMETIS/PT_SCOTCH
options%npSymbFact = fdf_get("PEXSI.np-symbfact",1)

verbosity = fdf_get("PEXSI.verbosity",1)
options%verbosity = verbosity

call get_current_temperature(pexsi_temperature)
options%temperature = pexsi_temperature
!
!  Set guard smearing for later use
!
two_kT = 2.0_dp * pexsi_temperature

options%numElectronPEXSITolerance = PEXSINumElectronTolerance

! Stop inertia count if mu has not changed much from iteration to iteration.

options%muInertiaTolerance =  &
     fdf_get("PEXSI.inertia-mu-tolerance",0.05_dp,"Ry")

! One-sided expansion of interval if correct mu falls outside it
options%muInertiaExpansion =  &
     fdf_get("PEXSI.lateral-expansion-inertia",3.0_dp*eV,"Ry") 


! Other user options

! Maximum number of iterations for computing the inertia                                          
! in a given scf step (until a proper bracket is obtained)                                        
inertiaMaxIter   = fdf_get("PEXSI.inertia-max-iter",5)

! Energy-width termination tolerance for inertia-counting
! By default, it is the same as the mu tolerance, to match
! the criterion in the simple DFT driver
energyWidthInertiaTolerance =  &
     fdf_get("PEXSI.inertia-energy-width-tolerance", &
             options%muInertiaTolerance,"Ry")

3.8 Initialize plan at first scf step

Each spin-set of PEXSI processors has its own plan.

if (scf_step == 1) then
   call pexsi_initialize_scfloop(PEXSI_Spatial_Comm,npPerPole,global_rank,info)
   call check_info(info,"initialize_plan")
endif

3.9 Load H and S matrices

In this version H and S are symmetric. We associate them with the plan (I really do not know very well what happens behind the scenes. Presumably no copy is made.)

call f_ppexsi_load_real_symmetric_hs_matrix(&
      plan,&
      options,&
      nrows,&
      nnz,&
      nnzLocal,&
      numColLocal,&
      colptrLocal,&
      rowindLocal,&
      HnzvalLocal,&
      isSIdentity,&
      SnzvalLocal,&
      info) 

call check_info(info,"load_real_sym_hs_matrix")

3.10 Factorization

This is only done at the beginning of the scf cycle. Presumably it could be done once and for all for one of the spins, but we have no way to transfer the info from the spin-up to the spin-down manifold.

if (scf_step == 1) then
   ! This is only needed for inertia-counting
   call f_ppexsi_symbolic_factorize_real_symmetric_matrix(&
        plan, &
        options,&
        info)
   call check_info(info,"symbolic_factorize_real_symmetric_matrix")

   call f_ppexsi_symbolic_factorize_complex_symmetric_matrix(&
        plan, &
        options,&
        info)
   call check_info(info,"symbolic_factorize_complex_symmetric_matrix")
endif
options%isSymbolicFactorize = 0 ! We do not need it anymore

3.11 Call the solver

3.11.1 Solver call structure

This was too black of a black box, as there is very little control of the operations. The most glaring shortcoming is the lack of a proper handling of the convergence conditions.

The plan for improvement is to call the inertia counting routine, and the fermi-operator calculator, explicitly.

real(dp) :: deltaMu
real(dp) :: numElectronDrvMuPEXSI, numElectronPEXSI
real(dp), allocatable :: numElectronSpin(:), numElectronDrvMuSpin(:)
real(dp) :: numElectron_out, numElectronDrvMu_out
integer :: numTotalPEXSIIter
integer :: numTotalInertiaIter
!
numTotalInertiaIter = 0

call timer("pexsi-solver", 1)

! This is for the initial phase of the scf loop
if (need_inertia_counting()) then

   call get_bracket_for_inertia_count( )  
   call do_inertia_count(plan,muMin0,muMax0,mu)

else

   !  Maybe there is no need for bracket, just for mu estimation
   call get_bracket_for_solver()

endif

numTotalPEXSIIter = 0
allocate(numElectronSpin(nspin),numElectronDrvMuSpin(nspin))

solver_loop: do

   if (numTotalPEXSIIter > options%maxPEXSIIter ) then
      ! Maybe do not die, and trust further DM normalization
      ! to fix the number of electrons for unstable cases
      call die("too many PEXSI iterations")
   endif

   if(mpirank == 0) then
      write (6,"(a,f9.4,a,f9.5)") 'Computing DM for mu(eV): ', mu/eV, &
           ' Tol: ', PEXSINumElectronTolerance
      write (6,"(a,f9.4,f9.5)") 'Bracket: ', muMin0/eV, muMax0/eV
   endif

   call f_ppexsi_calculate_fermi_operator_real(&
        plan,&
        options,&
        mu,&
        numElectronExact,&
        numElectron_out,&
        numElectronDrvMu_out,&
        info)

   call check_info(info,"fermi_operator")

   ! Per spin
   numElectron_out = numElectron_out / nspin
   numElectronDrvMu_out =  numElectronDrvMu_out / nspin

   ! Gather the results for both spins on all processors

   call MPI_AllGather(numElectron_out,1,MPI_Double_precision,&
         numElectronSpin,1,MPI_Double_precision,PEXSI_Spin_Comm,ierr)
   call MPI_AllGather(numElectronDrvMu_out,1,MPI_Double_precision,&
         numElectronDrvMuSpin,1,MPI_Double_precision,PEXSI_Spin_Comm,ierr)

   numElectronPEXSI = sum(numElectronSpin(1:nspin))
   numElectronDrvMuPEXSI = sum(numElectronDrvMuSpin(1:nspin))

   if (mpirank == 0) then
      write(6,"(a,f10.4)") "Fermi Operator. mu: ", mu/eV
      if (nspin == 2) then
         write(6,"(a,2f10.4,a,f10.4)") "Fermi Operator. numElectron(Up,Down): ", &
                        numElectronSpin(1:nspin), " Total: ", numElectronPEXSI
         write(6,"(a,2f10.4,a,f10.4)") "Fermi Operator. dN_e/dmu(Up,Down): ", &
                        numElectronDrvMuSpin(1:nspin)*eV, " Total: ", numElectronDrvMuPEXSI*eV
      else
         write(6,"(a,f10.4)") "Fermi Operator. numElectron: ", numElectronPEXSI
         write(6,"(a,f10.4)") "Fermi Operator. dN_e/dmu: ", numElectronDrvMuPEXSI*eV
      endif
   endif

   numTotalPEXSIIter =  numTotalPEXSIIter + 1

   if (abs(numElectronPEXSI-numElectronExact) > PEXSINumElectronTolerance) then

      deltaMu = - (numElectronPEXSI - numElectronExact) / numElectronDrvMuPEXSI
      ! The simple DFT driver uses the size of the jump to flag problems:
      ! if (abs(deltaMu) > options%muPEXSISafeGuard) then

      if ( ((mu + deltaMu) < muMin0) .or. ((mu + deltaMu) > muMax0) ) then
         if (mpirank ==0) then
            write(6,"(a,f9.3)") "DeltaMu: ", deltaMu, " is too big. Falling back to IC"
         endif

         ! We must choose a new starting bracket, otherwise we will fall into the same
         ! cycle of values

         call do_inertia_count(plan,muMin0,muMax0,mu)

         cycle solver_loop

      endif
      mu = mu + deltaMu
      cycle solver_loop
   else
      ! Converged
      if (mpirank == 0) then
         write(6,"(a,f10.4)") "PEXSI solver converged. mu: ", mu/eV
      endif
      exit solver_loop
   endif

end do solver_loop
deallocate(numElectronSpin,numElectronDrvMuSpin)
call timer("pexsi-solver", 2)

3.12 Get output matrices and compute energies

This section is still done by the PEXSI group processors. But note that the energies are not set if we do not use the simple DFT driver.

real(dp)       :: bs_energy, eBandH, free_bs_energy
real(dp)       :: buffer1
if( PEXSI_worker ) then
   call f_ppexsi_retrieve_real_symmetric_dft_matrix(&
        plan,&
        DMnzvalLocal,&
        EDMnzvalLocal,&
        FDMnzvalLocal,&
        eBandH,&          ! Will not be available
        bs_energy,&
        free_bs_energy,&
        info)
   call check_info(info,"retrieve_real_symmetric_dft_matrix")

   if (nspin == 2) then
      ! The matrices have to be divided by two...
      DMnzvalLocal(:) = 0.5_dp * DMnzvalLocal(:)
      EDMnzvalLocal(:) = 0.5_dp * EDMnzvalLocal(:)
      !!! Watch out with this. Internals??
      FDMnzvalLocal(:) = 0.5_dp * FDMnzvalLocal(:)
   endif

endif

if ((mpirank == 0) .and. (verbosity >= 1)) then
   write(6,"(a,i3)") " #&s Number of solver iterations: ", numTotalPEXSIIter
   write(6,"(a,i3)") " #&s Number of inertia iterations: ", numTotalInertiaIter
   write(6,"(a,f12.5,f12.4,2x,a2)") "mu, N_e:", mu/eV, &
        numElectronPEXSI, "&s"
endif

if (PEXSI_worker) then

   free_bs_energy = 0.0_dp
   bs_energy = 0.0_dp
   eBandH = 0.0_dp
   do i = 1,nnzLocal
      free_bs_energy = free_bs_energy + SnzvalLocal(i) * &
           ( FDMnzvalLocal(i) ) 
      bs_energy = bs_energy + SnzvalLocal(i) * &
           ( EDMnzvalLocal(i) )
      eBandH = eBandH + HnzvalLocal(i) * &
           ( DMnzvalLocal(i) )
   enddo

   ! First, reduce over the Pole_comm

   call globalize_sum( free_bs_energy, buffer1, comm=PEXSI_Pole_Comm )
   free_bs_energy = buffer1 
   call globalize_sum( bs_energy, buffer1, comm=PEXSI_Pole_Comm )
   bs_energy = buffer1
   call globalize_sum( eBandH, buffer1, comm=PEXSI_Pole_Comm )
   eBandH = buffer1

   ! Now, reduce over both spins

   call globalize_sum( free_bs_energy, buffer1, comm=PEXSI_Spin_Comm )
   ! Note that we need an extra term: mu*N for the free energy
   free_bs_energy = buffer1 + mu*numElectronPEXSI
   call globalize_sum( bs_energy, buffer1, comm=PEXSI_Spin_Comm )
   bs_energy = buffer1
   call globalize_sum( eBandH, buffer1, comm=PEXSI_Spin_Comm )
   eBandH = buffer1

   ! This output block will be executed only if World's root node is
   ! in one of the leading pole groups. This might not be so

   if ((mpirank == 0) .and. (verbosity >= 2)) then
      write(6, "(a,f12.4)") "#&s Tr(S*EDM) (eV) = ", bs_energy/eV
      write(6,"(a,f12.4)") "#&s Tr(H*DM) (eV) = ", eBandH/eV
      write(6,"(a,f12.4)") "#&s Tr(S*FDM) + mu*N (eV) = ", (free_bs_energy)/eV
   endif

   ef = mu
   ! Note that we use the S*EDM version of the band-structure energy
   ! to estimate the entropy, by comparing it to S*FDM This looks
   ! consistent, but note that the EDM is not used in Siesta to
   ! estimate the total energy, only the DM (via the density) (that
   ! is, the XC and Hartree correction terms to Ebs going into Etot
   ! are estimated using the DM)

   Entropy = - (free_bs_energy - bs_energy) / temp

   ! ef and Entropy are now known to the leading-pole processes
endif ! PEXSI_worker

3.13 Copy information to Siesta side

do ispin = 1, nspin

   m1 => m1_spin(ispin)

   if (PEXSI_worker .and. (pexsi_spin == ispin)) then
      ! Prepare m2 to transfer

      call de_alloc(FDMnzvalLocal,"FDMnzvalLocal","pexsi_solver")
      call de_alloc(colPtrLocal,"colPtrLocal","pexsi_solver")

      call de_alloc(m2%vals(1)%data,"m2%vals(1)%data","pexsi_solver")
      call de_alloc(m2%vals(2)%data,"m2%vals(2)%data","pexsi_solver")

      m2%vals(1)%data => DMnzvalLocal(1:nnzLocal)
      m2%vals(2)%data => EDMnzvalLocal(1:nnzLocal)

   endif

   ! Prepare m1 to receive the results
   if (SIESTA_worker) then
      nullify(m1%vals(1)%data)    ! formerly pointing to S
      nullify(m1%vals(2)%data)    ! formerly pointing to H
      deallocate(m1%vals)
      nullify(m1%numcols)         ! formerly pointing to numH
      nullify(m1%cols)            ! formerly pointing to listH
   endif

   call timer("redist_orbs_bck", 1)
   dist2 => dist2_spin(ispin)
   call redistribute_spmatrix(norbs,m2,dist2,m1,dist1,World_Comm)
   call timer("redist_orbs_bck", 2)

   if (PEXSI_worker .and. (pexsi_spin == ispin)) then
      call de_alloc(DMnzvalLocal, "DMnzvalLocal", "pexsi_solver")
      call de_alloc(EDMnzvalLocal,"EDMnzvalLocal","pexsi_solver")

      nullify(m2%vals(1)%data)    ! formerly pointing to DM
      nullify(m2%vals(2)%data)    ! formerly pointing to EDM
      deallocate(m2%vals)
      ! allocated in the direct transfer
      call de_alloc(m2%numcols,"m2%numcols","pexsi_solver")
      call de_alloc(m2%cols,   "m2%cols",   "pexsi_solver")
   endif

   ! We assume that Siesta's root node also belongs to one of the
   ! leading-pole PEXSI communicators.
   ! Note that by wrapping the broadcasts for SIESTA_workers we
   ! do not make ef and Entropy known to the non-leading PEXSI processes.

   if (SIESTA_worker) then
      call broadcast(ef,comm=SIESTA_Comm)
      call broadcast(Entropy,comm=SIESTA_Comm)
      ! In future, m1%vals(1,2) could be pointing to DM and EDM,
      ! and the 'redistribute' routine check whether the vals arrays are
      ! associated, to use them instead of allocating them.
      DM(:,ispin)  = m1%vals(1)%data(:)    
      EDM(:,ispin) = m1%vals(2)%data(:)    
      ! Check no_l
      if (no_l /= m1%no_l) then
         call die("Mismatch in no_l")
      endif
      ! Check listH
      if (any(listH(:) /= m1%cols(:))) then
         call die("Mismatch in listH")
      endif

      call de_alloc(m1%vals(1)%data,"m1%vals(1)%data","pexsi_solver")
      call de_alloc(m1%vals(2)%data,"m1%vals(2)%data","pexsi_solver")
      deallocate(m1%vals)
      ! allocated in the direct transfer
      call de_alloc(m1%numcols,"m1%numcols","pexsi_solver") 
      call de_alloc(m1%cols,   "m1%cols",   "pexsi_solver")

   endif
enddo
call timer("pexsi", 2)

3.14 Clean up

We cannot finalize the plan now if we are going to reuse it in subsequent iterations… This is done by siesta_forces at the end of the scf cycle.

Here we only clean up the communicators and groups and the distributions.

call delete(dist1)
do ispin = 1, nspin
  call delete(dist2_spin(ispin))
enddo
deallocate(dist2_spin)
deallocate(m1_spin)

call MPI_Comm_Free(PEXSI_Spatial_Comm, ierr)
call MPI_Comm_Free(PEXSI_Pole_Comm, ierr)
call MPI_Comm_Free(PEXSI_Spin_Comm, ierr)
call MPI_Comm_Free(World_Comm, ierr)
!
call MPI_Group_Free(PEXSI_Spatial_Group, ierr)
call MPI_Group_Free(PEXSI_Pole_Group, ierr)
call MPI_Group_Free(World_Group, ierr)

3.15 Support routines

Several routines

<<inertia-count-iteration>>
<<get-on-the-fly-tolerance>>
<<need-inertia-counting>>
<<get-bracket-for-inertia-count>>
<<get-bracket-for-solver>>
<<get-current-temperature>>
<<linear-interpolation-routine>>
<<check-info>>

3.15.1 Inertia-count iteration

subroutine do_inertia_count(plan,muMin0,muMax0,muInertia)
  use iso_c_binding, only : c_intptr_t
  use m_convergence

  integer(c_intptr_t)      :: plan
  real(dp), intent(inout)  :: muMin0, muMax0
  real(dp), intent(out)    :: muInertia

  real(dp)            ::   muMinInertia, muMaxInertia
  integer             ::   nInertiaRounds

  real(dp), parameter ::   eps_inertia = 0.1_dp
  type(converger_t)   ::   conv_mu
  logical             ::   bad_lower_bound, bad_upper_bound
  logical             ::   interval_problem, one_more_round
  real(dp)            ::   inertia_electron_width
  real(dp)            ::   inertia_original_electron_width
  real(dp)            ::   inertia_energy_width
  real(dp)            ::   muLower, muUpper
  integer             ::   numMinICountShifts, numShift

  real(dp), allocatable :: shiftList(:), inertiaList(:)
  real(dp), allocatable :: inertiaList_out(:)

  integer :: imin, imax

  <<determine-number-of-shifts>>

  nInertiaRounds = 0

  refine_interval: do
      <<refine-interval-by-inertia-count>>
      numTotalInertiaIter = numTotalInertiaIter + 1
  enddo refine_interval

  deallocate(shiftList,inertiaList)

 end subroutine do_inertia_count
  1. Determine number of inertia-count shifts

    This is based on the total number of processors available, in such a way that each group of np-PerPole processors deals with a shift.

    ! Minimum number of sampling points for inertia counts                                            
    numMinICountShifts = fdf_get("PEXSI.inertia-min-num-shifts", 10)
    
    numShift = numNodesTotal/npPerPole
    do
       if (numShift < numMinICountShifts) then
          numShift = numShift + numNodesTotal/npPerPole
       else
          exit
       endif
    enddo
    
    allocate(shiftList(numShift), inertiaList(numShift))
    allocate(inertiaList_out(numShift))
    
  2. Refine interval by inertia count

    This is the body of the old expert inertia-count loop. We begin by setting up the shift list and calling the workhorse routine (which does not do anything extra inside, just compute the T=0K inertia).

    options%muMin0 = muMin0
    options%muMax0 = muMax0
    
    if (mpirank == 0) then
       write (6,"(a,2f9.4,a,a,i4)") 'Calling inertiaCount: [', &
            muMin0/eV, muMax0/eV, "] (eV)", &
            " Nshifts: ", numShift
    endif
    
    call timer("pexsi-inertia-ct", 1)
    
    do i = 1, numShift
       shiftList(i) = muMin0 + (i-1) * (muMax0-muMin0)/(numShift-1)
    enddo
    
    call f_ppexsi_inertia_count_real_symmetric_matrix(&
         plan,&
         options,&
         numShift,&
         shiftList,&
         inertiaList_out,&
         info) 
    
    call check_info(info,"inertia-count")
    
    ! All-Reduce to add the (two) spin inertias
    ! so that all processors have the complete inertiaList(:)
    call MPI_AllReduce(inertiaList_out, inertiaList, &
         numShift, MPI_Double_precision, &
         MPI_Sum, PEXSI_Spin_Comm, ierr)
    
    ! If nspin=1, each state is doubly occupied
    ! 
    inertiaList(:) = 2 * inertiaList(:) / nspin
    
    
    call timer("pexsi-inertia-ct", 2)
    

    Now we need to make sure that the results make sense. First, that we had μ in our starting interval. If not, we immediately expand the bounds and go back to the top of the loop.

    interval_problem = .false.
    
    if(mpirank == 0) then
       bad_lower_bound = (inertiaList(1) > (numElectronExact - 0.1)) 
       bad_upper_bound = (inertiaList(numShift) < (numElectronExact + 0.1)) 
    endif
    
    call broadcast(bad_lower_bound,comm=World_Comm)
    call broadcast(bad_upper_bound,comm=World_Comm)
    
    if (bad_lower_bound) then
       interval_problem =  .true.
       muMin0 = muMin0 - options%muInertiaExpansion ! 0.5
       if (mpirank==0) then
          write (6,"(a,2f12.4,a,2f10.4)") 'Wrong inertia-count interval (lower end). Counts: ', &
               inertiaList(1), inertiaList(numShift), &
               ' New interval: ', muMin0/eV, muMax0/eV
       endif
    endif
    if (bad_upper_bound) then
       interval_problem =  .true.
       muMax0 = muMax0 + options%muInertiaExpansion ! 0.5
       if (mpirank==0) then
          write (6,"(a,2f12.4,a,2f10.4)") 'Wrong inertia-count interval (upper end). Counts: ', &
               inertiaList(1), inertiaList(numShift), &
               ' New interval: ', muMin0/eV, muMax0/eV
       endif
    endif
    
    if (interval_problem) then
       ! do nothing more, stay in loop
       cycle refine_interval
    endif
    

    If we did have μ in the interval, we consider this a bona-fide inertia-count iteration and update the counter.

    Next, we scan the list of inertia values to obtain a new interval for μ. For now, we use the T=0 values, without any broadening.

    nInertiaRounds = nInertiaRounds + 1
    
    imin = 1; imax = numShift
    
    do i = 1, numShift
       if (inertiaList(i) < numElectronExact - eps_inertia) then
          imin = max(imin,i)
       endif
       if (inertiaList(i) > numElectronExact + eps_inertia) then
          imax = min(imax,i)
       endif
    enddo
    muMaxInertia = shiftList(imax)
    muMinInertia = shiftList(imin)
    
    ! Get the band edges by interpolation
    muLower = interpolate(inertiaList,shiftList,numElectronExact-eps_inertia)
    muUpper = interpolate(inertiaList,shiftList,numElectronExact+eps_inertia)
    
    muInertia = 0.5_dp * (muUpper + muLower)
    
    if (mpirank == 0) then
       write (6,"(a,i3,f10.4,i3,f10.4)") 'imin, muMinInertia, imax, muMaxInertia: ',&
              imin, muMinInertia/eV, imax, muMaxInertia/eV
       write (6,"(a,2f10.4,a,f10.4)") 'muLower, muUpper: ', muLower/eV, muUpper/eV, &
            ' mu estimated: ', muInertia/eV
    endif
    

    Now we have to decide whether we are satisfied with the estimation of μ and the new bracket. We have several possible criteria: the actual width in energy of the bracket, the width in "states" (both of the original interval and the narrower interval), and the behavior of μ itself with successive iterations.

    The problem with the "states" measures is that they depend on the size of the system. Energy measures (width and convergence of μ) are more universal. Setting the change in μ to 0.8 eV is the default, and double that for the interval width.

    We also exit the loop if we have done too many IC iterations.

    Note: We can probably do this for all processors, without need to wrap the tests in (mpirank = 0)= and broadcasting later. But proper documentation in the PEXSI library about these issues is lacking.

      if (mpirank==0) then
    
         inertia_energy_width = (muMaxInertia - muMinInertia)
         ! Note that this is the width of the starting interval...
         inertia_original_electron_width = (inertiaList(numShift) - inertiaList(1))
         inertia_electron_width = (inertiaList(imax) - inertiaList(imin))
    
         write (6,"(a,2f9.4,a,f9.4,3(a,f10.3))") ' -- new bracket (eV): [', &
              muMinInertia/eV, muMaxInertia/eV,  &
              "] estimated mu: ", muInertia/eV, &
              " Nel width: ", inertia_electron_width, &
              " (Base: ", inertia_original_electron_width, &
              " ) E width: ", inertia_energy_width/eV
    
         if (nInertiaRounds == 1) then
            call reset(conv_mu)
            call set_tolerance(conv_mu,options%muInertiaTolerance)
         endif
         call add_value(conv_mu, muInertia)
    
    
         one_more_round = .true.
    
    !!$     if (inertia_original_electron_width < inertiaNumElectronTolerance) then
    !!$        write (6,"(a)") 'Leaving inertia loop: electron tolerance'
    !!$        one_more_round = .false.
    !!$     endif
    !!$     if (inertia_electron_width < inertiaMinNumElectronTolerance) then
    !!$        write (6,"(a)") 'Leaving inertia loop: minimum workable electron tolerance'
    !!$        one_more_round = .false.
    !!$     endif
    
         ! This is the first clause of Lin's criterion
         ! in the simple DFT driver. The second clause is the same as the next one
         ! when the energy-width tolerance is the same as the mu tolerance (my default)
         ! I am not sure about the basis for this
         if (abs(muMaxInertia -numElectronExact) < eps_inertia ) then
            write (6,"(a,f12.6)") "Leaving inertia loop: |muMaxInertia-N_e|: ", &
                 abs(muMaxInertia -numElectronExact)
            one_more_round = .false.
         endif
         if (inertia_energy_width < energyWidthInertiaTolerance) then
            write (6,"(a,f12.6)") 'Leaving inertia loop: energy width tolerance: ', &
             energyWidthInertiaTolerance/eV
            one_more_round = .false.
         endif
         if (is_converged(conv_mu)) then
            write (6,"(a,f12.6)") 'Leaving inertia loop: mu tolerance: ', options%muInertiaTolerance/eV
            one_more_round = .false.
         endif
         if (nInertiaRounds == inertiaMaxIter) then
            write (6,"(a)") 'Leaving inertia loop: too many rounds'
            one_more_round = .false.
         endif
      endif
      call broadcast(one_more_round,comm=World_Comm)
    
      if (one_more_round) then
         ! stay in loop
         ! These values should be guarded, in case the refined interval
         ! is too tight. Use 2*kT
         ! 
         muMin0 = muMinInertia - two_kT
         muMax0 = muMaxInertia + two_kT
      else
         exit refine_interval
      endif
    

3.15.2 On-the-fly tolerance determination

!
! This routine encodes the heuristics to compute the
! tolerance dynamically.
!
subroutine get_on_the_fly_tolerance(dDmax,tolerance)
real(dp), intent(in)  :: dDmax
real(dp), intent(out) :: tolerance

real(dp) :: tolerance_preconditioner
real(dp) :: tolerance_target_factor, tolerance_exp
real(dp), save :: previous_tolerance
logical :: new_algorithm

new_algorithm = fdf_get("PEXSI.dynamical-tolerance",.false.)
!
!
if (new_algorithm) then

!   By default, the tolerance goes to the (minimum) target 
!   at a level 5 times dDtol

   tolerance_target_factor = fdf_get("PEXSI.tolerance-target-factor",5.0_dp)

!
!  This can range in a (0.5,2.0) interval, approximately

   tolerance_preconditioner = fdf_get("PEXSI.tolerance-preconditioner",1.0_dp)

   if (scf_step > 1 ) then

      tolerance_exp = log10(dDmax/(tolerance_target_factor*dDtol))
      ! 
  !   range = log10(PEXSINumElectronToleranceMax/PEXSINumElectronToleranceMin)
      tolerance_exp = max(tolerance_exp,0.0_dp)*tolerance_preconditioner
      tolerance = PEXSINumElectronToleranceMin * 10.0_dp**tolerance_exp
      tolerance = min(tolerance,PEXSINumElectronToleranceMax)

      if (tolerance > previous_tolerance) then
         if (mpirank==0) write(6,"(a,f10.2)") &
              "Will not raise PEXSI solver tolerance to: ", &
              tolerance
         tolerance = previous_tolerance
      endif
      previous_tolerance = tolerance
   else
      ! No heuristics for now for first step
      ! Note that this should really change in MD or geometry optimization
      previous_tolerance = huge(1.0_dp)
      tolerance = PEXSINumElectronToleranceMax

   endif
else
   tolerance = Max(PEXSINumElectronToleranceMin, &
                              Min(dDmax*1.0, PEXSINumElectronToleranceMax))
endif

if (mpirank==0) write(6,"(a,f10.2)") &
     "Current PEXSI solver tolerance: ", tolerance

end subroutine get_on_the_fly_tolerance

3.15.3 Decide whether inertia-counting is needed

!------------------------------------------------------------------
! This function will determine whether an initial inertia-counting
! stage is needed, based on user input and the level of convergence
!
! Variables used through host association for now:
!
!      scf_step
!      prevDmax, safe_dDmax_NoInertia
!
! Some logging output is done, so this function is not pure.

function need_inertia_counting() result(do_inertia_count)
logical :: do_inertia_count

real(dp) :: safe_dDmax_NoInertia
integer  :: isInertiaCount, numInertiaCounts

! Use inertia counts?
! The use of this input variable is deprecated. Warn the user
! only if there is a disagreement.

isInertiaCount = fdf_get("PEXSI.inertia-count",-1)
! For how many scf steps?
numInertiaCounts = fdf_get("PEXSI.inertia-counts",3)

if ((isInertiaCount == 0) .and. (numInertiaCounts > 0)) then 
   if (mpirank == 0) write(6,"(a,i4)")  &
        "Warning: Inertia-counts turned off by legacy parameter" // &
        " PEXSI.inertia-count"
   numInertiaCounts = 0
endif

safe_dDmax_NoInertia = fdf_get("PEXSI.safe-dDmax-no-inertia",0.05)

do_inertia_count = .false.

write_ok = ((mpirank == 0) .and. (verbosity >= 1))

if (numInertiaCounts > 0) then
  if (scf_step .le. numInertiaCounts) then
     if (write_ok) write(6,"(a,i4)")  &
      "&o Inertia-count step scf_step<numIC", scf_step
     do_inertia_count = .true.
  endif
else  if (numInertiaCounts < 0) then
   if (scf_step <= -numInertiaCounts) then
      if (write_ok) write(6,"(a,i4)") &
           "&o Inertia-count step scf_step<-numIC ", scf_step
      do_inertia_count = .true.
   else if (prevDmax > safe_dDmax_NoInertia) then
      if (write_ok) write(6,"(a,i4)") &
           "&o Inertia-count step as prevDmax > safe_Dmax ", scf_step
      do_inertia_count = .true.
   endif
endif

end function need_inertia_counting

3.15.4 Get bracket for inertia-counting

!---------------------------------------------------------------
!  Chooses the proper interval for the call to the driver
!  in case we need a stage of inertia counting  
!
subroutine get_bracket_for_inertia_count()

 real(dp)       :: safe_width_ic
 real(dp)       :: safe_dDmax_Ef_inertia

 safe_width_ic = fdf_get("PEXSI.safe-width-ic-bracket",4.0_dp*eV,"Ry")
 safe_dDmax_Ef_Inertia = fdf_get("PEXSI.safe-dDmax-ef-inertia",0.1)

write_ok = ((mpirank == 0) .and. (verbosity >= 1))

 ! Proper bracketing                                                           
 if (scf_step > 1) then
   if (prevDmax < safe_dDmax_Ef_inertia) then
      ! Shift brackets using estimate of Ef change from previous iteration 
      !                                                                    
      if (write_ok) write(6,"(a)") &
         "&o Inertia-count bracket shifted by Delta_Ef"
      ! This might be risky, if the final interval of the previous iteration   
      ! is too narrow. We should broaden it by o(kT)                           
      ! The usefulness of delta_Ef is thus debatable...                        

      muMin0 = muMin0 + delta_Ef - two_kT
      muMax0 = muMax0 + delta_Ef + two_kT
   else
      ! Use a large enough interval around the previous estimation of   
      ! mu (the gap edges are not available...)  
      if (write_ok) write(6,"(a)") "&o Inertia-count safe bracket"
!      muMin0 = min(muLowerEdge - 0.5*safe_width_ic, muMinInertia)
      muMin0 = min(mu - 0.5*safe_width_ic, muMin0)
!      muMax0 = max(muUpperEdge + 0.5*safe_width_ic, muMaxInertia)
      muMax0 = max(mu + 0.5*safe_width_ic, muMax0)
   endif
 else
    if (write_ok) write(6,"(a)") &
       "&o Inertia-count called with iscf=1 parameters"
 endif
end subroutine get_bracket_for_inertia_count

3.15.5 Get bracket for solver

subroutine get_bracket_for_solver()

    real(dp)       :: safe_width_solver
    real(dp)       :: safe_dDmax_Ef_solver

safe_width_solver = fdf_get("PEXSI.safe-width-solver-bracket",4.0_dp*eV,"Ry")
safe_dDmax_Ef_solver = fdf_get("PEXSI.safe-dDmax-ef-solver",0.05)

write_ok = ((mpirank == 0) .and. (verbosity >= 1))

! Do nothing for now
! No setting of  muMin0 and muMax0 yet, pending clarification of flow

  if (scf_step > 1) then
     if (prevDmax < safe_dDmax_Ef_solver) then
        if (write_ok) write(6,"(a)") "&o Solver mu shifted by delta_Ef"
        mu = mu + delta_Ef
     endif
     ! Always provide a safe bracket around mu, in case we need to fallback
     ! to executing a cycle of inertia-counting
     if (write_ok) write(6,"(a)") "&o Safe solver bracket around mu"
     muMin0 = mu - 0.5*safe_width_solver
     muMax0 = mu + 0.5*safe_width_solver
  else
     if (write_ok) write(6,"(a)") "&o Solver called with iscf=1 parameters"
     ! do nothing. Keep mu, muMin0 and muMax0 as they are inherited
  endif
end subroutine get_bracket_for_solver

3.15.6 Compute current temperature if annealing

We use a saved variable for keeping track of the previous temperature.

real(dp), save :: previous_pexsi_temperature
!------------------------------------------------------
! If using the "annealing" feature, this routine computes
! the current temperature to use in the PEXSI solver
!
subroutine get_current_temperature(pexsi_temperature)
  real(dp), intent(out) :: pexsi_temperature

 logical  :: use_annealing
 real(dp) :: annealing_preconditioner, temp_factor
 real(dp) :: annealing_target_factor

 use_annealing = fdf_get("PEXSI.use-annealing",.false.)
 if (use_annealing) then
   annealing_preconditioner = fdf_get("PEXSI.annealing-preconditioner",1.0_dp)
!   By default, the temperature goes to the target at a level 10 times dDtol
   annealing_target_factor = fdf_get("PEXSI.annealing-target-factor",10.0_dp)

   if (scf_step > 1 ) then

      ! Examples for target_factor = 10, dDtol=0.0001:
      ! prevDmax=0.1, preconditioner=1, factor=3
      ! prevDmax=0.1, preconditioner=2, factor=5
      ! prevDmax=0.1, preconditioner=3, factor=7
      ! prevDmax<=0.001, factor = 1
      ! prevDmax<0.001, factor = 1

      temp_factor = (log10(prevDmax/(annealing_target_factor*dDtol)))
      temp_factor = 1 + annealing_preconditioner * max(0.0_dp, temp_factor)

      pexsi_temperature = temp_factor * temperature
      if (pexsi_temperature > previous_pexsi_temperature) then
         if (mpirank==0) write(6,"(a,f10.2)") &
              "Will not raise PEXSI temperature to: ", &
              pexsi_temperature/Kelvin
         pexsi_temperature = previous_pexsi_temperature
      endif
      previous_pexsi_temperature = pexsi_temperature
   else
      ! No heuristics for now for first step
      previous_pexsi_temperature = huge(1.0_dp)
      pexsi_temperature = temperature
      !   Keep in mind for the future if modifying T at the 1st step
      !      previous_pexsi_temperature = pexsi_temperature
   endif
else
      pexsi_temperature = temperature
endif
if (mpirank==0) write(6,"(a,f10.2)") &
     "Current PEXSI temperature (K): ", pexsi_temperature/Kelvin
end subroutine get_current_temperature

3.15.7 Linear interpolation routine

A very simple routine.

function interpolate(xx,yy,x) result(val)
!
! Interpolate linearly in the (monotonically increasing!) arrays xx and yy
!
integer, parameter :: dp = selected_real_kind(10,100)

real(dp), intent(in) :: xx(:), yy(:)
real(dp), intent(in) :: x
real(dp)             :: val

integer :: i, n

n = size(xx)
if (size(yy) /= n) call die("Mismatch in array sizes in interpolate")

if ( (x < xx(1)) .or. (x > xx(n))) then
   call die("Interpolate: x not in range")
endif

do i = 2, n
   if (x <= xx(i)) then
      val = yy(i-1) + (x-xx(i-1)) * (yy(i)-yy(i-1))/(xx(i)-xx(i-1))
      exit
   endif
enddo

end function interpolate

3.15.8 Error dispatcher

subroutine check_info(info,str)
integer, intent(in) :: info
character(len=*), intent(in) :: str

    if(mpirank == 0) then
       if (info /= 0) then
          write(6,*) trim(str) // " info : ", info
          call die("Error exit from " // trim(str) // " routine")
       endif
      call pxfflush(6)
    endif       
end subroutine check_info

Author: Alberto Garcia

Created: 2015-10-19 Mon 14:22

Emacs 24.5.1 (Org mode 8.2.10)

Validate