matrix_inverse.fpp Source File


Contents

Source Code


Source Code

#include "unused_dummy.inc"

!> A module to hold different routines which can be used to invert a matrix
!>
!> Really we should provide an interface to different versions of these routines for
!> different data types, doing things in-place or not and with assumed arrays and not etc.
!>
!> If our code were needs to do lots of inverts then it may make sense to create a small
!> timing routine which can be used to select the most efficient method for a given size on
!> the current machine etc. This could then be used with a generic invert routine, the basic
!> idea being:
!> subroutine invert(a)
!> :
!> !If this is the first call then do some timing to pick best method
!> if(first) call pick_best_method
!> !This will set a module level variable identifying the best routine
!> select case(best_method)
!> case(0)
!>  call invert_type_1(a)
!> :
!> end select
!> :
!> end subroutine
!> Of course the best method may change with matrix size so we may need to calculate
!> best_method for a number of sizes and then pick the one corresponding to the closest
!> sized array to the one we have.
module mat_inv

  implicit none

  private

  !> A simple wrapper type around an integer to represent different
  !> inversion methods. We could consider deriving from this and
  !> binding the specific inversion methods to the type, but that is
  !> perhaps more complication than required.
  type matrix_inversion_method_type
     private
     integer :: flag = 0
     character(len=16) :: name = "UNSET NAME"
   contains
     procedure, public :: get_flag => matrix_inversion_method_get_flag
     procedure, public :: get_name => matrix_inversion_method_get_name
  end type matrix_inversion_method_type

  type(matrix_inversion_method_type), parameter :: mat_inv_serial_lapack = matrix_inversion_method_type( &
       flag = 1, name = 'Lapack')
  type(matrix_inversion_method_type), parameter :: mat_inv_serial_gauss_jordan = matrix_inversion_method_type( &
       flag = 2, name = 'Serial GJ')
  type(matrix_inversion_method_type), parameter :: mat_inv_serial_block = matrix_inversion_method_type( &
       flag = 3, name = 'Block')
  type(matrix_inversion_method_type), parameter :: mat_inv_omp_gauss_jordan = matrix_inversion_method_type( &
       flag = 4, name = 'OMP GJ')

  !> Array of all available methods
  type(matrix_inversion_method_type), dimension(*), parameter :: inversion_methods = [ &
#ifdef LAPACK
       mat_inv_serial_lapack, &
#endif
       mat_inv_serial_gauss_jordan, &
       mat_inv_serial_block, &
       mat_inv_omp_gauss_jordan &
       ]

  !/Utility
  public :: matrix_inversion_method_type

  !/Flags
  public :: mat_inv_serial_gauss_jordan
  public :: mat_inv_serial_lapack
  public :: mat_inv_serial_block
  public :: mat_inv_omp_gauss_jordan
  public :: inversion_methods
  public :: recommend_inversion_method

  !/Serial
  !> Generic wrapper
  public :: invert_serial
  !> Specific methods
  public :: invert_block
  public :: invert_lapack
  public :: invert_gauss_jordan
  !/OpenMP
  public :: invert_gauss_jordan_omp
  !/MPI

!NOTE: No attempt has been made to optimise any of these routines yet.
contains

  !> Helper routine to get access to the matrix_inversion_method's flag
  !> in a read-only way. Primarily used for testing.
  elemental integer function matrix_inversion_method_get_flag(self) result(flag)
    implicit none
    class(matrix_inversion_method_type), intent(in) :: self
    flag = self%flag
  end function matrix_inversion_method_get_flag

  !> Helper routine to get access to the matrix_inversion_method's name
  !> in a read-only way. Primarily used for testing.
  elemental character(len=16) function matrix_inversion_method_get_name(self) result(name)
    implicit none
    class(matrix_inversion_method_type), intent(in) :: self
    name = self%name
  end function matrix_inversion_method_get_name

  !> Times each available method for the given problem size and returns
  !> the flag of the fastest one.
  function recommend_inversion_method(trial_size, repeats, display_times) result(method)
    use job_manage, only: timer_local
    use file_utils, only: error_unit
    use ran, only: ranf
    use optionals, only: get_option_with_default
    implicit none
    integer, intent(in) :: trial_size
    integer, intent(in), optional :: repeats
    logical, intent(in), optional :: display_times
    type(matrix_inversion_method_type) :: method
    real :: start_time
    integer :: method_index, j, k
    complex, dimension(:, :), allocatable :: array
    integer :: error, number_of_methods, number_of_repeats, repeat
    real, dimension(:), allocatable :: times
    logical :: should_display_times

    ! Initialise to invalid method flag
    method = matrix_inversion_method_type(flag = -1)

    ! Try to allocate matrix of trial_size, if it doesn't work report error
    ! and return.
    allocate(array(trial_size, trial_size), stat = error)

    if (error > 0) then
       write(error_unit(), '("Unable to allocated trial array with size ",I0)') trial_size
       return
    end if

    ! Initialise array
    do j = 1, trial_size
       do k = 1, trial_size
          array(j,k) = cmplx(ranf(), ranf())
       end do

       ! Try to ensure array is diagonally dominant
       array(j, j) = array(j, j) + 1.0
    end do

    number_of_methods = size(inversion_methods)

    allocate(times(number_of_methods))
    times = 0.0

    number_of_repeats = get_option_with_default(repeats, 1)
    should_display_times = get_option_with_default(display_times, .false.)

    ! Time each method
    do method_index = 1, number_of_methods
       start_time = timer_local()
       do repeat = 1, number_of_repeats
          call invert_serial(array, trial_size, inversion_methods(method_index))
       end do
       times(method_index) = timer_local() - start_time
    end do

    ! Report the results if requested
    if (should_display_times) then
       write(*,'("Matrix inversion benchmark : ",I0,&
            &" repeats of square matrices with size ",I0)') number_of_repeats, trial_size
       write(*,'("Method",T18,"Total time (s)")')
       do method_index = 1, number_of_methods
          method = inversion_methods(method_index)
          write(*,'(A16,T18,F12.8)') trim(method%get_name()), times(method_index)
       end do
    end if

    ! Recommend the method with the smallest time
    method = inversion_methods( minloc(times, dim = 1))
  end function recommend_inversion_method

  !>Serial

  !> Provides a wrapper around the various serial inversion routines.
  subroutine invert_serial(a, n, method)
    use mp, only: mp_abort
    implicit none
    integer,intent(in)::n
    complex,dimension(n,n),intent(inout)::a
    type(matrix_inversion_method_type), optional, intent(in) :: method
    type(matrix_inversion_method_type) :: internal_method

    ! Here we could choose to prefer lapack. In the longer term we may
    ! want to have a recommend_method routine which can recommend an
    ! appropriate method for a particular problem size.

#ifdef LAPACK
    internal_method = mat_inv_serial_lapack
#else
    internal_method = mat_inv_serial_block
#endif

    if (present(method)) internal_method = method

    select case (internal_method%flag)
    case(mat_inv_serial_gauss_jordan%flag)
       call invert_gauss_jordan(a, n)
#ifdef LAPACK
    case(mat_inv_serial_lapack%flag)
       call invert_lapack(a, n)
#endif
    case(mat_inv_omp_gauss_jordan%flag)
       call invert_gauss_jordan_omp(a, n)
    case(mat_inv_serial_block%flag)
       call invert_block(a, n)
    case default
       call mp_abort("Unknown inversion method passed to mat_inv::invert_serial", .true.)
    end select

  end subroutine invert_serial

  !> A routine to invert a matrix using lapack routines.
  !>
  !> Requires USE_LAPACK=on and appropriate lapack link information
  !> at build time.
  subroutine invert_lapack(a,n)
    use mp, only: mp_abort
#ifdef LAPACK
    use lapack_wrapper, only: getrf, getri
#endif
    implicit none
    integer, intent(in)::n
    complex, dimension(n, n), intent(inout)::a
#ifdef LAPACK
    complex, dimension(:), allocatable::work
    integer :: lwork
    integer :: info
    integer, dimension(:), allocatable::ipiv

    !Setup
    lwork = n * n

    !Allocates
    allocate(work(lwork), ipiv(n))

    !LU decompose
    call getrf(n, n, a, n, ipiv, info)

    if (info /= 0) then
       call mp_abort("Error during lapack LU factorization in mat_inv::invert_lapack.", .true.)
    endif

    !Invert
    call getri(n, a, n, ipiv, work, lwork, info)

    if (info /= 0) then
       call mp_abort("Error during lapack inversion in mat_inv::invert_lapack.", .true.)
    endif

    !Cleanup
    deallocate(work, ipiv)
#else
    UNUSED_DUMMY(a); UNUSED_DUMMY(n);
    call mp_abort("Attempt to use mat_inv::invert_lapack but built without lapack support.", .true.)
#endif
  end subroutine invert_lapack

  !> Serial Gauss-Jordan elimination
  subroutine invert_gauss_jordan(a,n)
    implicit none
    integer,intent(in):: n
    !Should we make this assumed shape?
    !<DD>Tagged
    complex,dimension(n,n),intent(inout) :: a

    !Is it better to make these allocatable?
    !<DD>Tagged
    complex :: tmp,fac
    integer i, k

    do i=1,n
       fac=1/a(i,i) !This would become inverse if done on blocks
       a(i,i)=1
       a(:,i)=a(:,i)*fac
       !NOTE:These loops don't shrink as i increases so load should
       !be balanced?
       do k=1,i-1
          tmp=a(i,k)
          a(i,k)=0
          a(:,k)=a(:,k)-a(:,i)*tmp
       enddo
       do k=i+1,n
          tmp=a(i,k)
          a(i,k)=0
          a(:,k)=a(:,k)-a(:,i)*tmp
       enddo
    enddo
  end subroutine invert_gauss_jordan

  !> Serial Block inverse
  !>
  !> We split the matrix into four sub-blocks:
  !>
  !>     | A B |
  !>     | C D |
  !>
  !> where A is m x m and D is (n-m) x (n-m).
  !> The inverse can then be written as
  !>
  !>     | A B |^-1 == | M N |
  !>     | C D |       | O P |
  !>
  !> With
  !>
  !>     M = [A-B(D^-1)C]^-1
  !>     N = -MB(D^-1)
  !>     O = -(D^-1)CM
  !>     P = (D^-1) + (D^-1)CMB(D^-1)
  !>
  !> In practice if we define
  !>
  !>     T = -B(D^-1)
  !>     S = -(D^-1)C
  !>
  !> we can then write
  !>
  !>     M = [A + TC]^-1
  !>     N = MT
  !>     O = SM
  !>     P = D^-1 + SN
  !>
  !> We can therefore find the inverse of our matrix using
  !> two inversions of matrices of size ~ m x m ~ (n/2)x(n/2)
  !> two matrix additions and six matrix multiplies.
  !>
  !> The performance of this approach is dependent on an efficient
  !> matrix multiplication implementation. Most compilers will offer
  !> decent implementations and the option to use an external library
  !> such as blas.
  recursive subroutine invert_block(a,n,m_in)
    implicit none
    integer, intent(in):: n
    complex, dimension(n, n), intent(in out), target :: a
    integer, intent(in), optional :: m_in
    integer :: m
    complex, dimension(:, :), allocatable :: tmp
    complex, dimension(:, :), pointer :: aa, bb, cc, dd

    ! If the matrix is small then use direct inversion
    if (n <= 4) then
       call invert_gauss_jordan(a, n)
       return
    end if

    ! Ensure the sub-matrix size is set. We split the length n into
    ! two regions, 1:m and m+1:n.
    if(present(m_in)) then
       m = m_in
    else
       m = max(n/2, 1)
    end if

    ! Here we use pointers to give more useful names to the sub-blocks of a
    ! An associate block could be another option, but currently this leads to
    ! errors in the result.
    aa => a(1:m, 1:m)
    bb => a(1:m, m+1:n)
    cc => a(m+1:n, 1:m)
    dd => a(m+1:n, m+1:n)

    ! First calculate D^-1 and store in place
    call invert_block(dd, n-m)

    !Calculate T = -B(D^-1)
    tmp = -matmul(bb,  dd)

    ! Calculate [A+TC]
    aa = aa + matmul(tmp, cc)

    ! Invert [A+TC] and store in place. M is now done
    call invert_block(aa, m)

    ! Calculate N and store.
    bb = matmul(aa, tmp)

    ! Calculate S = -(D^-1)C
    tmp = -matmul(dd, cc)

    ! Calculate O
    cc = matmul(tmp, aa)

    ! Calculate P
    dd = dd + matmul(tmp, bb)
  end subroutine invert_block

  !> openmp Gauss-Jordan elimination
  subroutine invert_gauss_jordan_omp(a,n)
    implicit none
    integer,intent(in):: n
    !Should we make this assumed shape?
    !<DD>Tagged
    complex,dimension(n,n),intent(inout) :: a

    !Is it better to make these allocatable?
    !<DD>Tagged
    complex :: tmp,fac
    integer i, k

    !Can't omp at top level currently due to data dependencies
    do i=1,n
       fac=1/a(i,i) !This would become inverse if done on blocks
       a(i,i)=1
       a(:,i)=a(:,i)*fac
       !NOTE: Slight difference to serial version to avoid
       !having two separate parallel regions
       !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(tmp, k) SHARED(a, i, n) SCHEDULE(static)
       do k=1,n
          if(k.eq.i)cycle
          tmp=a(i,k)
          a(i,k)=0
          a(:,k)=a(:,k)-a(:,i)*tmp
       enddo
       !$OMP END PARALLEL DO
    enddo
  end subroutine invert_gauss_jordan_omp
end module mat_inv