#include "unused_dummy.inc" !> A module to hold different routines which can be used to invert a matrix !> !> Really we should provide an interface to different versions of these routines for !> different data types, doing things in-place or not and with assumed arrays and not etc. !> !> If our code were needs to do lots of inverts then it may make sense to create a small !> timing routine which can be used to select the most efficient method for a given size on !> the current machine etc. This could then be used with a generic invert routine, the basic !> idea being: !> subroutine invert(a) !> : !> !If this is the first call then do some timing to pick best method !> if(first) call pick_best_method !> !This will set a module level variable identifying the best routine !> select case(best_method) !> case(0) !> call invert_type_1(a) !> : !> end select !> : !> end subroutine !> Of course the best method may change with matrix size so we may need to calculate !> best_method for a number of sizes and then pick the one corresponding to the closest !> sized array to the one we have. module mat_inv implicit none private !> A simple wrapper type around an integer to represent different !> inversion methods. We could consider deriving from this and !> binding the specific inversion methods to the type, but that is !> perhaps more complication than required. type matrix_inversion_method_type private integer :: flag = 0 character(len=16) :: name = "UNSET NAME" contains procedure, public :: get_flag => matrix_inversion_method_get_flag procedure, public :: get_name => matrix_inversion_method_get_name end type matrix_inversion_method_type type(matrix_inversion_method_type), parameter :: mat_inv_serial_lapack = matrix_inversion_method_type( & flag = 1, name = 'Lapack') type(matrix_inversion_method_type), parameter :: mat_inv_serial_gauss_jordan = matrix_inversion_method_type( & flag = 2, name = 'Serial GJ') type(matrix_inversion_method_type), parameter :: mat_inv_serial_block = matrix_inversion_method_type( & flag = 3, name = 'Block') type(matrix_inversion_method_type), parameter :: mat_inv_omp_gauss_jordan = matrix_inversion_method_type( & flag = 4, name = 'OMP GJ') !> Array of all available methods type(matrix_inversion_method_type), dimension(*), parameter :: inversion_methods = [ & #ifdef LAPACK mat_inv_serial_lapack, & #endif mat_inv_serial_gauss_jordan, & mat_inv_serial_block, & mat_inv_omp_gauss_jordan & ] !/Utility public :: matrix_inversion_method_type !/Flags public :: mat_inv_serial_gauss_jordan public :: mat_inv_serial_lapack public :: mat_inv_serial_block public :: mat_inv_omp_gauss_jordan public :: inversion_methods public :: recommend_inversion_method !/Serial !> Generic wrapper public :: invert_serial !> Specific methods public :: invert_block public :: invert_lapack public :: invert_gauss_jordan !/OpenMP public :: invert_gauss_jordan_omp !/MPI !NOTE: No attempt has been made to optimise any of these routines yet. contains !> Helper routine to get access to the matrix_inversion_method's flag !> in a read-only way. Primarily used for testing. elemental integer function matrix_inversion_method_get_flag(self) result(flag) implicit none class(matrix_inversion_method_type), intent(in) :: self flag = self%flag end function matrix_inversion_method_get_flag !> Helper routine to get access to the matrix_inversion_method's name !> in a read-only way. Primarily used for testing. elemental character(len=16) function matrix_inversion_method_get_name(self) result(name) implicit none class(matrix_inversion_method_type), intent(in) :: self name = self%name end function matrix_inversion_method_get_name !> Times each available method for the given problem size and returns !> the flag of the fastest one. function recommend_inversion_method(trial_size, repeats, display_times) result(method) use job_manage, only: timer_local use file_utils, only: error_unit use ran, only: ranf use optionals, only: get_option_with_default implicit none integer, intent(in) :: trial_size integer, intent(in), optional :: repeats logical, intent(in), optional :: display_times type(matrix_inversion_method_type) :: method real :: start_time integer :: method_index, j, k complex, dimension(:, :), allocatable :: array integer :: error, number_of_methods, number_of_repeats, repeat real, dimension(:), allocatable :: times logical :: should_display_times ! Initialise to invalid method flag method = matrix_inversion_method_type(flag = -1) ! Try to allocate matrix of trial_size, if it doesn't work report error ! and return. allocate(array(trial_size, trial_size), stat = error) if (error > 0) then write(error_unit(), '("Unable to allocated trial array with size ",I0)') trial_size return end if ! Initialise array do j = 1, trial_size do k = 1, trial_size array(j,k) = cmplx(ranf(), ranf()) end do ! Try to ensure array is diagonally dominant array(j, j) = array(j, j) + 1.0 end do number_of_methods = size(inversion_methods) allocate(times(number_of_methods)) times = 0.0 number_of_repeats = get_option_with_default(repeats, 1) should_display_times = get_option_with_default(display_times, .false.) ! Time each method do method_index = 1, number_of_methods start_time = timer_local() do repeat = 1, number_of_repeats call invert_serial(array, trial_size, inversion_methods(method_index)) end do times(method_index) = timer_local() - start_time end do ! Report the results if requested if (should_display_times) then write(*,'("Matrix inversion benchmark : ",I0,& &" repeats of square matrices with size ",I0)') number_of_repeats, trial_size write(*,'("Method",T18,"Total time (s)")') do method_index = 1, number_of_methods method = inversion_methods(method_index) write(*,'(A16,T18,F12.8)') trim(method%get_name()), times(method_index) end do end if ! Recommend the method with the smallest time method = inversion_methods( minloc(times, dim = 1)) end function recommend_inversion_method !>Serial !> Provides a wrapper around the various serial inversion routines. subroutine invert_serial(a, n, method) use mp, only: mp_abort implicit none integer,intent(in)::n complex,dimension(n,n),intent(inout)::a type(matrix_inversion_method_type), optional, intent(in) :: method type(matrix_inversion_method_type) :: internal_method ! Here we could choose to prefer lapack. In the longer term we may ! want to have a recommend_method routine which can recommend an ! appropriate method for a particular problem size. #ifdef LAPACK internal_method = mat_inv_serial_lapack #else internal_method = mat_inv_serial_block #endif if (present(method)) internal_method = method select case (internal_method%flag) case(mat_inv_serial_gauss_jordan%flag) call invert_gauss_jordan(a, n) #ifdef LAPACK case(mat_inv_serial_lapack%flag) call invert_lapack(a, n) #endif case(mat_inv_omp_gauss_jordan%flag) call invert_gauss_jordan_omp(a, n) case(mat_inv_serial_block%flag) call invert_block(a, n) case default call mp_abort("Unknown inversion method passed to mat_inv::invert_serial", .true.) end select end subroutine invert_serial !> A routine to invert a matrix using lapack routines. !> !> Requires USE_LAPACK=on and appropriate lapack link information !> at build time. subroutine invert_lapack(a,n) use mp, only: mp_abort #ifdef LAPACK use lapack_wrapper, only: getrf, getri #endif implicit none integer, intent(in)::n complex, dimension(n, n), intent(inout)::a #ifdef LAPACK complex, dimension(:), allocatable::work integer :: lwork integer :: info integer, dimension(:), allocatable::ipiv !Setup lwork = n * n !Allocates allocate(work(lwork), ipiv(n)) !LU decompose call getrf(n, n, a, n, ipiv, info) if (info /= 0) then call mp_abort("Error during lapack LU factorization in mat_inv::invert_lapack.", .true.) endif !Invert call getri(n, a, n, ipiv, work, lwork, info) if (info /= 0) then call mp_abort("Error during lapack inversion in mat_inv::invert_lapack.", .true.) endif !Cleanup deallocate(work, ipiv) #else UNUSED_DUMMY(a); UNUSED_DUMMY(n); call mp_abort("Attempt to use mat_inv::invert_lapack but built without lapack support.", .true.) #endif end subroutine invert_lapack !> Serial Gauss-Jordan elimination subroutine invert_gauss_jordan(a,n) implicit none integer,intent(in):: n !Should we make this assumed shape? !<DD>Tagged complex,dimension(n,n),intent(inout) :: a !Is it better to make these allocatable? !<DD>Tagged complex :: tmp,fac integer i, k do i=1,n fac=1/a(i,i) !This would become inverse if done on blocks a(i,i)=1 a(:,i)=a(:,i)*fac !NOTE:These loops don't shrink as i increases so load should !be balanced? do k=1,i-1 tmp=a(i,k) a(i,k)=0 a(:,k)=a(:,k)-a(:,i)*tmp enddo do k=i+1,n tmp=a(i,k) a(i,k)=0 a(:,k)=a(:,k)-a(:,i)*tmp enddo enddo end subroutine invert_gauss_jordan !> Serial Block inverse !> !> We split the matrix into four sub-blocks: !> !> | A B | !> | C D | !> !> where A is m x m and D is (n-m) x (n-m). !> The inverse can then be written as !> !> | A B |^-1 == | M N | !> | C D | | O P | !> !> With !> !> M = [A-B(D^-1)C]^-1 !> N = -MB(D^-1) !> O = -(D^-1)CM !> P = (D^-1) + (D^-1)CMB(D^-1) !> !> In practice if we define !> !> T = -B(D^-1) !> S = -(D^-1)C !> !> we can then write !> !> M = [A + TC]^-1 !> N = MT !> O = SM !> P = D^-1 + SN !> !> We can therefore find the inverse of our matrix using !> two inversions of matrices of size ~ m x m ~ (n/2)x(n/2) !> two matrix additions and six matrix multiplies. !> !> The performance of this approach is dependent on an efficient !> matrix multiplication implementation. Most compilers will offer !> decent implementations and the option to use an external library !> such as blas. recursive subroutine invert_block(a,n,m_in) implicit none integer, intent(in):: n complex, dimension(n, n), intent(in out), target :: a integer, intent(in), optional :: m_in integer :: m complex, dimension(:, :), allocatable :: tmp complex, dimension(:, :), pointer :: aa, bb, cc, dd ! If the matrix is small then use direct inversion if (n <= 4) then call invert_gauss_jordan(a, n) return end if ! Ensure the sub-matrix size is set. We split the length n into ! two regions, 1:m and m+1:n. if(present(m_in)) then m = m_in else m = max(n/2, 1) end if ! Here we use pointers to give more useful names to the sub-blocks of a ! An associate block could be another option, but currently this leads to ! errors in the result. aa => a(1:m, 1:m) bb => a(1:m, m+1:n) cc => a(m+1:n, 1:m) dd => a(m+1:n, m+1:n) ! First calculate D^-1 and store in place call invert_block(dd, n-m) !Calculate T = -B(D^-1) tmp = -matmul(bb, dd) ! Calculate [A+TC] aa = aa + matmul(tmp, cc) ! Invert [A+TC] and store in place. M is now done call invert_block(aa, m) ! Calculate N and store. bb = matmul(aa, tmp) ! Calculate S = -(D^-1)C tmp = -matmul(dd, cc) ! Calculate O cc = matmul(tmp, aa) ! Calculate P dd = dd + matmul(tmp, bb) end subroutine invert_block !> openmp Gauss-Jordan elimination subroutine invert_gauss_jordan_omp(a,n) implicit none integer,intent(in):: n !Should we make this assumed shape? !<DD>Tagged complex,dimension(n,n),intent(inout) :: a !Is it better to make these allocatable? !<DD>Tagged complex :: tmp,fac integer i, k !Can't omp at top level currently due to data dependencies do i=1,n fac=1/a(i,i) !This would become inverse if done on blocks a(i,i)=1 a(:,i)=a(:,i)*fac !NOTE: Slight difference to serial version to avoid !having two separate parallel regions !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(tmp, k) SHARED(a, i, n) SCHEDULE(static) do k=1,n if(k.eq.i)cycle tmp=a(i,k) a(i,k)=0 a(:,k)=a(:,k)-a(:,i)*tmp enddo !$OMP END PARALLEL DO enddo end subroutine invert_gauss_jordan_omp end module mat_inv