Skip to content

Commit

Permalink
minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyangzhuan committed Dec 4, 2023
1 parent 8187d48 commit 416774d
Show file tree
Hide file tree
Showing 36 changed files with 93 additions and 7 deletions.
16 changes: 12 additions & 4 deletions EXAMPLE/RankBenchmark_Driver.f90
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ PROGRAM ButterflyPACK_RankBenchmark
integer,allocatable::Permutation(:)
integer Nunk_loc,Nunk_m_loc, Nunk_n_loc
integer,allocatable::tree(:),tree_m(:),tree_n(:)
complex(kind=8),allocatable::rhs_glo(:,:),rhs_loc(:,:),x_glo(:,:),x_loc(:,:)
complex(kind=8),allocatable::rhs_glo(:,:),rhs_loc(:,:),x_glo(:,:),x_loc(:,:),xin_loc(:,:),xout_loc(:,:)
integer nrhs
type(z_matrixblock) ::blocks
character(len=1024) :: strings,strings1
Expand Down Expand Up @@ -153,7 +153,7 @@ PROGRAM ButterflyPACK_RankBenchmark


quant%tst = 2

quant%wavelen = 0.25d0/8d0

nargs = iargc()
ii=1
Expand All @@ -170,6 +170,8 @@ PROGRAM ButterflyPACK_RankBenchmark
call getarg(ii,strings1)
if(trim(strings)=='--tst')then
read(strings1,*)quant%tst
elseif(trim(strings)=='--wavelen')then
read(strings1,*)quant%wavelen
else
if(ptree%MyID==Main_ID)write(*,*)'ignoring unknown quant: ', trim(strings)
endif
Expand Down Expand Up @@ -199,7 +201,7 @@ PROGRAM ButterflyPACK_RankBenchmark
! Read a full non-square matrix and do a BF compression

ppw=5
quant%wavelen = 0.25d0/8d0

ds = quant%wavelen/ppw
if(quant%tst==1)then ! two colinear plate
Nperdim = z_ceiling_safe(1d0/ds)
Expand Down Expand Up @@ -298,7 +300,13 @@ PROGRAM ButterflyPACK_RankBenchmark
call MPI_Bcast(quant%Permutation_n,quant%Nunk_n,MPI_integer,0,ptree%comm,ierr)

call z_BF_Construct_Element_Compute(blocks, option, stats, msh, ker, ptree)

nrhs=1
allocate(xin_loc(Nunk_n_loc,nrhs))
xin_loc=1
allocate(xout_loc(Nunk_m_loc,nrhs))
call z_BF_Mult('N', xin_loc, xout_loc, Nunk_n_loc, Nunk_m_loc, nrhs, blocks, option, stats, ptree)
deallocate(xin_loc)
deallocate(xout_loc)
!******************************************************************************!

!**** print statistics
Expand Down
1 change: 1 addition & 0 deletions SRC/BPACK_constr.f90
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,7 @@ subroutine BF_MD_Construct_Element_Compute(Ndim, blocks, option, stats, msh, ker

! !!!! the following functions have not been tensorized
if (option%verbosity >= 0)call BF_MD_checkError(Ndim, blocks_1, option, msh, ker, stats, ptree, 0, option%verbosity)
stats%Mem_Comp_for=Memory
! call BF_ComputeMemory(blocks, stats%Mem_Comp_for)


Expand Down
36 changes: 36 additions & 0 deletions SRC/Bplus_utilities.f90
Original file line number Diff line number Diff line change
Expand Up @@ -7054,6 +7054,42 @@ subroutine BF_all2all_V_split(block_i, pgno_i, level_i, block_o, pgnos_o, level_

end subroutine BF_all2all_V_split

subroutine BF_Mult(chara, xin, xout, Ninloc, Noutloc, Ncol, blocks, option, stats, ptree)
implicit none
real(kind=8) t1, t2
character chara
integer Ninloc, Noutloc, Ncol
DT::xin(Ninloc, Ncol), xout(Noutloc, Ncol)

type(Hstat)::stats
type(Hoption)::option
type(matrixblock)::blocks

type(proctree)::ptree

t1 = MPI_Wtime()

stats%Flop_Tmp = 0
stats%Flop_C_Mult = 0
stats%Time_C_Mult = 0
xout = 0
if (chara == 'N') then
call BF_block_MVP_dat(blocks, chara, Noutloc, Ninloc, Ncol, xin, Ninloc, xout,Noutloc, BPACK_cone, BPACK_czero, ptree, stats)
else
call BF_block_MVP_dat(blocks, chara, Ninloc, Noutloc, Ncol, xin, Ninloc, xout, Noutloc, BPACK_cone, BPACK_czero, ptree, stats)
endif

t2 = MPI_Wtime()

xout = xout/option%scale_factor

stats%Time_C_Mult = stats%Time_C_Mult + t2 - t1
stats%Flop_C_Mult = stats%Flop_C_Mult + stats%Flop_Tmp

end subroutine BF_Mult




subroutine BF_block_MVP_dat(blocks, chara, M, N, Nrnd, random1, ldi, random2, ldo, a, b, ptree, stats)
implicit none
Expand Down
Empty file modified example_scripts/pbs_script.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runYang_fkerreg.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/run_cmake_build_gnu_osx_catalina.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/run_cmake_build_gnu_osx_static.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/run_cmake_build_gnu_osx_zsh.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/run_cmake_build_gnu_summit.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/run_cmake_build_gnu_ubuntu.sh
100644 → 100755
Empty file.
4 changes: 3 additions & 1 deletion example_scripts/run_cmake_build_gnu_ubuntu_mpi4_gcc910.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ cmake .. \
-DCMAKE_C_COMPILER=$MPICC \
-DCMAKE_INSTALL_PREFIX=. \
-DCMAKE_INSTALL_LIBDIR=./lib \
-DCMAKE_BUILD_TYPE=Debug\
-DCMAKE_BUILD_TYPE=Release\
-DTPL_ARPACK_LIBRARIES="/home/administrator/Desktop/Software/arpack-ng/build/lib/libarpack.so;/home/administrator/Desktop/Software/arpack-ng/build/lib/libparpack.so" \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON

chmod -R 777 ../SRC*

# -DTPL_ARPACK_LIBRARIES="/home/administrator/Desktop/Software/arpack-ng/build/lib/libarpack.so;/home/administrator/Desktop/Software/arpack-ng/build/lib/libparpack.so" \


Expand Down
Empty file modified example_scripts/run_cmake_build_intel_flux.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/run_cmake_build_pgi_summit.sh
100644 → 100755
Empty file.
38 changes: 38 additions & 0 deletions example_scripts/run_cmake_build_pgi_ubuntu_mpi3_magma.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
module purge
# module load pgi
# module load scalapack-netlib/pgi-19.10/2.0.2
module load PrgEnv-pgi
d
cd ..
sed -i 's/^M$//' PrecisionPreprocessing.sh
# bash PrecisionPreprocessing.sh
mkdir -p build
cd build
rm -rf CMakeCache.txt
rm -rf DartConfiguration.tcl
rm -rf CTestTestfile.cmake
rm -rf cmake_install.cmake
rm -rf CMakeFiles
cmake .. \
-DCMAKE_Fortran_FLAGS="" \
-DCMAKE_CXX_FLAGS="" \
-DBUILD_SHARED_LIBS=OFF \
-DTPL_BLAS_LIBRARIES="/opt/pgi/linux86-64-llvm/19.10/lib/libomp.so;/opt/pgi/linux86-64-llvm/19.10/lib/libblas.so" \
-DTPL_LAPACK_LIBRARIES="/opt/pgi/linux86-64-llvm/19.10/lib/liblapack.so" \
-DTPL_SCALAPACK_LIBRARIES="/opt/pgi/linux86-64-llvm/19.10/lib/scalapack/scalapack-2.0.2/openmpi-3.1.3/lib/libscalapack.a" \
-DCMAKE_Fortran_COMPILER=mpif90 \
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_INSTALL_PREFIX=. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON






#-DCMAKE_Fortran_FLAGS="-ftracer -funswitch-loops -ftree-vectorize -fimplicit-none -fno-range-check -finit-real=nan" \
# -DTPL_BLAS_LIBRARIES="" \
# -DTPL_LAPACK_LIBRARIES="/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64/libmkl_gf_lp64.so;/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64/libmkl_intel_thread.so;/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64/libmkl_core.so;/opt/intel/compilers_and_libraries_2018.1.163/linux/compiler/lib/intel64/libiomp5.so" \
# -DTPL_SCALAPACK_LIBRARIES="/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64/libmkl_blacs_intelmpi_lp64.so;/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64/libmkl_scalapack_lp64.so" \
Empty file modified example_scripts/runit.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_cpp.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_big.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_big_corner.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_big_halfcircle.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_big_halfcircle_100M.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_big_parallelstrip.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_big_rectangle.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_halfcircle_BACA.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_parallel_test.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_parallel_test_eigen.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_parallel_test_hss.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_parallel_test_summit.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em2d_parallel_test_summit_pgi.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em3d.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em3d_bplus_cori.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em3d_bplus_summit.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_em3d_sp.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_full_GO.sh
100644 → 100755
Empty file.
Empty file modified example_scripts/runit_krr_test_summit.sh
100644 → 100755
Empty file.
5 changes: 3 additions & 2 deletions example_scripts/runit_osx_ventura.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export GPTUNEROOT=/Users/liuyangzhuan/Desktop/GPTune/
export MPIRUN="$GPTUNEROOT/openmpi-4.1.5/bin/mpirun"
$MPIRUN --allow-run-as-root -n 4 ../build/EXAMPLE/frankben_t -quant --tst 3 --wavelen 0.0625 -option --nmin_leaf 10 --lrlevel 100 --verbosity 1 --sample_para 1.0 --sample_para_outer 2.0 | tee a.out

# $MPIRUN --allow-run-as-root -n 4 ../build/EXAMPLE/frankben_t -quant --tst 3 --wavelen 0.0625 -option --nmin_leaf 10 --lrlevel 100 --verbosity 1 --sample_para 1.0 --sample_para_outer 1.0 | tee a.out_tensor
$MPIRUN --allow-run-as-root -n 4 ../build/EXAMPLE/frankben_t -quant --tst 2 --wavelen 0.03125 -option --nmin_leaf 10 --lrlevel 100 --verbosity 1 --sample_para 1.0 --sample_para_outer 1.0 | tee a.out_tensor

$MPIRUN --allow-run-as-root -n 4 ../build/EXAMPLE/frankben -quant --tst 2 --wavelen 0.03125 -option --nmin_leaf 200 --lrlevel 100 --verbosity 1 --sample_para 2.0 --sample_para_outer 2.0 | tee a.out_matrix

0 comments on commit 416774d

Please sign in to comment.