将 Quda 从 CUDA 转码到 HIP
HIP 是 开源的 AMD 上的异构并行计算框架, 其类似于 Nvidia 的 CUDA. QUDA 则是 Lattice QCD 中广泛使用的 cuda 库. 我们需要将 quda 从 CUDA 平台移植到 AMD 的 HIP 平台. 方便的是, HIP 提供了几个脚本来帮助我们实现这一过程.
目录
QUDA from cuda to hip
There are tools for transcode quda from cuda to hip - hipify-perl
and hipify-cmakefile
for source codes and cmake files respectively.
Code Portion
Details
$ sed -i "s/curand_uniform/hiprand_uniform/g" include/*.h
$ sed -i "s/curand_normal/hiprand_normal/g" include/*.h
$ sed -i "s/cuCdiv/hipCdiv/g" include/*.h
$ sed -i "s/cuCdivf/hipCdivf/g" include/*.h
$ sed -i "s/curandStateMRG32k3a/hiprandStateMRG32k3a/g" include/*.h
$ sed -i "s/cuDoubleComplex/hipDoubleComplex/g" include/*.h
$ sed -i "s/cuFloatComplex/hipFloatComplex/g" include/*.h
$ sed -i "s|cuComplex.h|hip/hip_complex.h|g" include/*.h
$ sed -i "s|cudaHostRegisterDefault|hipHostRegisterDefault|g" include/*.h
$ sed -i "s|CUDA_CUSSESS|hipSuccess|g" lib/*.cpp
$ sed -i "s|CUresult|hipError_t|g" lib/*.cpp
$ sed -i "s|cudaHostRegisterDefault|hipHostRegisterDefault|g" lib/*.cpp
$ sed -i "s|cuMemAlloc|hipMalloc|g" lib/*.cpp
$ sed -i "s|cuMemFree|hipFree|g" lib/*.cpp
$ sed -i "s/cuMeMFreeHost/hipFreeHost/g" lib/*.cpp
$ sed -i "s|CUdeviceptr|hipDeviceptr_t|g" lib/*.cpp
$ sed -i "s/cudaHostRegisterDefault/hipHostRegisterDefault/g" lib/*.cpp
$ sed -i "s/cudaIpcEventHandle_t/hipIpcEventHandle_t/g" lib/*.cpp
$ sed -i "s/cudaEventInterprocess/hipEventInterprocess/g" lib/*.cpp
$ sed -i "s/cudaIpcOpenEventHandle/hipIpcOpenEventHandle/g" lib/*.cpp
$ sed -i "s/cudaIpcGetEventHandle/hipIpcGetEventHandle/g" lib/*.cpp
$ sed -i "s/cuMemcpyDtoH/hipMemcpyDtoH/g" lib/*.cpp
$ sed -i "s/cuMemcpy/hipMemcpy/g" lib/*.cpp
$ sed -i "s/cuMemcpyDtoHAsync/hipMemcpyDtoHAsync/g" lib/*.cpp
$ sed -i "s/cuCtxSynchronize/hipCtxSynchronize/g" lib/*.cpp
$ sed -i "s/cuEventSynchronize/hipEventSynchronize/g" lib/*.cpp
$ sed -i "s/cuStreamSynchronize/hipStreamSynchronize/g" lib/*.cpp
$ sed -i "s/cuStreamWaitEvent/hipStreamWaitEvent/g" lib/*.cpp
$ sed -i "s/cuEventQuery/hipEventQuery/g" lib/*.cpp
$ sed -i "s/cuEventRecord/hipEventRecord/g" lib/*.cpp
$ sed -i "s/CUDA_MEMCPY2D/hip_Memcpy2D/g" lib/*.cpp
$ sed -i "s/cudaLaunchKernel/hipLaunchKernel/g" lib/*.cpp
$ sed -i "s/CUDA_ERROR_NOT_READY/hipErrorNotReady/g" lib/*.cpp
sed -i "s/COMPILE_LANGUAGE:CUDA/COMPILE_LANGUAGE:HIP/g" lib/CMakeLists.txt
- adding rocRAND and hipRAND library to CMakeLists.txt
include_directories(SYSTEM ${rocRAND_HOME}/include/) FIND_LIBRARY(rocrand_LIB rocRAND ${rocRAND_HOME}/lib/) include_directories(SYSTEM ${hipRAND_HOME}/include/) FIND_LIBRARY(hiprand_LIB hipRAND ${hipRAND_HOME}/lib/)
cuComplex.h
tohip/hip_complex.h
: include/complex_quda.hhipEventCreate
tohipEventCreateWithFlags
: lib/lattice_field.cpp line 369- lib/tune.cpp
QUDA_HASH
to “” - lib/quda_cuda_api.cpp:
- line 33 “const void *” to “void *”
- line 92 add kind to the end
- line 116 remove const
- WidthInBytes to widthInBytes
- Height to height - lib/lattice_field.cpp: comment hipIpcGetEventHandle and hipIpcOpenEventHandle
rocRAND
- download googletest
$ git clone https://github.com/google/googletest.git
$ mv googletest googletest-src && tar cvjf googletest-src.tar.bz2 googletest-src
$ tar xf rocRAND-1.8.0.tar.gz && cd rocRAND-1.8.0
$ mkdir build && cd build
$ CC=gcc CXX=g++ cmake3 .. -DCMAKE_HIP_COMPILER_ENV_VAR=hipcc -DHIP_TOOLKIT_INCLUDE=/opt/rocm/hip/include/hip -D__HIP_PLATFORM_HCC__=hcc -DCUDA_cuda_LIBRARY=/opt/rocm/hip/lib/
### error may occur when download googletest
sed -i "6,9d " googletest-download/googletest-download-prefix/tmp/googletest-download-gitclone.cmake
touch googletest-download/googletest-download-prefix/src/googletest-download-stamp/googletest-download-gitclone-lastrun.txt
### then re-run cmake
$ CC=gcc CXX=hcc cmake3 .. -DCMAKE_HIP_COMPILER_ENV_VAR=hipcc -DHIP_TOOLKIT_INCLUDE=/opt/rocm/hip/include/hip -D__HIP_PLATFORM_HCC__=hcc -DCUDA_cuda_LIBRARY=/opt/rocm/hip/lib/
$ make -j4 && make DESTDIR=$HOME/ install
cublas
-
include/blas_cublas.h : <cublas_v2.h> -> <rocblas.h>
-
include/cub_helper.cuh: adding #include<rocblas.h>
-
include/cub_helper.cuh: adding #include<thrust/system/cuda/detail/cub/cub.cuh>
-
hip_helpers/forwarder.hpp
- device/dispatch/dispatch_histogram.cuh
- device/dispatch/dispatch_reduce_by_key.cuh -
adding hip_helpers/forwarder.hpp from here
-
uncomment line 125 of cub_helper.cuh:// shared bool isLastBlockDone;
-
shared bool -> bool
thrust
download thrust
cub
The corresponding version is rocPRIM:
$ tar xf
amdgpu-target
Not finished:
-DQUDA_GPU_ARCH=gfx801
$(TOP_DIR)/CMakeLists.txt
--arch -> --amdgpu-target
PTX
quda compiling
source /work/soft/profile.d/rocm-1.8.5.sh
source /work/soft/profile.d/gcc-8.1.0.env.sh
export LD_LIBRARY_PATH=$LD_LIBRAY_PATH:/opt/rocm/rocrand/lib/:/opt/rocm/hiprand/lib:/opt/rocm/lib
export C_INCLUDE_PATH=$C_INCLUDE_PATH:/opt/rocm/rocrand/include:/opt/rocm/hiprand/include:/opt/rocm/include
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/opt/rocm/rocrand/include:/opt/rocm/hiprand/include:/opt/rocm/include
export PATH=$PATH:$HOME/opt/rocm/rocrand/bin:$HOME/opt/rocm/hiprand/bin:/opt/rocm/bin
# CC=gcc CXX=hcc cmake3 .. -DCMAKE_HIP_COMPILER_ENV_VAR=hipcc -DHIP_TOOLKIT_INCLUDE=/opt/rocm/hip/include/ -D__HIP_PLATFORM_HCC__=hcc -DCUDA_cuda_LIBRARY=/opt/rocm/hip/lib/ -DCMAKE_HIP_LINK_EXECUTABLE=/opt/rocm/hcc/bin/ -DCMAKE_HIP_CREATE_STATIC_LIBRARY=/opt/rocm/hcc/bin/hipcc_cmake_linker_helper
#rm -rf *; CC=gcc CXX=g++ cmake3 .. -DHIP_TOOLKIT_INCLUDE=/opt/rocm/include -DCUDA_cuda_LIBRARY=/opt/rocm/lib/ -DCMAKE_HIP_LINK_EXECUTABLE=/opt/rocm/bin/hipcc_cmake_linker_helper -DCMAKE_HIP_CREATE_STATIC_LIBRARY=/opt/rocm/bin/hipcc_cmake_linker_helper
rm -rf *; CC=gcc CXX=g++ cmake3 .. -DHIP_TOOLKIT_INCLUDE=/opt/rocm/include -DCUDA_cuda_LIBRARY=/opt/rocm/lib/ -DCMAKE_HIP_LINK_EXECUTABLE=/opt/rocm/bin/hipcc_cmake_linker_helper -DCMAKE_HIP_CREATE_STATIC_LIBRARY="-L/opt/rocm/bin/hipcc_cmake_linker_helper"
Compiling CL2QCD
source /work/soft/profile.d/rocm-1.8.5.sh
source /work/soft/profiled./gcc-8.1.0.env.sh
rm -rf *;CC=gcc CXX=g++ cmake3 .. -DOpenCL_LIBRARIES="-L/opt/rocm/opencl/lib/x86_64 -lOpenCL -lcltrace" -DOpenCL_INCLUDE_DIR=/opt/rocm/opencl/include -DGMP_INCLUDE_DIR=/usr/include -DMPFR_INCLUDE_DIR=/usr/include/ -DLIBXML2_INCLUDE_DIR=/usr/include/libxml2 -DNettle_INCLUDE_DIR=/usr/include -DGMP_LIBRARIES="-L/usr/lib64 -lgmp" -DMPFR_LIBRARIES="-L/usr/lib64 -lmpfr" -DLIBXML2_LIBRARY=/usr/lib64/libxml2.so.2


