HIP 是 开源的 AMD 上的异构并行计算框架, 其类似于 Nvidia 的 CUDA. QUDA 则是 Lattice QCD 中广泛使用的 cuda 库. 我们需要将 quda 从 CUDA 平台移植到 AMD 的 HIP 平台. 方便的是, HIP 提供了几个脚本来帮助我们实现这一过程.


目录

  1. QUDA from cuda to hip
    1. Code Portion
    2. Details
    3. rocRAND
    4. cublas
    5. thrust
    6. cub
    7. amdgpu-target
    8. PTX
  2. quda compiling
  3. Compiling CL2QCD

QUDA from cuda to hip

There are tools for transcode quda from cuda to hip - hipify-perl and hipify-cmakefile for source codes and cmake files respectively.

Code Portion

Details

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
$ sed -i "s/curand_uniform/hiprand_uniform/g" include/*.h
$ sed -i "s/curand_normal/hiprand_normal/g" include/*.h
$ sed -i "s/cuCdiv/hipCdiv/g" include/*.h
$ sed -i "s/cuCdivf/hipCdivf/g" include/*.h

$ sed -i "s/curandStateMRG32k3a/hiprandStateMRG32k3a/g" include/*.h
$ sed -i "s/cuDoubleComplex/hipDoubleComplex/g" include/*.h
$ sed -i "s/cuFloatComplex/hipFloatComplex/g" include/*.h
$ sed -i "s|cuComplex.h|hip/hip_complex.h|g" include/*.h
$ sed -i "s|cudaHostRegisterDefault|hipHostRegisterDefault|g" include/*.h

$ sed -i "s|CUDA_CUSSESS|hipSuccess|g" lib/*.cpp
$ sed -i "s|CUresult|hipError_t|g" lib/*.cpp
$ sed -i "s|cudaHostRegisterDefault|hipHostRegisterDefault|g" lib/*.cpp
$ sed -i "s|cuMemAlloc|hipMalloc|g" lib/*.cpp
$ sed -i "s|cuMemFree|hipFree|g" lib/*.cpp
$ sed -i "s/cuMeMFreeHost/hipFreeHost/g" lib/*.cpp
$ sed -i "s|CUdeviceptr|hipDeviceptr_t|g" lib/*.cpp
$ sed -i "s/cudaHostRegisterDefault/hipHostRegisterDefault/g" lib/*.cpp
$ sed -i "s/cudaIpcEventHandle_t/hipIpcEventHandle_t/g" lib/*.cpp
$ sed -i "s/cudaEventInterprocess/hipEventInterprocess/g" lib/*.cpp
$ sed -i "s/cudaIpcOpenEventHandle/hipIpcOpenEventHandle/g" lib/*.cpp
$ sed -i "s/cudaIpcGetEventHandle/hipIpcGetEventHandle/g" lib/*.cpp
$ sed -i "s/cuMemcpyDtoH/hipMemcpyDtoH/g" lib/*.cpp
$ sed -i "s/cuMemcpy/hipMemcpy/g" lib/*.cpp
$ sed -i "s/cuMemcpyDtoHAsync/hipMemcpyDtoHAsync/g" lib/*.cpp
$ sed -i "s/cuCtxSynchronize/hipCtxSynchronize/g" lib/*.cpp
$ sed -i "s/cuEventSynchronize/hipEventSynchronize/g" lib/*.cpp
$ sed -i "s/cuStreamSynchronize/hipStreamSynchronize/g" lib/*.cpp
$ sed -i "s/cuStreamWaitEvent/hipStreamWaitEvent/g" lib/*.cpp
$ sed -i "s/cuEventQuery/hipEventQuery/g" lib/*.cpp
$ sed -i "s/cuEventRecord/hipEventRecord/g" lib/*.cpp
$ sed -i "s/CUDA_MEMCPY2D/hip_Memcpy2D/g" lib/*.cpp
$ sed -i "s/cudaLaunchKernel/hipLaunchKernel/g" lib/*.cpp
$ sed -i "s/CUDA_ERROR_NOT_READY/hipErrorNotReady/g" lib/*.cpp
  1. sed -i "s/COMPILE_LANGUAGE:CUDA/COMPILE_LANGUAGE:HIP/g" lib/CMakeLists.txt

  2. adding rocRAND and hipRAND library to CMakeLists.txt

    1
    2
    3
    4
    include_directories(SYSTEM ${rocRAND_HOME}/include/)
    FIND_LIBRARY(rocrand_LIB rocRAND ${rocRAND_HOME}/lib/)
    include_directories(SYSTEM ${hipRAND_HOME}/include/)
    FIND_LIBRARY(hiprand_LIB hipRAND ${hipRAND_HOME}/lib/)
  3. cuComplex.h to hip/hip_complex.h: include/complex_quda.h

  4. hipEventCreate to hipEventCreateWithFlags: lib/lattice_field.cpp line 369

  5. lib/tune.cpp QUDA_HASH to “”

  6. lib/quda_cuda_api.cpp:
    - line 33 “const void *” to “void *”
    - line 92 add kind to the end
    - line 116 remove const
    - WidthInBytes to widthInBytes
    - Height to height

  7. lib/lattice_field.cpp: comment hipIpcGetEventHandle and hipIpcOpenEventHandle

rocRAND

  1. download googletest
1
2
3
4
5
6
7
8
9
10
11
$ git clone https://github.com/google/googletest.git
$ mv googletest googletest-src && tar cvjf googletest-src.tar.bz2 googletest-src
$ tar xf rocRAND-1.8.0.tar.gz && cd rocRAND-1.8.0
$ mkdir build && cd build
$ CC=gcc CXX=g++ cmake3 .. -DCMAKE_HIP_COMPILER_ENV_VAR=hipcc -DHIP_TOOLKIT_INCLUDE=/opt/rocm/hip/include/hip -D__HIP_PLATFORM_HCC__=hcc -DCUDA_cuda_LIBRARY=/opt/rocm/hip/lib/
### error may occur when download googletest
sed -i "6,9d" googletest-download/googletest-download-prefix/tmp/googletest-download-gitclone.cmake
touch googletest-download/googletest-download-prefix/src/googletest-download-stamp/googletest-download-gitclone-lastrun.txt
### then re-run cmake
$ CC=gcc CXX=hcc cmake3 .. -DCMAKE_HIP_COMPILER_ENV_VAR=hipcc -DHIP_TOOLKIT_INCLUDE=/opt/rocm/hip/include/hip -D__HIP_PLATFORM_HCC__=hcc -DCUDA_cuda_LIBRARY=/opt/rocm/hip/lib/
$ make -j4 && make DESTDIR=$HOME/ install

cublas

  1. include/blas_cublas.h : <cublas_v2.h> -> <rocblas.h>

  2. include/cub_helper.cuh: adding #include<rocblas.h>

  3. include/cub_helper.cuh: adding #include<thrust/system/cuda/detail/cub/cub.cuh>

  4. hip_helpers/forwarder.hpp
    - device/dispatch/dispatch_histogram.cuh
    - device/dispatch/dispatch_reduce_by_key.cuh

  5. adding hip_helpers/forwarder.hpp from here

  6. uncomment line 125 of cub_helper.cuh:// shared bool isLastBlockDone;

  7. shared bool -> bool

thrust

download thrust

cub

The corresponding version is rocPRIM:

1
$ tar xf

amdgpu-target

Not finished:

1
-DQUDA_GPU_ARCH=gfx801

$(TOP_DIR)/CMakeLists.txt

1
--arch -> --amdgpu-target

PTX

quda compiling

1
2
3
4
5
6
7
8
9
10
source /work/soft/profile.d/rocm-1.8.5.sh
source /work/soft/profile.d/gcc-8.1.0.env.sh
export LD_LIBRARY_PATH=$LD_LIBRAY_PATH:/opt/rocm/rocrand/lib/:/opt/rocm/hiprand/lib:/opt/rocm/lib
export C_INCLUDE_PATH=$C_INCLUDE_PATH:/opt/rocm/rocrand/include:/opt/rocm/hiprand/include:/opt/rocm/include
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/opt/rocm/rocrand/include:/opt/rocm/hiprand/include:/opt/rocm/include
export PATH=$PATH:$HOME/opt/rocm/rocrand/bin:$HOME/opt/rocm/hiprand/bin:/opt/rocm/bin

# CC=gcc CXX=hcc cmake3 .. -DCMAKE_HIP_COMPILER_ENV_VAR=hipcc -DHIP_TOOLKIT_INCLUDE=/opt/rocm/hip/include/ -D__HIP_PLATFORM_HCC__=hcc -DCUDA_cuda_LIBRARY=/opt/rocm/hip/lib/ -DCMAKE_HIP_LINK_EXECUTABLE=/opt/rocm/hcc/bin/ -DCMAKE_HIP_CREATE_STATIC_LIBRARY=/opt/rocm/hcc/bin/hipcc_cmake_linker_helper
#rm -rf *; CC=gcc CXX=g++ cmake3 .. -DHIP_TOOLKIT_INCLUDE=/opt/rocm/include -DCUDA_cuda_LIBRARY=/opt/rocm/lib/ -DCMAKE_HIP_LINK_EXECUTABLE=/opt/rocm/bin/hipcc_cmake_linker_helper -DCMAKE_HIP_CREATE_STATIC_LIBRARY=/opt/rocm/bin/hipcc_cmake_linker_helper
rm -rf *; CC=gcc CXX=g++ cmake3 .. -DHIP_TOOLKIT_INCLUDE=/opt/rocm/include -DCUDA_cuda_LIBRARY=/opt/rocm/lib/ -DCMAKE_HIP_LINK_EXECUTABLE=/opt/rocm/bin/hipcc_cmake_linker_helper -DCMAKE_HIP_CREATE_STATIC_LIBRARY="-L/opt/rocm/bin/hipcc_cmake_linker_helper"

Compiling CL2QCD

1
2
3
source /work/soft/profile.d/rocm-1.8.5.sh
source /work/soft/profiled./gcc-8.1.0.env.sh
rm -rf *;CC=gcc CXX=g++ cmake3 .. -DOpenCL_LIBRARIES="-L/opt/rocm/opencl/lib/x86_64 -lOpenCL -lcltrace" -DOpenCL_INCLUDE_DIR=/opt/rocm/opencl/include -DGMP_INCLUDE_DIR=/usr/include -DMPFR_INCLUDE_DIR=/usr/include/ -DLIBXML2_INCLUDE_DIR=/usr/include/libxml2 -DNettle_INCLUDE_DIR=/usr/include -DGMP_LIBRARIES="-L/usr/lib64 -lgmp" -DMPFR_LIBRARIES="-L/usr/lib64 -lmpfr" -DLIBXML2_LIBRARY=/usr/lib64/libxml2.so.2