"examples/vscode:/vscode.git/clone" did not exist on "24563ca654f6574dae93aeece8eeef69e39097e5"
Commit 9f59fa20 authored by Your Name's avatar Your Name
Browse files

PQSanNo kernel函数增加异步sync操作pass,具体原因未知

parent 9943a4c4
source /opt/dtk-22.04.2/env.sh
cd build cd build
rm hc_* -f rm hc_* -f
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}:/opencl/include export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}:/opencl/include
...@@ -5,14 +6,16 @@ export CPLUS_INCLUDE_PATH=${ROCM_PATH}/hiprand/include/:${ROCM_PATH}/rocrand/inc ...@@ -5,14 +6,16 @@ export CPLUS_INCLUDE_PATH=${ROCM_PATH}/hiprand/include/:${ROCM_PATH}/rocrand/inc
export LD_LIBRARY_PATH=${ROCM_PATH}/hipblas/lib/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${ROCM_PATH}/hipblas/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH
CXX=hipcc CC=hipcc cmake -DFAISS_ENABLE_GPU=ON -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DCMAKE_BUILD_TYPE=DEBUG -DCMAKE_MODULE_PATH="${ROCM_PATH}/hip/cmake/" -DMKL_LIBRARIES=/public/software/compiler/intel-compiler-2017.5.239/mkl/lib/intel64_lin/ .. #CXX=hipcc CC=hipcc cmake -DFAISS_ENABLE_GPU=ON -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DCMAKE_BUILD_TYPE=DEBUG -DCMAKE_MODULE_PATH="${ROCM_PATH}/hip/cmake/" -DMKL_LIBRARIES=/public/software/compiler/intel-compiler-2017.5.239/mkl/lib/intel64_lin/ ..
CXX=hipcc CC=hipcc cmake -DFAISS_ENABLE_GPU=ON -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_MODULE_PATH="${ROCM_PATH}/hip/cmake/" -DMKL_LIBRARIES=/public/software/compiler/intel-compiler-2017.5.239/mkl/lib/intel64_lin/ ..
make -j faiss VERBOSE=1 > hc_faiss.log 2>&1 make -j32 faiss VERBOSE=1 > hc_faiss.log 2>&1
make -j swigfaiss VERBOSE=1 > hc_swigfaiss.log 2>&1 make -j32 swigfaiss VERBOSE=1 > hc_swigfaiss.log 2>&1
export LD_LIBRARY_PATH=/public/home/huchen/faiss/faiss_hc/build/faiss/gpu/test/:/public/home/huchen/faiss/faiss_hc/build/faiss/:/public/home/huchen/faiss/faiss_hc/build/lib/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/public/home/huchen/faiss/faiss_gitlab_ok/build/faiss/gpu/test/:/public/home/huchen/faiss/faiss_gitlab_ok/build/faiss/:/public/home/huchen/faiss/faiss_gitlab_ok/build/lib/:$LD_LIBRARY_PATH
make install VERBOSE=1 > hc_install.log 2>&1 make -j32 install VERBOSE=1 > hc_install.log 2>&1
#make test VERBOSE=1 > hc_test.log 2>&1
#make -j32 test VERBOSE=1 > hc_test.log 2>&1
cd - cd -
./test_gpu.sh ./test_gpu.sh
......
...@@ -181,7 +181,7 @@ find_package(HIP) ...@@ -181,7 +181,7 @@ find_package(HIP)
#target_link_libraries(faiss) #target_link_libraries(faiss)
#target_link_libraries(faiss_avx2) #target_link_libraries(faiss_avx2)
#link_directories(/opt/dtk-21.04/hipblas/lib/) #link_directories(/opt/dtk-21.04/hipblas/lib/)
target_link_libraries(faiss PRIVATE /opt/dtk-22.04.1/hipblas/lib/libhipblas.so.0) target_link_libraries(faiss PRIVATE /opt/dtk-22.04.2/hipblas/lib/libhipblas.so.0)
target_link_libraries(faiss_avx2 PRIVATE /opt/dtk-22.04.1/hipblas/lib/libhipblas.so.0) target_link_libraries(faiss_avx2 PRIVATE /opt/dtk-22.04.2/hipblas/lib/libhipblas.so.0)
target_compile_options(faiss PRIVATE --gpu-max-threads-per-block=1024) target_compile_options(faiss PRIVATE --gpu-max-threads-per-block=1024)
target_compile_options(faiss_avx2 PRIVATE --gpu-max-threads-per-block=1024) target_compile_options(faiss_avx2 PRIVATE --gpu-max-threads-per-block=1024)
...@@ -213,10 +213,11 @@ void GpuIndexFlat::addImpl_(int n, const float* x, const Index::idx_t* ids) { ...@@ -213,10 +213,11 @@ void GpuIndexFlat::addImpl_(int n, const float* x, const Index::idx_t* ids) {
data_->add(x, n, resources_->getDefaultStream(config_.device)); data_->add(x, n, resources_->getDefaultStream(config_.device));
this->ntotal += n; this->ntotal += n;
float result_sum = 0; // HC Debug
for(int iii = 0; iii < n; iii++) //float result_sum = 0;
result_sum = x[iii]; //for(int iii = 0; iii < n; iii++)
std::cout << "this->ntotal = " << this->ntotal << " result_sum = " << result_sum << std::endl; // result_sum = x[iii];
//std::cout << "this->ntotal = " << this->ntotal << " result_sum = " << result_sum << std::endl;
} }
void GpuIndexFlat::searchImpl_( void GpuIndexFlat::searchImpl_(
......
...@@ -248,6 +248,8 @@ __global__ void pqScanNoPrecomputedMultiPass( ...@@ -248,6 +248,8 @@ __global__ void pqScanNoPrecomputedMultiPass(
#pragma unroll #pragma unroll
for (int byte = 0; byte < kBytesPerCode32; ++byte) { for (int byte = 0; byte < kBytesPerCode32; ++byte) {
auto code = getByte(code32[word], byte * 8, 8); auto code = getByte(code32[word], byte * 8, 8);
// HC Debug
__syncthreads();
auto offset = codesPerSubQuantizer * auto offset = codesPerSubQuantizer *
(word * kBytesPerCode32 + byte); (word * kBytesPerCode32 + byte);
......
...@@ -73,12 +73,12 @@ void testForSize(int rows, int cols, int k, bool dir, bool warp) { ...@@ -73,12 +73,12 @@ void testForSize(int rows, int cols, int k, bool dir, bool warp) {
if (warp) { if (warp) {
runWarpSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0); runWarpSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0);
hipDeviceSynchronize(); //hipDeviceSynchronize();
std::cout << "runWarpSelect" << std::endl; //std::cout << "runWarpSelect" << std::endl;
float result_sum = 0; //float result_sum = 0;
for(int iii = 0; iii < rows; iii++) //for(int iii = 0; iii < rows; iii++)
result_sum += gpuOutVal.data()[iii]; // result_sum += gpuOutVal.data()[iii];
std::cout << "gpuOutVal is " << result_sum << " gpuOutInd is " << gpuOutInd.data()[0] << std::endl; //std::cout << "gpuOutVal is " << result_sum << " gpuOutInd is " << gpuOutInd.data()[0] << std::endl;
} else { } else {
runBlockSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0); runBlockSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0);
} }
......
...@@ -104,8 +104,8 @@ void compareIndices( ...@@ -104,8 +104,8 @@ void compareIndices(
testDistance.data(), testDistance.data(),
testIndices.data()); testIndices.data());
std::cout << "testDistance.data() " << testDistance.data()[0] << " testIndices.data() " << testIndices.data()[0] << std::endl; //std::cout << "testDistance.data() " << testDistance.data()[0] << " testIndices.data() " << testIndices.data()[0] << std::endl;
std::cout << "refDistance.data() " << refDistance.data()[0] << " refIndices.data() " << refIndices.data()[0] << std::endl; //std::cout << "refDistance.data() " << refDistance.data()[0] << " refIndices.data() " << refIndices.data()[0] << std::endl;
faiss::gpu::compareLists( faiss::gpu::compareLists(
refDistance.data(), refDistance.data(),
......
source /opt/dtk-22.04.2/env.sh
cd build; cd build;
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}:/opencl/include export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}:/opencl/include
export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/:$LD_LIBRARY_PATH
...@@ -5,18 +6,18 @@ export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/l ...@@ -5,18 +6,18 @@ export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/l
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/hiprand/include/:${ROCM_PATH}/rocrand/include:$CPLUS_INCLUDE_PATH export CPLUS_INCLUDE_PATH=${ROCM_PATH}/hiprand/include/:${ROCM_PATH}/rocrand/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/hipblas/lib/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${ROCM_PATH}/hipblas/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/public/home/huchen/faiss/faiss_hc/build/faiss/gpu/test/:/public/home/huchen/faiss/faiss_hc/build/faiss/:/public/home/huchen/faiss/faiss_hc/build/lib/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/public/home/huchen/faiss/faiss_gitlab_ok/build/faiss/gpu/test/:/public/home/huchen/faiss/faiss_gitlab_ok/build/faiss/:/public/home/huchen/faiss/faiss_gitlab_ok/build/lib/:$LD_LIBRARY_PATH
export HIP_KERNEL_PRINTF=1 #export HIP_KERNEL_PRINTF=1
#make -j95 VERBOSE=1 #make -j95 VERBOSE=1
export HIP_VISIBLE_DEVICES=5 export HIP_VISIBLE_DEVICES=3
export HIP_LAUNCH_BLOCKING=1 #export HIP_LAUNCH_BLOCKING=1
export HIP_LOG_LEVEL=7 #export HIP_LOG_LEVEL=7
export AMD_LOG_LEVEL=4 #export AMD_LOG_LEVEL=4
export AMD_OCL_WAIT_COMMAND=1 #export AMD_OCL_WAIT_COMMAND=1
#ctest ctest
#./faiss/gpu/test/TestGpuIndexFlat > hc_test.log 2>&1 #./faiss/gpu/test/TestGpuIndexFlat > hc_test.log 2>&1
#./faiss/gpu/test/TestGpuIndexFlat #./faiss/gpu/test/TestGpuIndexFlat
...@@ -27,7 +28,13 @@ export AMD_OCL_WAIT_COMMAND=1 ...@@ -27,7 +28,13 @@ export AMD_OCL_WAIT_COMMAND=1
#./faiss/gpu/test/TestGpuIndexIVFPQ #./faiss/gpu/test/TestGpuIndexIVFPQ
#./faiss/gpu/test/TestGpuIndexIVFScalarQuantizer #./faiss/gpu/test/TestGpuIndexIVFScalarQuantizer
ctest -V -R TestGpuIndexIVFPQ.Query_IP_MMCodeDistance #ctest -R TestGpuIndexIVFPQ
#ctest -R TestGpuIndexFlat
#gdb --args ./faiss/gpu/test/TestGpuIndexIVFPQ --gtest_filter=TestGpuIndexIVFPQ.Query_L2_MMCodeDistance
#./faiss/gpu/test/TestGpuIndexIVFPQ --gtest_filter=TestGpuIndexIVFPQ.Query_L2_MMCodeDistance
#./faiss/gpu/test/TestGpuIndexIVFPQ --gtest_filter=TestGpuIndexIVFPQ.Query_IP_MMCodeDistance
#./faiss/gpu/test/TestGpuIndexIVFPQ --gtest_filter=TestGpuIndexIVFPQ.Query_IP_MMCodeDistance > hc_test.log 2>&1
##./faiss/gpu/test/TestGpuIndexFlat ##./faiss/gpu/test/TestGpuIndexFlat
##./faiss/gpu/test/TestCodePacking ##./faiss/gpu/test/TestCodePacking
......
cd build;
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}:/opencl/include
export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/:$LD_LIBRARY_PATH
#export PATH=/home/huchen/FAISS/swig-4.0.2-build/bin/:$PATH
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/hiprand/include/:${ROCM_PATH}/rocrand/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/hipblas/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/public/home/huchen/faiss/faiss_hc/build/faiss/gpu/test/:/public/home/huchen/faiss/faiss_hc/build/faiss/:/public/home/huchen/faiss/faiss_hc/build/lib/:$LD_LIBRARY_PATH
export HIP_KERNEL_PRINTF=1
#make -j95 VERBOSE=1
export HIP_VISIBLE_DEVICES=5
#ctest
gdb ./faiss/gpu/test/TestGpuIndexFlat
#ctest -R TestGpuIndexIVFFlat.Float32_32_Add_L2
#./faiss/gpu/test/TestGpuIndexIVFFlat
#./faiss/gpu/test/TestGpuIndexIVFPQ
#./faiss/gpu/test/TestGpuIndexIVFScalarQuantizer
#./faiss/gpu/test/TestGpuIndexFlat
#./faiss/gpu/test/TestCodePacking
#./faiss/gpu/test/TestGpuIndexBinaryFlat
#./faiss/gpu/test/TestGpuMemoryException
#./faiss/gpu/test/TestGpuSelect
#./faiss/gpu/test/TestGpuDistance
...@@ -35,7 +35,7 @@ endif() ...@@ -35,7 +35,7 @@ endif()
include(FetchContent) include(FetchContent)
FetchContent_Declare(googletest FetchContent_Declare(googletest
URL "https://gitee.com/alinn66/googletest/releases/download/release-1.10.0/googletest-release-1.10.0.tar.gz") URL "file:///public/home/huchen/faiss/faiss_gitlab_ok/googletest-release-1.10.0.gitee.tar.gz")
set(BUILD_GMOCK CACHE BOOL OFF) set(BUILD_GMOCK CACHE BOOL OFF)
set(INSTALL_GTEST CACHE BOOL OFF) set(INSTALL_GTEST CACHE BOOL OFF)
FetchContent_MakeAvailable(googletest) FetchContent_MakeAvailable(googletest)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment