我想编译CUDALibrarySamples。cuFFT使用cmake,我想编译并链接1d_c2c
应用程序与静态版本的cufft库(-lcufft_static
)。使用Makefile是微不足道的,我已经添加了-lcufft_static
在
nvcc -x cu $(FLAGS) $(INC) 1d_c2c_example.cpp -o 1d_c2c_example $(LIBS)
然而,我不确定如何使用cmake来做同样的事情。我注意到cmake有静态标志:CUDA_cublasLt_static_LIBRARY
,CUDA_cufft_static_LIBRARY
等。所以我的问题是我如何才能启用它们?提前谢谢你!
我试过了
target_link_libraries(${ROUTINE}_example PRIVATE ${CUDA_cufft_static_LIBRARY})
但似乎不起作用。
根据@paleonix的建议,我做了以下事情:
target_link_libraries(${ROUTINE}_example PRIVATE CUDA::cufft_static CUDA::cudart).
但我得到以下错误:
/usr/bin/ld: /opt/cuda/lib64/libcufft_static.a(cbdouble_32bit_prime_callback_RT_SM35_plus.o): in function __sti____cudaRegisterAll()': cbdouble_32bit_prime_callback_RT_SM35_plus.compute_86.cudafe1.cpp:(.text.startup+0x1d): undefined reference to __cudaRegisterLinkedBinary_61_cbdouble_32bit_prime_callback_RT_SM35_plus_compute_86_cpp1_ii_dc5d5345
我正在尝试为CUDA库示例构建以下示例:
1d_c2c_example.cpp
#include <complex>
#include <iostream>
#include <random>
#include <vector>
#include <cuda_runtime.h>
#include <cufftXt.h>
#include "cufft_utils.h"
int main(int argc, char *argv[]) {
cufftHandle plan;
cudaStream_t stream = NULL;
int n = 8;
int batch_size = 2;
int fft_size = batch_size * n;
using scalar_type = float;
using data_type = std::complex<scalar_type>;
std::vector<data_type> data(fft_size);
for (int i = 0; i < fft_size; i++) {
data[i] = data_type(i, -i);
}
std::printf("Input array:\n");
for (auto &i : data) {
std::printf("%f + %fj\n", i.real(), i.imag());
}
std::printf("=====\n");
cufftComplex *d_data = nullptr;
CUFFT_CALL(cufftCreate(&plan));
CUFFT_CALL(cufftPlan1d(&plan, data.size(), CUFFT_C2C, batch_size));
CUDA_RT_CALL(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
CUFFT_CALL(cufftSetStream(plan, stream));
// Create device data arrays
CUDA_RT_CALL(cudaMalloc(reinterpret_cast<void **>(&d_data), sizeof(data_type) * data.size()));
CUDA_RT_CALL(cudaMemcpyAsync(d_data, data.data(), sizeof(data_type) * data.size(), cudaMemcpyHostToDevice, stream));
CUFFT_CALL(cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD));
CUFFT_CALL(cufftExecC2C(plan, d_data, d_data, CUFFT_INVERSE));
CUDA_RT_CALL(cudaMemcpyAsync(data.data(), d_data, sizeof(data_type) * data.size(), cudaMemcpyDeviceToHost, stream));
CUDA_RT_CALL(cudaStreamSynchronize(stream));
/* free resources */
CUDA_RT_CALL(cudaFree(d_data))
CUFFT_CALL(cufftDestroy(plan));
CUDA_RT_CALL(cudaStreamDestroy(stream));
CUDA_RT_CALL(cudaDeviceReset());
return EXIT_SUCCESS;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.18)
set(ROUTINE 1d_c2c)
project(
"${ROUTINE}_example"
DESCRIPTION "GPU-Accelerated Fast Fourier Transforms"
HOMEPAGE_URL "https://docs.nvidia.com/cuda/cufft/index.html"
LANGUAGES CXX CUDA)
set(CMAKE_CUDA_ARCHITECTURES 80)
find_package(CUDAToolkit REQUIRED)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CUDA_ARCHITECTURES 80)
#if(CMAKE_CUDA_ARCHITECTURES LESS 60)
#set(CMAKE_CUDA_ARCHITECTURES 60 70 75 80 86)
#endif()
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_cudart_LIBRARY})
add_executable(${ROUTINE}_example)
target_include_directories(${ROUTINE}_example
PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
${CMAKE_SOURCE_DIR}/../utils)
target_sources(${ROUTINE}_example
PRIVATE ${PROJECT_SOURCE_DIR}/${ROUTINE}_example.cpp)
set(CMAKE_CUDA_ARCHITECTURES 80)
#target_link_libraries(${ROUTINE}_example PRIVATE ${CUDA_cufft_static_LIBRARY} CUDA::cufft CUDA::cudart)
target_link_libraries(${ROUTINE}_example PRIVATE CUDA::cufft_static CUDA::cudart)
当我移除
find_package(CUDAToolkit REQUIRED)
cmake显示以下错误:
CMake Error at CMakeLists.txt:82 (target_link_libraries):
Target "1d_c2c_example" links to:
CUDA::cufft_static
but the target was not found.
1条答案
按热度按时间owfi6suc1#
在使用
CUDA::cufft_static
后仍然有链接器问题的主要原因是静态cuFFT需要启用可重定位的设备代码。这在CMake中通过CUDA_SEPARABLE_COMPILATION
属性完成。我将收回我的说法,即不应该同时使用
CUDA
和find_package(CUDAToolkit REQUIRED)
。虽然cufft_static
目标在只使用语言时可用,但它不会自动链接culibos
。因此更优雅的解决方案似乎是使用包中的CUDA::cufft_static
。我从链接库中获取了
cufft_utils.h
并将其放入${PROJECT_SOURCE_DIR}/utils
中。如果您的项目结构不同,则必须调整target_include_directories
命令。设置
CMAKE_
变量是一种糟糕的方式。为了让其中一些变量正常工作,您必须在project
命令之前设置它们。但是,例如,CUDA架构和构建类型应该在第一次配置期间通过命令行参数设置到cmake
或使用ccmake
来获得一个漂亮的控制台UI。正如你所看到的,我给了源文件一个
.cu
。让它在.cpp
文件中工作似乎有点复杂。阅读cuFFT文档的这一章是一个很好的起点。虽然可以通过使用find_package(Threads REQUIRED)
然后将${CMAKE_DL_LIBS}
和Threads::Threads
添加到target_link_libraries
命令来消除大多数链接器错误,我无法让可重定位设备代码工作,因为我无法让CMake使用nvcc
进行链接。我尝试在set_target_properties
中设置LINKER_LANGUAGE CUDA
,理论上应该可以工作,但实际上CMake一直使用g++
进行链接(在make VERBOSE=1
中可见)。这可能是CMake中的一个bug。