maxpool_cudnn_0913
user_5573880
c_cpp
a year ago
5.5 kB
10
Indexable
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include <stdio.h>
#include <iostream>
#include <random>
#include <vector>
// Utility function for CUDA and cuDNN error checking
#define checkCudaErrors(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
std::cerr << "CUDA error in " << __FILE__ << ":" << __LINE__ << ": " \
<< cudaGetErrorString(err) << std::endl; \
exit(EXIT_FAILURE); \
} \
} while (0)
#define checkCudnnErrors(call) \
do { \
cudnnStatus_t err = call; \
if (err != CUDNN_STATUS_SUCCESS) { \
std::cerr << "cuDNN error in " << __FILE__ << ":" << __LINE__ << ": " \
<< cudnnGetErrorString(err) << std::endl; \
exit(EXIT_FAILURE); \
} \
} while (0)
// Constants
const int INPUT_CHANNELS = 3;
const int OUTPUT_CHANNELS = 16;
const int INPUT_SIZE = 416;
const int OUTPUT_SIZE = 208;
const int pool_size = 2;
const int stride = 2;
// Initialize cuDNN pooling descriptor
void setupPoolingDescriptor(cudnnPoolingDescriptor_t& pooling_desc,
int pool_size, int stride) {
checkCudnnErrors(cudnnCreatePoolingDescriptor(&pooling_desc));
checkCudnnErrors(cudnnSetPooling2dDescriptor(
pooling_desc, CUDNN_POOLING_MAX, // Max pooling
CUDNN_PROPAGATE_NAN, // Propagate NaNs
pool_size, pool_size, // Pooling window size
0, 0, // Padding
stride, stride // Stride
));
}
int main() {
// Initialize cuDNN
cudnnHandle_t cudnn;
checkCudnnErrors(cudnnCreate(&cudnn));
// Initialize input and output data
std::vector<float> h_input(INPUT_SIZE * INPUT_SIZE * INPUT_CHANNELS);
std::vector<float> h_output(OUTPUT_SIZE * OUTPUT_SIZE * INPUT_CHANNELS);
// Use a fixed seed for reproducibility
unsigned int seed = 1234;
std::mt19937 gen(seed);
std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
for (auto& val : h_input) val = dis(gen);
half* d_input;
half* d_pool_output;
size_t input_size = INPUT_SIZE * INPUT_SIZE * INPUT_CHANNELS * sizeof(half);
size_t pool_output_size =
OUTPUT_SIZE * OUTPUT_SIZE * INPUT_CHANNELS * sizeof(half);
checkCudaErrors(cudaMalloc(&d_input, input_size));
checkCudaErrors(cudaMalloc(&d_pool_output, pool_output_size));
// Convert input to half precision and copy to device
std::vector<half> h_input_half(h_input.size());
for (size_t i = 0; i < h_input.size(); ++i) {
h_input_half[i] = __float2half(h_input[i]);
}
checkCudaErrors(cudaMemcpy(d_input, h_input_half.data(), input_size,
cudaMemcpyHostToDevice));
// Set up input tensor descriptor
cudnnTensorDescriptor_t input_desc;
checkCudnnErrors(cudnnCreateTensorDescriptor(&input_desc));
checkCudnnErrors(cudnnSetTensor4dDescriptor(
input_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, 1, INPUT_CHANNELS,
INPUT_SIZE, INPUT_SIZE // N, C, H, W
));
// Set up output tensor descriptor
cudnnTensorDescriptor_t output_desc;
checkCudnnErrors(cudnnCreateTensorDescriptor(&output_desc));
checkCudnnErrors(cudnnSetTensor4dDescriptor(
output_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, 1, INPUT_CHANNELS,
OUTPUT_SIZE, OUTPUT_SIZE // N, C, H, W
));
// Set up pooling descriptor
cudnnPoolingDescriptor_t pooling_desc;
setupPoolingDescriptor(pooling_desc, pool_size, stride);
// Execute MaxPooling
float alpha = 1.0f, beta = 0.0f;
checkCudnnErrors(cudnnPoolingForward(
cudnn, pooling_desc, &alpha, input_desc, d_input, // Input tensor
&beta, output_desc, d_pool_output // Output tensor
));
// Copy result back to host
std::vector<half> h_output_half(h_output.size());
checkCudaErrors(cudaMemcpy(h_output_half.data(), d_pool_output,
pool_output_size, cudaMemcpyDeviceToHost));
for (size_t i = 0; i < h_output.size(); ++i) {
h_output[i] = __half2float(h_output_half[i]);
}
// Output results
std::cout << "MaxPooling output (first few values):" << std::endl;
for (int i = 0; i < 10; ++i) {
std::cout << h_output[i] << " ";
}
std::cout << std::endl;
// Clean up cuDNN resources
checkCudnnErrors(cudnnDestroyPoolingDescriptor(pooling_desc));
checkCudnnErrors(cudnnDestroyTensorDescriptor(input_desc));
checkCudnnErrors(cudnnDestroyTensorDescriptor(output_desc));
checkCudnnErrors(cudnnDestroy(cudnn));
// Clean up CUDA memory
checkCudaErrors(cudaFree(d_input));
checkCudaErrors(cudaFree(d_pool_output));
std::cout << "Program completed." << std::endl;
return 0;
}
Editor is loading...
Leave a Comment