Untitled
unknown
plain_text
a month ago
1.8 kB
7
Indexable
Never
# cat t1.cu #include <cuda_runtime.h> #include <iostream> #include <omp.h> // Kernel to print the device ID from the GPU __global__ void printGPUDeviceID() { int deviceID; cudaGetDevice(&deviceID); // Get the current device ID printf("Device ID from the kernel: %d\n", deviceID); } int main() { // Get the number of available devices int num_devices; cudaGetDeviceCount(&num_devices); if (num_devices < 2) { std::cout << "This example requires at least two GPUs." << std::endl; return 1; } // Use OpenMP to create threads for each GPU #pragma omp parallel num_threads(num_devices) { int thread_id = omp_get_thread_num(); // Get the OpenMP thread ID int device_id = thread_id; // Assign one device per thread // Set the current device for this thread cudaSetDevice(device_id); // Get and print the device ID from the host int deviceIDFromHost; cudaGetDevice(&deviceIDFromHost); printf("Device ID from the host (thread %d): %d\n", thread_id, deviceIDFromHost); // Launch a kernel to print the device ID from the GPU printGPUDeviceID<<<1, 1>>>(); // Wait for the GPU to finish cudaDeviceSynchronize(); // Check for any errors during kernel execution cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA error on device %d: %s\n", device_id, cudaGetErrorString(err)); } } return 0; } # nvcc -Xcompiler -fopenmp t1.cu -o t1 -lgomp -rdc=true # OMP_NUM_THREADS=2 ./t1 Device ID from the host (thread 0): 0 Device ID from the host (thread 1): 1 Device ID from the kernel: 1 Device ID from the kernel: 0 #
Leave a Comment