Untitled
unknown
plain_text
a year ago
1.8 kB
13
Indexable
# cat t1.cu
#include <cuda_runtime.h>
#include <iostream>
#include <omp.h>
// Kernel to print the device ID from the GPU
__global__ void printGPUDeviceID() {
int deviceID;
cudaGetDevice(&deviceID); // Get the current device ID
printf("Device ID from the kernel: %d\n", deviceID);
}
int main() {
// Get the number of available devices
int num_devices;
cudaGetDeviceCount(&num_devices);
if (num_devices < 2) {
std::cout << "This example requires at least two GPUs." << std::endl;
return 1;
}
// Use OpenMP to create threads for each GPU
#pragma omp parallel num_threads(num_devices)
{
int thread_id = omp_get_thread_num(); // Get the OpenMP thread ID
int device_id = thread_id; // Assign one device per thread
// Set the current device for this thread
cudaSetDevice(device_id);
// Get and print the device ID from the host
int deviceIDFromHost;
cudaGetDevice(&deviceIDFromHost);
printf("Device ID from the host (thread %d): %d\n", thread_id, deviceIDFromHost);
// Launch a kernel to print the device ID from the GPU
printGPUDeviceID<<<1, 1>>>();
// Wait for the GPU to finish
cudaDeviceSynchronize();
// Check for any errors during kernel execution
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA error on device %d: %s\n", device_id, cudaGetErrorString(err));
}
}
return 0;
}
# nvcc -Xcompiler -fopenmp t1.cu -o t1 -lgomp -rdc=true
# OMP_NUM_THREADS=2 ./t1
Device ID from the host (thread 0): 0
Device ID from the host (thread 1): 1
Device ID from the kernel: 1
Device ID from the kernel: 0
# Editor is loading...
Leave a Comment