1003_matmul_cudnn
user_3093867
python
5 months ago
1.4 kB
3
Indexable
import cupy as cp import numpy as np import time def benchmark_cudnn(left, right, num_iterations=1): # Transfer data to GPU left_gpu = cp.asarray(left) right_gpu = cp.asarray(right) # Warm-up run _ = cp.matmul(left_gpu, right_gpu) # Benchmark start_time = time.time() for _ in range(num_iterations): result = cp.matmul(left_gpu, right_gpu) cp.cuda.Stream.null.synchronize() end_time = time.time() return (end_time - start_time) / num_iterations # Generate random input data left = np.random.randn(1024, 1024).astype(np.float32) right = np.random.randn(1024, 1024).astype(np.float32) # Run benchmark avg_time = benchmark_cudnn(left, right) print(f"cuDNN average execution time: {avg_time:.6f} seconds") # Print shapes for verification print("Left input shape:", left.shape) print("Right input shape:", right.shape) # Print a few values from the inputs print("\nLeft input (first few values):") print(left.flatten()[:10]) print("\nRight input (first few values):") print(right.flatten()[:10]) # Perform a single run to get the output shape and values left_gpu = cp.asarray(left) right_gpu = cp.asarray(right) result = cp.asnumpy(cp.matmul(left_gpu, right_gpu)) print("\nOutput shape:", result.shape) print("\nOutput (first few values):") print(result.flatten()[:10])
Editor is loading...
Leave a Comment