1003_matmul_cudnn
user_3093867
python
a year ago
1.4 kB
5
Indexable
import cupy as cp
import numpy as np
import time
def benchmark_cudnn(left, right, num_iterations=1):
# Transfer data to GPU
left_gpu = cp.asarray(left)
right_gpu = cp.asarray(right)
# Warm-up run
_ = cp.matmul(left_gpu, right_gpu)
# Benchmark
start_time = time.time()
for _ in range(num_iterations):
result = cp.matmul(left_gpu, right_gpu)
cp.cuda.Stream.null.synchronize()
end_time = time.time()
return (end_time - start_time) / num_iterations
# Generate random input data
left = np.random.randn(1024, 1024).astype(np.float32)
right = np.random.randn(1024, 1024).astype(np.float32)
# Run benchmark
avg_time = benchmark_cudnn(left, right)
print(f"cuDNN average execution time: {avg_time:.6f} seconds")
# Print shapes for verification
print("Left input shape:", left.shape)
print("Right input shape:", right.shape)
# Print a few values from the inputs
print("\nLeft input (first few values):")
print(left.flatten()[:10])
print("\nRight input (first few values):")
print(right.flatten()[:10])
# Perform a single run to get the output shape and values
left_gpu = cp.asarray(left)
right_gpu = cp.asarray(right)
result = cp.asnumpy(cp.matmul(left_gpu, right_gpu))
print("\nOutput shape:", result.shape)
print("\nOutput (first few values):")
print(result.flatten()[:10])Editor is loading...
Leave a Comment