1003_matmul_cudnn

 avatar
user_3093867
python
5 months ago
1.4 kB
3
Indexable
import cupy as cp
import numpy as np
import time

def benchmark_cudnn(left, right, num_iterations=1):
    # Transfer data to GPU
    left_gpu = cp.asarray(left)
    right_gpu = cp.asarray(right)
    
    # Warm-up run
    _ = cp.matmul(left_gpu, right_gpu)
    
    # Benchmark
    start_time = time.time()
    for _ in range(num_iterations):
        result = cp.matmul(left_gpu, right_gpu)
    cp.cuda.Stream.null.synchronize()
    end_time = time.time()
    
    return (end_time - start_time) / num_iterations

# Generate random input data
left = np.random.randn(1024, 1024).astype(np.float32)
right = np.random.randn(1024, 1024).astype(np.float32)

# Run benchmark
avg_time = benchmark_cudnn(left, right)
print(f"cuDNN average execution time: {avg_time:.6f} seconds")

# Print shapes for verification
print("Left input shape:", left.shape)
print("Right input shape:", right.shape)

# Print a few values from the inputs
print("\nLeft input (first few values):")
print(left.flatten()[:10])
print("\nRight input (first few values):")
print(right.flatten()[:10])

# Perform a single run to get the output shape and values
left_gpu = cp.asarray(left)
right_gpu = cp.asarray(right)
result = cp.asnumpy(cp.matmul(left_gpu, right_gpu))

print("\nOutput shape:", result.shape)
print("\nOutput (first few values):")
print(result.flatten()[:10])
Editor is loading...
Leave a Comment