CPU (S) v CPU (P) v GPU

mail@pastecode.io avatar
unknown
csharp
3 years ago
3.4 kB
14
Indexable
    internal static class Program
    {

        private static void Main()
        {
            const int size = 1_000_000_000;
            (int[] a, int[] b) = GetComputeData(size);
            (long tCpuS, int[] cpuS) = CpuComputeSerial(a, b, size);
            (long tCpuP, int[] cpuP) = CpuComputeParallel(a, b, size);
            (long tGpu, int[] gpu) = GpuCompute(a, b, size);

            // Console.WriteLine("CPU : GPU");
            // for (int i = 0; i < size; i++) {
            //     Console.WriteLine(cpu[i] + " : " + gpu[i]);
            // }
            
            Console.WriteLine("CPU(S) Time: " + tCpuS);
            Console.WriteLine("CPU(P) Time: " + tCpuP);
            Console.WriteLine("GPU Time: " + tGpu);
        }
        
        private static (long, int[]) CpuComputeSerial(int[] a, int[] b, int size)
        {
            int[] c = new int[size];

            Stopwatch s = new();
            s.Start();
            for (int i = 0; i < size; i++) {
                c[i] = a[i] + b[i];
            }
            s.Stop();
            return (s.ElapsedMilliseconds, c);
        }

        private static (long, int[]) CpuComputeParallel(int[] a, int[] b, int size)
        {
            int[] c = new int[size];

            Stopwatch s = new();
            s.Start();
            Parallel.For(0, size, i =>
            {
                c[i] = a[i] + b[i];
            });
            s.Stop();
            return (s.ElapsedMilliseconds, c);
        }

        private static (long, int[]) GpuCompute(int[] a, int[] b, int size)
        {
            // Initialize ILGPU
            Context gpuCtx = Context.CreateDefault();
            Accelerator accelerator = gpuCtx.CreateCudaAccelerator(0);
            
            // Load data
            MemoryBuffer1D<int, Stride1D.Dense> gpuA = accelerator.Allocate1D(a);
            MemoryBuffer1D<int, Stride1D.Dense> gpuB = accelerator.Allocate1D(b);
            MemoryBuffer1D<int, Stride1D.Dense> gpuC = accelerator.Allocate1D<int>(size);
            
            // Compile kernel
            Action<Index1D, ArrayView1D<int, Stride1D.Dense>, ArrayView1D<int, Stride1D.Dense>, ArrayView1D<int, Stride1D.Dense>> kernel = accelerator.LoadAutoGroupedStreamKernel(
                (Index1D i, ArrayView1D<int, Stride1D.Dense> aInGpu, ArrayView1D<int, Stride1D.Dense> bInGpu, ArrayView1D<int, Stride1D.Dense> cInGpu) =>
                {
                    cInGpu[i] = aInGpu[i] + bInGpu[i];
                }
            );

            Stopwatch s = new();
            s.Start();
            // Run
            kernel(size, gpuA, gpuB, gpuC);

            // Wait
            accelerator.Synchronize();
            s.Stop();

            int[] c = gpuC.GetAsArray1D();
            
            accelerator.Dispose();
            gpuCtx.Dispose();

            return (s.ElapsedMilliseconds, c);
        }

        private static (int[], int[]) GetComputeData(int size)
        {
            int[] a = new int[size];
            int[] b = new int[size];

            Random random = new();
            for (int i = 0; i < size; i++) {
                a[i] = random.Next();
                b[i] = random.Next();
            }

            return (a, b);
        }

    }