Test

 avatar
unknown
c_cpp
2 years ago
5.6 kB
9
Indexable
#include <algorithm>
#include <iostream>
#include <utility>
// #include <atomic>

#include "parallel.h"

// #include "get_time.h"

using namespace parlay;

const int BLOCK = 1000;
const int SCAN_CONTROL = 500000;

// std::atomic<int> _scan, _partition, copy, base = 0;
double N;

// declarations ----------------------------
template <class Func>
void up_sweep(const Func& A, int* ps, int s, int t);

void down_sweep(int* ps, int s, int t, int p);

template <class Func>
void scan(const Func& A, int* ps, int n);

template <class T>
std::pair<int, int> partition(T *A, T *B, int* __ps, int n, T pivot);

template <class T>
T find_pivot(T *A, int n);

template <class T>
void _quicksort(T* A, T* B, int* ps, int n);

template <class T>
void quicksort(T *A, size_t n);
// -----------------------------------------

// for testing
template <class T>
void quicksort(T *A, size_t n) {
  // T _A [] = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
  //T _A [] = {0, 0, 1, -7, 5, 2, 10, 1, 2, 2};
  //T _A [] = {4, 5, 1, -7, 5, -2, 10, 1, 2, 2};
  //n = 10;
  //A = _A;

  N = n;
  T * B = new T [n];
  int * ps = new int [2*n];

  // timer time;
  _quicksort(A, B, ps, n);
  //time.stop();

  // std::cout << "done. time: " << time.total_time() << std::endl;
  // std::cout << "scan time: " << _scan / 10000.0 << " "
  //           << "partition time " << _partition / 10000.0 << " "
  //           << "base time " << base / 10000.0 << " "
  //           << "copy time " << copy / 10000.0 << std::endl;
  //partition(A, B, ps, n, (T)10000);

  // for (int i = 0; i < n; ++i) {
  //   std::cout << A[i] << ' ';
  // } std::cout << std::endl;
}

template <class T>
void _quicksort(T* A, T* B, int* ps, int n) {
  //timer time;
  if (n <= 10000000) {
    std::sort(A, A+n);

    //time.stop();
    //std::cout << "sort n = " << n << ". time: " << time.total_time() << std::endl;
    //base.fetch_add(time.total_time()*10000, std::memory_order_relaxed);
    return;
  }
  // std::sort(A, A + 10000);
  // T pivot = A[10000/2];

  T pivot = find_pivot(A, 100000);

  //timer t1;
  std::pair<int, int> t = partition(A, B, ps, n, pivot);
  //t1.stop();
  //_partition.fetch_add(t1.total_time()*10000, std::memory_order_relaxed);
  
  //timer t2;
  parallel_for(0, n/BLOCK + 1, [&](int _i) {
    for (int i = _i*BLOCK; i < (_i+1)*BLOCK && i < n; ++i) {
      A[i] = B[i];
    }
  });
  //t2.stop();
  //copy.fetch_add(t1.total_time()*10000, std::memory_order_relaxed);

  //time.stop();
  //std::cout << "n = " << n << ". time: " << time.total_time() <<
  //  ". partition: " << t1.total_time() << 
  //  ". copy: " << t2.total_time() << std::endl;

  auto f1 = [&]() { _quicksort(A, B, ps, t.first); };
  auto f2 = [&]() { _quicksort(A + t.second, B + t.second, ps+(2*t.second), n-t.second); };
  par_do(f1, f2); 
}

template <class T>
T find_pivot(T *A, int n) {
  if (n < 10000) {
    std::sort(A, A+n);
    return A[n/2];
  } 
  int t = n/3;

  T * mid = new T [3];

  parallel_for(0, 3, [&](int i) {
    mid[i] = find_pivot(A + t*i, t);
  });

  if ((mid[0] < mid[1] && mid[1] < mid[2]) || (mid[2] < mid[1] && mid[1] < mid[0]))
    return mid[1];
  else if ((mid[1] < mid[0] && mid[0] < mid[2]) || (mid[2] < mid[0] && mid[0] < mid[1]))
    return mid[0];
  else
    return mid[2];
}

template <class T>
std::pair<int, int> partition(T *A, T *B, int* __ps, int n, T pivot) {
  auto less    = [&](int i) { return (int)(A[i] < pivot); };
  auto greater = [&](int i) { return (int)(A[i] > pivot); };

  int ** ps = new int * [2];
  ps[0] = __ps; ps[1] = __ps + n;

  auto f1 = [&]() { scan(less,    ps[0], n); };
  auto f2 = [&]() { scan(greater, ps[1], n); };
  par_do(f1, f2);

  auto _ps = [&](int i, int j) { 
    if (j < n-1) {
      return ps[i][j + 1];
    } else {
      return ps[i][j] + 
        (i == 0 ? less(j) : greater(j));
    }
  };

  int num_equal = n - _ps(0, n-1) - _ps(1, n-1);
  int equal_offset = _ps(0, n-1);
  int greater_offset = equal_offset + num_equal;

  auto f3 = [&] () {
    parallel_for(0, n/BLOCK + 1, [&](int _i) {
      for (int i = _i*BLOCK; i < (_i+1)*BLOCK && i < n; ++i) {
        if (A[i] < pivot) {
          B[_ps(0, i)-1] = A[i];
        } else if (A[i] > pivot) {
          B[_ps(1, i)-1+greater_offset] = A[i];
        }
      }
    });
  };
  auto f4 = [&] () {
    parallel_for(0, (greater_offset-equal_offset)/BLOCK + 1, [&](int _i) {
      for (int i = equal_offset + _i*BLOCK; i < equal_offset + (_i+1)*BLOCK && i < greater_offset; ++i) {
        B[i] = pivot;
      }
    });
  };
  par_do(f3, f4);

  return std::make_pair(_ps(0, n-1), greater_offset);   
}

template <class Func>
void scan(const Func& A, int* ps, int n) {
  //timer time;
  up_sweep(A, ps, 0, n-1);
  down_sweep(ps, 0, n-1, 0);
  //time.stop();
  //std::cout << "scan n = " << n << ". time: " << time.total_time() << std::endl;
  //_scan.fetch_add(time.total_time()*10000, std::memory_order_relaxed);
}

template <class Func>
void up_sweep(const Func& A, int* ps, int s, int t) {
  if (s == t) {
    ps[s] = A(s);
    return;
  }

  auto f1 = [&]() { up_sweep(A, ps, s, (s+t)/2); };
  auto f2 = [&]() { up_sweep(A, ps, (s+t)/2 + 1, t); };

  if (t - s <= SCAN_CONTROL) {
    f1(); f2();
  } else {
    par_do(f1, f2);
  }

  ps[t] = ps[(s+t)/2] + ps[t];
}

void down_sweep(int* ps, int s, int t, int p) {
  if (s == t) {
    ps[s] = p;
    return;
  }
  int m = (s+t)/2;
  int left_sum = ps[m];
  auto f1 = [&]() { down_sweep(ps, s, m, p); };
  auto f2 = [&]() { down_sweep(ps, m + 1, t, p+left_sum); };

  if (t - s <= SCAN_CONTROL) {
    f1(); f2();
  } else {
    par_do(f1, f2);
  }
}
Editor is loading...