percentile_removal
unknown
c_cpp
2 years ago
2.4 kB
24
Indexable
Never
#include <iostream> #include <vector> #include <numeric> // std::iota #include <algorithm> // std::sort, std::stable_sort #include <iomanip> // std::setprecision // function to return indices of sorted vector without changing that vector template <typename T> std::vector<size_t> sort_indexes(const std::vector<T> &v) { // this function was found on stackoverflow // initialize original index locations std::vector<size_t> idx(v.size()); iota(idx.begin(), idx.end(), 0); // sort indexes based on comparing values in v // using std::stable_sort instead of std::sort // to avoid unnecessary index re-orderings // when v contains elements of equal values stable_sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) {return v[i1] < v[i2];}); return idx; } int main(int argc, const char * argv[]) { // create 2 distributions, but we will only remove elements (of BOTH) based on percentile of distr1 std::vector<double> distr1, distr2; std::cout << std::setprecision(2); // just to make printout more readable srand(time(NULL)); // seed random based on time // fill vectors randomly for (int i = 0; i < 5; i++){ distr1.push_back((double)rand() / RAND_MAX); distr2.push_back((double)rand() / RAND_MAX); } for (double d : distr1){ std::cout << "distribution 1: " << d << "\n"; } for (double d : distr2){ std::cout << "distribution 2: " << d << "\n"; } // return indices of distribution in ascending order std::vector<size_t> sorted_idx = sort_indexes(distr1); size_t cull_percentile = 0.2 * distr1.size(); // 20% std::cout << "deleting based on distr1's 20th percentile, which is " << distr1[sorted_idx[cull_percentile]] << "\n"; // remove indices corresponding with elements less than percentile sorted_idx.erase(sorted_idx.begin(), sorted_idx.begin() + cull_percentile); std::sort(sorted_idx.begin(), sorted_idx.end()); std::vector<double> culled_vec1, culled_vec2; for (size_t i = 0; i < sorted_idx.size(); i++){ culled_vec1.push_back(distr1[sorted_idx[i]]); culled_vec2.push_back(distr2[sorted_idx[i]]); } for (double d : culled_vec1){ std::cout << "culled vector 1: " << d << "\n"; } for (double d : culled_vec2){ std::cout << "celled vector 2: " << d << "\n"; } return 0; }