percentile_removal
unknown
c_cpp
4 years ago
2.4 kB
37
Indexable
#include <iostream>
#include <vector>
#include <numeric> // std::iota
#include <algorithm> // std::sort, std::stable_sort
#include <iomanip> // std::setprecision
// function to return indices of sorted vector without changing that vector
template <typename T>
std::vector<size_t> sort_indexes(const std::vector<T> &v) {
// this function was found on stackoverflow
// initialize original index locations
std::vector<size_t> idx(v.size());
iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
// using std::stable_sort instead of std::sort
// to avoid unnecessary index re-orderings
// when v contains elements of equal values
stable_sort(idx.begin(), idx.end(),
[&v](size_t i1, size_t i2) {return v[i1] < v[i2];});
return idx;
}
int main(int argc, const char * argv[]) {
// create 2 distributions, but we will only remove elements (of BOTH) based on percentile of distr1
std::vector<double> distr1, distr2;
std::cout << std::setprecision(2); // just to make printout more readable
srand(time(NULL)); // seed random based on time
// fill vectors randomly
for (int i = 0; i < 5; i++){
distr1.push_back((double)rand() / RAND_MAX);
distr2.push_back((double)rand() / RAND_MAX);
}
for (double d : distr1){
std::cout << "distribution 1: " << d << "\n";
}
for (double d : distr2){
std::cout << "distribution 2: " << d << "\n";
}
// return indices of distribution in ascending order
std::vector<size_t> sorted_idx = sort_indexes(distr1);
size_t cull_percentile = 0.2 * distr1.size(); // 20%
std::cout << "deleting based on distr1's 20th percentile, which is " << distr1[sorted_idx[cull_percentile]] << "\n";
// remove indices corresponding with elements less than percentile
sorted_idx.erase(sorted_idx.begin(), sorted_idx.begin() + cull_percentile);
std::sort(sorted_idx.begin(), sorted_idx.end());
std::vector<double> culled_vec1, culled_vec2;
for (size_t i = 0; i < sorted_idx.size(); i++){
culled_vec1.push_back(distr1[sorted_idx[i]]);
culled_vec2.push_back(distr2[sorted_idx[i]]);
}
for (double d : culled_vec1){
std::cout << "culled vector 1: " << d << "\n";
}
for (double d : culled_vec2){
std::cout << "celled vector 2: " << d << "\n";
}
return 0;
}
Editor is loading...