[NOTE] This question can be depreciated in favor of version 0.3.
This is a code revision of a previous post and works well.
Code has been reworked to be far more clear and concise, thanks to reviewer suggestions. Redundant and unnecessary data structures have been removed, and code has been made more consistent with C++11.
The purpose of this code is to produce a universe of points, randomly generated around predetermined centroids, provided as a vector of vectors. The final product is a file of sample points, to be used for fake data analysis in another program. The objective here was brevity and speed - I feel the code could be vastly concisified, but it is working.
Detailed description of the primary algorithm parameters is in the .h.
My goal is two-fold: speed and conciseness (in code). Speed takes precedence over conciseness, but thankfully they tend to go hand in hand.
clustergen.h
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <random>
#include <chrono>
#ifndef CLUSTERGEN_H
#define CLUSTERGEN_H
// POINT GENERATION - COMMON USER DEFINED VARIABLES
double const PT_BOUND = 10; // Defines the +/- around a centroid for point generation if UNIFORM
double const PT_SD = 5; // Defines the stddev around a centroid for point generation if NORMAL
// PRIMARY ALGORITHM
void clustergen(unsigned int k, std::vector<std::vector<double>> &c, std::string file_out, std::string file_rpt, bool csv, bool norm);
// Produces [k] points in [file_out]; all points are separated by line breaks
// If [csv] = 0, dimensions for each point are whitespace-separated
// If [csv] = 1, dimensions for each point are comma-separated
// [c] is a vector of vectors; it's size is the number of centroids
// The first insertion in [c] sets dimensional precedence; dimensional mismatches are always omitted/avoided.
#endif //CLUSTERGEN_H
clustergen.cpp
#include "clustergen.h"
void clustergen(unsigned int k, std::vector<std::vector<double>> &c, std::string file_out, std::string file_rpt, bool csv, bool norm) {
std::default_random_engine gen(std::chrono::system_clock::now().time_since_epoch().count()); // Random seed
std::ofstream fout(file_out); // This is the useful output of all points
std::ofstream rout(file_rpt); // Report file to avoid console output
std::vector<unsigned int> ct(c.size(), 0); // Independent counting vector for reporting
auto ct_iter = ct.begin(); // Counting vector iterator - used below primary for()
rout << "CLUSTERGEN STATUS REPORT FOLLOWS..." << std::endl; // Begin reporting to file
for (auto c_iter = c.begin(); k > 0; --k) {
if (c_iter == c.end()) { c_iter = c.begin(); } // Continuously loop through cluster vec until k = 0
if (ct_iter == ct.end()) { ct_iter = ct.begin(); } // Continuously loop through counting vec until k = 0
for (auto d_iter = c_iter->begin(); d_iter != c_iter->end(); ++d_iter) {
if (norm) {
// Point generation occurs NORMALLY distributed around centroid
std::normal_distribution<double> distr(*d_iter, PT_SD);
fout << distr(gen);
} else {
// Point generation occurs UNIFORMLY distributed around centroid
std::uniform_real_distribution<double> distr(*d_iter - PT_BOUND, *d_iter + PT_BOUND);
fout << distr(gen);
}
std::vector<double>::iterator temp_d_iter = d_iter; // Used to peek at the next dimensional element
if (++temp_d_iter != c_iter->end()) { (csv == 0) ? (fout << " ") : (fout << ","); } // WS or CSV
else if (k > 1) { fout << std::endl; } // Line break on all but last line
}
++c_iter;
++(*ct_iter);
++ct_iter;
}
// Reporting to file follows
unsigned int ct_tot = 0;
unsigned int i = 0;
for (ct_iter = ct.begin(); ct_iter != ct.end(); ++ct_iter) {
rout << std::endl << *ct_iter << " points ";
rout << ((norm) ? "normally" : "uniformly");
rout << " distributed around centroid " << ++i << " ...";
ct_tot += *ct_iter;
}
rout << std::endl << std::endl << ct_tot << " total points assigned.";
}
main.cpp
#include "clustergen.h"
int main() {
std::vector<std::vector<double>> v = {{0,0}, {50,30}, {100,120}};
clustergen(11, v, "clustergen_out.dat", "clustergen_report.dat", 1, 0);
// Will generate 11 points around the three given centroids in vector [v]
// Points will be CSV and UNIFORMLY distributed.
}
