I'm processing about 3.5gb/15minutes, and I was wondering if I could get this down to a better time...
Note that this is for a very specific CSV file, so I am not concerned about generality - only speed of implementation and optimization.
int main(int argc, char *argv[]) { std::string filename; std::cout << "Please type the file name: " << std::endl; std::cin >> filename; std::string ticker; std::cout << "Please enter the ticker: " << std::endl; std::cin >> ticker; std::ifstream instream(filename.c_str()); std::string ask_filename = ticker + "_ASK.NIT"; std::ofstream askstream(ask_filename.c_str()); std::string bid_filename = ticker + "_BID.NIT"; std::ofstream bidstream(bid_filename.c_str()); std::string line; std::getline(instream,line); while(std::getline(instream,line)) { std::stringstream lineStream(line); std::string cell; std::string new_line; std::vector<std::string> my_str_vec; while(std::getline(lineStream,cell,',')) { my_str_vec.push_back(cell); //new_line.append(cell.append(";")); } // works on date std::string my_date = my_str_vec[0]; std::string::iterator my_iter; std::string processed_date = ""; for(my_iter = my_date.begin(); my_iter != my_date.end(); ++my_iter) { if(std::isalnum(*my_iter) || *my_iter == ' ') processed_date.append(1,(*my_iter)); } my_str_vec[0] = processed_date; std::vector<std::string>::iterator my_vec_iter; for(my_vec_iter = my_str_vec.begin() + 1; my_vec_iter != my_str_vec.end(); ++my_vec_iter) { std::string my_semicol = ";"; *my_vec_iter = my_semicol.append(*my_vec_iter); } askstream << my_str_vec[0] << my_str_vec[1] << my_str_vec[3] << std::endl; bidstream << my_str_vec[0] << my_str_vec[2] << my_str_vec[4] << std::endl; } askstream.close(); bidstream.close(); return 0; }