I am a beginner in C++. I recently wrote a dimacs parser in C++ as a learning exercise. Can you suggest improvements in my code?
#include<cstdlib> #include<cstdio> #include<vector> #include<cstring> #define vector std::vector #define memcmp std::memcmp typedef struct { char *current; int line; int nvars; int nclauses; } Parser; int parse_literal(Parser *parser); void skip(Parser *parser); void parse_header(Parser *parser); vector<int> parse_clause(Parser *parser); vector<vector<int>> parse_dimacs(Parser *parser); void print_clause(vector<int> clause); int main(int argc, char *argv[]) { if(argc < 2) { printf("Usage: ./dimacs_parser <dimacs_file>\n"); exit(EXIT_FAILURE); } // read file and allocate large enough string FILE *file = fopen(argv[1], "rb"); fseek(file, 0L, SEEK_END); size_t filesize = ftell(file); rewind(file); char *dimacs = (char *)malloc(filesize); size_t bytes_read = fread(dimacs, sizeof(char), filesize, file); dimacs[bytes_read] = '\0'; // EOF // parse Parser *parser = (Parser *)malloc(sizeof(Parser)); parser->current = dimacs; vector<vector<int>> clauses = parse_dimacs(parser); for(auto &clause: clauses) { print_clause(clause); putchar('\n'); } // free the resources fclose(file); free(dimacs); free(parser); } void print_clause(vector<int> clause) { printf("["); for(int i = 0; i < clause.size() - 1; i++) printf("%d, ", clause[i]); printf("%d]", clause.back()); } int parse_literal(Parser *parser) { skip(parser); bool is_negative = false; int literal = 0; if(*parser->current == '-') { is_negative = true; parser->current++; } while(*parser->current != ' ' && *parser->current != '\n') { switch(*parser->current) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': literal = 10 * literal + (*parser->current - '0'); parser->current++; break; default: printf("Unexpected character %c at [line: %d]", *parser->current, parser->line); exit(EXIT_FAILURE); } } return is_negative ? -literal : literal; } void skip(Parser *parser) { switch(*parser->current) { case ' ': case '\t': case '\r': parser->current++; skip(parser); break; case '\n': parser->current++; parser->line++; skip(parser); break; case 'c': while(*parser->current != '\n') parser->current++; skip(parser); break; default: return; } } void parse_header(Parser *parser) { #define skip_whitespace while(*parser->current++ != ' '); skip(parser); if(*parser->current != 'p') { printf("expect header at [line: %d]", parser->line); exit(EXIT_FAILURE); } parser->current++; skip_whitespace; #undef skip_whitespace if(memcmp(parser->current, "cnf", 3) != 0) { printf("expect 'cnf' in header at [line: %d]", parser->line); exit(EXIT_FAILURE); } parser->current += 3; skip(parser); parser->nvars = parse_literal(parser); skip(parser); parser->nclauses = parse_literal(parser); } vector<int> parse_clause(Parser *parser) { skip(parser); vector<int> clause; while(*parser->current != '\n' && *parser->current != '\0') { int literal = parse_literal(parser); if(abs(literal) > parser->nvars) { printf("literal at [line: %d] exceeds total no.of vars", parser->line); exit(EXIT_FAILURE); } clause.push_back(literal); } if(clause.empty()) { printf("Unexpected EOF at [line: %d]", parser->line); exit(EXIT_FAILURE); } if(clause.back() != 0) { printf("expect '0' at the end of the clause at [line: %d]", parser->line); exit(EXIT_FAILURE); } clause.pop_back(); return clause; } vector<vector<int>> parse_dimacs(Parser *parser) { vector<vector<int>> clauses; parser->line = 1; parse_header(parser); clauses.reserve(parser->nclauses); skip(parser); if(*parser->current == '\0') { printf("expect clauses at [line: %d]", parser->line); exit(EXIT_FAILURE); } while(*parser->current != '\0' && clauses.size() < parser->nclauses) { vector<int> clause = parse_clause(parser); clauses.push_back(clause); } skip(parser); return clauses; }
#define vector std::vector
rather thanusing std::vector;
, along withmalloc
, manual memory management and pointers. You might want to provide context for why you've avoided most standard C++ idioms here in favor of a "C with vectors" flavor.\$\endgroup\$