4
\$\begingroup\$

I recently wrote a mathematical expression parser in C++. The software can read valid mathematical expressions and evaluate them. An example of an expression the code can parse is (sin(pi)*e^(-3)). I would love to receive constructive feedback if possible for the code. Such as compiler optimizations, memory optimization and algorithms improvements.

The important methods are:

  1. tokenize
  2. pre_process_trig_and_constants
  3. evaluate
  4. eval_with_braces

The important structs are:

  1. toks_and_ops
  2. expr_stack

tokenize

toks_and_ops parser::tokenize(string expr){ /** * This method tokenizes a string (without braces) into numbers and operands. The string must be a valid mathematical expression * It is recommended to be called from evaluate() method since there is no support for braces. * For evaluation of expressions with braces check out struct expr_stack. * */ const int len= expr.size(); string tok =""; char current_char; // struct toks_and_ops is used here and the following vector<> members are for the struct feilds. vector<double> toks; vector<char> ops ; int current_index=0; while(current_index<len){ current_char=expr.at(current_index); /** * check if the character is a number a.k.a between values 57 and 48 in ASCII *'.' is 46 in ASCII and - is 45 *This method is faster than cross referencing character with every other numbers */ if((current_char<58 && current_char>44) && current_char != 47){ if(current_char == MINUS){ if(expr.at(current_index -1) > 47 && expr.at(current_index -1) <58){ /** * Pure subtraction is considered as addition of a negative value. * if the character before the minus sign is a number its a pure subtraction * if the character before is an operation it is a normal operation * It is guaranteed that there will be always one character before minus sign * evaluate() method will append '0' before an expression if the first character is '-' * Furthermore evaluate() will only work with expressions without braces so an error is not possible */ ops.push_back(PLUS); toks.push_back(get_num(tok)); tok=""; } } tok +=current_char; }else{ /** * If the character is not a number , '.' or '-' */ toks.push_back( get_num(tok)); ops.push_back(current_char); tok=""; } current_index++; } toks.push_back(get_num(tok)); toks_and_ops res ={toks,ops}; return res; } 

pre_process_trig_and_constants

string parser::pre_process_trig_and_constants(string source){ source =replace_expr(source,"sin","s"); source =replace_expr(source,"cos", "c"); source =replace_expr(source,"tan", "t"); source =replace_expr(source,"e", to_string(exp(1))); source =replace_expr(source,"pi", to_string(M_PI)); return source; } 

evaluate
WARNING: This is long.

double parser::evaluate(string expr){ /** * Central method for evaluation. * This method is not directly called by the user * This method serves as a helper for the structure expr_stack to evaluate expressions with braces * This method can be called if required to evaluate simple expressions i.ewithout any braces. */ if(expr.empty()){ return 1; }if(expr.at(0) == MINUS){ /** * preventing an error for tokenize() method */ expr ="0" +expr; }if(expr.size() ==1 ){ return get_num(expr); } toks_and_ops r =tokenize(expr); int ops_index=0; /** * The operations use BEDMAS * In this context we exclude braces since this method does not evaluate expression with brace * Power takes precedence then * ->/ -> + * Indirectly expressions inside brackets are evaluated first by the expression_stack */ for(auto i = r.ops.begin(); i< r.ops.end();){ if(*i == POWER){ r.toks[ops_index] = pow(r.toks[ops_index] , r.toks[ops_index+1]); remov(ops_index+1, r.toks); remov(ops_index, r.ops); }else{ i++; ops_index++; } } if(r.toks.size() ==1){ return r.toks[0]; } ops_index=0; for(auto i = r.ops.begin(); i< r.ops.end();){ if(*i == MULTI){ r.toks[ops_index] =r.toks[ops_index+1] * r.toks[ops_index]; remov(ops_index+1, r.toks); remov(ops_index, r.ops); }else{ i++; ops_index++; } } if(r.toks.size() ==1){ return r.toks[0]; } ops_index=0; for(auto i = r.ops.begin(); i< r.ops.end();){ if(*i == DIV){ r.toks[ops_index] = r.toks[ops_index] / r.toks[ops_index+1]; remov(ops_index+1, r.toks); remov(ops_index, r.ops); }else{ i++; ops_index++; } } if(r.toks.size() ==1){ return r.toks[0]; } ops_index=0; for(auto i = r.ops.begin(); i< r.ops.end();){ if(*i == PLUS){ r.toks[ops_index] = r.toks[ops_index+1] + r.toks[ops_index]; remov(ops_index+1, r.toks); remov(ops_index, r.ops); }else{ i++; ops_index++; } } return r.toks[0]; }; 

eval_with_braces

double parser::eval_with_braces(string expr){ /** * evaluates expressions with braces * see expr_stack structure for more information on evaluation of expressions with braces */ expr_stack eval; expr_stack trig_eval; int ind=0; int trig_ind; string temp=""; string sec_temp=""; expr.erase( remove(expr.begin(),expr.end(), ' '), expr.end()); expr = pre_process_trig_and_constants(expr); expr =expr+"+0"; for(auto i =expr.begin();i<expr.end();){ if((*i!=SIN && *i !=COS) && *i != TAN){ eval.push(*i); i++; ind++; }else{ trig_ind =ind+1; //isolates the immediate valid expression after trig indicator i.e sin, cos or tan while(!trig_eval.expr_done){ trig_eval.push(expr.at(trig_ind)); trig_ind++; } if(*i== SIN){ temp= to_string(round_val(sin(evaluate(trig_eval.expr)))); }else if(*i== COS){ temp= to_string(round_val(cos(evaluate(trig_eval.expr)))); }else{ temp= to_string(round_val(tan(evaluate(trig_eval.expr)))); } sec_temp =expr.substr(0,ind) ; sec_temp+= temp; sec_temp+=expr.substr(ind+ trig_eval.push_count +1); expr=sec_temp; sec_temp=""; temp=""; trig_eval.recycle(); trig_ind=0; } } return evaluate(eval.expr); }; 

toks_and_ops

 struct toks_and_ops{ /** * compound data type for conveninece */ vector<double> toks; vector<char> ops; }; 

expr_stack

struct expr_stack{ /** * member fields * */ bool expr_done =false; int ind=0; int prev= -1; int push_count=0; vector<int> prev_l_bracs; string expr=""; string ref; /** * for re-initializing this stack */ void recycle(){ /** * sets all members fields to initial value */ expr_done =false; ind=0; prev= -1; push_count=0; prev_l_bracs.clear(); expr=""; ref=""; } /** * method for the stack * */ void push(char i){ /** * The algorithm for push() dynamically checks for complete braces ( complete braces are a pair of adjacent ( and ) ) * If more left braces are found the current starting index of a brace to be completed is updated as the index of most recent left brace * While there is a left brace and a right brace is found , it denotes a valid brace expression and the contents inside it is evaluated as * a mathematical expression by calling evaluate() * After this the current starting index for a brace to be completed is updates as the most recent one before the previousleft brace * The previous valid brace expression is replaced by the result of the evaluation * * Once a full valid brace expression is completely pushed inside this stack there will not be any braces left and * evaluate() method can be called to evaluate it. * * expr_stack acts like a pre-processor for expressions */ push_count++; if(i == LBRAC){ prev_l_bracs.push_back(ind); prev= ind; expr+= i; ind++; }else if(i == RBRAC && prev>=0){ ref=expr.substr(prev +1 , ind -prev ); ref=to_string(evaluate(ref)); expr = expr.substr(0, prev)+ ref; ind =prev+ ref.size(); remov(prev_l_bracs.size() -1 ,prev_l_bracs); if(!prev_l_bracs.empty()){ prev = (prev_l_bracs.at(prev_l_bracs.size()-1)); }else{ prev =-1; expr_done = true; } }else{ expr+= i; ind++; } }; }; 

Please ask or comment if any clarification is needed, or if anything is ambiguous :)

\$\endgroup\$

    1 Answer 1

    2
    \$\begingroup\$

    Constant parameters

    parser::tokenize(string expr){ 

    would be better off as

    parser::tokenize(const string &expr) { 

    Similarly for pre_process_trig_and_constants, which should use an intermediate variable for the reassignment statements in that function.

    Encapsulation

    toks_and_ops res ={toks,ops}; 

    Rather than structures that don't know how to initialize themselves - such as this toks_and_ops - I would sooner see a constructor for this toks_and_ops as class that accepts a const string &expr and does most of what tokenize is doing now.

    ASCII symbols

    if((current_char<58 && current_char>44) && current_char != 47){ 

    is very difficult to understand and maintain. Given that you say you're already assuming ASCII, as long as your compiler is configured to also apply ASCII to literals, you should just be using character literals like '.' .

    For-loop

    int current_index=0; while(current_index<len){ // ... current_index++; } 

    should just be

    for (int current_index = 0; current_index < len; current_index++) { 
    \$\endgroup\$

      Start asking to get answers

      Find the answer to your question by asking.

      Ask question

      Explore related questions

      See similar questions with these tags.