Recently, I've been wanting to do some larger C projects than I'm used to, since I really like the language. So, as a first step, I decided to implement a nice calculator. My end goal is implementing continued-fraction arithmetic, since I've never really seen that used anywhere before, but currently, it just does int
operations - and I thought it'd be nice to know what I can do better before I actually do more.
The interface is pretty simple: You type in expressions to calculate, and it prints the result, like so (user input is indented)
1 + 1 2 114 - 10 + (_2 ** 3 + 1) * 5 69 * 42 2898 :binary 101101010010 :exit
So far, it supports plus, minus, times, divide and remainder (same symbols as in C), powers with **
, negation with _
, as well as parentheses.
Additionally, you can use the previous result by leaving out a number at the start of a line, and display it in binary or hex using a command starting with :.
#include <stdio.h> #include <stdbool.h> #include <string.h> #define MAX_LINE_LENGTH 256 #define EVAL_STACK_SIZE 256 typedef enum { RES_GOOD, RES_ERROR, RES_EXIT } EvalRes; typedef struct { enum { PLUS = 0, MINUS = 1, MULT = 2, DIV = 3, MOD = 4, EXP = 5, NEG = 6, INT = 7, OPEN = 8, CLOSE = 9, END = 10, NONE = 11 } type; union { int num; }; } Token; #ifdef DEBUG char *TOKEN_NAMES[] = {"PLUS", "MINUS", "MULT", "DIV", "MOD", "EXP", "NEG", "INT", "OPEN", "CLOSE", "END", "::"}; #endif // + - * / % ** _ int precedence[] = {0, 0, 1, 1, 1, 2, 3}; bool right_assoc[] = {0, 0, 0, 0, 0, 1, 0}; bool is_op(Token t) { return t.type < INT; } char get(char **s) { return **s; } char next(char **s) { return **s ? *(*s)++ : 0; } void unget(char **s) { (*s)--; } bool is_blank(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } // return next token, advance *line until position directly after Token next_token(char **line) { while(is_blank(get(line))) next(line); Token token; char c = next(line); if(c == '+') token = (Token) {PLUS}; else if(c == '-') token = (Token) {MINUS}; else if(c == '*') { if(get(line) == '*') { next(line); token = (Token) {EXP}; } else token = (Token) {MULT}; } else if(c == '/') token = (Token) {DIV}; else if(c == '%') token = (Token) {MOD}; else if(c == '_') token = (Token) {NEG}; else if(c >= '0' && c <= '9') { // just return 1 place in line, then parse from there unget(line); int num = 0; while((c = get(line)) >= '0' && c <= '9') { num = 10 * num + (c - '0'); next(line); } token = (Token) {INT, {num}}; } else if(c == '(') token = (Token) {OPEN}; else if(c == ')') token = (Token) {CLOSE}; else if(!c) return (Token) {END}; else return (Token) {NONE}; return token; } // push `t` onto `*stack` void push(Token t, Token **stack) { *(++*stack) = t; } // pop token off `*stack` Token pull(Token **stack) { return *((*stack)--); } // get depth'th token off `*stack`; depth=0 is TOS Token peek(int depth, Token **stack) { return (*stack)[-depth]; } #ifdef DEBUG void print_stack(Token *stack) { int ix = 0; while(stack[-ix].type != NONE) ix++; for(int i = ix; i >= 0; i--) printf("%s ", TOKEN_NAMES[stack[-i].type]); printf("\n"); } #endif // signal value; use as `base_op` in reduce() to reduce all const int ALL_OP = -111111; // returns true if reduced successfully, false on error // this is where the actual computation takes place bool reduce(Token **stack, int base_op) { if(peek(0, stack).type != INT) return false; int n = pull(stack).num; while(is_op(peek(0, stack))) { Token op = pull(stack); // reduce all with higher precedence // and ones with same precedence unless right-associative if(base_op != ALL_OP && ( precedence[op.type] < precedence[base_op] || (op.type == base_op && right_assoc[op.type]) )) { push(op, stack); break; } // evaluate ... _ n if(op.type == NEG) n = -n; // evaluate ... m (op) n else { Token t = pull(stack); if(t.type != INT) return false; int m = t.num; switch(op.type) { case PLUS: n = m + n; break; case MINUS: n = m - n; break; case MULT: n = m * n; break; case DIV: n = m / n; break; case MOD: n = m % n; break; case EXP: { int pow = n; n = 1; while(pow-- > 0) n *= m; break; } default: return false; } } } push((Token) {INT, {n}}, stack); return true; } EvalRes eval(char *line) { // previous answer static int ans = 0; #ifdef DEBUG printf("%zu: \"%.*s\"\n", strlen(line), (int)strlen(line), line); #endif // no input -> no output if(*line == 0) return RES_GOOD; // commands if(*line == ':') { #define lineis(s) !strncmp(line, s, sizeof(s)) if(lineis(":exit") || lineis(":quit")) return RES_EXIT; if(lineis(":ans")) printf("%d\n", ans); // print previous answer in hex signedly if(lineis(":hex")) printf("%s%x\n", ans < 0 ? "-" : "", ans < 0 ? -ans : ans); // print previous answer in binary; sadly, no formatting option for that if(lineis(":binary")) { int n = ans; if(n < 0) { printf("-"); n = -n; } int digits = 1; while(n >= 1 << digits) digits++; while(--digits + 1) printf("%d", n >> digits & 1); printf("\n"); } return RES_GOOD; } Token stack_array[EVAL_STACK_SIZE] = {{NONE}, {INT, {ans}}}; // stack for tokens; the pointer always points to the top element Token *stack = stack_array + 1; #ifdef DEBUG print_stack(stack); #endif bool finished = false; while(!finished) { Token token = next_token(&line); #ifdef DEBUG printf("Token %s\n", TOKEN_NAMES[token.type]); #endif switch(token.type) { // +,- reduce +,-,*,/,%,**,_, then push self case PLUS: case MINUS: // *,/,% reduce *,/,%,**,_, then push self case MULT: case DIV: case MOD: // ** reduces _, then pushes self case EXP: if(!reduce(&stack, token.type)) return RES_ERROR; push(token, &stack); break; // negation, int, open parens are just pushed case NEG: case INT: case OPEN: push(token, &stack); break; // closing paren finished evaluating its sub-expression, pulls open paren off stack case CLOSE: if(!reduce(&stack, ALL_OP)) return RES_ERROR; if(peek(1, &stack).type != OPEN) return RES_ERROR; stack[-1] = stack[0]; stack--; break; case END: finished = true; break; case NONE: return RES_ERROR; } #ifdef DEBUG print_stack(stack); #endif } if(!reduce(&stack, ALL_OP)) return RES_ERROR; // print result, and make it the new previous answer int result = pull(&stack).num; printf("%d\n", result); ans = result; return RES_GOOD; } int main() { char line[MAX_LINE_LENGTH]; while(true) { printf(" "); fgets(line, MAX_LINE_LENGTH, stdin); // remove all blanks (notably, \n and \r) from the end size_t len = strlen(line); while(is_blank(line[len - 1])) line[--len] = 0; EvalRes res = eval(line); if(res == RES_EXIT) break; if(res == RES_ERROR) printf("Invalid Input\n"); } }
Instead of having separate parsing and evaluation stages, lines are tokenized and evaluated on-the-fly using a stack machine.
On start, the stack contains a sentinel - in lieu of tracking the stack height separately to avoid underflow - and the previous result. Then, for every token:
- if it's a number, it's pushed on the stack
- if it's an operation, then first it tries to evaluate any existing expression on the stack - for example, if the stack contains tokens
.. 1 + 2 * 3
and the next token is a+
, then it will reduce this to.. 1 + 6
, then.. 7
, before pushing the+
to get.. 7 +
. Here, we also achieve operator precedence by not reducing operations with lower precedence than the current one - for example, if the stack contains.. 1 + 2
, then a following*
will not reduce this to.. 3
. All this is implemented inreduce()
above. - if it's an open paren, it's just pushed, and acts like the sentinel value at the bottom for any following operations; if it's a closing paren, then it does a
reduce
, before removing the open paren it was preceded by.
In the end, the result of the evaluation (after one more reduce
) is what's on top of the stack. I know that this means something like 1 2 3 4 5
is valid input with result 5, but for now I'm deciding that's a feature :)