I'm just starting with C++ (I have worked with C for more than a year) and as a first program I wanted to write a cleaner version of my Z80 assembler I had written in C before, this time trying to do it the C++ way (in which I clearly failed) as I did not want to use C++ as just 'Plain C with some extra features', I copied some C code and modified it a lot to translate it to C++ (I surely made a lot of mistakes as I tried different approaches modifying/messing up lot of the code) but I really could not organize my code nor express my ideas in C++.
I tried with std::istringstream
but ended up using a string and an index variable. Right now the objective of the code is to just separate each statement into tokens.
The rules for the assembler are the following:
- Registers start with % and can have spaces between % and the first char (e.g. ADD % A, %B)
- Numbers start with $ and follow the same rules as registers
- String and chars are between double and single quotes respectively as in C/C++
- Indirect operands are between parenthesis as indirect address numbers
- Everything else is considered as a label
For include: push the new file into the file stack and return from read statements If we are pushing into the stack a file we already had pushed then we have a include cycle, abort
Algorithm:
1.- Read every line and adding each statement to a vector of statements
2.- For each label add it to map and set it as ready
3.- For each macro read all its statements and store it inside the macro struct
4.- If we read a undefined opcode check if it is a macro and if so, copy all of its statements to the statements vector
5.- Resolve all equ labels that do not have a value, if one references a addr. label then move it to the addr labels group and copy the index of the statement after its referenced label.
6.- After reading a complete file pass through all statements checking args and adding up sizes give each address label the size before it's statement plus offset if needed by user
7.- Pass again through statements this time assembling them, as by now we should have every label
So How should someone implement this? Or How could I improve this?
#include <unordered_map> #include <iostream> #include <string> #include <vector> #include <cctype> #include <sstream> enum class OperandType { // Registers rB, rC, rD, rE, rH, rL, rA, rI, rR, // Double Registers rrBC, rrDE, rrHL, rrSP, rrIX, rrIY, rrAF, rrAFp, // Indirect Registers iBC, iDE, iHL, iSP, iIX, iIY, // Numbers and others NN, STRING, LABEL, // Flags fNZ, fZ, fNC, fC, fPO, fPE, fP, fM }; enum class Opcode { LD, EX, PUSH, POP, ADD, ADC, SBC, SUB, AND, OR, XOR, CP, INC, DEC, BIT, SET, RES, IM, RLC, RL, RRC, RR, SLA, SRA, SRL, JP, JR, DJNZ, CALL, RET, RST, IN, OUT, LDI, LDIR, LDD, LDDR, CPI, CPIR, CPD, CPDR, NEG, RLD, RRD, INI, INIR, IND, INDR, OUTI, OTIR, OUTD, OTDR, RETI, RETN, EXX, DAA, CPL, CCF, SCF, NOP, HALT, DI, EI, RLCA, RLA, RRCA, RRA, DB, DS, EQU, INCLUDE, MACRO }; static inline bool uses_flags(Opcode opcode) { return (opcode == Opcode::JP || opcode == Opcode::JR || opcode == Opcode::CALL || opcode == Opcode::RET); } static inline bool is_valid_char(int c) { return (isalnum(c) || c == '_' || c == '-' || c == '%' || c == '$' || c == '\''); } const std::unordered_map<std::string, Opcode> opcodes = { {"LD", Opcode::LD}, {"EX", Opcode::EX}, {"PUSH", Opcode::PUSH}, {"POP", Opcode::POP}, {"ADD", Opcode::ADD}, {"ADC", Opcode::ADC}, {"SBC", Opcode::SBC}, {"SUB", Opcode::SUB}, {"AND", Opcode::AND}, {"OR", Opcode::OR}, {"XOR", Opcode::XOR}, {"CP", Opcode::CP}, {"INC", Opcode::INC}, {"DEC", Opcode::DEC}, {"BIT", Opcode::BIT}, {"SET", Opcode::SET}, {"RES", Opcode::RES}, {"IM", Opcode::IM}, {"RLC", Opcode::RLC}, {"RL", Opcode::RL}, {"RRC", Opcode::RRC}, {"RR", Opcode::RR}, {"SLA", Opcode::SLA}, {"SRA", Opcode::SRA}, {"SRL", Opcode::SRL}, {"JP", Opcode::JP}, {"JR", Opcode::JR}, {"DJNZ", Opcode::DJNZ}, {"CALL", Opcode::CALL}, {"RET", Opcode::RET}, {"RST", Opcode::RST}, {"IN", Opcode::IN}, {"OUT", Opcode::OUT}, {"LDI", Opcode::LDI}, {"LDIR", Opcode::LDIR}, {"LDD", Opcode::LDD}, {"LDDR", Opcode::LDDR}, {"CPI", Opcode::CPI}, {"CPIR", Opcode::CPIR}, {"CPD", Opcode::CPD}, {"CPDR", Opcode::CPDR}, {"NEG", Opcode::NEG}, {"RLD", Opcode::RLD}, {"RRD", Opcode::RRD}, {"INI", Opcode::INI}, {"INIR", Opcode::INIR}, {"IND", Opcode::IND}, {"INDR", Opcode::INDR}, {"OUTI", Opcode::OUTI}, {"OTIR", Opcode::OTIR}, {"OUTD", Opcode::OUTD}, {"OTDR", Opcode::OTDR}, {"RETI", Opcode::RETI}, {"RETN", Opcode::RETN}, {"EXX", Opcode::EXX}, {"DAA", Opcode::DAA}, {"CPL", Opcode::CPL}, {"CCF", Opcode::CCF}, {"SCF", Opcode::SCF}, {"NOP", Opcode::NOP}, {"HALT", Opcode::HALT}, {"DI", Opcode::DI}, {"EI", Opcode::EI}, {"RLCA", Opcode::RLCA}, {"RLA", Opcode::RLA}, {"RRCA", Opcode::RRCA}, {"RRA", Opcode::RRA}, {"DB", Opcode::DB}, {"DS", Opcode::DS}, {"EQU", Opcode::EQU}, {"INCLUDE",Opcode::INCLUDE}, {"MACRO", Opcode::MACRO} }; const std::unordered_map<std::string, OperandType> registers = { {"A", OperandType::rA}, {"B", OperandType::rB}, {"C", OperandType::rC}, {"D", OperandType::rD}, {"E", OperandType::rE}, {"H", OperandType::rH}, {"L", OperandType::rL}, {"I", OperandType::rI}, {"R", OperandType::rR}, {"BC", OperandType::rrBC}, {"DE", OperandType::rrDE}, {"HL", OperandType::rrHL}, {"SP", OperandType::rrSP}, {"AF", OperandType::rrAF}, {"IX", OperandType::rrIX}, {"IY", OperandType::rrIY}, {"AF'", OperandType::rrAFp} }; const std::unordered_map<std::string, OperandType> flags = { {"NZ", OperandType::fNZ}, {"Z", OperandType::fZ}, {"NC", OperandType::fNC}, {"C", OperandType::fC}, {"PO", OperandType::fPO}, {"PE", OperandType::fPE}, {"P", OperandType::fP}, {"M", OperandType::fM} }; std::vector<std::string> tokens; inline void eat_spaces(const std::string& line, int& it) { while( it < line.size() && isspace(line[it]) ) it ++; } void read_operands(const std::string& line, int& it) { std::string delim = ";"; // Read operands while(it < line.size()) { std::string token; switch( line[it] ) { case ';': return; case '\'': case '\"': case '(': { auto next = line.find_first_of(delim + line[it], it + 1); if(next == std::string::npos || line[next] == ';') std::cout << "Error Operands" << std::endl; token = line.substr(it, next - it + 1); it = (next + 1); } break; case '%': case '$': token += line[it ++]; eat_spaces(line, it); while(isalnum(line[it])) token += line[it ++]; break; default: while(is_valid_char(line[it])) token += line[it ++]; break; } tokens.push_back(token); eat_spaces(line, it); if(it == line.size() || line[it] == ';') break; else if(line[it] != ',') { std::cout << "Garbage1:" << line[it] << std::endl; return; } // Eat comma it ++; // Check if after comma actually comes another operand eat_spaces(line, it); if(it == line.size() || line[it] == ';') { std::cout << "Garbage2:" << line[it] << std::endl;; return; } } } void read_opcode(const std::string& line, int& it) { // Read posible label and opcode for(int i = 0; i < 2; i ++) { std::string token; eat_spaces(line, it); if(line[it] == ';') return; if( !isalpha(line[it]) ) std::cout << "Error Opcode\n"; while( is_valid_char(line[it]) ) token += line[it ++]; if(line[it] == ':') { it ++; } else { tokens.push_back(token); break; } } eat_spaces(line, it); } void read_statements(void) { std::vector<Operand> operands; std::string line = "RET \"go\", b ;comment"; Opcode opcode; int it = 0; read_opcode(line, it); int first_operand = tokens.size(); try { opcode = opcodes.at(tokens[0]); } catch(std::out_of_range) { std::cout << "Unknown Opcode: " << tokens[0] << std::endl; } read_operands(line, it); if((opcode == Opcode::RET && tokens.size() > 1) || (uses_flags(opcode) && tokens.size() > 2)) { try { operands[0].type = flags.at(tokens[1]); } catch(std::out_of_range) { std::cout << "Unknown Flag:" << tokens[1] << std::endl; } } for(int i = 0; i < tokens.size(); i ++) std::cout << tokens[i] << std::endl; return; } int main() { read_statements(); }
Operand
? Anenum
? Anamespace
? Something else?\$\endgroup\$OperandType
.\$\endgroup\$