diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b497ff1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +bin/ +.vscode/ \ No newline at end of file diff --git a/Makefile b/Makefile index f8d850c..3d0c7ad 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,38 @@ -main: - mkdir -p bin/ - g++ src/construct.cpp src/deconstruct.cpp src/construct_debug.cpp src/reconstruct.cpp src/construct_flags.cpp -o bin/construct +CC = g++ +CFLAGS = -std=c++11 -Wall --pedantic-errors -g +SDIR = src +BDIR = bin +_OBJS = construct_debug.o construct_flags.o deconstruct.o reconstruct.o construct.o +OBJS = $(patsubst %,$(BDIR)/%,$(_OBJS)) +PROG = construct + +.PHONY: all clean + +all: $(OBJS) $(BDIR)/$(PROG) + +$(BDIR)/$(PROG): $(OBJS) + mkdir -p $(BDIR) + $(CC) $(OBJS) -o $(BDIR)/$(PROG) + +$(BDIR)/construct.o: $(SDIR)/construct.cpp $(SDIR)/deconstruct.h $(SDIR)/reconstruct.h $(SDIR)/construct_flags.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/construct.cpp -o $(BDIR)/construct.o $(CFLAGS) + +$(BDIR)/construct_debug.o: $(SDIR)/construct_debug.cpp $(SDIR)/construct_debug.h $(SDIR)/construct_types.h $(SDIR)/reconstruct.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/construct_debug.cpp -o $(BDIR)/construct_debug.o $(CFLAGS) + +$(BDIR)/construct_flags.o: $(SDIR)/construct_flags.cpp $(SDIR)/construct_flags.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/construct_flags.cpp -o $(BDIR)/construct_flags.o $(CFLAGS) + +$(BDIR)/deconstruct.o: $(SDIR)/deconstruct.cpp $(SDIR)/deconstruct.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/deconstruct.cpp -o $(BDIR)/deconstruct.o $(CFLAGS) + +$(BDIR)/reconstruct.o: $(SDIR)/reconstruct.cpp $(SDIR)/reconstruct.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/reconstruct.cpp -o $(BDIR)/reconstruct.o $(CFLAGS) + +clean: + rm -rf $(BDIR) diff --git a/README.md b/README.md index a3d1036..e88bc63 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ fmt: db "%s", 10, 0 - If statements: If statements, like while loops, take a single [conditional](#conditionals) statement - Functions: Functions are declared with the "function" keyword, a "ret" instruction is added to functions in post-processing, so functions will not flow into eachother. -- Function calls: Functions can be called with any number of arguments, independent of the function decleration. +- Function calls: Functions can be called with any number of arguments, independent of the function decleration. If the amount of arguments used to call a function is more than its decleration states, they can be accessed like normal with their respective registers / stack address. Construct function calls, like NASM, use the "call" keyword. Functions can still be called without parentheses or arguments, NASM-style. - Macros: Construct macros can only be used in their respective scopes. Construct macros are declared with the '!' character and cannot contain whitespaces. @@ -58,3 +58,4 @@ Neither side of the comparison can contains whitespaces. ### Required flags - `-f (format)`: Can be either "elf64", "elf32", "elf16", "elf8" and decides the registers used for funcion calls. - `-i (input file)`: Specifies the input file to be compiled (-i is not neccesary) +- `-o (output file)`: Specifies the output file to be created diff --git a/examples/factorial.con b/examples/factorial.con index a388f49..18c99f9 100644 --- a/examples/factorial.con +++ b/examples/factorial.con @@ -10,7 +10,7 @@ function factorial(num): while i le num: mul i inc i - + function main(): call factorial(3) !result rax diff --git a/examples/strchr.con b/examples/strchr.con index 4ff263d..81534cf 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -3,10 +3,9 @@ extern printf section .text function strchr(str, chr): !ptrresult rax - !findchr sil mov ptrresult, 0 while byte[str] ne 0: - if byte[str] e findchr: + if byte[str] e chr: mov ptrresult, str ret inc str diff --git a/src/construct.cpp b/src/construct.cpp index f771c5c..b229b07 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -1,17 +1,22 @@ -#include "deconstruct.h" -#include "reconstruct.h" -#include "construct_flags.h" -#include -#include +#include +#include +#include +#include +#include +#include "construct_types.h" +#include "deconstruct.h" // parse_construct() +#include "reconstruct.h" // linearize_tokens() +#include "construct_flags.h" // handle_flags() -int main(int argc, char** argv) { +int main(int argc, char** argv) +{ std::string path; std::string outpath; - if(handle_flags(argc, argv, &path, &outpath) != 0) { + if (handle_flags(argc, argv, &path, &outpath) != 0) { std::cout << "Some flag(s) not set" << std::endl; return 0; } - if(path.empty()) { + if (path.empty()) { std::cout << "No input file specified" << std::endl; return 0; } diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index a3368cb..5098a7a 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -1,7 +1,13 @@ +#include +#include +#include #include "construct_debug.h" +#include "construct_types.h" +#include "reconstruct.h" // comparison_to_string() -std::string tokentype_to_string(CON_TOKENTYPE type) { - switch(type) { +std::string tokentype_to_string(CON_TOKENTYPE type) +{ + switch (type) { case SECTION: return "section"; case TAG: @@ -16,13 +22,16 @@ std::string tokentype_to_string(CON_TOKENTYPE type) { return "cmd"; case MACRO: return "macro"; + case FUNCALL: + return "funcall"; } - return "unknown"; + throw std::invalid_argument("Invalid token type: "+std::to_string(static_cast(type))); } -std::string token_to_string(con_token token) { +std::string token_to_string(con_token token) +{ std::string tokstring = "type: " + tokentype_to_string(token.tok_type); - switch(token.tok_type) { + switch (token.tok_type) { case SECTION: tokstring += ", name: " + token.tok_section->name; break; @@ -37,19 +46,19 @@ std::string token_to_string(con_token token) { break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; - for(int i = 0; i < token.tok_function->arguments.size(); i++) { - if(i != 0) { + for (size_t i = 0; i < token.tok_function->arguments.size(); i++) { + if (i != 0) { tokstring += ", "; } tokstring += token.tok_function->arguments[i]; } break; case CMD: - if(!token.tok_cmd->arg1.empty() && !token.tok_cmd->arg2.empty()) { + if (!token.tok_cmd->arg1.empty() && !token.tok_cmd->arg2.empty()) { tokstring += ", cmd: " + token.tok_cmd->command + " " + token.tok_cmd->arg1 + ", " + token.tok_cmd->arg2; break; } - if(!token.tok_cmd->arg1.empty()) { + if (!token.tok_cmd->arg1.empty()) { tokstring += ", cmd: " + token.tok_cmd->command + " " + token.tok_cmd->arg1; break; } @@ -58,10 +67,12 @@ std::string token_to_string(con_token token) { case MACRO: tokstring += ", macro: " + token.tok_macro->macro + ", value: " + token.tok_macro->value; break; + default: // FUNCALL + break; } - if(token.tokens.size() > 0) { + if (token.tokens.size() > 0) { tokstring += ", tokens: {\n"; - for(int i = 0; i < token.tokens.size(); i++) { + for (size_t i = 0; i < token.tokens.size(); i++) { tokstring += token_to_string(*token.tokens[i]) + "\n"; } tokstring += "}"; diff --git a/src/construct_debug.h b/src/construct_debug.h index d9b605a..9ff034a 100644 --- a/src/construct_debug.h +++ b/src/construct_debug.h @@ -1,7 +1,10 @@ -#include -#include +#ifndef CONSTRUCT_DEBUG_H_ +#define CONSTRUCT_DEBUG_H_ + +#include #include "construct_types.h" -#include "reconstruct.h" std::string tokentype_to_string(CON_TOKENTYPE type); std::string token_to_string(con_token token); + +#endif // CONSTRUCT_DEBUG_H_ diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index 741dbc7..5da4f90 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -1,22 +1,27 @@ +#include +#include #include "construct_flags.h" #include "construct_types.h" +extern CON_BITWIDTH bitwidth; + using namespace std; -int set_bitwidth(char* argv) { - if(strcmp(argv, "elf64") == 0) { +int set_bitwidth(char* argv) +{ + if (string(argv) == "elf64") { bitwidth = BIT64; return 0; } - if(strcmp(argv, "elf32") == 0) { + if (string(argv) == "elf32") { bitwidth = BIT32; return 0; } - if(strcmp(argv, "elf16") == 0) { + if (string(argv) == "elf16") { bitwidth = BIT16; return 0; } - if(strcmp(argv, "elf8") == 0) { + if (string(argv) == "elf8") { bitwidth = BIT8; return 0; } @@ -24,42 +29,43 @@ int set_bitwidth(char* argv) { return -1; } -int handle_flags(int argc, char** argv, string* path, string* outpath) { +int handle_flags(int argc, char** argv, string* path, string* outpath) +{ bool bitwidth_set = false; bool path_set = false; bool outpath_set = false; - for(int i = 1; i < argc; i++) { - if(strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { + for (int i = 1; i < argc; i++) { + if (string(argv[i]) == "-f" && set_bitwidth(argv[i+1]) == 0) { bitwidth_set = true; i++; continue; } - if(strcmp(argv[i], "-i") == 0) { + if (string(argv[i]) == "-i") { path_set = true; i++; (*path) = argv[i]; continue; } - if(strcmp(argv[i], "-o") == 0) { + if (string(argv[i]) == "-o") { outpath_set = true; i++; (*outpath) = argv[i]; continue; } - if(path != NULL) { + if (path != NULL) { path_set = true; (*path) = argv[i]; } } - if(!bitwidth_set) { + if (!bitwidth_set) { cout << "flag -f (format) not set" << endl; return -1; } - if(!path_set) { + if (!path_set) { cout << "flag -i (input file) not set" << endl; return -1; } - if(!outpath_set) { + if (!outpath_set) { cout << "flag -o (output file) not set" << endl; return -1; } diff --git a/src/construct_flags.h b/src/construct_flags.h index 9777d64..b7a1e28 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -1,7 +1,9 @@ -#include -#include -#include -#include "reconstruct.h" +#ifndef CONSTRUCT_FLAGS_H_ +#define CONSTRUCT_FLAGS_H_ + +#include int set_bitwidth(char* argv); int handle_flags(int argc, char** argv, std::string* path, std::string* outpath); + +#endif // CONSTRUCT_FLAGS_H_ diff --git a/src/construct_types.h b/src/construct_types.h index 489d9da..f3259c6 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -1,8 +1,8 @@ -#ifndef CON_TYPES_H -#define CON_TYPES_H +#ifndef CONSTRUCT_TYPES_H_ +#define CONSTRUCT_TYPES_H_ -#include -#include +#include +#include enum CON_BITWIDTH { BIT8, @@ -86,7 +86,7 @@ struct con_cmd { struct con_funcall { std::string funcname; - std:: vector arguments; + std::vector arguments; }; -#endif +#endif // CONSTRUCT_TYPES_H_ diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 87143c6..71b28bf 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,12 +1,75 @@ -#include "construct_types.h" +#include +#include +#include +#include #include "deconstruct.h" -#include +#include "construct_types.h" using namespace std; -int get_line_indentation(string line) { + +static const char& FIRST_UPPERCASE_LETTER = 'A'; +static const char& LAST_UPPERCASE_LETTER = 'Z'; +static bool is_upper(const char& c) +{ + return c >= FIRST_UPPERCASE_LETTER && c <= LAST_UPPERCASE_LETTER; +} +static char to_lower(const char& c) +{ + return is_upper(c) ? c - FIRST_UPPERCASE_LETTER : c; +} +static void to_lower(string& str) +{ + string tmp; + for (string::iterator it = str.begin(); it != str.end(); ++it) { + tmp.push_back(to_lower(*it)); + } + str = tmp; +} + +class IsAnyOf +{ +private: + string chars; +public: + IsAnyOf() = default; + IsAnyOf(const string& _chars) : chars(_chars) {} + IsAnyOf(const char*& _chars) : chars(_chars) {} + ~IsAnyOf() = default; + IsAnyOf(const IsAnyOf& other) = delete; // should save unique sorted chars for that + IsAnyOf& operator=(const IsAnyOf& other) = delete; + + bool operator()(const char& c) const { + for (string::const_iterator it = chars.cbegin(); it != chars.cend(); ++it) { + if (*it == c) return true; + } + return false; + } +}; + +template +static void split(vector& result, const string& input, const Predicate& pred, const bool& compress_adj_delims = false) +{ + string tmp; + bool prev_is_delim = false; + for (string::const_iterator it = input.cbegin(); it != input.cend(); ++it) { + if (pred(*it)) { + if (prev_is_delim && compress_adj_delims) continue; + result.push_back(tmp); + tmp.clear(); + prev_is_delim = true; + } else { + tmp.push_back(*it); + prev_is_delim = false; + } + } + result.push_back(tmp); +} + +int get_line_indentation(string line) +{ int indentation = 0; - for(int i = 0; i < line.size(); i++) { - if(line[i] == '\t') { + for (size_t i = 0; i < line.size(); i++) { + if (line[i] == '\t') { indentation++; continue; } @@ -16,42 +79,45 @@ int get_line_indentation(string line) { } // Expects formatted line -CON_TOKENTYPE get_token_type(string line) { - if(line[0] == '!') +CON_TOKENTYPE get_token_type(string line) +{ + if (line[0] == '!') return MACRO; - if(line.substr(0, 3) == "if ") + if (line.substr(0, 3) == "if ") return IF; - if(line.substr(0, 6) == "while ") + if (line.substr(0, 6) == "while ") return WHILE; - if(line.substr(0, 9) == "function ") + if (line.substr(0, 9) == "function ") return FUNCTION; - if(line.substr(0, 8) == "section ") + if (line.substr(0, 8) == "section ") return SECTION; - if(line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) + if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; - if(line.find(' ') == string::npos && line[line.size()-1] == ':') + if (line.find(' ') == string::npos && line[line.size()-1] == ':') return TAG; return CMD; } -CON_COMPARISON str_to_comparison(string comp) { - if(comp == "e") +CON_COMPARISON str_to_comparison(string comp) +{ + if (comp == "e") return E; - if(comp == "ne") + if (comp == "ne") return NE; - if(comp == "l") + if (comp == "l") return L; - if(comp == "g") + if (comp == "g") return G; - if(comp == "le") + if (comp == "le") return LE; - if(comp == "ge") + if (comp == "ge") return GE; - //ERROR + throw invalid_argument("Invalid comparison sing: "+comp); } -vector delinearize_tokens(std::vector tokens) { +vector delinearize_tokens(std::vector tokens) +{ vector dl_tokens; // Serves as parent "section" where all tokens belong to, convenient for algo @@ -72,17 +138,17 @@ vector delinearize_tokens(std::vector tokens) { // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. - for(int i = 0; i < tokens.size(); i++) { - if(parent_stack.top()->indentation - tokens[i]->indentation >= 0) { + for (size_t i = 0; i < tokens.size(); i++) { + if (parent_stack.top()->indentation - tokens[i]->indentation >= 0) { int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; - for(int j = 0; j < indentation_diff; j++) { + for (int j = 0; j < indentation_diff; j++) { parent_stack.pop(); } } - if(tokens[i]->indentation == parent_stack.top()->indentation+1) { + if (tokens[i]->indentation == parent_stack.top()->indentation+1) { parent_stack.top()->tokens.push_back(tokens[i]); } - if(tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { + if (tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { parent_stack.push(tokens[i]); } } @@ -95,7 +161,8 @@ vector delinearize_tokens(std::vector tokens) { return delinearized_tokens; } -con_macro* parse_macro(string line) { +con_macro* parse_macro(string line) +{ con_macro* tok_macro = new con_macro(); int spacepos = line.find(' '); tok_macro->macro = line.substr(1, spacepos-1); @@ -103,67 +170,74 @@ con_macro* parse_macro(string line) { return tok_macro; } -con_if* parse_if(string line) { +con_if* parse_if(string line) +{ con_if* tok_if = new con_if(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + split(line_split, line, IsAnyOf(" ")); tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); return tok_if; } -con_while* parse_while(string line) { +con_while* parse_while(string line) +{ con_while* tok_while = new con_while(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + split(line_split, line, IsAnyOf(" ")); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_section* parse_section(string line) { +con_section* parse_section(string line) +{ con_section* tok_section = new con_section(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + split(line_split, line, IsAnyOf(" ")); tok_section->name = line_split[1]; return tok_section; } -con_tag* parse_tag(string line) { +con_tag* parse_tag(string line) +{ con_tag* tok_tag = new con_tag(); tok_tag->name = line.substr(0, line.size()-1); return tok_tag; } -con_cmd* parse_cmd(string line) { +con_cmd* parse_cmd(string line) +{ con_cmd* tok_cmd = new con_cmd(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ,")); + split(line_split, line, IsAnyOf(" ,")); tok_cmd->command = line_split[0]; - if(line_split.size() > 1) + if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; - if(line_split.size() > 3) + if (line_split.size() > 3) tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_function* parse_function(string line) { +con_function* parse_function(string line) +{ con_function* tok_function = new con_function(); vector line_split; - boost::split(line_split, line, boost::is_any_of("():,")); + split(line_split, line, IsAnyOf("():,")); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for(int i = 1; i < line_split.size()-2; i++) { - if(line_split[i].empty()) { + for (size_t i = 1; i < line_split.size()-2; i++) { + if (line_split[i].empty()) { continue; } tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied } return tok_function; } -con_funcall* parse_funcall(string line) { +con_funcall* parse_funcall(string line) +{ con_funcall* tok_funcall = new con_funcall(); vector line_split; - boost::split(line_split, line, boost::is_any_of("(),")); + split(line_split, line, IsAnyOf("(),")); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); - for(int i = 1; i < line_split.size()-1; i++) { - if(line_split[i].empty()) { + for (size_t i = 1; i < line_split.size()-1; i++) { + if (line_split[i].empty()) { continue; } tok_funcall->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied @@ -172,26 +246,27 @@ con_funcall* parse_funcall(string line) { } // Does not expect formatted line, only lowercase -con_token* parse_line(string line) { +con_token* parse_line(string line) +{ con_token* token = new con_token; //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for(int i = 0; i < line.size(); i++) { - if(line[i] == ' ') { - if(!caught_space) { + for (size_t i = 0; i < line.size(); i++) { + if (line[i] == ' ') { + if (!caught_space) { f_line += line[i]; caught_space = true; } } else { - if(line[i] != '\t') { + if (line[i] != '\t') { f_line += line[i]; } caught_space = false; - } + } } token->tok_type = get_token_type(f_line); - switch(token->tok_type) { + switch (token->tok_type) { case MACRO: token->tok_macro = parse_macro(f_line); break; @@ -218,25 +293,26 @@ con_token* parse_line(string line) { } return token; } -vector parse_construct(string code) { +vector parse_construct(string code) +{ vector code_split; - boost::split(code_split, code, boost::is_any_of("\n"), boost::token_compress_on); - boost::to_lower(code); + split(code_split, code, IsAnyOf("\n"), true); + to_lower(code); vector tokens; bool in_data = false; - for(int i = 0; i < code_split.size(); i++) { + for (size_t i = 0; i < code_split.size(); i++) { // Check if it contains any alphabet chars - if(code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { + if (code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; } con_token* new_token = parse_line(code_split[i]); new_token->indentation = get_line_indentation(code_split[i]); tokens.push_back(new_token); - if(new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { + if (new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { in_data = true; - } else if(new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { + } else if (new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { in_data = false; - } else if(in_data) { + } else if (in_data) { //TODO free original con_x con_cmd* data_cmd = new con_cmd; data_cmd->command = code_split[i]; diff --git a/src/deconstruct.h b/src/deconstruct.h index be95614..cb7ba19 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -1,11 +1,9 @@ -#include "construct_debug.h" -#include -#include -#include -#include -#include -#include -#include +#ifndef DECONSTRUCT_H_ +#define DECONSTRUCT_H_ + +#include +#include +#include "construct_types.h" int get_line_indentation(std::string line); CON_TOKENTYPE get_token_type(std::string line); @@ -23,3 +21,5 @@ con_function* parse_function(std::string line); con_funcall* parse_funcall(std::string line); con_token* parse_line(std::string line); std::vector parse_construct(std::string code); + +#endif // DECONSTRUCT_H_ diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index d8bfaa5..ddd1d56 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -1,3 +1,6 @@ +#include +#include +#include #include "reconstruct.h" #include "construct_types.h" @@ -6,10 +9,11 @@ int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { - switch(bitwidth) { +string reg_to_str(uint8_t call_num) +{ + switch (bitwidth) { case BIT8: - switch(call_num) { + switch (call_num) { case 0: return "dil"; break; @@ -31,7 +35,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; case BIT16: - switch(call_num) { + switch (call_num) { case 0: return "di"; break; @@ -53,7 +57,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; case BIT32: - switch(call_num) { + switch (call_num) { case 0: return "edi"; break; @@ -75,7 +79,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; case BIT64: - switch(call_num) { + switch (call_num) { case 0: return "rdi"; break; @@ -97,10 +101,12 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; } + throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); } -string comparison_to_string(CON_COMPARISON condition) { - switch(condition) { +string comparison_to_string(CON_COMPARISON condition) +{ + switch (condition) { case E: return "e"; case NE: @@ -114,11 +120,12 @@ string comparison_to_string(CON_COMPARISON condition) { case GE: return "ge"; } - return "unknown"; + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { - switch(condition) { +CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) +{ + switch (condition) { case E: return NE; case NE: @@ -132,83 +139,83 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { case GE: return L; } + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -void apply_macro_to_token(con_token& token, vector macros) { - if(token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { +void apply_macro_to_token(con_token& token, vector macros) +{ + if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; } // Unoptimal, but more clear imo - for(int i = 0; i < macros.size(); i++) { + for (size_t i = 0; i < macros.size(); i++) { con_macro* crntmacro = ¯os[i]; size_t pos; - switch(token.tok_type) { + switch (token.tok_type) { case WHILE: - if(!token.tok_while->condition.arg1.empty() && + if (!token.tok_while->condition.arg1.empty() && (pos = token.tok_while->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg1[pos-1])) && (pos == token.tok_while->condition.arg1.size()-1 || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { token.tok_while->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if(!token.tok_while->condition.arg2.empty() && + if (!token.tok_while->condition.arg2.empty() && (pos = token.tok_while->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg2[pos-1])) && (pos == token.tok_while->condition.arg2.size()-1 || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { - token.tok_while->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case IF: - if(!token.tok_if->condition.arg1.empty() && + if (!token.tok_if->condition.arg1.empty() && (pos = token.tok_if->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg1[pos-1])) && (pos == token.tok_if->condition.arg1.size()-1 || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { - token.tok_if->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if(!token.tok_if->condition.arg2.empty() && + if (!token.tok_if->condition.arg2.empty() && (pos = token.tok_if->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg2[pos-1])) && (pos == token.tok_if->condition.arg2.size()-1 || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { - token.tok_if->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case CMD: - if(!token.tok_cmd->arg1.empty() && + if (!token.tok_cmd->arg1.empty() && (pos = token.tok_cmd->arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_cmd->arg1[pos-1])) && (pos == token.tok_cmd->arg1.size()-1 || !isalpha(token.tok_cmd->arg1[pos+crntmacro->macro.size()]))) { - token.tok_cmd->arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if(!token.tok_cmd->arg2.empty() && + if (!token.tok_cmd->arg2.empty() && (pos = token.tok_cmd->arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_cmd->arg2[pos-1])) && (pos == token.tok_cmd->arg2.size()-1 || !isalpha(token.tok_cmd->arg2[pos+crntmacro->macro.size()]))) { - token.tok_cmd->arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; + default: + break; } } } -void apply_funcalls(std::vector& tokens) { - for(int i = 0; i < tokens.size(); i++) { +void apply_funcalls(std::vector& tokens) +{ + for (size_t i = 0; i < tokens.size(); i++) { apply_funcalls(tokens[i]->tokens); - if(tokens[i]->tok_type != FUNCALL) { + if (tokens[i]->tok_type != FUNCALL) { continue; } vector* args = &tokens[i]->tok_funcall->arguments; vector arg_tokens; - for(int j = 0; j < args->size(); j++) { + for (size_t j = 0; j < args->size(); j++) { con_token* arg_tok = new con_token(); arg_tok->tok_type = CMD; con_cmd* arg_cmd = new con_cmd(); arg_tok->tok_cmd = arg_cmd; arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j, bitwidth); + arg_cmd->arg1 = reg_to_str(j); arg_cmd->arg2 = (*args)[j]; arg_tokens.push_back(arg_tok); } @@ -224,14 +231,15 @@ void apply_funcalls(std::vector& tokens) { } } -void apply_functions(std::vector& tokens) { +void apply_functions(std::vector& tokens) +{ vector* subtokens = &tokens; - for(int i = 0; i < subtokens->size(); i++) { - if((*subtokens)[i]->tok_type != FUNCTION) { + for (size_t i = 0; i < subtokens->size(); i++) { + if ((*subtokens)[i]->tok_type != FUNCTION) { continue; } con_function* crntfunc = (*subtokens)[i]->tok_function; - if(crntfunc->name == "main") { + if (crntfunc->name == "main") { crntfunc->name = "_start"; } @@ -240,11 +248,11 @@ void apply_functions(std::vector& tokens) { con_tag* functag = new con_tag; tag_tok->tok_tag = functag; functag->name = crntfunc->name; - for(int j = 0; j < crntfunc->arguments.size(); j++) { + for (size_t j = 0; j < crntfunc->arguments.size(); j++) { con_token* arg_tok = new con_token; arg_tok->tok_type = MACRO; con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j, bitwidth); + arg_macro->value = reg_to_str(j); arg_macro->macro = crntfunc->arguments[j]; arg_tok->tok_macro = arg_macro; @@ -259,20 +267,21 @@ void apply_functions(std::vector& tokens) { (*subtokens)[i]->tokens.push_back(ret_tok); } } -void apply_macros(vector& tokens, vector knownmacros) { - for(int i = 0; i < tokens.size(); i++) { - if(tokens[i]->tok_type == MACRO) { +void apply_macros(vector& tokens, vector knownmacros) +{ + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type == MACRO) { // Filter spaces from macro and value pair - con_macro* f_macro = new con_macro(); + con_macro* f_macro = new con_macro(); f_macro->macro = ""; f_macro->value = ""; - for(int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { - if(tokens[i]->tok_macro->macro[j] != ' ') { + for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + if (tokens[i]->tok_macro->macro[j] != ' ') { f_macro->macro += tokens[i]->tok_macro->macro[j]; } } - for(int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { - if(tokens[i]->tok_macro->value[j] != ' ') + for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + if (tokens[i]->tok_macro->value[j] != ' ') f_macro->value += tokens[i]->tok_macro->value[j]; } knownmacros.push_back(*f_macro); @@ -280,15 +289,16 @@ void apply_macros(vector& tokens, vector knownmacros) { continue; } apply_macro_to_token(*tokens[i], knownmacros); - if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { apply_macros(tokens[i]->tokens, knownmacros); } } } -void apply_whiles(vector& tokens) { - for(int i = 0; i< tokens.size(); i++) { +void apply_whiles(vector& tokens) +{ + for (size_t i = 0; i< tokens.size(); i++) { apply_whiles(tokens[i]->tokens); - if(tokens[i]->tok_type != WHILE) { + if (tokens[i]->tok_type != WHILE) { continue; } con_token* cmp_tok = new con_token; @@ -320,13 +330,13 @@ void apply_whiles(vector& tokens) { endwhile_tok->tok_type = TAG; con_tag* endwhile_tag = new con_tag; endwhile_tok->tok_tag = endwhile_tag; - endwhile_tag->name = endtag_name; + endwhile_tag->name = endtag_name; con_token* startwhile_tok = new con_token; startwhile_tok->tok_type = TAG; con_tag* startwhile_tag = new con_tag; startwhile_tok->tok_tag = startwhile_tag; - startwhile_tag->name = starttag_name; + startwhile_tag->name = starttag_name; while_amnt++; tokens[i]->tokens.insert(tokens[i]->tokens.begin(), jmp_tok); @@ -337,10 +347,11 @@ void apply_whiles(vector& tokens) { // so: starttag, cmp, jmp ... jmp, endtag } } -void apply_ifs(vector& tokens) { - for(int i = 0; i< tokens.size(); i++) { +void apply_ifs(vector& tokens) +{ + for (size_t i = 0; i< tokens.size(); i++) { apply_ifs(tokens[i]->tokens); - if(tokens[i]->tok_type != IF) { + if (tokens[i]->tok_type != IF) { continue; } con_token* cmp_tok = new con_token; @@ -350,7 +361,7 @@ void apply_ifs(vector& tokens) { cmp_cmd->command = "cmp"; cmp_cmd->arg1 = tokens[i]->tok_if->condition.arg1; cmp_cmd->arg2 = tokens[i]->tok_if->condition.arg2; - + string tagname = "endif" + to_string(if_amnt); con_token* jmp_tok = new con_token; @@ -364,7 +375,7 @@ void apply_ifs(vector& tokens) { endif_tok->tok_type = TAG; con_tag* endif_tag = new con_tag; endif_tok->tok_tag = endif_tag; - endif_tag->name = tagname; + endif_tag->name = tagname; if_amnt++; tokens[i]->tokens.insert(tokens[i]->tokens.begin(), jmp_tok); @@ -372,9 +383,10 @@ void apply_ifs(vector& tokens) { tokens[i]->tokens.push_back(endif_tok); } } -void linearize_tokens(vector& tokens) { - for(int i = 0; i < tokens.size(); i++) { - if(tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { +void linearize_tokens(vector& tokens) +{ + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { continue; } vector* subtokens = &tokens[i]->tokens; @@ -384,28 +396,31 @@ void linearize_tokens(vector& tokens) { } } -std::string tokens_to_nasm(std::vector& tokens) { +std::string tokens_to_nasm(std::vector& tokens) +{ string output = ""; - for(int i = 0; i < tokens.size(); i++) { - if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO || tokens[i]->tok_type == FUNCALL) { + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE + || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO + || tokens[i]->tok_type == FUNCALL) { continue; } output += "\n"; - if(tokens[i]->tok_type == CMD) { + if (tokens[i]->tok_type == CMD) { output += tokens[i]->tok_cmd->command; - if(!tokens[i]->tok_cmd->arg1.empty()) { + if (!tokens[i]->tok_cmd->arg1.empty()) { output += " " + tokens[i]->tok_cmd->arg1; } - if(!tokens[i]->tok_cmd->arg2.empty()) { + if (!tokens[i]->tok_cmd->arg2.empty()) { output += ", " + tokens[i]->tok_cmd->arg2; } continue; } - if(tokens[i]->tok_type == TAG) { + if (tokens[i]->tok_type == TAG) { output += tokens[i]->tok_tag->name + ":"; continue; } - if(tokens[i]->tok_type == SECTION) { + if (tokens[i]->tok_type == SECTION) { output += "section " + tokens[i]->tok_section->name; continue; } diff --git a/src/reconstruct.h b/src/reconstruct.h index 3147859..330a96c 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -1,6 +1,8 @@ -#include -#include -#include +#ifndef RECONSTRUCT_H_ +#define RECONSTRUCT_H_ + +#include +#include #include "construct_types.h" // Used for naming tags @@ -8,7 +10,7 @@ extern int if_amnt; extern int while_amnt; extern CON_BITWIDTH bitwidth; -std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); +std::string reg_to_str(uint8_t call_num); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); @@ -16,7 +18,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); void linearize_tokens(std::vector& tokens); void apply_macro_to_token(con_token& token, std::vector macros); -// The following functions transform the construct specific tokens to nasm ones, +// The following functions transform the construct specific tokens to nasm ones, // the parent construct tokens remain, but are removed during linearization // Converts args to macros and adds tag with same name to child tokens @@ -27,3 +29,5 @@ void apply_ifs(std::vector& tokens); void apply_macros(std::vector& tokens, std::vector macros); std::string tokens_to_nasm(std::vector& tokens); + +#endif // RECONSTRUCT_H_