diff --git a/src/construct.cpp b/src/construct.cpp index b229b07..53c61ab 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -34,6 +34,7 @@ int main(int argc, char** argv) glob_cmd->command = "global _start"; tokens.insert(tokens.begin(), glob_tok); + // Order dependant: some tokens are replaced with macros, so apply_macro() must be at the end. tokens = delinearize_tokens(tokens); apply_functions(tokens); apply_ifs(tokens); diff --git a/src/construct_types.h b/src/construct_types.h index f3259c6..5f76bbc 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -40,15 +40,16 @@ struct con_token { struct con_while* tok_while; struct con_if* tok_if; struct con_function* tok_function; - struct con_funcall* tok_funcall; struct con_cmd* tok_cmd; struct con_macro* tok_macro; + struct con_funcall* tok_funcall; std::vector tokens; // Only non-empty for if, while and function tokens }; -struct con_macro { - std::string value; - std::string macro; +struct _con_condition { + CON_COMPARISON op; + std::string arg1; + std::string arg2; }; struct con_section { @@ -59,18 +60,12 @@ struct con_tag { std::string name; }; -struct con_condition { - CON_COMPARISON op; - std::string arg1; - std::string arg2; -}; - struct con_while { - con_condition condition; + _con_condition condition; }; struct con_if { - con_condition condition; + _con_condition condition; }; struct con_function { @@ -84,6 +79,11 @@ struct con_cmd { std::string arg2; }; +struct con_macro { + std::string value; + std::string macro; +}; + struct con_funcall { std::string funcname; std::vector arguments; diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 71b28bf..7b22553 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -81,20 +81,20 @@ int get_line_indentation(string line) // Expects formatted line CON_TOKENTYPE get_token_type(string line) { - if (line[0] == '!') - return MACRO; - if (line.substr(0, 3) == "if ") - return IF; + if (line.substr(0, 8) == "section ") + return SECTION; + if (line.find(' ') == string::npos && line[line.size()-1] == ':') + return TAG; if (line.substr(0, 6) == "while ") return WHILE; + if (line.substr(0, 3) == "if ") + return IF; if (line.substr(0, 9) == "function ") return FUNCTION; - if (line.substr(0, 8) == "section ") - return SECTION; + if (line[0] == '!') + return MACRO; if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; - if (line.find(' ') == string::npos && line[line.size()-1] == ':') - return TAG; return CMD; } @@ -161,24 +161,19 @@ vector delinearize_tokens(std::vector tokens) return delinearized_tokens; } -con_macro* parse_macro(string line) -{ - con_macro* tok_macro = new con_macro(); - int spacepos = line.find(' '); - tok_macro->macro = line.substr(1, spacepos-1); - tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); - return tok_macro; -} - -con_if* parse_if(string line) +con_section* parse_section(string line) { - con_if* tok_if = new con_if(); + con_section* tok_section = new con_section(); vector line_split; split(line_split, line, IsAnyOf(" ")); - tok_if->condition.arg1 = line_split[1]; - tok_if->condition.op = str_to_comparison(line_split[2]); - tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); - return tok_if; + tok_section->name = line_split[1]; + return tok_section; +} +con_tag* parse_tag(string line) +{ + con_tag* tok_tag = new con_tag(); + tok_tag->name = line.substr(0, line.size()-1); + return tok_tag; } con_while* parse_while(string line) { @@ -190,19 +185,29 @@ con_while* parse_while(string line) tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_section* parse_section(string line) +con_if* parse_if(string line) { - con_section* tok_section = new con_section(); + con_if* tok_if = new con_if(); vector line_split; split(line_split, line, IsAnyOf(" ")); - tok_section->name = line_split[1]; - return tok_section; + tok_if->condition.arg1 = line_split[1]; + tok_if->condition.op = str_to_comparison(line_split[2]); + tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); + return tok_if; } -con_tag* parse_tag(string line) +con_function* parse_function(string line) { - con_tag* tok_tag = new con_tag(); - tok_tag->name = line.substr(0, line.size()-1); - return tok_tag; + con_function* tok_function = new con_function(); + vector line_split; + split(line_split, line, IsAnyOf("():,")); + tok_function->name = line_split[0].substr(9, line_split[0].size()-9); + for (size_t i = 1; i < line_split.size()-2; i++) { + if (line_split[i].empty()) { + continue; + } + tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied + } + return tok_function; } con_cmd* parse_cmd(string line) { @@ -216,19 +221,13 @@ con_cmd* parse_cmd(string line) tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_function* parse_function(string line) +con_macro* parse_macro(string line) { - con_function* tok_function = new con_function(); - vector line_split; - split(line_split, line, IsAnyOf("():,")); - tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for (size_t i = 1; i < line_split.size()-2; i++) { - if (line_split[i].empty()) { - continue; - } - tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied - } - return tok_function; + con_macro* tok_macro = new con_macro(); + int spacepos = line.find(' '); + tok_macro->macro = line.substr(1, spacepos-1); + tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); + return tok_macro; } con_funcall* parse_funcall(string line) { @@ -267,29 +266,29 @@ con_token* parse_line(string line) } token->tok_type = get_token_type(f_line); switch (token->tok_type) { - case MACRO: - token->tok_macro = parse_macro(f_line); + case SECTION: + token->tok_section = parse_section(f_line); break; - case IF: - token->tok_if = parse_if(f_line); + case TAG: + token->tok_tag = parse_tag(f_line); break; case WHILE: token->tok_while = parse_while(f_line); break; + case IF: + token->tok_if = parse_if(f_line); + break; case FUNCTION: token->tok_function = parse_function(f_line); break; - case FUNCALL: - token->tok_funcall = parse_funcall(f_line); - case SECTION: - token->tok_section = parse_section(f_line); - break; - case TAG: - token->tok_tag = parse_tag(f_line); - break; case CMD: token->tok_cmd = parse_cmd(f_line); break; + case MACRO: + token->tok_macro = parse_macro(f_line); + break; + case FUNCALL: + token->tok_funcall = parse_funcall(f_line); } return token; } diff --git a/src/deconstruct.h b/src/deconstruct.h index cb7ba19..7139980 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -11,14 +11,15 @@ CON_COMPARISON str_to_comparison(std::string comp); std::vector delinearize_tokens(std::vector tokens); -con_macro* parse_macro(std::string line); -con_if* parse_if(std::string line); -con_while* parse_while(std::string line); con_section* parse_section(std::string line); con_tag* parse_tag(std::string line); -con_cmd* parse_cmd(std::string line); +con_while* parse_while(std::string line); +con_if* parse_if(std::string line); con_function* parse_function(std::string line); +con_cmd* parse_cmd(std::string line); +con_macro* parse_macro(std::string line); con_funcall* parse_funcall(std::string line); + con_token* parse_line(std::string line); std::vector parse_construct(std::string code); diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index ddd1d56..f0e0f7f 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -5,6 +5,7 @@ #include "construct_types.h" using namespace std; + int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; @@ -103,7 +104,6 @@ string reg_to_str(uint8_t call_num) } throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); } - string comparison_to_string(CON_COMPARISON condition) { switch (condition) { @@ -122,7 +122,6 @@ string comparison_to_string(CON_COMPARISON condition) } throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } - CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { switch (condition) { @@ -142,7 +141,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -void apply_macro_to_token(con_token& token, vector macros) +static void apply_macro_to_token(con_token& token, vector macros) { if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; @@ -200,100 +199,7 @@ void apply_macro_to_token(con_token& token, vector macros) } } } -void apply_funcalls(std::vector& tokens) -{ - for (size_t i = 0; i < tokens.size(); i++) { - apply_funcalls(tokens[i]->tokens); - if (tokens[i]->tok_type != FUNCALL) { - continue; - } - vector* args = &tokens[i]->tok_funcall->arguments; - vector arg_tokens; - for (size_t j = 0; j < args->size(); j++) { - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j); - arg_cmd->arg2 = (*args)[j]; - arg_tokens.push_back(arg_tok); - } - con_token* call_tok = new con_token(); - call_tok->tok_type = CMD; - con_cmd* call_cmd = new con_cmd(); - call_tok->tok_cmd = call_cmd; - call_cmd->command = "call"; - call_cmd->arg1 = tokens[i]->tok_funcall->funcname; - arg_tokens.push_back(call_tok); - - tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); - } -} - -void apply_functions(std::vector& tokens) -{ - vector* subtokens = &tokens; - for (size_t i = 0; i < subtokens->size(); i++) { - if ((*subtokens)[i]->tok_type != FUNCTION) { - continue; - } - con_function* crntfunc = (*subtokens)[i]->tok_function; - if (crntfunc->name == "main") { - crntfunc->name = "_start"; - } - con_token* tag_tok = new con_token; - tag_tok->tok_type = TAG; - con_tag* functag = new con_tag; - tag_tok->tok_tag = functag; - functag->name = crntfunc->name; - for (size_t j = 0; j < crntfunc->arguments.size(); j++) { - con_token* arg_tok = new con_token; - arg_tok->tok_type = MACRO; - con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j); - arg_macro->macro = crntfunc->arguments[j]; - arg_tok->tok_macro = arg_macro; - - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); - } - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); - con_token* ret_tok = new con_token; - ret_tok->tok_type = CMD; - con_cmd* ret_cmd = new con_cmd; - ret_tok->tok_cmd = ret_cmd; - ret_cmd->command = "ret"; - (*subtokens)[i]->tokens.push_back(ret_tok); - } -} -void apply_macros(vector& tokens, vector knownmacros) -{ - for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type == MACRO) { - // Filter spaces from macro and value pair - con_macro* f_macro = new con_macro(); - f_macro->macro = ""; - f_macro->value = ""; - for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { - if (tokens[i]->tok_macro->macro[j] != ' ') { - f_macro->macro += tokens[i]->tok_macro->macro[j]; - } - } - for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { - if (tokens[i]->tok_macro->value[j] != ' ') - f_macro->value += tokens[i]->tok_macro->value[j]; - } - knownmacros.push_back(*f_macro); - delete f_macro; - continue; - } - apply_macro_to_token(*tokens[i], knownmacros); - if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { - apply_macros(tokens[i]->tokens, knownmacros); - } - } -} void apply_whiles(vector& tokens) { for (size_t i = 0; i< tokens.size(); i++) { @@ -383,6 +289,100 @@ void apply_ifs(vector& tokens) tokens[i]->tokens.push_back(endif_tok); } } +void apply_functions(std::vector& tokens) +{ + vector* subtokens = &tokens; + for (size_t i = 0; i < subtokens->size(); i++) { + if ((*subtokens)[i]->tok_type != FUNCTION) { + continue; + } + con_function* crntfunc = (*subtokens)[i]->tok_function; + if (crntfunc->name == "main") { + crntfunc->name = "_start"; + } + + con_token* tag_tok = new con_token; + tag_tok->tok_type = TAG; + con_tag* functag = new con_tag; + tag_tok->tok_tag = functag; + functag->name = crntfunc->name; + for (size_t j = 0; j < crntfunc->arguments.size(); j++) { + con_token* arg_tok = new con_token; + arg_tok->tok_type = MACRO; + con_macro* arg_macro = new con_macro; + arg_macro->value = reg_to_str(j); + arg_macro->macro = crntfunc->arguments[j]; + arg_tok->tok_macro = arg_macro; + + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); + } + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); + con_token* ret_tok = new con_token; + ret_tok->tok_type = CMD; + con_cmd* ret_cmd = new con_cmd; + ret_tok->tok_cmd = ret_cmd; + ret_cmd->command = "ret"; + (*subtokens)[i]->tokens.push_back(ret_tok); + } +} +void apply_macros(vector& tokens, vector knownmacros) +{ + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type == MACRO) { + // Filter spaces from macro and value pair + con_macro* f_macro = new con_macro(); + f_macro->macro = ""; + f_macro->value = ""; + for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + if (tokens[i]->tok_macro->macro[j] != ' ') { + f_macro->macro += tokens[i]->tok_macro->macro[j]; + } + } + for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + if (tokens[i]->tok_macro->value[j] != ' ') + f_macro->value += tokens[i]->tok_macro->value[j]; + } + knownmacros.push_back(*f_macro); + delete f_macro; + continue; + } + apply_macro_to_token(*tokens[i], knownmacros); + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { + apply_macros(tokens[i]->tokens, knownmacros); + } + } +} +void apply_funcalls(std::vector& tokens) +{ + for (size_t i = 0; i < tokens.size(); i++) { + apply_funcalls(tokens[i]->tokens); + if (tokens[i]->tok_type != FUNCALL) { + continue; + } + vector* args = &tokens[i]->tok_funcall->arguments; + vector arg_tokens; + for (size_t j = 0; j < args->size(); j++) { + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + arg_cmd->command = "mov"; + arg_cmd->arg1 = reg_to_str(j); + arg_cmd->arg2 = (*args)[j]; + arg_tokens.push_back(arg_tok); + } + con_token* call_tok = new con_token(); + call_tok->tok_type = CMD; + con_cmd* call_cmd = new con_cmd(); + call_tok->tok_cmd = call_cmd; + call_cmd->command = "call"; + call_cmd->arg1 = tokens[i]->tok_funcall->funcname; + arg_tokens.push_back(call_tok); + + tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); + } +} + void linearize_tokens(vector& tokens) { for (size_t i = 0; i < tokens.size(); i++) { diff --git a/src/reconstruct.h b/src/reconstruct.h index 330a96c..4dd2c02 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -14,19 +14,18 @@ std::string reg_to_str(uint8_t call_num); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); -// During linearization, the construct parent tokens are removed -void linearize_tokens(std::vector& tokens); -void apply_macro_to_token(con_token& token, std::vector macros); - // The following functions transform the construct specific tokens to nasm ones, // the parent construct tokens remain, but are removed during linearization // Converts args to macros and adds tag with same name to child tokens -void apply_functions(std::vector& tokens); -void apply_funcalls(std::vector& tokens); void apply_whiles(std::vector& tokens); void apply_ifs(std::vector& tokens); +void apply_functions(std::vector& tokens); void apply_macros(std::vector& tokens, std::vector macros); +void apply_funcalls(std::vector& tokens); + +// During linearization, the construct parent tokens are removed +void linearize_tokens(std::vector& tokens); std::string tokens_to_nasm(std::vector& tokens);