From ee695311885e5f6b632522e0a97dfd8e7900223b Mon Sep 17 00:00:00 2001 From: NoneSince <88423501+NoneSince@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:23:51 +0000 Subject: [PATCH 1/7] better split function. reg_to_str() now takes bitwidth as argument --- src/deconstruct.cpp | 110 +++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 62 deletions(-) diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 7b22553..fd47b44 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -7,63 +7,10 @@ using namespace std; -static const char& FIRST_UPPERCASE_LETTER = 'A'; -static const char& LAST_UPPERCASE_LETTER = 'Z'; -static bool is_upper(const char& c) -{ - return c >= FIRST_UPPERCASE_LETTER && c <= LAST_UPPERCASE_LETTER; -} -static char to_lower(const char& c) -{ - return is_upper(c) ? c - FIRST_UPPERCASE_LETTER : c; -} -static void to_lower(string& str) -{ - string tmp; - for (string::iterator it = str.begin(); it != str.end(); ++it) { - tmp.push_back(to_lower(*it)); - } - str = tmp; -} +static void to_lower(string& str); -class IsAnyOf -{ -private: - string chars; -public: - IsAnyOf() = default; - IsAnyOf(const string& _chars) : chars(_chars) {} - IsAnyOf(const char*& _chars) : chars(_chars) {} - ~IsAnyOf() = default; - IsAnyOf(const IsAnyOf& other) = delete; // should save unique sorted chars for that - IsAnyOf& operator=(const IsAnyOf& other) = delete; +static void split(vector& result, const string& input, const string& chars); - bool operator()(const char& c) const { - for (string::const_iterator it = chars.cbegin(); it != chars.cend(); ++it) { - if (*it == c) return true; - } - return false; - } -}; - -template -static void split(vector& result, const string& input, const Predicate& pred, const bool& compress_adj_delims = false) -{ - string tmp; - bool prev_is_delim = false; - for (string::const_iterator it = input.cbegin(); it != input.cend(); ++it) { - if (pred(*it)) { - if (prev_is_delim && compress_adj_delims) continue; - result.push_back(tmp); - tmp.clear(); - prev_is_delim = true; - } else { - tmp.push_back(*it); - prev_is_delim = false; - } - } - result.push_back(tmp); -} int get_line_indentation(string line) { @@ -165,7 +112,7 @@ con_section* parse_section(string line) { con_section* tok_section = new con_section(); vector line_split; - split(line_split, line, IsAnyOf(" ")); + split(line_split, line, " "); tok_section->name = line_split[1]; return tok_section; } @@ -179,7 +126,7 @@ con_while* parse_while(string line) { con_while* tok_while = new con_while(); vector line_split; - split(line_split, line, IsAnyOf(" ")); + split(line_split, line, " "); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : @@ -189,7 +136,7 @@ con_if* parse_if(string line) { con_if* tok_if = new con_if(); vector line_split; - split(line_split, line, IsAnyOf(" ")); + split(line_split, line, " "); tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); @@ -199,7 +146,7 @@ con_function* parse_function(string line) { con_function* tok_function = new con_function(); vector line_split; - split(line_split, line, IsAnyOf("():,")); + split(line_split, line, "():,"); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); for (size_t i = 1; i < line_split.size()-2; i++) { if (line_split[i].empty()) { @@ -213,7 +160,7 @@ con_cmd* parse_cmd(string line) { con_cmd* tok_cmd = new con_cmd(); vector line_split; - split(line_split, line, IsAnyOf(" ,")); + split(line_split, line, " ,"); tok_cmd->command = line_split[0]; if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; @@ -233,7 +180,7 @@ con_funcall* parse_funcall(string line) { con_funcall* tok_funcall = new con_funcall(); vector line_split; - split(line_split, line, IsAnyOf("(),")); + split(line_split, line, "(),"); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); for (size_t i = 1; i < line_split.size()-1; i++) { if (line_split[i].empty()) { @@ -295,7 +242,7 @@ con_token* parse_line(string line) vector parse_construct(string code) { vector code_split; - split(code_split, code, IsAnyOf("\n"), true); + split(code_split, code, "\n"); to_lower(code); vector tokens; bool in_data = false; @@ -321,3 +268,42 @@ vector parse_construct(string code) } return tokens; } + +// ----- ----- ----- ----- ----- ----- helper functions impl ----- ----- ----- ----- ----- + +void to_lower(string& str) +{ + for (string::iterator it = str.begin(); it != str.end(); ++it) { + if (*it >= 'A' && *it <= 'Z') { + *it -= 'A'; + *it += 'a'; + } + } +} + +void split(vector& result, const string& input, const string& chars) +{ + string tmp; + bool prev_is_delim = false; + for (string::const_iterator input_it = input.cbegin(); input_it != input.cend(); ++input_it) { + bool is_in_chars = false; + for (string::const_iterator chars_it = chars.cbegin(); chars_it != chars.cend(); ++chars_it) { + if (*chars_it == *input_it) { + is_in_chars = true; + break; + } + } + if (is_in_chars) { + if (prev_is_delim) continue; + if (!tmp.empty()) + result.push_back(tmp); + tmp.clear(); + prev_is_delim = true; + } else { + tmp.push_back(*input_it); + prev_is_delim = false; + } + } + if (!tmp.empty()) + result.push_back(tmp); +} From 50b3033cfcdcaf682cef28f966e680b922bbfcd4 Mon Sep 17 00:00:00 2001 From: NoneSince <88423501+NoneSince@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:24:30 +0000 Subject: [PATCH 2/7] forgot to push this --- src/reconstruct.cpp | 6 +++--- src/reconstruct.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index 03a7054..3068cd0 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -10,7 +10,7 @@ int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num) +string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { switch (bitwidth) { case BIT8: @@ -309,7 +309,7 @@ void apply_functions(std::vector& tokens) con_token* arg_tok = new con_token; arg_tok->tok_type = MACRO; con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j); + arg_macro->value = reg_to_str(j, bitwidth); arg_macro->macro = crntfunc->arguments[j]; arg_tok->tok_macro = arg_macro; @@ -366,7 +366,7 @@ void apply_funcalls(std::vector& tokens) con_cmd* arg_cmd = new con_cmd(); arg_tok->tok_cmd = arg_cmd; arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j); + arg_cmd->arg1 = reg_to_str(j, bitwidth); arg_cmd->arg2 = (*args)[j]; arg_tokens.push_back(arg_tok); } diff --git a/src/reconstruct.h b/src/reconstruct.h index 4dd2c02..cc9f025 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -10,7 +10,7 @@ extern int if_amnt; extern int while_amnt; extern CON_BITWIDTH bitwidth; -std::string reg_to_str(uint8_t call_num); +std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); From 905921639486d4d74f45851cc6d6ee496eec4948 Mon Sep 17 00:00:00 2001 From: NoneSince <88423501+NoneSince@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:29:47 +0000 Subject: [PATCH 3/7] reverted strchr.con change: chr is 64bit while findchr is 8bit --- examples/strchr.con | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/strchr.con b/examples/strchr.con index 81534cf..4ff263d 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -3,9 +3,10 @@ extern printf section .text function strchr(str, chr): !ptrresult rax + !findchr sil mov ptrresult, 0 while byte[str] ne 0: - if byte[str] e chr: + if byte[str] e findchr: mov ptrresult, str ret inc str From 9b4be387042e7e287a965cdfe843ce9ccd4547a7 Mon Sep 17 00:00:00 2001 From: NoneSince <88423501+NoneSince@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:44:46 +0000 Subject: [PATCH 4/7] better split args --- src/deconstruct.cpp | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index fd47b44..e500c35 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -9,7 +9,7 @@ using namespace std; static void to_lower(string& str); -static void split(vector& result, const string& input, const string& chars); +static vector split(const string& input, const string& chars); int get_line_indentation(string line) @@ -111,8 +111,7 @@ vector delinearize_tokens(std::vector tokens) con_section* parse_section(string line) { con_section* tok_section = new con_section(); - vector line_split; - split(line_split, line, " "); + vector line_split = split(line, " "); tok_section->name = line_split[1]; return tok_section; } @@ -125,8 +124,7 @@ con_tag* parse_tag(string line) con_while* parse_while(string line) { con_while* tok_while = new con_while(); - vector line_split; - split(line_split, line, " "); + vector line_split = split(line, " "); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : @@ -135,8 +133,7 @@ con_while* parse_while(string line) con_if* parse_if(string line) { con_if* tok_if = new con_if(); - vector line_split; - split(line_split, line, " "); + vector line_split = split(line, " "); tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); @@ -145,8 +142,7 @@ con_if* parse_if(string line) con_function* parse_function(string line) { con_function* tok_function = new con_function(); - vector line_split; - split(line_split, line, "():,"); + vector line_split = split(line, "():,"); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); for (size_t i = 1; i < line_split.size()-2; i++) { if (line_split[i].empty()) { @@ -159,8 +155,7 @@ con_function* parse_function(string line) con_cmd* parse_cmd(string line) { con_cmd* tok_cmd = new con_cmd(); - vector line_split; - split(line_split, line, " ,"); + vector line_split = split(line, " ,"); tok_cmd->command = line_split[0]; if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; @@ -179,8 +174,7 @@ con_macro* parse_macro(string line) con_funcall* parse_funcall(string line) { con_funcall* tok_funcall = new con_funcall(); - vector line_split; - split(line_split, line, "(),"); + vector line_split = split(line, "(),"); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); for (size_t i = 1; i < line_split.size()-1; i++) { if (line_split[i].empty()) { @@ -241,8 +235,7 @@ con_token* parse_line(string line) } vector parse_construct(string code) { - vector code_split; - split(code_split, code, "\n"); + vector code_split = split(code, "\n"); to_lower(code); vector tokens; bool in_data = false; @@ -281,8 +274,9 @@ void to_lower(string& str) } } -void split(vector& result, const string& input, const string& chars) +vector split(const string& input, const string& chars) { + vector result; string tmp; bool prev_is_delim = false; for (string::const_iterator input_it = input.cbegin(); input_it != input.cend(); ++input_it) { @@ -306,4 +300,5 @@ void split(vector& result, const string& input, const string& chars) } if (!tmp.empty()) result.push_back(tmp); + return result; } From ec626cb2e812357a394e1093351236cc14028192 Mon Sep 17 00:00:00 2001 From: NoneSince <88423501+NoneSince@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:21:06 +0000 Subject: [PATCH 5/7] apllied wanted format for functions --- src/construct.cpp | 3 +-- src/construct_debug.cpp | 6 ++---- src/construct_flags.cpp | 6 ++---- src/deconstruct.cpp | 48 ++++++++++++++--------------------------- src/reconstruct.cpp | 33 ++++++++++------------------ 5 files changed, 32 insertions(+), 64 deletions(-) diff --git a/src/construct.cpp b/src/construct.cpp index 53c61ab..c11b113 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -8,8 +8,7 @@ #include "reconstruct.h" // linearize_tokens() #include "construct_flags.h" // handle_flags() -int main(int argc, char** argv) -{ +int main(int argc, char** argv) { std::string path; std::string outpath; if (handle_flags(argc, argv, &path, &outpath) != 0) { diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 5098a7a..d1c08b7 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -5,8 +5,7 @@ #include "construct_types.h" #include "reconstruct.h" // comparison_to_string() -std::string tokentype_to_string(CON_TOKENTYPE type) -{ +std::string tokentype_to_string(CON_TOKENTYPE type) { switch (type) { case SECTION: return "section"; @@ -28,8 +27,7 @@ std::string tokentype_to_string(CON_TOKENTYPE type) throw std::invalid_argument("Invalid token type: "+std::to_string(static_cast(type))); } -std::string token_to_string(con_token token) -{ +std::string token_to_string(con_token token) { std::string tokstring = "type: " + tokentype_to_string(token.tok_type); switch (token.tok_type) { case SECTION: diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index 33f3dc9..f034d27 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -7,8 +7,7 @@ extern CON_BITWIDTH bitwidth; using namespace std; -int set_bitwidth(char* argv) -{ +int set_bitwidth(char* argv) { if (string(argv) == "elf8") { bitwidth = BIT8; return 0; @@ -29,8 +28,7 @@ int set_bitwidth(char* argv) return -1; } -int handle_flags(int argc, char** argv, string* path, string* outpath) -{ +int handle_flags(int argc, char** argv, string* path, string* outpath) { bool bitwidth_set = false; bool path_set = false; bool outpath_set = false; diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index e500c35..0962f0d 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -12,8 +12,7 @@ static void to_lower(string& str); static vector split(const string& input, const string& chars); -int get_line_indentation(string line) -{ +int get_line_indentation(string line) { int indentation = 0; for (size_t i = 0; i < line.size(); i++) { if (line[i] == '\t') { @@ -26,8 +25,7 @@ int get_line_indentation(string line) } // Expects formatted line -CON_TOKENTYPE get_token_type(string line) -{ +CON_TOKENTYPE get_token_type(string line) { if (line.substr(0, 8) == "section ") return SECTION; if (line.find(' ') == string::npos && line[line.size()-1] == ':') @@ -45,8 +43,7 @@ CON_TOKENTYPE get_token_type(string line) return CMD; } -CON_COMPARISON str_to_comparison(string comp) -{ +CON_COMPARISON str_to_comparison(string comp) { if (comp == "e") return E; if (comp == "ne") @@ -63,8 +60,7 @@ CON_COMPARISON str_to_comparison(string comp) } -vector delinearize_tokens(std::vector tokens) -{ +vector delinearize_tokens(std::vector tokens) { vector dl_tokens; // Serves as parent "section" where all tokens belong to, convenient for algo @@ -108,21 +104,18 @@ vector delinearize_tokens(std::vector tokens) return delinearized_tokens; } -con_section* parse_section(string line) -{ +con_section* parse_section(string line) { con_section* tok_section = new con_section(); vector line_split = split(line, " "); tok_section->name = line_split[1]; return tok_section; } -con_tag* parse_tag(string line) -{ +con_tag* parse_tag(string line) { con_tag* tok_tag = new con_tag(); tok_tag->name = line.substr(0, line.size()-1); return tok_tag; } -con_while* parse_while(string line) -{ +con_while* parse_while(string line) { con_while* tok_while = new con_while(); vector line_split = split(line, " "); tok_while->condition.arg1 = line_split[1]; @@ -130,8 +123,7 @@ con_while* parse_while(string line) tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_if* parse_if(string line) -{ +con_if* parse_if(string line) { con_if* tok_if = new con_if(); vector line_split = split(line, " "); tok_if->condition.arg1 = line_split[1]; @@ -139,8 +131,7 @@ con_if* parse_if(string line) tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); return tok_if; } -con_function* parse_function(string line) -{ +con_function* parse_function(string line) { con_function* tok_function = new con_function(); vector line_split = split(line, "():,"); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); @@ -152,8 +143,7 @@ con_function* parse_function(string line) } return tok_function; } -con_cmd* parse_cmd(string line) -{ +con_cmd* parse_cmd(string line) { con_cmd* tok_cmd = new con_cmd(); vector line_split = split(line, " ,"); tok_cmd->command = line_split[0]; @@ -163,16 +153,14 @@ con_cmd* parse_cmd(string line) tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_macro* parse_macro(string line) -{ +con_macro* parse_macro(string line) { con_macro* tok_macro = new con_macro(); int spacepos = line.find(' '); tok_macro->macro = line.substr(1, spacepos-1); tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); return tok_macro; } -con_funcall* parse_funcall(string line) -{ +con_funcall* parse_funcall(string line) { con_funcall* tok_funcall = new con_funcall(); vector line_split = split(line, "(),"); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); @@ -186,8 +174,7 @@ con_funcall* parse_funcall(string line) } // Does not expect formatted line, only lowercase -con_token* parse_line(string line) -{ +con_token* parse_line(string line) { con_token* token = new con_token; //remove multiple spaces from line string f_line = ""; @@ -233,8 +220,7 @@ con_token* parse_line(string line) } return token; } -vector parse_construct(string code) -{ +vector parse_construct(string code) { vector code_split = split(code, "\n"); to_lower(code); vector tokens; @@ -264,8 +250,7 @@ vector parse_construct(string code) // ----- ----- ----- ----- ----- ----- helper functions impl ----- ----- ----- ----- ----- -void to_lower(string& str) -{ +void to_lower(string& str) { for (string::iterator it = str.begin(); it != str.end(); ++it) { if (*it >= 'A' && *it <= 'Z') { *it -= 'A'; @@ -274,8 +259,7 @@ void to_lower(string& str) } } -vector split(const string& input, const string& chars) -{ +vector split(const string& input, const string& chars) { vector result; string tmp; bool prev_is_delim = false; diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index 3068cd0..8e25a83 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -10,8 +10,7 @@ int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) -{ +string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { switch (bitwidth) { case BIT8: switch (call_num) { @@ -104,8 +103,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) } throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); } -string comparison_to_string(CON_COMPARISON condition) -{ +string comparison_to_string(CON_COMPARISON condition) { switch (condition) { case E: return "e"; @@ -122,8 +120,7 @@ string comparison_to_string(CON_COMPARISON condition) } throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) -{ +CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { switch (condition) { case E: return NE; @@ -141,8 +138,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -static void apply_macro_to_token(con_token& token, vector macros) -{ +static void apply_macro_to_token(con_token& token, vector macros) { if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; } @@ -199,8 +195,7 @@ static void apply_macro_to_token(con_token& token, vector macros) } } -void apply_whiles(vector& tokens) -{ +void apply_whiles(vector& tokens) { for (size_t i = 0; i< tokens.size(); i++) { apply_whiles(tokens[i]->tokens); if (tokens[i]->tok_type != WHILE) { @@ -252,8 +247,7 @@ void apply_whiles(vector& tokens) // so: starttag, cmp, jmp ... jmp, endtag } } -void apply_ifs(vector& tokens) -{ +void apply_ifs(vector& tokens) { for (size_t i = 0; i< tokens.size(); i++) { apply_ifs(tokens[i]->tokens); if (tokens[i]->tok_type != IF) { @@ -288,8 +282,7 @@ void apply_ifs(vector& tokens) tokens[i]->tokens.push_back(endif_tok); } } -void apply_functions(std::vector& tokens) -{ +void apply_functions(std::vector& tokens) { vector* subtokens = &tokens; for (size_t i = 0; i < subtokens->size(); i++) { if ((*subtokens)[i]->tok_type != FUNCTION) { @@ -324,8 +317,7 @@ void apply_functions(std::vector& tokens) (*subtokens)[i]->tokens.push_back(ret_tok); } } -void apply_macros(vector& tokens, vector knownmacros) -{ +void apply_macros(vector& tokens, vector knownmacros) { for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type == MACRO) { // Filter spaces from macro and value pair @@ -351,8 +343,7 @@ void apply_macros(vector& tokens, vector knownmacros) } } } -void apply_funcalls(std::vector& tokens) -{ +void apply_funcalls(std::vector& tokens) { for (size_t i = 0; i < tokens.size(); i++) { apply_funcalls(tokens[i]->tokens); if (tokens[i]->tok_type != FUNCALL) { @@ -382,8 +373,7 @@ void apply_funcalls(std::vector& tokens) } } -void linearize_tokens(vector& tokens) -{ +void linearize_tokens(vector& tokens) { for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { continue; @@ -395,8 +385,7 @@ void linearize_tokens(vector& tokens) } } -std::string tokens_to_nasm(std::vector& tokens) -{ +std::string tokens_to_nasm(std::vector& tokens) { string output = ""; for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE From 6d0e6c4774e6148a276072b82302952e5a542692 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Fri, 2 Feb 2024 14:58:10 +0200 Subject: [PATCH 6/7] revert merge master into mabofoul/step_by_step --- examples/factorial.asm | 25 --- examples/factorial.con | 3 +- examples/strchr.asm | 33 --- examples/strchr.con | 14 +- examples/strlwr.asm | 32 --- examples/strlwr.con | 21 +- src/construct.cpp | 19 +- src/construct_debug.cpp | 10 +- src/construct_types.h | 72 ++---- src/deconstruct.cpp | 485 ++++++---------------------------------- src/deconstruct.h | 1 - src/reconstruct.cpp | 340 ++++++++++------------------ src/reconstruct.h | 4 +- 13 files changed, 223 insertions(+), 836 deletions(-) delete mode 100644 examples/factorial.asm delete mode 100644 examples/strchr.asm delete mode 100644 examples/strlwr.asm diff --git a/examples/factorial.asm b/examples/factorial.asm deleted file mode 100644 index 23a002c..0000000 --- a/examples/factorial.asm +++ /dev/null @@ -1,25 +0,0 @@ -global _start -extern printf -section .text -factorial: -mov rsi, 2 -mov rax, 1 -startwhile0: -cmp rsi, rdi -jg endwhile0 -mul rsi -inc rsi -jmp startwhile0 -endwhile0: -ret -_start: -mov rdi, 3 -call factorial -mov rdi, fmt -mov rsi, rax -call printf -mov rax, 60 -syscall -ret -section .data -fmt: db "%d", 10, 0 diff --git a/examples/factorial.con b/examples/factorial.con index 6aae732..18c99f9 100644 --- a/examples/factorial.con +++ b/examples/factorial.con @@ -16,7 +16,8 @@ function main(): !result rax call printf(fmt, result) - syscall exit() + mov rax, 60 + syscall section .data fmt: db "%d", 10, 0 diff --git a/examples/strchr.asm b/examples/strchr.asm deleted file mode 100644 index ea70278..0000000 --- a/examples/strchr.asm +++ /dev/null @@ -1,33 +0,0 @@ -global _start -extern printf -section .text -strchr: -mov rax, 0 -startwhile0: -cmp byte[rdi], 0 -je endwhile0 -cmp byte[rdi], sil -jne endif0 -mov rax, rdi -ret -endif0: -inc rdi -jmp startwhile0 -endwhile0: -ret -_start: -mov rdi, teststr -mov rsi, 87 -call strchr -mov rdi, fmt -mov rsi, rax -call printf -mov rdi, fmt -mov rsi, teststr -call printf -mov rax, 60 -syscall -ret -section .data -teststr: db "Hello World!", 0 -fmt: db "%p", 10, 0 diff --git a/examples/strchr.con b/examples/strchr.con index df25e65..4ff263d 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -4,25 +4,23 @@ section .text function strchr(str, chr): !ptrresult rax !findchr sil - !end_of_str 0 - !nullptr 0 - mov ptrresult, nullptr - while byte[str] ne end_of_str: + mov ptrresult, 0 + while byte[str] ne 0: if byte[str] e findchr: mov ptrresult, str ret inc str function main(): - !W_letter 87 - call strchr(teststr, W_letter) + call strchr(teststr, 87) !result rax call printf(fmt, result) call printf(fmt, teststr) - syscall exit() + mov rax, 60 + syscall section .data -teststr: db "Hello World!", 0 fmt: db "%p", 10, 0 +teststr: db "Hello World!", 0 diff --git a/examples/strlwr.asm b/examples/strlwr.asm deleted file mode 100644 index b1ebd7d..0000000 --- a/examples/strlwr.asm +++ /dev/null @@ -1,32 +0,0 @@ -global _start -extern printf -section .text -strlwr: -startwhile0: -cmp byte[rdi], 0 -je endwhile0 -cmp byte[rdi], 65 -jl endif1 -cmp byte[rdi], 90 -jg endif0 -mov sil, byte[rdi] -add sil, 32 -mov byte[rdi], sil -endif0: -endif1: -inc rdi -jmp startwhile0 -endwhile0: -ret -_start: -mov rdi, teststr -call strlwr -mov rdi, fmt -mov rsi, teststr -call printf -mov rax, 60 -syscall -ret -section .data -teststr: db "HeLlO WoRlD", 0 -fmt: db "%s", 10, 0 diff --git a/examples/strlwr.con b/examples/strlwr.con index c50ad7a..3cc54e1 100644 --- a/examples/strlwr.con +++ b/examples/strlwr.con @@ -2,27 +2,24 @@ extern printf section .text function strlwr(str): - !A_letter 65 - !Z_letter 90 - !end_of_str 0 - !A_to_a 32 - while byte[str] ne end_of_str: - if byte[str] ge A_letter: - if byte[str] le Z_letter: + while byte[str] ne 0: + if byte[str] ge 65: + if byte[str] le 90: !crntchr sil mov crntchr, byte[str] - add crntchr, A_to_a + add crntchr, 32 mov byte[str], crntchr inc str function main(): - call strlwr(teststr) - call printf(fmt, teststr) + call strlwr(teststring) + call printf(fmt, teststring) - syscall exit() + mov rax, 60 + syscall section .data -teststr: db "HeLlO WoRlD", 0 +teststring db "HeLlO WoRlD", 0 fmt: db "%s", 10, 0 diff --git a/src/construct.cpp b/src/construct.cpp index bdeb64d..c11b113 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -4,9 +4,9 @@ #include #include #include "construct_types.h" -#include "deconstruct.h" -#include "reconstruct.h" -#include "construct_flags.h" +#include "deconstruct.h" // parse_construct() +#include "reconstruct.h" // linearize_tokens() +#include "construct_flags.h" // handle_flags() int main(int argc, char** argv) { std::string path; @@ -39,23 +39,12 @@ int main(int argc, char** argv) { apply_ifs(tokens); apply_whiles(tokens); apply_funcalls(tokens); - apply_syscalls(tokens); - std::vector empty_macros; // pointer to con_macros in tokens, not a copy + std::vector empty_macros; apply_macros(tokens, empty_macros); - empty_macros.clear(); // remove the pointers to con_macro, not the con_macro objects themselves linearize_tokens(tokens); std::ofstream outfile; outfile.open(outpath); outfile << tokens_to_nasm(tokens); outfile.close(); - - for (std::vector::reverse_iterator r_it = tokens.rbegin(); r_it != tokens.rend(); ++r_it) { - delete *r_it; - *r_it = nullptr; - } - tokens.clear(); - glob_cmd = nullptr; // deleted in tokens vector - glob_tok = nullptr; // deleted in tokens vector - return 0; } diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 894f20b..d1c08b7 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -16,15 +16,13 @@ std::string tokentype_to_string(CON_TOKENTYPE type) { case IF: return "if"; case FUNCTION: - return "function"; + return "func"; case CMD: return "cmd"; case MACRO: return "macro"; case FUNCALL: return "funcall"; - case SYSCALL: - return "syscall"; } throw std::invalid_argument("Invalid token type: "+std::to_string(static_cast(type))); } @@ -39,12 +37,10 @@ std::string token_to_string(con_token token) { tokstring += ", name: " + token.tok_tag->name; break; case WHILE: - tokstring += ", condition: " + token.tok_while->condition.arg1 + " " - + comparison_to_string(token.tok_while->condition.op) + " " + token.tok_while->condition.arg2; + tokstring += ", condition: " + token.tok_while->condition.arg1 + " " + comparison_to_string(token.tok_while->condition.op) + " " + token.tok_while->condition.arg2; break; case IF: - tokstring += ", condition: " + token.tok_if->condition.arg1 + " " - + comparison_to_string(token.tok_if->condition.op) + " " + token.tok_if->condition.arg2; + tokstring += ", condition: " + token.tok_if->condition.arg1 + " " + comparison_to_string(token.tok_if->condition.op) + " " + token.tok_if->condition.arg2; break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; diff --git a/src/construct_types.h b/src/construct_types.h index ca4497e..5f76bbc 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -28,8 +28,22 @@ enum CON_TOKENTYPE { FUNCTION, CMD, MACRO, - FUNCALL, - SYSCALL + FUNCALL +}; + + +struct con_token { + CON_TOKENTYPE tok_type; + int indentation; + struct con_section* tok_section; + struct con_tag* tok_tag; + struct con_while* tok_while; + struct con_if* tok_if; + struct con_function* tok_function; + struct con_cmd* tok_cmd; + struct con_macro* tok_macro; + struct con_funcall* tok_funcall; + std::vector tokens; // Only non-empty for if, while and function tokens }; struct _con_condition { @@ -38,7 +52,6 @@ struct _con_condition { std::string arg2; }; - struct con_section { std::string name; }; @@ -76,57 +89,4 @@ struct con_funcall { std::vector arguments; }; -struct con_syscall { - uint16_t number; - std::vector arguments; -}; - - -struct con_token { - CON_TOKENTYPE tok_type; - int indentation; - con_section* tok_section = nullptr; - con_tag* tok_tag = nullptr; - con_while* tok_while = nullptr; - con_if* tok_if = nullptr; - con_function* tok_function = nullptr; - con_cmd* tok_cmd = nullptr; - con_macro* tok_macro = nullptr; - con_funcall* tok_funcall = nullptr; - con_syscall* tok_syscall = nullptr; - std::vector tokens; // relevant to "if", "while", "function" and "syscall" tokens - - ~con_token() { - switch (tok_type) { - case SECTION: - if (tok_section != nullptr) delete tok_section; - break; - case TAG: - if (tok_tag != nullptr) delete tok_tag; - break; - case WHILE: - if (tok_while != nullptr) delete tok_while; - break; - case IF: - if (tok_if != nullptr) delete tok_if; - break; - case FUNCTION: - if (tok_function != nullptr) delete tok_function; - break; - case CMD: - if (tok_cmd != nullptr) delete tok_cmd; - break; - case MACRO: - if (tok_macro != nullptr) delete tok_macro; - break; - case FUNCALL: - if (tok_funcall != nullptr) delete tok_funcall; - break; - case SYSCALL: - if (tok_syscall != nullptr) delete tok_syscall; - break; - } - } -}; - #endif // CONSTRUCT_TYPES_H_ diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index d2b2960..0962f0d 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,15 +1,16 @@ #include #include #include -#include #include #include "deconstruct.h" #include "construct_types.h" using namespace std; +static void to_lower(string& str); + static vector split(const string& input, const string& chars); -static uint16_t get_syscall_number(const std::string& syscall_name); + int get_line_indentation(string line) { int indentation = 0; @@ -25,23 +26,20 @@ int get_line_indentation(string line) { // Expects formatted line CON_TOKENTYPE get_token_type(string line) { - vector line_split = split(line, " "); // line_split is not empty - if (line_split[0] == "section") + if (line.substr(0, 8) == "section ") return SECTION; if (line.find(' ') == string::npos && line[line.size()-1] == ':') return TAG; - if (line_split[0] == "while") + if (line.substr(0, 6) == "while ") return WHILE; - if (line_split[0] == "if") + if (line.substr(0, 3) == "if ") return IF; - if (line_split[0] == "function") + if (line.substr(0, 9) == "function ") return FUNCTION; if (line[0] == '!') return MACRO; - if (line_split[0] == "call" && line.find('(') != string::npos && line.find(')') != string::npos) + if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; - if (line_split[0] == "syscall" && line.find('(') != string::npos && line.find(')') != string::npos) - return SYSCALL; return CMD; } @@ -66,9 +64,10 @@ vector delinearize_tokens(std::vector tokens) { vector dl_tokens; // Serves as parent "section" where all tokens belong to, convenient for algo + con_section* parent_section = new con_section; con_token* parent_token = new con_token; + parent_token->tok_section = parent_section; parent_token->tok_type = SECTION; - parent_token->tok_section = new con_section; // is deleted inside parent_token parent_token->indentation = -1; stack parent_stack; @@ -76,67 +75,67 @@ vector delinearize_tokens(std::vector tokens) { dl_tokens.push_back(parent_token); - // When a new while, if or function is encountered it is pushed to the top of the parent_stack + // When a new when, if or function is encountered it is pushed to the top of the parent_stack // All tokens with the indentation of the top of the parent_stack+1 // are then added to the elem at the top of the stack (ptr so also to elem in vector). // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->indentation - parent_stack.top()->indentation <= 0) { - int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation + 1; + if (parent_stack.top()->indentation - tokens[i]->indentation >= 0) { + int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; for (int j = 0; j < indentation_diff; j++) { parent_stack.pop(); } } - if (tokens[i]->indentation - parent_stack.top()->indentation == 1) { + if (tokens[i]->indentation == parent_stack.top()->indentation+1) { parent_stack.top()->tokens.push_back(tokens[i]); } - if (tokens[i]->tok_type == WHILE || tokens[i]->tok_type == IF || tokens[i]->tok_type == FUNCTION) { + if (tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { parent_stack.push(tokens[i]); } } vector delinearized_tokens = parent_token->tokens; + delete parent_section; delete parent_token; - parent_token = nullptr; return delinearized_tokens; } -con_section* parse_section(string line) { // section name // section . name ?? +con_section* parse_section(string line) { con_section* tok_section = new con_section(); vector line_split = split(line, " "); tok_section->name = line_split[1]; return tok_section; } -con_tag* parse_tag(string line) { // name: // name : ?? +con_tag* parse_tag(string line) { con_tag* tok_tag = new con_tag(); tok_tag->name = line.substr(0, line.size()-1); return tok_tag; } -con_while* parse_while(string line) { // while val1 comp val2: +con_while* parse_while(string line) { con_while* tok_while = new con_while(); - vector line_split = split(line, " :"); + vector line_split = split(line, " "); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); - tok_while->condition.arg2 = line_split[3]; + tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_if* parse_if(string line) { // if val1 comp val2: +con_if* parse_if(string line) { con_if* tok_if = new con_if(); - vector line_split = split(line, " :"); + vector line_split = split(line, " "); tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); - tok_if->condition.arg2 = line_split[3]; + tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); return tok_if; } -con_function* parse_function(string line) { // function func(arg1, arg2, ...): +con_function* parse_function(string line) { con_function* tok_function = new con_function(); - vector line_split = split(line, " ():,"); - tok_function->name = line_split[1]; - for (size_t i = 2; i < line_split.size(); i++) { + vector line_split = split(line, "():,"); + tok_function->name = line_split[0].substr(9, line_split[0].size()-9); + for (size_t i = 1; i < line_split.size()-2; i++) { if (line_split[i].empty()) { continue; } @@ -144,44 +143,35 @@ con_function* parse_function(string line) { // function func(arg1, arg2, ...): } return tok_function; } -con_cmd* parse_cmd(string line) { // op // op arg1 // op arg1, arg2 +con_cmd* parse_cmd(string line) { con_cmd* tok_cmd = new con_cmd(); vector line_split = split(line, " ,"); tok_cmd->command = line_split[0]; if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; - if (line_split.size() > 2) - tok_cmd->arg2 = line_split[2]; + if (line_split.size() > 3) + tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_macro* parse_macro(string line) { // !name reg +con_macro* parse_macro(string line) { con_macro* tok_macro = new con_macro(); - vector line_split = split(line, " !"); - tok_macro->macro = line_split[0]; - tok_macro->value = line_split[1]; + int spacepos = line.find(' '); + tok_macro->macro = line.substr(1, spacepos-1); + tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); return tok_macro; } -con_funcall* parse_funcall(string line) { // call func(arg1, arg2, ...) +con_funcall* parse_funcall(string line) { con_funcall* tok_funcall = new con_funcall(); - vector line_split = split(line, " (),"); - tok_funcall->funcname = line_split[1]; - for (size_t i = 2; i < line_split.size(); i++) { - if (line_split[i].empty()) throw invalid_argument("Invalid syntax"); - tok_funcall->arguments.push_back(line_split[i]); + vector line_split = split(line, "(),"); + tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); + for (size_t i = 1; i < line_split.size()-1; i++) { + if (line_split[i].empty()) { + continue; + } + tok_funcall->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied } return tok_funcall; } -con_syscall* parse_syscall(string line) // syscall sysc(arg1, arg2, ...) -{ - vector line_split = split(line, " (),"); - con_syscall* tok_syscall = new con_syscall(); - tok_syscall->number = get_syscall_number(line_split[1]); - for (size_t i = 2; i < line_split.size(); i++) { - if (line_split[i].empty()) throw invalid_argument("Invalid syntax"); - tok_syscall->arguments.push_back(line_split[i]); - } - return tok_syscall; -} // Does not expect formatted line, only lowercase con_token* parse_line(string line) { @@ -189,11 +179,18 @@ con_token* parse_line(string line) { //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for (string::iterator it = line.begin(); it != line.end(); ++it) { - bool is_space = (*it == ' '); - if (*it == '\t' || (is_space && caught_space)) continue; - f_line += *it; - caught_space = is_space; + for (size_t i = 0; i < line.size(); i++) { + if (line[i] == ' ') { + if (!caught_space) { + f_line += line[i]; + caught_space = true; + } + } else { + if (line[i] != '\t') { + f_line += line[i]; + } + caught_space = false; + } } token->tok_type = get_token_type(f_line); switch (token->tok_type) { @@ -220,15 +217,12 @@ con_token* parse_line(string line) { break; case FUNCALL: token->tok_funcall = parse_funcall(f_line); - break; - case SYSCALL: - token->tok_syscall = parse_syscall(f_line); - break; } return token; } vector parse_construct(string code) { vector code_split = split(code, "\n"); + to_lower(code); vector tokens; bool in_data = false; for (size_t i = 0; i < code_split.size(); i++) { @@ -236,17 +230,10 @@ vector parse_construct(string code) { if (code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; } - con_token* new_token = nullptr; - try { - new_token = parse_line(code_split[i]); - } - catch (const std::exception& e) { - throw std::runtime_error("Line "+to_string(i)+" ["+code_split[i]+"] :"+e.what()); - } + con_token* new_token = parse_line(code_split[i]); new_token->indentation = get_line_indentation(code_split[i]); tokens.push_back(new_token); - if (new_token->tok_type == SECTION - && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { + if (new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { in_data = true; } else if (new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { in_data = false; @@ -263,8 +250,16 @@ vector parse_construct(string code) { // ----- ----- ----- ----- ----- ----- helper functions impl ----- ----- ----- ----- ----- -vector split(const string& input, const string& chars) { +void to_lower(string& str) { + for (string::iterator it = str.begin(); it != str.end(); ++it) { + if (*it >= 'A' && *it <= 'Z') { + *it -= 'A'; + *it += 'a'; + } + } +} +vector split(const string& input, const string& chars) { vector result; string tmp; bool prev_is_delim = false; @@ -291,349 +286,3 @@ vector split(const string& input, const string& chars) { result.push_back(tmp); return result; } - -uint16_t get_syscall_number(const std::string& syscall_name) -{ - static const map& name_to_num = { - {"read" , 0 }, - {"write" , 1 }, - {"open" , 2 }, - {"close" , 3 }, - {"stat" , 4 }, - {"fstat" , 5 }, - {"lstat" , 6 }, - {"poll" , 7 }, - {"lseek" , 8 }, - {"mmap" , 9 }, - {"mprotect" , 10 }, - {"munmap" , 11 }, - {"brk" , 12 }, - {"rt_sigaction" , 13 }, - {"rt_sigprocmask" , 14 }, - {"rt_sigreturn" , 15 }, - {"ioctl" , 16 }, - {"pread64" , 17 }, - {"pwrite64" , 18 }, - {"readv" , 19 }, - {"writev" , 20 }, - {"access" , 21 }, - {"pipe" , 22 }, - {"select" , 23 }, - {"sched_yield" , 24 }, - {"mremap" , 25 }, - {"msync" , 26 }, - {"mincore" , 27 }, - {"madvise" , 28 }, - {"shmget" , 29 }, - {"shmat" , 30 }, - {"shmctl" , 31 }, - {"dup" , 32 }, - {"dup2" , 33 }, - {"pause" , 34 }, - {"nanosleep" , 35 }, - {"getitimer" , 36 }, - {"alarm" , 37 }, - {"setitimer" , 38 }, - {"getpid" , 39 }, - {"sendfile" , 40 }, - {"socket" , 41 }, - {"connect" , 42 }, - {"accept" , 43 }, - {"sendto" , 44 }, - {"recvfrom" , 45 }, - {"sendmsg" , 46 }, - {"recvmsg" , 47 }, - {"shutdown" , 48 }, - {"bind" , 49 }, - {"listen" , 50 }, - {"getsockname" , 51 }, - {"getpeername" , 52 }, - {"socketpair" , 53 }, - {"setsockopt" , 54 }, - {"getsockopt" , 55 }, - {"clone" , 56 }, - {"fork" , 57 }, - {"vfork" , 58 }, - {"execve" , 59 }, - {"exit" , 60 }, - {"wait4" , 61 }, - {"kill" , 62 }, - {"uname" , 63 }, - {"semget" , 64 }, - {"semop" , 65 }, - {"semctl" , 66 }, - {"shmdt" , 67 }, - {"msgget" , 68 }, - {"msgsnd" , 69 }, - {"msgrcv" , 70 }, - {"msgctl" , 71 }, - {"fcntl" , 72 }, - {"flock" , 73 }, - {"fsync" , 74 }, - {"fdatasync" , 75 }, - {"truncate" , 76 }, - {"ftruncate" , 77 }, - {"getdents" , 78 }, - {"getcwd" , 79 }, - {"chdir" , 80 }, - {"fchdir" , 81 }, - {"rename" , 82 }, - {"mkdir" , 83 }, - {"rmdir" , 84 }, - {"creat" , 85 }, - {"link" , 86 }, - {"unlink" , 87 }, - {"symlink" , 88 }, - {"readlink" , 89 }, - {"chmod" , 90 }, - {"fchmod" , 91 }, - {"chown" , 92 }, - {"fchown" , 93 }, - {"lchown" , 94 }, - {"umask" , 95 }, - {"gettimeofday" , 96 }, - {"getrlimit" , 97 }, - {"getrusage" , 98 }, - {"sysinfo" , 99 }, - {"times" , 100}, - {"ptrace" , 101}, - {"getuid" , 102}, - {"syslog" , 103}, - {"getgid" , 104}, - {"setuid" , 105}, - {"setgid" , 106}, - {"geteuid" , 107}, - {"getegid" , 108}, - {"setpgid" , 109}, - {"getppid" , 110}, - {"getpgrp" , 111}, - {"setsid" , 112}, - {"setreuid" , 113}, - {"setregid" , 114}, - {"getgroups" , 115}, - {"setgroups" , 116}, - {"setresuid" , 117}, - {"getresuid" , 118}, - {"setresgid" , 119}, - {"getresgid" , 120}, - {"getpgid" , 121}, - {"setfsuid" , 122}, - {"setfsgid" , 123}, - {"getsid" , 124}, - {"capget" , 125}, - {"capset" , 126}, - {"rt_sigpending" , 127}, - {"rt_sigtimedwait" , 128}, - {"rt_sigqueueinfo" , 129}, - {"rt_sigsuspend" , 130}, - {"sigaltstack" , 131}, - {"utime" , 132}, - {"mknod" , 133}, - {"uselib" , 134}, - {"personality" , 135}, - {"ustat" , 136}, - {"statfs" , 137}, - {"fstatfs" , 138}, - {"sysfs" , 139}, - {"getpriority" , 140}, - {"setpriority" , 141}, - {"sched_setparam" , 142}, - {"sched_getparam" , 143}, - {"sched_setscheduler" , 144}, - {"sched_getscheduler" , 145}, - {"sched_get_priority_max", 146}, - {"sched_get_priority_min", 147}, - {"sched_rr_get_interval" , 148}, - {"mlock" , 149}, - {"munlock" , 150}, - {"mlockall" , 151}, - {"munlockall" , 152}, - {"vhangup" , 153}, - {"modify_ldt" , 154}, - {"pivot_root" , 155}, - {"_sysctl" , 156}, - {"prctl" , 157}, - {"arch_prctl" , 158}, - {"adjtimex" , 159}, - {"setrlimit" , 160}, - {"chroot" , 161}, - {"sync" , 162}, - {"acct" , 163}, - {"settimeofday" , 164}, - {"mount" , 165}, - {"umount2" , 166}, - {"swapon" , 167}, - {"swapoff" , 168}, - {"reboot" , 169}, - {"sethostname" , 170}, - {"setdomainname" , 171}, - {"iopl" , 172}, - {"ioperm" , 173}, - {"create_module" , 174}, - {"init_module" , 175}, - {"delete_module" , 176}, - {"get_kernel_syms" , 177}, - {"query_module" , 178}, - {"quotactl" , 179}, - {"nfsservctl" , 180}, - {"getpmsg" , 181}, - {"putpmsg" , 182}, - {"afs_syscall" , 183}, - {"tuxcall" , 184}, - {"security" , 185}, - {"gettid" , 186}, - {"readahead" , 187}, - {"setxattr" , 188}, - {"lsetxattr" , 189}, - {"fsetxattr" , 190}, - {"getxattr" , 191}, - {"lgetxattr" , 192}, - {"fgetxattr" , 193}, - {"listxattr" , 194}, - {"llistxattr" , 195}, - {"flistxattr" , 196}, - {"removexattr" , 197}, - {"lremovexattr" , 198}, - {"fremovexattr" , 199}, - {"tkill" , 200}, - {"time" , 201}, - {"futex" , 202}, - {"sched_setaffinity" , 203}, - {"sched_getaffinity" , 204}, - {"set_thread_area" , 205}, - {"io_setup" , 206}, - {"io_destroy" , 207}, - {"io_getevents" , 208}, - {"io_submit" , 209}, - {"io_cancel" , 210}, - {"get_thread_area" , 211}, - {"lookup_dcookie" , 212}, - {"epoll_create" , 213}, - {"epoll_ctl_old" , 214}, - {"epoll_wait_old" , 215}, - {"remap_file_pages" , 216}, - {"getdents64" , 217}, - {"set_tid_address" , 218}, - {"restart_syscall" , 219}, - {"semtimedop" , 220}, - {"fadvise64" , 221}, - {"timer_create" , 222}, - {"timer_settime" , 223}, - {"timer_gettime" , 224}, - {"timer_getoverrun" , 225}, - {"timer_delete" , 226}, - {"clock_settime" , 227}, - {"clock_gettime" , 228}, - {"clock_getres" , 229}, - {"clock_nanosleep" , 230}, - {"exit_group" , 231}, - {"epoll_wait" , 232}, - {"epoll_ctl" , 233}, - {"tgkill" , 234}, - {"utimes" , 235}, - {"vserver" , 236}, - {"mbind" , 237}, - {"set_mempolicy" , 238}, - {"get_mempolicy" , 239}, - {"mq_open" , 240}, - {"mq_unlink" , 241}, - {"mq_timedsend" , 242}, - {"mq_timedreceive" , 243}, - {"mq_notify" , 244}, - {"mq_getsetattr" , 245}, - {"kexec_load" , 246}, - {"waitid" , 247}, - {"add_key" , 248}, - {"request_key" , 249}, - {"keyctl" , 250}, - {"ioprio_set" , 251}, - {"ioprio_get" , 252}, - {"inotify_init" , 253}, - {"inotify_add_watch" , 254}, - {"inotify_rm_watch" , 255}, - {"migrate_pages" , 256}, - {"openat" , 257}, - {"mkdirat" , 258}, - {"mknodat" , 259}, - {"fchownat" , 260}, - {"futimesat" , 261}, - {"newfstatat" , 262}, - {"unlinkat" , 263}, - {"renameat" , 264}, - {"linkat" , 265}, - {"symlinkat" , 266}, - {"readlinkat" , 267}, - {"fchmodat" , 268}, - {"faccessat" , 269}, - {"pselect6" , 270}, - {"ppoll" , 271}, - {"unshare" , 272}, - {"set_robust_list" , 273}, - {"get_robust_list" , 274}, - {"splice" , 275}, - {"tee" , 276}, - {"sync_file_range" , 277}, - {"vmsplice" , 278}, - {"move_pages" , 279}, - {"utimensat" , 280}, - {"epoll_pwait" , 281}, - {"signalfd" , 282}, - {"timerfd_create" , 283}, - {"eventfd" , 284}, - {"fallocate" , 285}, - {"timerfd_settime" , 286}, - {"timerfd_gettime" , 287}, - {"accept4" , 288}, - {"signalfd4" , 289}, - {"eventfd2" , 290}, - {"epoll_create1" , 291}, - {"dup3" , 292}, - {"pipe2" , 293}, - {"inotify_init1" , 294}, - {"preadv" , 295}, - {"pwritev" , 296}, - {"rt_tgsigqueueinfo" , 297}, - {"perf_event_open" , 298}, - {"recvmmsg" , 299}, - {"fanotify_init" , 300}, - {"fanotify_mark" , 301}, - {"prlimit64" , 302}, - {"name_to_handle_at" , 303}, - {"open_by_handle_at" , 304}, - {"clock_adjtime" , 305}, - {"syncfs" , 306}, - {"sendmmsg" , 307}, - {"setns" , 308}, - {"getcpu" , 309}, - {"process_vm_readv" , 310}, - {"process_vm_writev" , 311}, - {"kcmp" , 312}, - {"finit_module" , 313}, - {"sched_setattr" , 314}, - {"sched_getattr" , 315}, - {"renameat2" , 316}, - {"seccomp" , 317}, - {"getrandom" , 318}, - {"memfd_create" , 319}, - {"kexec_file_load" , 320}, - {"bpf" , 321}, - {"execveat" , 322}, - {"userfaultfd" , 323}, - {"membarrier" , 324}, - {"mlock2" , 325}, - {"copy_file_range" , 326}, - {"preadv2" , 327}, - {"pwritev2" , 328}, - {"pkey_mprotect" , 329}, - {"pkey_alloc" , 330}, - {"pkey_free" , 331}, - {"statx" , 332} - }; - - try { - return name_to_num.at(syscall_name); - } - catch(const std::out_of_range& e) { - throw std::invalid_argument("Unknown syscall name: "+syscall_name); - } -} diff --git a/src/deconstruct.h b/src/deconstruct.h index 4a853aa..7139980 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -19,7 +19,6 @@ con_function* parse_function(std::string line); con_cmd* parse_cmd(std::string line); con_macro* parse_macro(std::string line); con_funcall* parse_funcall(std::string line); -con_syscall* parse_syscall(std::string line); con_token* parse_line(std::string line); std::vector parse_construct(std::string code); diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index e27e896..8e25a83 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -6,48 +6,11 @@ using namespace std; -#define min(a,b) ((a)<=(b) ? (a) : (b)) - int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string comparison_to_string(CON_COMPARISON condition) { - switch (condition) { - case E: - return "e"; - case NE: - return "ne"; - case L: - return "l"; - case G: - return "g"; - case LE: - return "le"; - case GE: - return "ge"; - } - throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); -} -CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { - switch (condition) { - case E: - return NE; - case NE: - return E; - case L: - return GE; - case G: - return LE; - case LE: - return G; - case GE: - return L; - } - throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); -} - -static string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { +string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { switch (bitwidth) { case BIT8: switch (call_num) { @@ -138,79 +101,92 @@ static string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; } - throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth)) - +" call_num="+to_string(static_cast(call_num))); + throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); } -static uint8_t str_to_reg(string reg_name) -{ - if (reg_name=="dil" ||reg_name=="di" || reg_name=="edi" || reg_name=="rdi") - return 0; - if (reg_name=="sil" ||reg_name=="si" || reg_name=="esi" || reg_name=="rsi") - return 1; - if (reg_name=="dl" || reg_name=="dx" || reg_name=="edx" || reg_name=="rdx") - return 2; - if (reg_name=="cl" || reg_name=="cx" || reg_name=="ecx" || reg_name=="rcx") - return 3; - if (reg_name=="r8b" || reg_name=="r8w" || reg_name=="r8d" || reg_name=="r8") - return 4; - if (reg_name=="r9b" || reg_name=="r9w" || reg_name=="r9d" || reg_name=="r9") - return 5; - return 6; +string comparison_to_string(CON_COMPARISON condition) { + switch (condition) { + case E: + return "e"; + case NE: + return "ne"; + case L: + return "l"; + case G: + return "g"; + case LE: + return "le"; + case GE: + return "ge"; + } + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } - -static size_t find_macro_in_arg(string arg, string macro) -{ - size_t pos = arg.find(macro); - if ((pos == 0 || (arg[pos-1]!='_' && !isalpha(arg[pos-1]))) - && (pos+macro.size()-1 == arg.size()-1 || (arg[pos+macro.size()]!='_' && !isalpha(arg[pos+macro.size()])))) { - return pos; +CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { + switch (condition) { + case E: + return NE; + case NE: + return E; + case L: + return GE; + case G: + return LE; + case LE: + return G; + case GE: + return L; } - return string::npos; + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -static void apply_macro_to_token(con_token& token, const vector& macros) { + +static void apply_macro_to_token(con_token& token, vector macros) { if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; } // Unoptimal, but more clear imo for (size_t i = 0; i < macros.size(); i++) { - const string& macro = macros[i]->macro; - const string& value = macros[i]->value; + con_macro* crntmacro = ¯os[i]; size_t pos; switch (token.tok_type) { case WHILE: - pos = find_macro_in_arg(token.tok_while->condition.arg1, macro); - while (pos != string::npos) { - token.tok_while->condition.arg1.replace(pos, macro.size(), value); - pos = find_macro_in_arg(token.tok_while->condition.arg1, macro); + if (!token.tok_while->condition.arg1.empty() && + (pos = token.tok_while->condition.arg1.find(crntmacro->macro)) != string::npos && + (pos == 0 || !isalpha(token.tok_while->condition.arg1[pos-1])) && + (pos == token.tok_while->condition.arg1.size()-1 || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { + token.tok_while->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - pos = find_macro_in_arg(token.tok_while->condition.arg2, macro); - while (pos != string::npos) { - token.tok_while->condition.arg2.replace(pos, macro.size(), value); - pos = find_macro_in_arg(token.tok_while->condition.arg2, macro); + if (!token.tok_while->condition.arg2.empty() && + (pos = token.tok_while->condition.arg2.find(crntmacro->macro)) != string::npos && + (pos == 0 || !isalpha(token.tok_while->condition.arg2[pos-1])) && + (pos == token.tok_while->condition.arg2.size()-1 || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { + token.tok_while->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case IF: - pos = find_macro_in_arg(token.tok_if->condition.arg1, macro); - while (pos != string::npos) { - token.tok_if->condition.arg1.replace(pos, macro.size(), value); - pos = find_macro_in_arg(token.tok_if->condition.arg1, macro); + if (!token.tok_if->condition.arg1.empty() && + (pos = token.tok_if->condition.arg1.find(crntmacro->macro)) != string::npos && + (pos == 0 || !isalpha(token.tok_if->condition.arg1[pos-1])) && + (pos == token.tok_if->condition.arg1.size()-1 || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { + token.tok_if->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - pos = find_macro_in_arg(token.tok_if->condition.arg2, macro); - while (pos != string::npos) { - token.tok_if->condition.arg2.replace(pos, macro.size(), value); - pos = find_macro_in_arg(token.tok_if->condition.arg2, macro); + if (!token.tok_if->condition.arg2.empty() && + (pos = token.tok_if->condition.arg2.find(crntmacro->macro)) != string::npos && + (pos == 0 || !isalpha(token.tok_if->condition.arg2[pos-1])) && + (pos == token.tok_if->condition.arg2.size()-1 || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { + token.tok_if->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case CMD: - pos = find_macro_in_arg(token.tok_cmd->arg1, macro); - while (pos != string::npos) { - token.tok_cmd->arg1.replace(pos, macro.size(), value); - pos = find_macro_in_arg(token.tok_cmd->arg1, macro); + if (!token.tok_cmd->arg1.empty() && + (pos = token.tok_cmd->arg1.find(crntmacro->macro)) != string::npos && + (pos == 0 || !isalpha(token.tok_cmd->arg1[pos-1])) && + (pos == token.tok_cmd->arg1.size()-1 || !isalpha(token.tok_cmd->arg1[pos+crntmacro->macro.size()]))) { + token.tok_cmd->arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - pos = find_macro_in_arg(token.tok_cmd->arg2, macro); - while (pos != string::npos) { - token.tok_cmd->arg2.replace(pos, macro.size(), value); - pos = find_macro_in_arg(token.tok_cmd->arg2, macro); + if (!token.tok_cmd->arg2.empty() && + (pos = token.tok_cmd->arg2.find(crntmacro->macro)) != string::npos && + (pos == 0 || !isalpha(token.tok_cmd->arg2[pos-1])) && + (pos == token.tok_cmd->arg2.size()-1 || !isalpha(token.tok_cmd->arg2[pos+crntmacro->macro.size()]))) { + token.tok_cmd->arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; default: @@ -218,99 +194,6 @@ static void apply_macro_to_token(con_token& token, const vector& mac } } } -static vector push_args(vector& args, CON_BITWIDTH bitwidth) -{ - vector arg_tokens; - - // stack args; - for (size_t i = 6; i < args.size() ; ++i) { - size_t i_rev = args.size()+5 - i; - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - arg_cmd->command = "pushq"; // bitwidth - arg_cmd->arg1 = args[i_rev]; - arg_tokens.push_back(arg_tok); - } - - // register args; - size_t reg_args_size = min(args.size(),6); - uint8_t first_read[7] = {6,6,6,6,6,6,6}; // cell 6 is garbage to hold not special-regs - for (size_t i = 0; i < reg_args_size; ++i) { - uint8_t reg_num = str_to_reg(args[i]); - first_read[reg_num] = min(first_read[reg_num],i); - } - // sort regs by first-read - uint8_t read_order[6] = {6,6,6,6,6,6}; - for (size_t fr = 0; fr < reg_args_size; ++fr) { - for (size_t reg = 0; reg < reg_args_size; ++reg) { - if ((fr == first_read[reg]) && (first_read[reg] > reg)) { //next in turn and will be pushed to stack - read_order[fr]=reg; - } - } - } - // push reversed to pop order - for (size_t fr = 0; fr < 6; ++fr) { - size_t fr_rev = 5 - fr; // reverse the order - if (read_order[fr_rev] != 6) { // there is a regester first read i arg number fr, and will be deleted before - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - arg_cmd->command = "push"; - arg_cmd->arg1 = reg_to_str(read_order[fr_rev], bitwidth); - arg_tokens.push_back(arg_tok); - } - } - // set each arg and track values places - uint8_t current_val_place[6] = {0,1,2,3,4,5}; // 6 means stack - for (size_t reg = 0; reg < reg_args_size; ++reg) { - if (first_read[reg] > reg) { - current_val_place[reg] = 6; - } - } - for (size_t i = 0; i < reg_args_size; i++) { - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - uint8_t wanted_reg = str_to_reg(args[i]); - if (wanted_reg==6) { - arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(i, bitwidth); - arg_cmd->arg2 = args[i]; - // if regi was read before, then current_val_place[i] is a previous register (correct) - // if regi isn't read yet, then current_val_place[i] is stack (correct) - } else { - if (current_val_place[wanted_reg] == 6) { - arg_cmd->command = "pop"; - arg_cmd->arg1 = reg_to_str(i, bitwidth); - current_val_place[wanted_reg] = i; // wanted_reg moved from stack to regi - } else { - if (i != current_val_place[wanted_reg]) { - arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(i, bitwidth); - arg_cmd->arg2 = reg_to_str(current_val_place[wanted_reg], bitwidth); - // if regi was read before, then current_val_place[i] is a previous register (correct) - // if regi isn't read yet, then current_val_place[i] is stack (correct) - current_val_place[wanted_reg] = min(current_val_place[wanted_reg],i); - } else { - arg_cmd->command = "nop"; - } - } - } - if (arg_cmd->command == "nop") { - delete arg_cmd; - arg_cmd = nullptr; - delete arg_tok; - arg_tok = nullptr; - } else { - arg_tokens.push_back(arg_tok); - } - } - return arg_tokens; -} void apply_whiles(vector& tokens) { for (size_t i = 0; i< tokens.size(); i++) { @@ -400,11 +283,12 @@ void apply_ifs(vector& tokens) { } } void apply_functions(std::vector& tokens) { - for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type != FUNCTION) { + vector* subtokens = &tokens; + for (size_t i = 0; i < subtokens->size(); i++) { + if ((*subtokens)[i]->tok_type != FUNCTION) { continue; } - con_function* crntfunc = tokens[i]->tok_function; + con_function* crntfunc = (*subtokens)[i]->tok_function; if (crntfunc->name == "main") { crntfunc->name = "_start"; } @@ -422,21 +306,35 @@ void apply_functions(std::vector& tokens) { arg_macro->macro = crntfunc->arguments[j]; arg_tok->tok_macro = arg_macro; - tokens[i]->tokens.insert(tokens[i]->tokens.begin(), arg_tok); + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); } - tokens[i]->tokens.insert(tokens[i]->tokens.begin(), tag_tok); + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); con_token* ret_tok = new con_token; ret_tok->tok_type = CMD; con_cmd* ret_cmd = new con_cmd; ret_tok->tok_cmd = ret_cmd; ret_cmd->command = "ret"; - tokens[i]->tokens.push_back(ret_tok); + (*subtokens)[i]->tokens.push_back(ret_tok); } } -void apply_macros(vector& tokens, vector& knownmacros) { +void apply_macros(vector& tokens, vector knownmacros) { for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type == MACRO) { - knownmacros.push_back(tokens[i]->tok_macro); + // Filter spaces from macro and value pair + con_macro* f_macro = new con_macro(); + f_macro->macro = ""; + f_macro->value = ""; + for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + if (tokens[i]->tok_macro->macro[j] != ' ') { + f_macro->macro += tokens[i]->tok_macro->macro[j]; + } + } + for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + if (tokens[i]->tok_macro->value[j] != ' ') + f_macro->value += tokens[i]->tok_macro->value[j]; + } + knownmacros.push_back(*f_macro); + delete f_macro; continue; } apply_macro_to_token(*tokens[i], knownmacros); @@ -451,7 +349,18 @@ void apply_funcalls(std::vector& tokens) { if (tokens[i]->tok_type != FUNCALL) { continue; } - vector arg_tokens = push_args(tokens[i]->tok_funcall->arguments, bitwidth); + vector* args = &tokens[i]->tok_funcall->arguments; + vector arg_tokens; + for (size_t j = 0; j < args->size(); j++) { + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + arg_cmd->command = "mov"; + arg_cmd->arg1 = reg_to_str(j, bitwidth); + arg_cmd->arg2 = (*args)[j]; + arg_tokens.push_back(arg_tok); + } con_token* call_tok = new con_token(); call_tok->tok_type = CMD; con_cmd* call_cmd = new con_cmd(); @@ -463,32 +372,6 @@ void apply_funcalls(std::vector& tokens) { tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); } } -void apply_syscalls(std::vector& tokens) -{ - for (size_t i = 0; i < tokens.size(); i++) { - apply_syscalls(tokens[i]->tokens); - if (tokens[i]->tok_type != SYSCALL) { - continue; - } - vector arg_tokens = push_args(tokens[i]->tok_syscall->arguments, bitwidth); - con_token* call_tok1 = new con_token(); - call_tok1->tok_type = CMD; - con_cmd* call_cmd = new con_cmd(); - call_tok1->tok_cmd = call_cmd; - call_cmd->command = "mov"; - call_cmd->arg1 = "rax"; - call_cmd->arg2 = to_string(tokens[i]->tok_syscall->number); - arg_tokens.push_back(call_tok1); - con_token* call_tok2 = new con_token(); - call_tok2->tok_type = CMD; - con_cmd* call_sys = new con_cmd(); - call_tok2->tok_cmd = call_sys; - call_sys->command = "syscall"; - arg_tokens.push_back(call_tok2); - - tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); - } -} void linearize_tokens(vector& tokens) { for (size_t i = 0; i < tokens.size(); i++) { @@ -505,16 +388,13 @@ void linearize_tokens(vector& tokens) { std::string tokens_to_nasm(std::vector& tokens) { string output = ""; for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type == WHILE || tokens[i]->tok_type == IF + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO - || tokens[i]->tok_type == FUNCALL || tokens[i]->tok_type == SYSCALL) { + || tokens[i]->tok_type == FUNCALL) { continue; } - if (tokens[i]->tok_type == SECTION) { - output += "section " + tokens[i]->tok_section->name; - } else if (tokens[i]->tok_type == TAG) { - output += tokens[i]->tok_tag->name + ":"; - } else if (tokens[i]->tok_type == CMD) { + output += "\n"; + if (tokens[i]->tok_type == CMD) { output += tokens[i]->tok_cmd->command; if (!tokens[i]->tok_cmd->arg1.empty()) { output += " " + tokens[i]->tok_cmd->arg1; @@ -522,8 +402,16 @@ std::string tokens_to_nasm(std::vector& tokens) { if (!tokens[i]->tok_cmd->arg2.empty()) { output += ", " + tokens[i]->tok_cmd->arg2; } + continue; + } + if (tokens[i]->tok_type == TAG) { + output += tokens[i]->tok_tag->name + ":"; + continue; + } + if (tokens[i]->tok_type == SECTION) { + output += "section " + tokens[i]->tok_section->name; + continue; } - output += "\n"; } return output; } diff --git a/src/reconstruct.h b/src/reconstruct.h index cbd936f..cc9f025 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -10,6 +10,7 @@ extern int if_amnt; extern int while_amnt; extern CON_BITWIDTH bitwidth; +std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); @@ -20,9 +21,8 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); void apply_whiles(std::vector& tokens); void apply_ifs(std::vector& tokens); void apply_functions(std::vector& tokens); -void apply_macros(std::vector& tokens, std::vector& macros); +void apply_macros(std::vector& tokens, std::vector macros); void apply_funcalls(std::vector& tokens); -void apply_syscalls(std::vector& tokens); // During linearization, the construct parent tokens are removed void linearize_tokens(std::vector& tokens); From 269c189b520d34f7f60447e1f151d48693258df8 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Mon, 5 Feb 2024 20:16:50 +0200 Subject: [PATCH 7/7] fixed split --- src/deconstruct.cpp | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 0962f0d..3debcb9 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -9,8 +9,7 @@ using namespace std; static void to_lower(string& str); -static vector split(const string& input, const string& chars); - +static vector split(const string& input, const string& delims, const bool& compress_adj_delims = false); int get_line_indentation(string line) { int indentation = 0; @@ -253,28 +252,29 @@ vector parse_construct(string code) { void to_lower(string& str) { for (string::iterator it = str.begin(); it != str.end(); ++it) { if (*it >= 'A' && *it <= 'Z') { - *it -= 'A'; - *it += 'a'; + *it -= 'A'; + *it += 'a'; } } } -vector split(const string& input, const string& chars) { +vector split(const string& input, const string& delims, const bool& compress_adj_delims) +{ vector result; string tmp; bool prev_is_delim = false; for (string::const_iterator input_it = input.cbegin(); input_it != input.cend(); ++input_it) { - bool is_in_chars = false; - for (string::const_iterator chars_it = chars.cbegin(); chars_it != chars.cend(); ++chars_it) { - if (*chars_it == *input_it) { - is_in_chars = true; + bool is_delim = false; + for (string::const_iterator delim_it = delims.cbegin(); delim_it != delims.cend(); ++delim_it) { + if (*delim_it == *input_it) { + is_delim = true; break; } } - if (is_in_chars) { - if (prev_is_delim) continue; - if (!tmp.empty()) - result.push_back(tmp); + + if (is_delim) { + if (prev_is_delim && compress_adj_delims) continue; + result.push_back(tmp); tmp.clear(); prev_is_delim = true; } else { @@ -282,7 +282,6 @@ vector split(const string& input, const string& chars) { prev_is_delim = false; } } - if (!tmp.empty()) - result.push_back(tmp); + result.push_back(tmp); return result; }