diff --git a/examples/factorial.asm b/examples/factorial.asm new file mode 100644 index 0000000..23a002c --- /dev/null +++ b/examples/factorial.asm @@ -0,0 +1,25 @@ +global _start +extern printf +section .text +factorial: +mov rsi, 2 +mov rax, 1 +startwhile0: +cmp rsi, rdi +jg endwhile0 +mul rsi +inc rsi +jmp startwhile0 +endwhile0: +ret +_start: +mov rdi, 3 +call factorial +mov rdi, fmt +mov rsi, rax +call printf +mov rax, 60 +syscall +ret +section .data +fmt: db "%d", 10, 0 diff --git a/examples/strchr.asm b/examples/strchr.asm new file mode 100644 index 0000000..ea70278 --- /dev/null +++ b/examples/strchr.asm @@ -0,0 +1,33 @@ +global _start +extern printf +section .text +strchr: +mov rax, 0 +startwhile0: +cmp byte[rdi], 0 +je endwhile0 +cmp byte[rdi], sil +jne endif0 +mov rax, rdi +ret +endif0: +inc rdi +jmp startwhile0 +endwhile0: +ret +_start: +mov rdi, teststr +mov rsi, 87 +call strchr +mov rdi, fmt +mov rsi, rax +call printf +mov rdi, fmt +mov rsi, teststr +call printf +mov rax, 60 +syscall +ret +section .data +teststr: db "Hello World!", 0 +fmt: db "%p", 10, 0 diff --git a/examples/strchr.con b/examples/strchr.con index 4ff263d..df25e65 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -4,23 +4,25 @@ section .text function strchr(str, chr): !ptrresult rax !findchr sil - mov ptrresult, 0 - while byte[str] ne 0: + !end_of_str 0 + !nullptr 0 + mov ptrresult, nullptr + while byte[str] ne end_of_str: if byte[str] e findchr: mov ptrresult, str ret inc str function main(): - call strchr(teststr, 87) + !W_letter 87 + call strchr(teststr, W_letter) !result rax call printf(fmt, result) call printf(fmt, teststr) - mov rax, 60 - syscall + syscall exit() section .data -fmt: db "%p", 10, 0 teststr: db "Hello World!", 0 +fmt: db "%p", 10, 0 diff --git a/examples/strlwr.asm b/examples/strlwr.asm new file mode 100644 index 0000000..b1ebd7d --- /dev/null +++ b/examples/strlwr.asm @@ -0,0 +1,32 @@ +global _start +extern printf +section .text +strlwr: +startwhile0: +cmp byte[rdi], 0 +je endwhile0 +cmp byte[rdi], 65 +jl endif1 +cmp byte[rdi], 90 +jg endif0 +mov sil, byte[rdi] +add sil, 32 +mov byte[rdi], sil +endif0: +endif1: +inc rdi +jmp startwhile0 +endwhile0: +ret +_start: +mov rdi, teststr +call strlwr +mov rdi, fmt +mov rsi, teststr +call printf +mov rax, 60 +syscall +ret +section .data +teststr: db "HeLlO WoRlD", 0 +fmt: db "%s", 10, 0 diff --git a/examples/strlwr.con b/examples/strlwr.con index 3cc54e1..c50ad7a 100644 --- a/examples/strlwr.con +++ b/examples/strlwr.con @@ -2,24 +2,27 @@ extern printf section .text function strlwr(str): - while byte[str] ne 0: - if byte[str] ge 65: - if byte[str] le 90: + !A_letter 65 + !Z_letter 90 + !end_of_str 0 + !A_to_a 32 + while byte[str] ne end_of_str: + if byte[str] ge A_letter: + if byte[str] le Z_letter: !crntchr sil mov crntchr, byte[str] - add crntchr, 32 + add crntchr, A_to_a mov byte[str], crntchr inc str function main(): - call strlwr(teststring) - call printf(fmt, teststring) + call strlwr(teststr) + call printf(fmt, teststr) - mov rax, 60 - syscall + syscall exit() section .data -teststring db "HeLlO WoRlD", 0 +teststr: db "HeLlO WoRlD", 0 fmt: db "%s", 10, 0 diff --git a/src/construct.cpp b/src/construct.cpp index e909da2..edfacc4 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -41,12 +41,22 @@ int main(int argc, char** argv) apply_whiles(tokens); apply_funcalls(tokens); apply_syscalls(tokens); - std::vector empty_macros; + std::vector empty_macros; // pointer to con_macros in tokens, not a copy apply_macros(tokens, empty_macros); + empty_macros.clear(); // remove the pointers to con_macro, not the con_macro objects themselves linearize_tokens(tokens); std::ofstream outfile; outfile.open(outpath); outfile << tokens_to_nasm(tokens); outfile.close(); + + for (std::vector::reverse_iterator r_it = tokens.rbegin(); r_it != tokens.rend(); ++r_it) { + delete *r_it; + *r_it = nullptr; + } + tokens.clear(); + glob_cmd = nullptr; // deleted in tokens vector + glob_tok = nullptr; // deleted in tokens vector + return 0; } diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 252b042..63ad58b 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -17,7 +17,7 @@ std::string tokentype_to_string(CON_TOKENTYPE type) case IF: return "if"; case FUNCTION: - return "func"; + return "function"; case CMD: return "cmd"; case MACRO: diff --git a/src/construct_types.h b/src/construct_types.h index 823ac29..ca4497e 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -32,28 +32,13 @@ enum CON_TOKENTYPE { SYSCALL }; - -struct con_token { - CON_TOKENTYPE tok_type; - int indentation; - struct con_section* tok_section; - struct con_tag* tok_tag; - struct con_while* tok_while; - struct con_if* tok_if; - struct con_function* tok_function; - struct con_cmd* tok_cmd; - struct con_macro* tok_macro; - struct con_funcall* tok_funcall; - struct con_syscall* tok_syscall; - std::vector tokens; // relevant to "if", "while", "function" and "syscall" tokens -}; - struct _con_condition { CON_COMPARISON op; std::string arg1; std::string arg2; }; + struct con_section { std::string name; }; @@ -96,4 +81,52 @@ struct con_syscall { std::vector arguments; }; + +struct con_token { + CON_TOKENTYPE tok_type; + int indentation; + con_section* tok_section = nullptr; + con_tag* tok_tag = nullptr; + con_while* tok_while = nullptr; + con_if* tok_if = nullptr; + con_function* tok_function = nullptr; + con_cmd* tok_cmd = nullptr; + con_macro* tok_macro = nullptr; + con_funcall* tok_funcall = nullptr; + con_syscall* tok_syscall = nullptr; + std::vector tokens; // relevant to "if", "while", "function" and "syscall" tokens + + ~con_token() { + switch (tok_type) { + case SECTION: + if (tok_section != nullptr) delete tok_section; + break; + case TAG: + if (tok_tag != nullptr) delete tok_tag; + break; + case WHILE: + if (tok_while != nullptr) delete tok_while; + break; + case IF: + if (tok_if != nullptr) delete tok_if; + break; + case FUNCTION: + if (tok_function != nullptr) delete tok_function; + break; + case CMD: + if (tok_cmd != nullptr) delete tok_cmd; + break; + case MACRO: + if (tok_macro != nullptr) delete tok_macro; + break; + case FUNCALL: + if (tok_funcall != nullptr) delete tok_funcall; + break; + case SYSCALL: + if (tok_syscall != nullptr) delete tok_syscall; + break; + } + } +}; + #endif // CONSTRUCT_TYPES_H_ diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index fa5cf3a..0032f65 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -8,7 +8,6 @@ using namespace std; -static void to_lower(string& str); static vector split(const string& input, const string& chars); static uint16_t get_syscall_number(const std::string& syscall_name); @@ -71,10 +70,9 @@ vector delinearize_tokens(std::vector tokens) vector dl_tokens; // Serves as parent "section" where all tokens belong to, convenient for algo - con_section* parent_section = new con_section; con_token* parent_token = new con_token; - parent_token->tok_section = parent_section; parent_token->tok_type = SECTION; + parent_token->tok_section = new con_section; // is deleted inside parent_token parent_token->indentation = -1; stack parent_stack; @@ -105,8 +103,8 @@ vector delinearize_tokens(std::vector tokens) vector delinearized_tokens = parent_token->tokens; - delete parent_section; delete parent_token; + parent_token = nullptr; return delinearized_tokens; } @@ -244,7 +242,6 @@ con_token* parse_line(string line) } vector parse_construct(string code) { - to_lower(code); vector code_split = split(code, "\n"); vector tokens; bool in_data = false; @@ -280,16 +277,6 @@ vector parse_construct(string code) // ----- ----- ----- ----- ----- ----- helper functions impl ----- ----- ----- ----- ----- -void to_lower(string& str) -{ - for (string::iterator it = str.begin(); it != str.end(); ++it) { - if (*it >= 'A' && *it <= 'Z') { - *it -= 'A'; - *it += 'a'; - } - } -} - vector split(const string& input, const string& chars) { vector result; diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index 5c20295..928af66 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -160,60 +160,61 @@ static uint8_t str_to_reg(string reg_name) return 5; return 6; } -static void apply_macro_to_token(con_token& token, vector macros) + +static size_t find_macro_in_arg(string arg, string macro) +{ + size_t pos = arg.find(macro); + if ((pos == 0 || (arg[pos-1]!='_' && !isalpha(arg[pos-1]))) + && (pos+macro.size()-1 == arg.size()-1 || (arg[pos+macro.size()]!='_' && !isalpha(arg[pos+macro.size()])))) { + return pos; + } + return string::npos; +} +static void apply_macro_to_token(con_token& token, const vector& macros) { if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; } // Unoptimal, but more clear imo for (size_t i = 0; i < macros.size(); i++) { - con_macro* crntmacro = ¯os[i]; + const string& macro = macros[i]->macro; + const string& value = macros[i]->value; size_t pos; switch (token.tok_type) { case WHILE: - if (!token.tok_while->condition.arg1.empty() && - (pos = token.tok_while->condition.arg1.find(crntmacro->macro)) != string::npos && - (pos == 0 || !isalpha(token.tok_while->condition.arg1[pos-1])) && - (pos == token.tok_while->condition.arg1.size()-1 - || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { - token.tok_while->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); + pos = find_macro_in_arg(token.tok_while->condition.arg1, macro); + while (pos != string::npos) { + token.tok_while->condition.arg1.replace(pos, macro.size(), value); + pos = find_macro_in_arg(token.tok_while->condition.arg1, macro); } - if (!token.tok_while->condition.arg2.empty() && - (pos = token.tok_while->condition.arg2.find(crntmacro->macro)) != string::npos && - (pos == 0 || !isalpha(token.tok_while->condition.arg2[pos-1])) && - (pos == token.tok_while->condition.arg2.size()-1 - || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { - token.tok_while->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); + pos = find_macro_in_arg(token.tok_while->condition.arg2, macro); + while (pos != string::npos) { + token.tok_while->condition.arg2.replace(pos, macro.size(), value); + pos = find_macro_in_arg(token.tok_while->condition.arg2, macro); } break; case IF: - if (!token.tok_if->condition.arg1.empty() && - (pos = token.tok_if->condition.arg1.find(crntmacro->macro)) != string::npos && - (pos == 0 || !isalpha(token.tok_if->condition.arg1[pos-1])) && - (pos == token.tok_if->condition.arg1.size()-1 - || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { - token.tok_if->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); + pos = find_macro_in_arg(token.tok_if->condition.arg1, macro); + while (pos != string::npos) { + token.tok_if->condition.arg1.replace(pos, macro.size(), value); + pos = find_macro_in_arg(token.tok_if->condition.arg1, macro); } - if (!token.tok_if->condition.arg2.empty() && - (pos = token.tok_if->condition.arg2.find(crntmacro->macro)) != string::npos && - (pos == 0 || !isalpha(token.tok_if->condition.arg2[pos-1])) && - (pos == token.tok_if->condition.arg2.size()-1 - || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { - token.tok_if->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); + pos = find_macro_in_arg(token.tok_if->condition.arg2, macro); + while (pos != string::npos) { + token.tok_if->condition.arg2.replace(pos, macro.size(), value); + pos = find_macro_in_arg(token.tok_if->condition.arg2, macro); } break; case CMD: - if (!token.tok_cmd->arg1.empty() && - (pos = token.tok_cmd->arg1.find(crntmacro->macro)) != string::npos && - (pos == 0 || !isalpha(token.tok_cmd->arg1[pos-1])) && - (pos == token.tok_cmd->arg1.size()-1 || !isalpha(token.tok_cmd->arg1[pos+crntmacro->macro.size()]))) { - token.tok_cmd->arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); + pos = find_macro_in_arg(token.tok_cmd->arg1, macro); + while (pos != string::npos) { + token.tok_cmd->arg1.replace(pos, macro.size(), value); + pos = find_macro_in_arg(token.tok_cmd->arg1, macro); } - if (!token.tok_cmd->arg2.empty() && - (pos = token.tok_cmd->arg2.find(crntmacro->macro)) != string::npos && - (pos == 0 || !isalpha(token.tok_cmd->arg2[pos-1])) && - (pos == token.tok_cmd->arg2.size()-1 || !isalpha(token.tok_cmd->arg2[pos+crntmacro->macro.size()]))) { - token.tok_cmd->arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); + pos = find_macro_in_arg(token.tok_cmd->arg2, macro); + while (pos != string::npos) { + token.tok_cmd->arg2.replace(pos, macro.size(), value); + pos = find_macro_in_arg(token.tok_cmd->arg2, macro); } break; default: @@ -440,25 +441,11 @@ void apply_functions(std::vector& tokens) tokens[i]->tokens.push_back(ret_tok); } } -void apply_macros(vector& tokens, vector knownmacros) +void apply_macros(vector& tokens, vector& knownmacros) { for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type == MACRO) { - // Filter spaces from macro and value pair - con_macro* f_macro = new con_macro(); - f_macro->macro = ""; - f_macro->value = ""; - for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { - if (tokens[i]->tok_macro->macro[j] != ' ') { - f_macro->macro += tokens[i]->tok_macro->macro[j]; - } - } - for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { - if (tokens[i]->tok_macro->value[j] != ' ') - f_macro->value += tokens[i]->tok_macro->value[j]; - } - knownmacros.push_back(*f_macro); - delete f_macro; + knownmacros.push_back(tokens[i]->tok_macro); continue; } apply_macro_to_token(*tokens[i], knownmacros); diff --git a/src/reconstruct.h b/src/reconstruct.h index 05970fa..cbd936f 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -20,7 +20,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); void apply_whiles(std::vector& tokens); void apply_ifs(std::vector& tokens); void apply_functions(std::vector& tokens); -void apply_macros(std::vector& tokens, std::vector macros); +void apply_macros(std::vector& tokens, std::vector& macros); void apply_funcalls(std::vector& tokens); void apply_syscalls(std::vector& tokens);