diff --git a/examples/factorial.con b/examples/factorial.con index 18c99f9..6aae732 100644 --- a/examples/factorial.con +++ b/examples/factorial.con @@ -16,8 +16,7 @@ function main(): !result rax call printf(fmt, result) - mov rax, 60 - syscall + syscall exit() section .data fmt: db "%d", 10, 0 diff --git a/examples/strchr.con b/examples/strchr.con index 81534cf..4ff263d 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -3,9 +3,10 @@ extern printf section .text function strchr(str, chr): !ptrresult rax + !findchr sil mov ptrresult, 0 while byte[str] ne 0: - if byte[str] e chr: + if byte[str] e findchr: mov ptrresult, str ret inc str diff --git a/src/construct.cpp b/src/construct.cpp index 53c61ab..e909da2 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -4,9 +4,9 @@ #include #include #include "construct_types.h" -#include "deconstruct.h" // parse_construct() -#include "reconstruct.h" // linearize_tokens() -#include "construct_flags.h" // handle_flags() +#include "deconstruct.h" +#include "reconstruct.h" +#include "construct_flags.h" int main(int argc, char** argv) { @@ -40,6 +40,7 @@ int main(int argc, char** argv) apply_ifs(tokens); apply_whiles(tokens); apply_funcalls(tokens); + apply_syscalls(tokens); std::vector empty_macros; apply_macros(tokens, empty_macros); linearize_tokens(tokens); diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 5098a7a..252b042 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -24,6 +24,8 @@ std::string tokentype_to_string(CON_TOKENTYPE type) return "macro"; case FUNCALL: return "funcall"; + case SYSCALL: + return "syscall"; } throw std::invalid_argument("Invalid token type: "+std::to_string(static_cast(type))); } @@ -39,10 +41,12 @@ std::string token_to_string(con_token token) tokstring += ", name: " + token.tok_tag->name; break; case WHILE: - tokstring += ", condition: " + token.tok_while->condition.arg1 + " " + comparison_to_string(token.tok_while->condition.op) + " " + token.tok_while->condition.arg2; + tokstring += ", condition: " + token.tok_while->condition.arg1 + " " + + comparison_to_string(token.tok_while->condition.op) + " " + token.tok_while->condition.arg2; break; case IF: - tokstring += ", condition: " + token.tok_if->condition.arg1 + " " + comparison_to_string(token.tok_if->condition.op) + " " + token.tok_if->condition.arg2; + tokstring += ", condition: " + token.tok_if->condition.arg1 + " " + + comparison_to_string(token.tok_if->condition.op) + " " + token.tok_if->condition.arg2; break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; diff --git a/src/construct_types.h b/src/construct_types.h index 5f76bbc..823ac29 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -28,7 +28,8 @@ enum CON_TOKENTYPE { FUNCTION, CMD, MACRO, - FUNCALL + FUNCALL, + SYSCALL }; @@ -43,7 +44,8 @@ struct con_token { struct con_cmd* tok_cmd; struct con_macro* tok_macro; struct con_funcall* tok_funcall; - std::vector tokens; // Only non-empty for if, while and function tokens + struct con_syscall* tok_syscall; + std::vector tokens; // relevant to "if", "while", "function" and "syscall" tokens }; struct _con_condition { @@ -89,4 +91,9 @@ struct con_funcall { std::vector arguments; }; +struct con_syscall { + uint16_t number; + std::vector arguments; +}; + #endif // CONSTRUCT_TYPES_H_ diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index e9386d6..fa5cf3a 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,75 +1,16 @@ #include #include #include +#include #include #include "deconstruct.h" #include "construct_types.h" using namespace std; -static const char& FIRST_UPPERCASE_LETTER = 'A'; -static const char& LAST_UPPERCASE_LETTER = 'Z'; -static bool is_upper(const char& c) -{ - return c >= FIRST_UPPERCASE_LETTER && c <= LAST_UPPERCASE_LETTER; -} -static char to_lower(const char& c) -{ - return is_upper(c) ? c - FIRST_UPPERCASE_LETTER : c; -} -static void to_lower(string& str) -{ - string tmp; - for (string::iterator it = str.begin(); it != str.end(); ++it) { - tmp.push_back(to_lower(*it)); - } - str = tmp; -} - -class IsAnyOf -{ -private: - string chars; -public: - IsAnyOf() = default; - IsAnyOf(const string& _chars) : chars(_chars) {} - IsAnyOf(const char*& _chars) : chars(_chars) {} - ~IsAnyOf() = default; - IsAnyOf(const IsAnyOf& other) = delete; // should save unique sorted chars for that - IsAnyOf& operator=(const IsAnyOf& other) = delete; - - bool operator()(const char& c) const { - for (string::const_iterator it = chars.cbegin(); it != chars.cend(); ++it) { - if (*it == c) return true; - } - return false; - } -}; - -template -static void split(vector& result, const string& input, const Predicate& pred, const bool& compress_adj_delims=true, const bool& strip=true) -{ - string tmp; - bool prev_is_delim = false; - for (string::const_iterator it = input.cbegin(); it != input.cend(); ++it) { - if (pred(*it)) { - if (prev_is_delim && compress_adj_delims) continue; - result.push_back(tmp); - tmp.clear(); - prev_is_delim = true; - } else { - tmp.push_back(*it); - prev_is_delim = false; - } - } - result.push_back(tmp); - if (strip) { - if (tmp.empty()) - result.pop_back(); - if (!result.empty() && result[0].empty()) - result.erase(result.begin()); - } -} +static void to_lower(string& str); +static vector split(const string& input, const string& chars); +static uint16_t get_syscall_number(const std::string& syscall_name); int get_line_indentation(string line) { @@ -87,20 +28,23 @@ int get_line_indentation(string line) // Expects formatted line CON_TOKENTYPE get_token_type(string line) { - if (line.substr(0, 8) == "section ") + vector line_split = split(line, " "); // line_split is not empty + if (line_split[0] == "section") return SECTION; if (line.find(' ') == string::npos && line[line.size()-1] == ':') return TAG; - if (line.substr(0, 6) == "while ") + if (line_split[0] == "while") return WHILE; - if (line.substr(0, 3) == "if ") + if (line_split[0] == "if") return IF; - if (line.substr(0, 9) == "function ") + if (line_split[0] == "function") return FUNCTION; if (line[0] == '!') return MACRO; - if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) + if (line_split[0] == "call" && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; + if (line_split[0] == "syscall" && line.find('(') != string::npos && line.find(')') != string::npos) + return SYSCALL; return CMD; } @@ -138,23 +82,23 @@ vector delinearize_tokens(std::vector tokens) dl_tokens.push_back(parent_token); - // When a new when, if or function is encountered it is pushed to the top of the parent_stack + // When a new while, if or function is encountered it is pushed to the top of the parent_stack // All tokens with the indentation of the top of the parent_stack+1 // are then added to the elem at the top of the stack (ptr so also to elem in vector). // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. for (size_t i = 0; i < tokens.size(); i++) { - if (parent_stack.top()->indentation - tokens[i]->indentation >= 0) { - int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; + if (tokens[i]->indentation - parent_stack.top()->indentation <= 0) { + int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation + 1; for (int j = 0; j < indentation_diff; j++) { parent_stack.pop(); } } - if (tokens[i]->indentation == parent_stack.top()->indentation+1) { + if (tokens[i]->indentation - parent_stack.top()->indentation == 1) { parent_stack.top()->tokens.push_back(tokens[i]); } - if (tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { + if (tokens[i]->tok_type == WHILE || tokens[i]->tok_type == IF || tokens[i]->tok_type == FUNCTION) { parent_stack.push(tokens[i]); } } @@ -170,8 +114,7 @@ vector delinearize_tokens(std::vector tokens) con_section* parse_section(string line) // section name // section . name ?? { con_section* tok_section = new con_section(); - vector line_split; - split(line_split, line, IsAnyOf(" ")); + vector line_split = split(line, " "); tok_section->name = line_split[1]; return tok_section; } @@ -184,8 +127,7 @@ con_tag* parse_tag(string line) // name: // name : ?? con_while* parse_while(string line) // while val1 comp val2: { con_while* tok_while = new con_while(); - vector line_split; - split(line_split, line, IsAnyOf(" :")); + vector line_split = split(line, " :"); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); tok_while->condition.arg2 = line_split[3]; @@ -194,8 +136,7 @@ con_while* parse_while(string line) // while val1 comp val2: con_if* parse_if(string line) // if val1 comp val2: { con_if* tok_if = new con_if(); - vector line_split; - split(line_split, line, IsAnyOf(" :")); + vector line_split = split(line, " :"); tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); tok_if->condition.arg2 = line_split[3]; @@ -204,8 +145,7 @@ con_if* parse_if(string line) // if val1 comp val2: con_function* parse_function(string line) // function func(arg1, arg2, ...): { con_function* tok_function = new con_function(); - vector line_split; - split(line_split, line, IsAnyOf(" ():,")); + vector line_split = split(line, " ():,"); tok_function->name = line_split[1]; for (size_t i = 2; i < line_split.size(); i++) { if (line_split[i].empty()) { @@ -218,8 +158,7 @@ con_function* parse_function(string line) // function func(arg1, arg2, ...): con_cmd* parse_cmd(string line) // op // op arg1 // op arg1, arg2 { con_cmd* tok_cmd = new con_cmd(); - vector line_split; - split(line_split, line, IsAnyOf(" ,")); + vector line_split = split(line, " ,"); tok_cmd->command = line_split[0]; if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; @@ -230,8 +169,7 @@ con_cmd* parse_cmd(string line) // op // op arg1 // op arg1, arg2 con_macro* parse_macro(string line) // !name reg { con_macro* tok_macro = new con_macro(); - vector line_split; - split(line_split, line, IsAnyOf(" !")); + vector line_split = split(line, " !"); tok_macro->macro = line_split[0]; tok_macro->value = line_split[1]; return tok_macro; @@ -239,17 +177,25 @@ con_macro* parse_macro(string line) // !name reg con_funcall* parse_funcall(string line) // call func(arg1, arg2, ...) { con_funcall* tok_funcall = new con_funcall(); - vector line_split; - split(line_split, line, IsAnyOf(" (),")); + vector line_split = split(line, " (),"); tok_funcall->funcname = line_split[1]; for (size_t i = 2; i < line_split.size(); i++) { - if (line_split[i].empty()) { - continue; - } - tok_funcall->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied + if (line_split[i].empty()) throw invalid_argument("Invalid syntax"); + tok_funcall->arguments.push_back(line_split[i]); } return tok_funcall; } +con_syscall* parse_syscall(string line) // syscall sysc(arg1, arg2, ...) +{ + vector line_split = split(line, " (),"); + con_syscall* tok_syscall = new con_syscall(); + tok_syscall->number = get_syscall_number(line_split[1]); + for (size_t i = 2; i < line_split.size(); i++) { + if (line_split[i].empty()) throw invalid_argument("Invalid syntax"); + tok_syscall->arguments.push_back(line_split[i]); + } + return tok_syscall; +} // Does not expect formatted line, only lowercase con_token* parse_line(string line) @@ -258,18 +204,11 @@ con_token* parse_line(string line) //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for (size_t i = 0; i < line.size(); i++) { - if (line[i] == ' ') { - if (!caught_space) { - f_line += line[i]; - caught_space = true; - } - } else { - if (line[i] != '\t') { - f_line += line[i]; - } - caught_space = false; - } + for (string::iterator it = line.begin(); it != line.end(); ++it) { + bool is_space = (*it == ' '); + if (*it == '\t' || (is_space && caught_space)) continue; + f_line += *it; + caught_space = is_space; } token->tok_type = get_token_type(f_line); switch (token->tok_type) { @@ -296,14 +235,17 @@ con_token* parse_line(string line) break; case FUNCALL: token->tok_funcall = parse_funcall(f_line); + break; + case SYSCALL: + token->tok_syscall = parse_syscall(f_line); + break; } return token; } vector parse_construct(string code) { - vector code_split; - split(code_split, code, IsAnyOf("\n")); to_lower(code); + vector code_split = split(code, "\n"); vector tokens; bool in_data = false; for (size_t i = 0; i < code_split.size(); i++) { @@ -311,10 +253,17 @@ vector parse_construct(string code) if (code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; } - con_token* new_token = parse_line(code_split[i]); + con_token* new_token = nullptr; + try { + new_token = parse_line(code_split[i]); + } + catch (const std::exception& e) { + throw std::runtime_error("Line "+to_string(i)+" ["+code_split[i]+"] :"+e.what()); + } new_token->indentation = get_line_indentation(code_split[i]); tokens.push_back(new_token); - if (new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { + if (new_token->tok_type == SECTION + && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { in_data = true; } else if (new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { in_data = false; @@ -328,3 +277,390 @@ vector parse_construct(string code) } return tokens; } + +// ----- ----- ----- ----- ----- ----- helper functions impl ----- ----- ----- ----- ----- + +void to_lower(string& str) +{ + for (string::iterator it = str.begin(); it != str.end(); ++it) { + if (*it >= 'A' && *it <= 'Z') { + *it -= 'A'; + *it += 'a'; + } + } +} + +vector split(const string& input, const string& chars) +{ + vector result; + string tmp; + bool prev_is_delim = false; + for (string::const_iterator input_it = input.cbegin(); input_it != input.cend(); ++input_it) { + bool is_in_chars = false; + for (string::const_iterator chars_it = chars.cbegin(); chars_it != chars.cend(); ++chars_it) { + if (*chars_it == *input_it) { + is_in_chars = true; + break; + } + } + if (is_in_chars) { + if (prev_is_delim) continue; + if (!tmp.empty()) + result.push_back(tmp); + tmp.clear(); + prev_is_delim = true; + } else { + tmp.push_back(*input_it); + prev_is_delim = false; + } + } + if (!tmp.empty()) + result.push_back(tmp); + return result; +} + +uint16_t get_syscall_number(const std::string& syscall_name) +{ + static const map& name_to_num = { + {"read" , 0 }, + {"write" , 1 }, + {"open" , 2 }, + {"close" , 3 }, + {"stat" , 4 }, + {"fstat" , 5 }, + {"lstat" , 6 }, + {"poll" , 7 }, + {"lseek" , 8 }, + {"mmap" , 9 }, + {"mprotect" , 10 }, + {"munmap" , 11 }, + {"brk" , 12 }, + {"rt_sigaction" , 13 }, + {"rt_sigprocmask" , 14 }, + {"rt_sigreturn" , 15 }, + {"ioctl" , 16 }, + {"pread64" , 17 }, + {"pwrite64" , 18 }, + {"readv" , 19 }, + {"writev" , 20 }, + {"access" , 21 }, + {"pipe" , 22 }, + {"select" , 23 }, + {"sched_yield" , 24 }, + {"mremap" , 25 }, + {"msync" , 26 }, + {"mincore" , 27 }, + {"madvise" , 28 }, + {"shmget" , 29 }, + {"shmat" , 30 }, + {"shmctl" , 31 }, + {"dup" , 32 }, + {"dup2" , 33 }, + {"pause" , 34 }, + {"nanosleep" , 35 }, + {"getitimer" , 36 }, + {"alarm" , 37 }, + {"setitimer" , 38 }, + {"getpid" , 39 }, + {"sendfile" , 40 }, + {"socket" , 41 }, + {"connect" , 42 }, + {"accept" , 43 }, + {"sendto" , 44 }, + {"recvfrom" , 45 }, + {"sendmsg" , 46 }, + {"recvmsg" , 47 }, + {"shutdown" , 48 }, + {"bind" , 49 }, + {"listen" , 50 }, + {"getsockname" , 51 }, + {"getpeername" , 52 }, + {"socketpair" , 53 }, + {"setsockopt" , 54 }, + {"getsockopt" , 55 }, + {"clone" , 56 }, + {"fork" , 57 }, + {"vfork" , 58 }, + {"execve" , 59 }, + {"exit" , 60 }, + {"wait4" , 61 }, + {"kill" , 62 }, + {"uname" , 63 }, + {"semget" , 64 }, + {"semop" , 65 }, + {"semctl" , 66 }, + {"shmdt" , 67 }, + {"msgget" , 68 }, + {"msgsnd" , 69 }, + {"msgrcv" , 70 }, + {"msgctl" , 71 }, + {"fcntl" , 72 }, + {"flock" , 73 }, + {"fsync" , 74 }, + {"fdatasync" , 75 }, + {"truncate" , 76 }, + {"ftruncate" , 77 }, + {"getdents" , 78 }, + {"getcwd" , 79 }, + {"chdir" , 80 }, + {"fchdir" , 81 }, + {"rename" , 82 }, + {"mkdir" , 83 }, + {"rmdir" , 84 }, + {"creat" , 85 }, + {"link" , 86 }, + {"unlink" , 87 }, + {"symlink" , 88 }, + {"readlink" , 89 }, + {"chmod" , 90 }, + {"fchmod" , 91 }, + {"chown" , 92 }, + {"fchown" , 93 }, + {"lchown" , 94 }, + {"umask" , 95 }, + {"gettimeofday" , 96 }, + {"getrlimit" , 97 }, + {"getrusage" , 98 }, + {"sysinfo" , 99 }, + {"times" , 100}, + {"ptrace" , 101}, + {"getuid" , 102}, + {"syslog" , 103}, + {"getgid" , 104}, + {"setuid" , 105}, + {"setgid" , 106}, + {"geteuid" , 107}, + {"getegid" , 108}, + {"setpgid" , 109}, + {"getppid" , 110}, + {"getpgrp" , 111}, + {"setsid" , 112}, + {"setreuid" , 113}, + {"setregid" , 114}, + {"getgroups" , 115}, + {"setgroups" , 116}, + {"setresuid" , 117}, + {"getresuid" , 118}, + {"setresgid" , 119}, + {"getresgid" , 120}, + {"getpgid" , 121}, + {"setfsuid" , 122}, + {"setfsgid" , 123}, + {"getsid" , 124}, + {"capget" , 125}, + {"capset" , 126}, + {"rt_sigpending" , 127}, + {"rt_sigtimedwait" , 128}, + {"rt_sigqueueinfo" , 129}, + {"rt_sigsuspend" , 130}, + {"sigaltstack" , 131}, + {"utime" , 132}, + {"mknod" , 133}, + {"uselib" , 134}, + {"personality" , 135}, + {"ustat" , 136}, + {"statfs" , 137}, + {"fstatfs" , 138}, + {"sysfs" , 139}, + {"getpriority" , 140}, + {"setpriority" , 141}, + {"sched_setparam" , 142}, + {"sched_getparam" , 143}, + {"sched_setscheduler" , 144}, + {"sched_getscheduler" , 145}, + {"sched_get_priority_max", 146}, + {"sched_get_priority_min", 147}, + {"sched_rr_get_interval" , 148}, + {"mlock" , 149}, + {"munlock" , 150}, + {"mlockall" , 151}, + {"munlockall" , 152}, + {"vhangup" , 153}, + {"modify_ldt" , 154}, + {"pivot_root" , 155}, + {"_sysctl" , 156}, + {"prctl" , 157}, + {"arch_prctl" , 158}, + {"adjtimex" , 159}, + {"setrlimit" , 160}, + {"chroot" , 161}, + {"sync" , 162}, + {"acct" , 163}, + {"settimeofday" , 164}, + {"mount" , 165}, + {"umount2" , 166}, + {"swapon" , 167}, + {"swapoff" , 168}, + {"reboot" , 169}, + {"sethostname" , 170}, + {"setdomainname" , 171}, + {"iopl" , 172}, + {"ioperm" , 173}, + {"create_module" , 174}, + {"init_module" , 175}, + {"delete_module" , 176}, + {"get_kernel_syms" , 177}, + {"query_module" , 178}, + {"quotactl" , 179}, + {"nfsservctl" , 180}, + {"getpmsg" , 181}, + {"putpmsg" , 182}, + {"afs_syscall" , 183}, + {"tuxcall" , 184}, + {"security" , 185}, + {"gettid" , 186}, + {"readahead" , 187}, + {"setxattr" , 188}, + {"lsetxattr" , 189}, + {"fsetxattr" , 190}, + {"getxattr" , 191}, + {"lgetxattr" , 192}, + {"fgetxattr" , 193}, + {"listxattr" , 194}, + {"llistxattr" , 195}, + {"flistxattr" , 196}, + {"removexattr" , 197}, + {"lremovexattr" , 198}, + {"fremovexattr" , 199}, + {"tkill" , 200}, + {"time" , 201}, + {"futex" , 202}, + {"sched_setaffinity" , 203}, + {"sched_getaffinity" , 204}, + {"set_thread_area" , 205}, + {"io_setup" , 206}, + {"io_destroy" , 207}, + {"io_getevents" , 208}, + {"io_submit" , 209}, + {"io_cancel" , 210}, + {"get_thread_area" , 211}, + {"lookup_dcookie" , 212}, + {"epoll_create" , 213}, + {"epoll_ctl_old" , 214}, + {"epoll_wait_old" , 215}, + {"remap_file_pages" , 216}, + {"getdents64" , 217}, + {"set_tid_address" , 218}, + {"restart_syscall" , 219}, + {"semtimedop" , 220}, + {"fadvise64" , 221}, + {"timer_create" , 222}, + {"timer_settime" , 223}, + {"timer_gettime" , 224}, + {"timer_getoverrun" , 225}, + {"timer_delete" , 226}, + {"clock_settime" , 227}, + {"clock_gettime" , 228}, + {"clock_getres" , 229}, + {"clock_nanosleep" , 230}, + {"exit_group" , 231}, + {"epoll_wait" , 232}, + {"epoll_ctl" , 233}, + {"tgkill" , 234}, + {"utimes" , 235}, + {"vserver" , 236}, + {"mbind" , 237}, + {"set_mempolicy" , 238}, + {"get_mempolicy" , 239}, + {"mq_open" , 240}, + {"mq_unlink" , 241}, + {"mq_timedsend" , 242}, + {"mq_timedreceive" , 243}, + {"mq_notify" , 244}, + {"mq_getsetattr" , 245}, + {"kexec_load" , 246}, + {"waitid" , 247}, + {"add_key" , 248}, + {"request_key" , 249}, + {"keyctl" , 250}, + {"ioprio_set" , 251}, + {"ioprio_get" , 252}, + {"inotify_init" , 253}, + {"inotify_add_watch" , 254}, + {"inotify_rm_watch" , 255}, + {"migrate_pages" , 256}, + {"openat" , 257}, + {"mkdirat" , 258}, + {"mknodat" , 259}, + {"fchownat" , 260}, + {"futimesat" , 261}, + {"newfstatat" , 262}, + {"unlinkat" , 263}, + {"renameat" , 264}, + {"linkat" , 265}, + {"symlinkat" , 266}, + {"readlinkat" , 267}, + {"fchmodat" , 268}, + {"faccessat" , 269}, + {"pselect6" , 270}, + {"ppoll" , 271}, + {"unshare" , 272}, + {"set_robust_list" , 273}, + {"get_robust_list" , 274}, + {"splice" , 275}, + {"tee" , 276}, + {"sync_file_range" , 277}, + {"vmsplice" , 278}, + {"move_pages" , 279}, + {"utimensat" , 280}, + {"epoll_pwait" , 281}, + {"signalfd" , 282}, + {"timerfd_create" , 283}, + {"eventfd" , 284}, + {"fallocate" , 285}, + {"timerfd_settime" , 286}, + {"timerfd_gettime" , 287}, + {"accept4" , 288}, + {"signalfd4" , 289}, + {"eventfd2" , 290}, + {"epoll_create1" , 291}, + {"dup3" , 292}, + {"pipe2" , 293}, + {"inotify_init1" , 294}, + {"preadv" , 295}, + {"pwritev" , 296}, + {"rt_tgsigqueueinfo" , 297}, + {"perf_event_open" , 298}, + {"recvmmsg" , 299}, + {"fanotify_init" , 300}, + {"fanotify_mark" , 301}, + {"prlimit64" , 302}, + {"name_to_handle_at" , 303}, + {"open_by_handle_at" , 304}, + {"clock_adjtime" , 305}, + {"syncfs" , 306}, + {"sendmmsg" , 307}, + {"setns" , 308}, + {"getcpu" , 309}, + {"process_vm_readv" , 310}, + {"process_vm_writev" , 311}, + {"kcmp" , 312}, + {"finit_module" , 313}, + {"sched_setattr" , 314}, + {"sched_getattr" , 315}, + {"renameat2" , 316}, + {"seccomp" , 317}, + {"getrandom" , 318}, + {"memfd_create" , 319}, + {"kexec_file_load" , 320}, + {"bpf" , 321}, + {"execveat" , 322}, + {"userfaultfd" , 323}, + {"membarrier" , 324}, + {"mlock2" , 325}, + {"copy_file_range" , 326}, + {"preadv2" , 327}, + {"pwritev2" , 328}, + {"pkey_mprotect" , 329}, + {"pkey_alloc" , 330}, + {"pkey_free" , 331}, + {"statx" , 332} + }; + + try { + return name_to_num.at(syscall_name); + } + catch(const std::out_of_range& e) { + throw std::invalid_argument("Unknown syscall name: "+syscall_name); + } +} diff --git a/src/deconstruct.h b/src/deconstruct.h index 7139980..4a853aa 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -19,6 +19,7 @@ con_function* parse_function(std::string line); con_cmd* parse_cmd(std::string line); con_macro* parse_macro(std::string line); con_funcall* parse_funcall(std::string line); +con_syscall* parse_syscall(std::string line); con_token* parse_line(std::string line); std::vector parse_construct(std::string code); diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index 4456011..5c20295 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -6,11 +6,50 @@ using namespace std; +#define min(a,b) ((a)<=(b) ? (a) : (b)) + int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num) +string comparison_to_string(CON_COMPARISON condition) +{ + switch (condition) { + case E: + return "e"; + case NE: + return "ne"; + case L: + return "l"; + case G: + return "g"; + case LE: + return "le"; + case GE: + return "ge"; + } + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); +} +CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) +{ + switch (condition) { + case E: + return NE; + case NE: + return E; + case L: + return GE; + case G: + return LE; + case LE: + return G; + case GE: + return L; + } + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); +} + +static string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { switch (bitwidth) { case BIT8: @@ -102,45 +141,25 @@ string reg_to_str(uint8_t call_num) } break; } - throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); + throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth)) + +" call_num="+to_string(static_cast(call_num))); } -string comparison_to_string(CON_COMPARISON condition) +static uint8_t str_to_reg(string reg_name) { - switch (condition) { - case E: - return "e"; - case NE: - return "ne"; - case L: - return "l"; - case G: - return "g"; - case LE: - return "le"; - case GE: - return "ge"; - } - throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); -} -CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) -{ - switch (condition) { - case E: - return NE; - case NE: - return E; - case L: - return GE; - case G: - return LE; - case LE: - return G; - case GE: - return L; - } - throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); + if (reg_name=="dil" ||reg_name=="di" || reg_name=="edi" || reg_name=="rdi") + return 0; + if (reg_name=="sil" ||reg_name=="si" || reg_name=="esi" || reg_name=="rsi") + return 1; + if (reg_name=="dl" || reg_name=="dx" || reg_name=="edx" || reg_name=="rdx") + return 2; + if (reg_name=="cl" || reg_name=="cx" || reg_name=="ecx" || reg_name=="rcx") + return 3; + if (reg_name=="r8b" || reg_name=="r8w" || reg_name=="r8d" || reg_name=="r8") + return 4; + if (reg_name=="r9b" || reg_name=="r9w" || reg_name=="r9d" || reg_name=="r9") + return 5; + return 6; } - static void apply_macro_to_token(con_token& token, vector macros) { if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { @@ -155,13 +174,15 @@ static void apply_macro_to_token(con_token& token, vector macros) if (!token.tok_while->condition.arg1.empty() && (pos = token.tok_while->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg1[pos-1])) && - (pos == token.tok_while->condition.arg1.size()-1 || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { + (pos == token.tok_while->condition.arg1.size()-1 + || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { token.tok_while->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } if (!token.tok_while->condition.arg2.empty() && (pos = token.tok_while->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg2[pos-1])) && - (pos == token.tok_while->condition.arg2.size()-1 || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { + (pos == token.tok_while->condition.arg2.size()-1 + || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { token.tok_while->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; @@ -169,13 +190,15 @@ static void apply_macro_to_token(con_token& token, vector macros) if (!token.tok_if->condition.arg1.empty() && (pos = token.tok_if->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg1[pos-1])) && - (pos == token.tok_if->condition.arg1.size()-1 || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { + (pos == token.tok_if->condition.arg1.size()-1 + || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { token.tok_if->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } if (!token.tok_if->condition.arg2.empty() && (pos = token.tok_if->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg2[pos-1])) && - (pos == token.tok_if->condition.arg2.size()-1 || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { + (pos == token.tok_if->condition.arg2.size()-1 + || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { token.tok_if->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; @@ -199,6 +222,100 @@ static void apply_macro_to_token(con_token& token, vector macros) } } +static vector push_args(vector& args, CON_BITWIDTH bitwidth) +{ + vector arg_tokens; + + // stack args; + for (size_t i = 6; i < args.size() ; ++i) { + size_t i_rev = args.size()+5 - i; + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + arg_cmd->command = "pushq"; // bitwidth + arg_cmd->arg1 = args[i_rev]; + arg_tokens.push_back(arg_tok); + } + + // register args; + size_t reg_args_size = min(args.size(),6); + uint8_t first_read[7] = {6,6,6,6,6,6,6}; // cell 6 is garbage to hold not special-regs + for (size_t i = 0; i < reg_args_size; ++i) { + uint8_t reg_num = str_to_reg(args[i]); + first_read[reg_num] = min(first_read[reg_num],i); + } + // sort regs by first-read + uint8_t read_order[6] = {6,6,6,6,6,6}; + for (size_t fr = 0; fr < reg_args_size; ++fr) { + for (size_t reg = 0; reg < reg_args_size; ++reg) { + if ((fr == first_read[reg]) && (first_read[reg] > reg)) { //next in turn and will be pushed to stack + read_order[fr]=reg; + } + } + } + // push reversed to pop order + for (size_t fr = 0; fr < 6; ++fr) { + size_t fr_rev = 5 - fr; // reverse the order + if (read_order[fr_rev] != 6) { // there is a regester first read i arg number fr, and will be deleted before + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + arg_cmd->command = "push"; + arg_cmd->arg1 = reg_to_str(read_order[fr_rev], bitwidth); + arg_tokens.push_back(arg_tok); + } + } + // set each arg and track values places + uint8_t current_val_place[6] = {0,1,2,3,4,5}; // 6 means stack + for (size_t reg = 0; reg < reg_args_size; ++reg) { + if (first_read[reg] > reg) { + current_val_place[reg] = 6; + } + } + for (size_t i = 0; i < reg_args_size; i++) { + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + uint8_t wanted_reg = str_to_reg(args[i]); + if (wanted_reg==6) { + arg_cmd->command = "mov"; + arg_cmd->arg1 = reg_to_str(i, bitwidth); + arg_cmd->arg2 = args[i]; + // if regi was read before, then current_val_place[i] is a previous register (correct) + // if regi isn't read yet, then current_val_place[i] is stack (correct) + } else { + if (current_val_place[wanted_reg] == 6) { + arg_cmd->command = "pop"; + arg_cmd->arg1 = reg_to_str(i, bitwidth); + current_val_place[wanted_reg] = i; // wanted_reg moved from stack to regi + } else { + if (i != current_val_place[wanted_reg]) { + arg_cmd->command = "mov"; + arg_cmd->arg1 = reg_to_str(i, bitwidth); + arg_cmd->arg2 = reg_to_str(current_val_place[wanted_reg], bitwidth); + // if regi was read before, then current_val_place[i] is a previous register (correct) + // if regi isn't read yet, then current_val_place[i] is stack (correct) + current_val_place[wanted_reg] = min(current_val_place[wanted_reg],i); + } else { + arg_cmd->command = "nop"; + } + } + } + if (arg_cmd->command == "nop") { + delete arg_cmd; + arg_cmd = nullptr; + delete arg_tok; + arg_tok = nullptr; + } else { + arg_tokens.push_back(arg_tok); + } + } + return arg_tokens; +} + void apply_whiles(vector& tokens) { for (size_t i = 0; i< tokens.size(); i++) { @@ -290,12 +407,11 @@ void apply_ifs(vector& tokens) } void apply_functions(std::vector& tokens) { - vector* subtokens = &tokens; - for (size_t i = 0; i < subtokens->size(); i++) { - if ((*subtokens)[i]->tok_type != FUNCTION) { + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type != FUNCTION) { continue; } - con_function* crntfunc = (*subtokens)[i]->tok_function; + con_function* crntfunc = tokens[i]->tok_function; if (crntfunc->name == "main") { crntfunc->name = "_start"; } @@ -309,19 +425,19 @@ void apply_functions(std::vector& tokens) con_token* arg_tok = new con_token; arg_tok->tok_type = MACRO; con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j); + arg_macro->value = reg_to_str(j, bitwidth); arg_macro->macro = crntfunc->arguments[j]; arg_tok->tok_macro = arg_macro; - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); + tokens[i]->tokens.insert(tokens[i]->tokens.begin(), arg_tok); } - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); + tokens[i]->tokens.insert(tokens[i]->tokens.begin(), tag_tok); con_token* ret_tok = new con_token; ret_tok->tok_type = CMD; con_cmd* ret_cmd = new con_cmd; ret_tok->tok_cmd = ret_cmd; ret_cmd->command = "ret"; - (*subtokens)[i]->tokens.push_back(ret_tok); + tokens[i]->tokens.push_back(ret_tok); } } void apply_macros(vector& tokens, vector knownmacros) @@ -358,18 +474,7 @@ void apply_funcalls(std::vector& tokens) if (tokens[i]->tok_type != FUNCALL) { continue; } - vector& args = tokens[i]->tok_funcall->arguments; - vector arg_tokens; - for (size_t j = 0; j < args.size(); j++) { - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j); - arg_cmd->arg2 = args[j]; - arg_tokens.push_back(arg_tok); - } + vector arg_tokens = push_args(tokens[i]->tok_funcall->arguments, bitwidth); con_token* call_tok = new con_token(); call_tok->tok_type = CMD; con_cmd* call_cmd = new con_cmd(); @@ -381,6 +486,32 @@ void apply_funcalls(std::vector& tokens) tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); } } +void apply_syscalls(std::vector& tokens) +{ + for (size_t i = 0; i < tokens.size(); i++) { + apply_syscalls(tokens[i]->tokens); + if (tokens[i]->tok_type != SYSCALL) { + continue; + } + vector arg_tokens = push_args(tokens[i]->tok_syscall->arguments, bitwidth); + con_token* call_tok1 = new con_token(); + call_tok1->tok_type = CMD; + con_cmd* call_cmd = new con_cmd(); + call_tok1->tok_cmd = call_cmd; + call_cmd->command = "mov"; + call_cmd->arg1 = "rax"; + call_cmd->arg2 = to_string(tokens[i]->tok_syscall->number); + arg_tokens.push_back(call_tok1); + con_token* call_tok2 = new con_token(); + call_tok2->tok_type = CMD; + con_cmd* call_sys = new con_cmd(); + call_tok2->tok_cmd = call_sys; + call_sys->command = "syscall"; + arg_tokens.push_back(call_tok2); + + tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); + } +} void linearize_tokens(vector& tokens) { @@ -399,9 +530,9 @@ std::string tokens_to_nasm(std::vector& tokens) { string output = ""; for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE + if (tokens[i]->tok_type == WHILE || tokens[i]->tok_type == IF || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO - || tokens[i]->tok_type == FUNCALL) { + || tokens[i]->tok_type == FUNCALL || tokens[i]->tok_type == SYSCALL) { continue; } if (tokens[i]->tok_type == SECTION) { diff --git a/src/reconstruct.h b/src/reconstruct.h index 4dd2c02..05970fa 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -10,7 +10,6 @@ extern int if_amnt; extern int while_amnt; extern CON_BITWIDTH bitwidth; -std::string reg_to_str(uint8_t call_num); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); @@ -23,6 +22,7 @@ void apply_ifs(std::vector& tokens); void apply_functions(std::vector& tokens); void apply_macros(std::vector& tokens, std::vector macros); void apply_funcalls(std::vector& tokens); +void apply_syscalls(std::vector& tokens); // During linearization, the construct parent tokens are removed void linearize_tokens(std::vector& tokens);