From c78612cc87d8f89a7738c947f9061759e9b6d363 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 18:53:53 +0200 Subject: [PATCH 01/10] revert NoneSince's PR #2 in Thomas-de-Bock/master. The reason: i want to push the changs one step at a time to make it easier to review. --- .gitignore | 2 - Makefile | 41 +----- README.md | 3 +- examples/factorial.con | 2 +- examples/strchr.con | 3 +- src/construct.cpp | 22 ++- src/construct_debug.cpp | 33 ++--- src/construct_debug.h | 9 +- src/construct_flags.cpp | 34 ++--- src/construct_flags.h | 10 +- src/construct_types.h | 36 ++--- src/deconstruct.cpp | 283 ++++++++++++++------------------------ src/deconstruct.h | 25 ++-- src/reconstruct.cpp | 293 +++++++++++++++++++--------------------- src/reconstruct.h | 25 ++-- 15 files changed, 331 insertions(+), 490 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index b497ff1..0000000 --- a/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -bin/ -.vscode/ \ No newline at end of file diff --git a/Makefile b/Makefile index 3d0c7ad..f8d850c 100644 --- a/Makefile +++ b/Makefile @@ -1,38 +1,3 @@ -CC = g++ -CFLAGS = -std=c++11 -Wall --pedantic-errors -g -SDIR = src -BDIR = bin -_OBJS = construct_debug.o construct_flags.o deconstruct.o reconstruct.o construct.o -OBJS = $(patsubst %,$(BDIR)/%,$(_OBJS)) -PROG = construct - -.PHONY: all clean - -all: $(OBJS) $(BDIR)/$(PROG) - -$(BDIR)/$(PROG): $(OBJS) - mkdir -p $(BDIR) - $(CC) $(OBJS) -o $(BDIR)/$(PROG) - -$(BDIR)/construct.o: $(SDIR)/construct.cpp $(SDIR)/deconstruct.h $(SDIR)/reconstruct.h $(SDIR)/construct_flags.h $(SDIR)/construct_types.h - mkdir -p $(BDIR) - $(CC) -c $(SDIR)/construct.cpp -o $(BDIR)/construct.o $(CFLAGS) - -$(BDIR)/construct_debug.o: $(SDIR)/construct_debug.cpp $(SDIR)/construct_debug.h $(SDIR)/construct_types.h $(SDIR)/reconstruct.h - mkdir -p $(BDIR) - $(CC) -c $(SDIR)/construct_debug.cpp -o $(BDIR)/construct_debug.o $(CFLAGS) - -$(BDIR)/construct_flags.o: $(SDIR)/construct_flags.cpp $(SDIR)/construct_flags.h $(SDIR)/construct_types.h - mkdir -p $(BDIR) - $(CC) -c $(SDIR)/construct_flags.cpp -o $(BDIR)/construct_flags.o $(CFLAGS) - -$(BDIR)/deconstruct.o: $(SDIR)/deconstruct.cpp $(SDIR)/deconstruct.h $(SDIR)/construct_types.h - mkdir -p $(BDIR) - $(CC) -c $(SDIR)/deconstruct.cpp -o $(BDIR)/deconstruct.o $(CFLAGS) - -$(BDIR)/reconstruct.o: $(SDIR)/reconstruct.cpp $(SDIR)/reconstruct.h $(SDIR)/construct_types.h - mkdir -p $(BDIR) - $(CC) -c $(SDIR)/reconstruct.cpp -o $(BDIR)/reconstruct.o $(CFLAGS) - -clean: - rm -rf $(BDIR) +main: + mkdir -p bin/ + g++ src/construct.cpp src/deconstruct.cpp src/construct_debug.cpp src/reconstruct.cpp src/construct_flags.cpp -o bin/construct diff --git a/README.md b/README.md index e88bc63..a3d1036 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ fmt: db "%s", 10, 0 - If statements: If statements, like while loops, take a single [conditional](#conditionals) statement - Functions: Functions are declared with the "function" keyword, a "ret" instruction is added to functions in post-processing, so functions will not flow into eachother. -- Function calls: Functions can be called with any number of arguments, independent of the function decleration. +- Function calls: Functions can be called with any number of arguments, independent of the function decleration. If the amount of arguments used to call a function is more than its decleration states, they can be accessed like normal with their respective registers / stack address. Construct function calls, like NASM, use the "call" keyword. Functions can still be called without parentheses or arguments, NASM-style. - Macros: Construct macros can only be used in their respective scopes. Construct macros are declared with the '!' character and cannot contain whitespaces. @@ -58,4 +58,3 @@ Neither side of the comparison can contains whitespaces. ### Required flags - `-f (format)`: Can be either "elf64", "elf32", "elf16", "elf8" and decides the registers used for funcion calls. - `-i (input file)`: Specifies the input file to be compiled (-i is not neccesary) -- `-o (output file)`: Specifies the output file to be created diff --git a/examples/factorial.con b/examples/factorial.con index 18c99f9..a388f49 100644 --- a/examples/factorial.con +++ b/examples/factorial.con @@ -10,7 +10,7 @@ function factorial(num): while i le num: mul i inc i - + function main(): call factorial(3) !result rax diff --git a/examples/strchr.con b/examples/strchr.con index 81534cf..4ff263d 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -3,9 +3,10 @@ extern printf section .text function strchr(str, chr): !ptrresult rax + !findchr sil mov ptrresult, 0 while byte[str] ne 0: - if byte[str] e chr: + if byte[str] e findchr: mov ptrresult, str ret inc str diff --git a/src/construct.cpp b/src/construct.cpp index 53c61ab..f771c5c 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -1,22 +1,17 @@ -#include -#include -#include -#include -#include -#include "construct_types.h" -#include "deconstruct.h" // parse_construct() -#include "reconstruct.h" // linearize_tokens() -#include "construct_flags.h" // handle_flags() +#include "deconstruct.h" +#include "reconstruct.h" +#include "construct_flags.h" +#include +#include -int main(int argc, char** argv) -{ +int main(int argc, char** argv) { std::string path; std::string outpath; - if (handle_flags(argc, argv, &path, &outpath) != 0) { + if(handle_flags(argc, argv, &path, &outpath) != 0) { std::cout << "Some flag(s) not set" << std::endl; return 0; } - if (path.empty()) { + if(path.empty()) { std::cout << "No input file specified" << std::endl; return 0; } @@ -34,7 +29,6 @@ int main(int argc, char** argv) glob_cmd->command = "global _start"; tokens.insert(tokens.begin(), glob_tok); - // Order dependant: some tokens are replaced with macros, so apply_macro() must be at the end. tokens = delinearize_tokens(tokens); apply_functions(tokens); apply_ifs(tokens); diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 5098a7a..a3368cb 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -1,13 +1,7 @@ -#include -#include -#include #include "construct_debug.h" -#include "construct_types.h" -#include "reconstruct.h" // comparison_to_string() -std::string tokentype_to_string(CON_TOKENTYPE type) -{ - switch (type) { +std::string tokentype_to_string(CON_TOKENTYPE type) { + switch(type) { case SECTION: return "section"; case TAG: @@ -22,16 +16,13 @@ std::string tokentype_to_string(CON_TOKENTYPE type) return "cmd"; case MACRO: return "macro"; - case FUNCALL: - return "funcall"; } - throw std::invalid_argument("Invalid token type: "+std::to_string(static_cast(type))); + return "unknown"; } -std::string token_to_string(con_token token) -{ +std::string token_to_string(con_token token) { std::string tokstring = "type: " + tokentype_to_string(token.tok_type); - switch (token.tok_type) { + switch(token.tok_type) { case SECTION: tokstring += ", name: " + token.tok_section->name; break; @@ -46,19 +37,19 @@ std::string token_to_string(con_token token) break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; - for (size_t i = 0; i < token.tok_function->arguments.size(); i++) { - if (i != 0) { + for(int i = 0; i < token.tok_function->arguments.size(); i++) { + if(i != 0) { tokstring += ", "; } tokstring += token.tok_function->arguments[i]; } break; case CMD: - if (!token.tok_cmd->arg1.empty() && !token.tok_cmd->arg2.empty()) { + if(!token.tok_cmd->arg1.empty() && !token.tok_cmd->arg2.empty()) { tokstring += ", cmd: " + token.tok_cmd->command + " " + token.tok_cmd->arg1 + ", " + token.tok_cmd->arg2; break; } - if (!token.tok_cmd->arg1.empty()) { + if(!token.tok_cmd->arg1.empty()) { tokstring += ", cmd: " + token.tok_cmd->command + " " + token.tok_cmd->arg1; break; } @@ -67,12 +58,10 @@ std::string token_to_string(con_token token) case MACRO: tokstring += ", macro: " + token.tok_macro->macro + ", value: " + token.tok_macro->value; break; - default: // FUNCALL - break; } - if (token.tokens.size() > 0) { + if(token.tokens.size() > 0) { tokstring += ", tokens: {\n"; - for (size_t i = 0; i < token.tokens.size(); i++) { + for(int i = 0; i < token.tokens.size(); i++) { tokstring += token_to_string(*token.tokens[i]) + "\n"; } tokstring += "}"; diff --git a/src/construct_debug.h b/src/construct_debug.h index 9ff034a..d9b605a 100644 --- a/src/construct_debug.h +++ b/src/construct_debug.h @@ -1,10 +1,7 @@ -#ifndef CONSTRUCT_DEBUG_H_ -#define CONSTRUCT_DEBUG_H_ - -#include +#include +#include #include "construct_types.h" +#include "reconstruct.h" std::string tokentype_to_string(CON_TOKENTYPE type); std::string token_to_string(con_token token); - -#endif // CONSTRUCT_DEBUG_H_ diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index 5da4f90..741dbc7 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -1,27 +1,22 @@ -#include -#include #include "construct_flags.h" #include "construct_types.h" -extern CON_BITWIDTH bitwidth; - using namespace std; -int set_bitwidth(char* argv) -{ - if (string(argv) == "elf64") { +int set_bitwidth(char* argv) { + if(strcmp(argv, "elf64") == 0) { bitwidth = BIT64; return 0; } - if (string(argv) == "elf32") { + if(strcmp(argv, "elf32") == 0) { bitwidth = BIT32; return 0; } - if (string(argv) == "elf16") { + if(strcmp(argv, "elf16") == 0) { bitwidth = BIT16; return 0; } - if (string(argv) == "elf8") { + if(strcmp(argv, "elf8") == 0) { bitwidth = BIT8; return 0; } @@ -29,43 +24,42 @@ int set_bitwidth(char* argv) return -1; } -int handle_flags(int argc, char** argv, string* path, string* outpath) -{ +int handle_flags(int argc, char** argv, string* path, string* outpath) { bool bitwidth_set = false; bool path_set = false; bool outpath_set = false; - for (int i = 1; i < argc; i++) { - if (string(argv[i]) == "-f" && set_bitwidth(argv[i+1]) == 0) { + for(int i = 1; i < argc; i++) { + if(strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { bitwidth_set = true; i++; continue; } - if (string(argv[i]) == "-i") { + if(strcmp(argv[i], "-i") == 0) { path_set = true; i++; (*path) = argv[i]; continue; } - if (string(argv[i]) == "-o") { + if(strcmp(argv[i], "-o") == 0) { outpath_set = true; i++; (*outpath) = argv[i]; continue; } - if (path != NULL) { + if(path != NULL) { path_set = true; (*path) = argv[i]; } } - if (!bitwidth_set) { + if(!bitwidth_set) { cout << "flag -f (format) not set" << endl; return -1; } - if (!path_set) { + if(!path_set) { cout << "flag -i (input file) not set" << endl; return -1; } - if (!outpath_set) { + if(!outpath_set) { cout << "flag -o (output file) not set" << endl; return -1; } diff --git a/src/construct_flags.h b/src/construct_flags.h index b7a1e28..9777d64 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -1,9 +1,7 @@ -#ifndef CONSTRUCT_FLAGS_H_ -#define CONSTRUCT_FLAGS_H_ - -#include +#include +#include +#include +#include "reconstruct.h" int set_bitwidth(char* argv); int handle_flags(int argc, char** argv, std::string* path, std::string* outpath); - -#endif // CONSTRUCT_FLAGS_H_ diff --git a/src/construct_types.h b/src/construct_types.h index 5f76bbc..489d9da 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -1,8 +1,8 @@ -#ifndef CONSTRUCT_TYPES_H_ -#define CONSTRUCT_TYPES_H_ +#ifndef CON_TYPES_H +#define CON_TYPES_H -#include -#include +#include +#include enum CON_BITWIDTH { BIT8, @@ -40,16 +40,15 @@ struct con_token { struct con_while* tok_while; struct con_if* tok_if; struct con_function* tok_function; + struct con_funcall* tok_funcall; struct con_cmd* tok_cmd; struct con_macro* tok_macro; - struct con_funcall* tok_funcall; std::vector tokens; // Only non-empty for if, while and function tokens }; -struct _con_condition { - CON_COMPARISON op; - std::string arg1; - std::string arg2; +struct con_macro { + std::string value; + std::string macro; }; struct con_section { @@ -60,12 +59,18 @@ struct con_tag { std::string name; }; +struct con_condition { + CON_COMPARISON op; + std::string arg1; + std::string arg2; +}; + struct con_while { - _con_condition condition; + con_condition condition; }; struct con_if { - _con_condition condition; + con_condition condition; }; struct con_function { @@ -79,14 +84,9 @@ struct con_cmd { std::string arg2; }; -struct con_macro { - std::string value; - std::string macro; -}; - struct con_funcall { std::string funcname; - std::vector arguments; + std:: vector arguments; }; -#endif // CONSTRUCT_TYPES_H_ +#endif diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 7b22553..87143c6 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,75 +1,12 @@ -#include -#include -#include -#include -#include "deconstruct.h" #include "construct_types.h" +#include "deconstruct.h" +#include using namespace std; - -static const char& FIRST_UPPERCASE_LETTER = 'A'; -static const char& LAST_UPPERCASE_LETTER = 'Z'; -static bool is_upper(const char& c) -{ - return c >= FIRST_UPPERCASE_LETTER && c <= LAST_UPPERCASE_LETTER; -} -static char to_lower(const char& c) -{ - return is_upper(c) ? c - FIRST_UPPERCASE_LETTER : c; -} -static void to_lower(string& str) -{ - string tmp; - for (string::iterator it = str.begin(); it != str.end(); ++it) { - tmp.push_back(to_lower(*it)); - } - str = tmp; -} - -class IsAnyOf -{ -private: - string chars; -public: - IsAnyOf() = default; - IsAnyOf(const string& _chars) : chars(_chars) {} - IsAnyOf(const char*& _chars) : chars(_chars) {} - ~IsAnyOf() = default; - IsAnyOf(const IsAnyOf& other) = delete; // should save unique sorted chars for that - IsAnyOf& operator=(const IsAnyOf& other) = delete; - - bool operator()(const char& c) const { - for (string::const_iterator it = chars.cbegin(); it != chars.cend(); ++it) { - if (*it == c) return true; - } - return false; - } -}; - -template -static void split(vector& result, const string& input, const Predicate& pred, const bool& compress_adj_delims = false) -{ - string tmp; - bool prev_is_delim = false; - for (string::const_iterator it = input.cbegin(); it != input.cend(); ++it) { - if (pred(*it)) { - if (prev_is_delim && compress_adj_delims) continue; - result.push_back(tmp); - tmp.clear(); - prev_is_delim = true; - } else { - tmp.push_back(*it); - prev_is_delim = false; - } - } - result.push_back(tmp); -} - -int get_line_indentation(string line) -{ +int get_line_indentation(string line) { int indentation = 0; - for (size_t i = 0; i < line.size(); i++) { - if (line[i] == '\t') { + for(int i = 0; i < line.size(); i++) { + if(line[i] == '\t') { indentation++; continue; } @@ -79,45 +16,42 @@ int get_line_indentation(string line) } // Expects formatted line -CON_TOKENTYPE get_token_type(string line) -{ - if (line.substr(0, 8) == "section ") - return SECTION; - if (line.find(' ') == string::npos && line[line.size()-1] == ':') - return TAG; - if (line.substr(0, 6) == "while ") - return WHILE; - if (line.substr(0, 3) == "if ") +CON_TOKENTYPE get_token_type(string line) { + if(line[0] == '!') + return MACRO; + if(line.substr(0, 3) == "if ") return IF; - if (line.substr(0, 9) == "function ") + if(line.substr(0, 6) == "while ") + return WHILE; + if(line.substr(0, 9) == "function ") return FUNCTION; - if (line[0] == '!') - return MACRO; - if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) + if(line.substr(0, 8) == "section ") + return SECTION; + if(line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; + if(line.find(' ') == string::npos && line[line.size()-1] == ':') + return TAG; return CMD; } -CON_COMPARISON str_to_comparison(string comp) -{ - if (comp == "e") +CON_COMPARISON str_to_comparison(string comp) { + if(comp == "e") return E; - if (comp == "ne") + if(comp == "ne") return NE; - if (comp == "l") + if(comp == "l") return L; - if (comp == "g") + if(comp == "g") return G; - if (comp == "le") + if(comp == "le") return LE; - if (comp == "ge") + if(comp == "ge") return GE; - throw invalid_argument("Invalid comparison sing: "+comp); + //ERROR } -vector delinearize_tokens(std::vector tokens) -{ +vector delinearize_tokens(std::vector tokens) { vector dl_tokens; // Serves as parent "section" where all tokens belong to, convenient for algo @@ -138,17 +72,17 @@ vector delinearize_tokens(std::vector tokens) // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. - for (size_t i = 0; i < tokens.size(); i++) { - if (parent_stack.top()->indentation - tokens[i]->indentation >= 0) { + for(int i = 0; i < tokens.size(); i++) { + if(parent_stack.top()->indentation - tokens[i]->indentation >= 0) { int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; - for (int j = 0; j < indentation_diff; j++) { + for(int j = 0; j < indentation_diff; j++) { parent_stack.pop(); } } - if (tokens[i]->indentation == parent_stack.top()->indentation+1) { + if(tokens[i]->indentation == parent_stack.top()->indentation+1) { parent_stack.top()->tokens.push_back(tokens[i]); } - if (tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { + if(tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { parent_stack.push(tokens[i]); } } @@ -161,82 +95,75 @@ vector delinearize_tokens(std::vector tokens) return delinearized_tokens; } -con_section* parse_section(string line) -{ - con_section* tok_section = new con_section(); - vector line_split; - split(line_split, line, IsAnyOf(" ")); - tok_section->name = line_split[1]; - return tok_section; +con_macro* parse_macro(string line) { + con_macro* tok_macro = new con_macro(); + int spacepos = line.find(' '); + tok_macro->macro = line.substr(1, spacepos-1); + tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); + return tok_macro; } -con_tag* parse_tag(string line) -{ - con_tag* tok_tag = new con_tag(); - tok_tag->name = line.substr(0, line.size()-1); - return tok_tag; + +con_if* parse_if(string line) { + con_if* tok_if = new con_if(); + vector line_split; + boost::split(line_split, line, boost::is_any_of(" ")); + tok_if->condition.arg1 = line_split[1]; + tok_if->condition.op = str_to_comparison(line_split[2]); + tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); + return tok_if; } -con_while* parse_while(string line) -{ +con_while* parse_while(string line) { con_while* tok_while = new con_while(); vector line_split; - split(line_split, line, IsAnyOf(" ")); + boost::split(line_split, line, boost::is_any_of(" ")); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_if* parse_if(string line) -{ - con_if* tok_if = new con_if(); +con_section* parse_section(string line) { + con_section* tok_section = new con_section(); vector line_split; - split(line_split, line, IsAnyOf(" ")); - tok_if->condition.arg1 = line_split[1]; - tok_if->condition.op = str_to_comparison(line_split[2]); - tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); - return tok_if; + boost::split(line_split, line, boost::is_any_of(" ")); + tok_section->name = line_split[1]; + return tok_section; } -con_function* parse_function(string line) -{ - con_function* tok_function = new con_function(); - vector line_split; - split(line_split, line, IsAnyOf("():,")); - tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for (size_t i = 1; i < line_split.size()-2; i++) { - if (line_split[i].empty()) { - continue; - } - tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied - } - return tok_function; +con_tag* parse_tag(string line) { + con_tag* tok_tag = new con_tag(); + tok_tag->name = line.substr(0, line.size()-1); + return tok_tag; } -con_cmd* parse_cmd(string line) -{ +con_cmd* parse_cmd(string line) { con_cmd* tok_cmd = new con_cmd(); vector line_split; - split(line_split, line, IsAnyOf(" ,")); + boost::split(line_split, line, boost::is_any_of(" ,")); tok_cmd->command = line_split[0]; - if (line_split.size() > 1) + if(line_split.size() > 1) tok_cmd->arg1 = line_split[1]; - if (line_split.size() > 3) + if(line_split.size() > 3) tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_macro* parse_macro(string line) -{ - con_macro* tok_macro = new con_macro(); - int spacepos = line.find(' '); - tok_macro->macro = line.substr(1, spacepos-1); - tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); - return tok_macro; +con_function* parse_function(string line) { + con_function* tok_function = new con_function(); + vector line_split; + boost::split(line_split, line, boost::is_any_of("():,")); + tok_function->name = line_split[0].substr(9, line_split[0].size()-9); + for(int i = 1; i < line_split.size()-2; i++) { + if(line_split[i].empty()) { + continue; + } + tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied + } + return tok_function; } -con_funcall* parse_funcall(string line) -{ +con_funcall* parse_funcall(string line) { con_funcall* tok_funcall = new con_funcall(); vector line_split; - split(line_split, line, IsAnyOf("(),")); + boost::split(line_split, line, boost::is_any_of("(),")); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); - for (size_t i = 1; i < line_split.size()-1; i++) { - if (line_split[i].empty()) { + for(int i = 1; i < line_split.size()-1; i++) { + if(line_split[i].empty()) { continue; } tok_funcall->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied @@ -245,73 +172,71 @@ con_funcall* parse_funcall(string line) } // Does not expect formatted line, only lowercase -con_token* parse_line(string line) -{ +con_token* parse_line(string line) { con_token* token = new con_token; //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for (size_t i = 0; i < line.size(); i++) { - if (line[i] == ' ') { - if (!caught_space) { + for(int i = 0; i < line.size(); i++) { + if(line[i] == ' ') { + if(!caught_space) { f_line += line[i]; caught_space = true; } } else { - if (line[i] != '\t') { + if(line[i] != '\t') { f_line += line[i]; } caught_space = false; - } + } } token->tok_type = get_token_type(f_line); - switch (token->tok_type) { - case SECTION: - token->tok_section = parse_section(f_line); + switch(token->tok_type) { + case MACRO: + token->tok_macro = parse_macro(f_line); break; - case TAG: - token->tok_tag = parse_tag(f_line); + case IF: + token->tok_if = parse_if(f_line); break; case WHILE: token->tok_while = parse_while(f_line); break; - case IF: - token->tok_if = parse_if(f_line); - break; case FUNCTION: token->tok_function = parse_function(f_line); break; + case FUNCALL: + token->tok_funcall = parse_funcall(f_line); + case SECTION: + token->tok_section = parse_section(f_line); + break; + case TAG: + token->tok_tag = parse_tag(f_line); + break; case CMD: token->tok_cmd = parse_cmd(f_line); break; - case MACRO: - token->tok_macro = parse_macro(f_line); - break; - case FUNCALL: - token->tok_funcall = parse_funcall(f_line); } return token; } -vector parse_construct(string code) -{ +vector parse_construct(string code) { vector code_split; - split(code_split, code, IsAnyOf("\n"), true); - to_lower(code); + boost::split(code_split, code, boost::is_any_of("\n"), boost::token_compress_on); + boost::to_lower(code); vector tokens; bool in_data = false; - for (size_t i = 0; i < code_split.size(); i++) { + for(int i = 0; i < code_split.size(); i++) { // Check if it contains any alphabet chars - if (code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { + if(code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; } con_token* new_token = parse_line(code_split[i]); new_token->indentation = get_line_indentation(code_split[i]); tokens.push_back(new_token); - if (new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { + if(new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { in_data = true; - } else if (new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { + } else if(new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { in_data = false; - } else if (in_data) { + } else if(in_data) { //TODO free original con_x con_cmd* data_cmd = new con_cmd; data_cmd->command = code_split[i]; diff --git a/src/deconstruct.h b/src/deconstruct.h index 7139980..be95614 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -1,9 +1,11 @@ -#ifndef DECONSTRUCT_H_ -#define DECONSTRUCT_H_ - -#include -#include -#include "construct_types.h" +#include "construct_debug.h" +#include +#include +#include +#include +#include +#include +#include int get_line_indentation(std::string line); CON_TOKENTYPE get_token_type(std::string line); @@ -11,16 +13,13 @@ CON_COMPARISON str_to_comparison(std::string comp); std::vector delinearize_tokens(std::vector tokens); +con_macro* parse_macro(std::string line); +con_if* parse_if(std::string line); +con_while* parse_while(std::string line); con_section* parse_section(std::string line); con_tag* parse_tag(std::string line); -con_while* parse_while(std::string line); -con_if* parse_if(std::string line); -con_function* parse_function(std::string line); con_cmd* parse_cmd(std::string line); -con_macro* parse_macro(std::string line); +con_function* parse_function(std::string line); con_funcall* parse_funcall(std::string line); - con_token* parse_line(std::string line); std::vector parse_construct(std::string code); - -#endif // DECONSTRUCT_H_ diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index f0e0f7f..d8bfaa5 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -1,20 +1,15 @@ -#include -#include -#include #include "reconstruct.h" #include "construct_types.h" using namespace std; - int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num) -{ - switch (bitwidth) { +string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { + switch(bitwidth) { case BIT8: - switch (call_num) { + switch(call_num) { case 0: return "dil"; break; @@ -36,7 +31,7 @@ string reg_to_str(uint8_t call_num) } break; case BIT16: - switch (call_num) { + switch(call_num) { case 0: return "di"; break; @@ -58,7 +53,7 @@ string reg_to_str(uint8_t call_num) } break; case BIT32: - switch (call_num) { + switch(call_num) { case 0: return "edi"; break; @@ -80,7 +75,7 @@ string reg_to_str(uint8_t call_num) } break; case BIT64: - switch (call_num) { + switch(call_num) { case 0: return "rdi"; break; @@ -102,11 +97,10 @@ string reg_to_str(uint8_t call_num) } break; } - throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); } -string comparison_to_string(CON_COMPARISON condition) -{ - switch (condition) { + +string comparison_to_string(CON_COMPARISON condition) { + switch(condition) { case E: return "e"; case NE: @@ -120,11 +114,11 @@ string comparison_to_string(CON_COMPARISON condition) case GE: return "ge"; } - throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); + return "unknown"; } -CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) -{ - switch (condition) { + +CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { + switch(condition) { case E: return NE; case NE: @@ -138,73 +132,163 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) case GE: return L; } - throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } -static void apply_macro_to_token(con_token& token, vector macros) -{ - if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { +void apply_macro_to_token(con_token& token, vector macros) { + if(token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; } // Unoptimal, but more clear imo - for (size_t i = 0; i < macros.size(); i++) { + for(int i = 0; i < macros.size(); i++) { con_macro* crntmacro = ¯os[i]; size_t pos; - switch (token.tok_type) { + switch(token.tok_type) { case WHILE: - if (!token.tok_while->condition.arg1.empty() && + if(!token.tok_while->condition.arg1.empty() && (pos = token.tok_while->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg1[pos-1])) && (pos == token.tok_while->condition.arg1.size()-1 || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { token.tok_while->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if (!token.tok_while->condition.arg2.empty() && + if(!token.tok_while->condition.arg2.empty() && (pos = token.tok_while->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg2[pos-1])) && (pos == token.tok_while->condition.arg2.size()-1 || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { + token.tok_while->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case IF: - if (!token.tok_if->condition.arg1.empty() && + if(!token.tok_if->condition.arg1.empty() && (pos = token.tok_if->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg1[pos-1])) && (pos == token.tok_if->condition.arg1.size()-1 || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { + token.tok_if->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if (!token.tok_if->condition.arg2.empty() && + if(!token.tok_if->condition.arg2.empty() && (pos = token.tok_if->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg2[pos-1])) && (pos == token.tok_if->condition.arg2.size()-1 || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { + token.tok_if->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case CMD: - if (!token.tok_cmd->arg1.empty() && + if(!token.tok_cmd->arg1.empty() && (pos = token.tok_cmd->arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_cmd->arg1[pos-1])) && (pos == token.tok_cmd->arg1.size()-1 || !isalpha(token.tok_cmd->arg1[pos+crntmacro->macro.size()]))) { + token.tok_cmd->arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if (!token.tok_cmd->arg2.empty() && + if(!token.tok_cmd->arg2.empty() && (pos = token.tok_cmd->arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_cmd->arg2[pos-1])) && (pos == token.tok_cmd->arg2.size()-1 || !isalpha(token.tok_cmd->arg2[pos+crntmacro->macro.size()]))) { + token.tok_cmd->arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; - default: - break; } } } +void apply_funcalls(std::vector& tokens) { + for(int i = 0; i < tokens.size(); i++) { + apply_funcalls(tokens[i]->tokens); + if(tokens[i]->tok_type != FUNCALL) { + continue; + } + vector* args = &tokens[i]->tok_funcall->arguments; + vector arg_tokens; + for(int j = 0; j < args->size(); j++) { + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + arg_cmd->command = "mov"; + arg_cmd->arg1 = reg_to_str(j, bitwidth); + arg_cmd->arg2 = (*args)[j]; + arg_tokens.push_back(arg_tok); + } + con_token* call_tok = new con_token(); + call_tok->tok_type = CMD; + con_cmd* call_cmd = new con_cmd(); + call_tok->tok_cmd = call_cmd; + call_cmd->command = "call"; + call_cmd->arg1 = tokens[i]->tok_funcall->funcname; + arg_tokens.push_back(call_tok); + + tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); + } +} -void apply_whiles(vector& tokens) -{ - for (size_t i = 0; i< tokens.size(); i++) { +void apply_functions(std::vector& tokens) { + vector* subtokens = &tokens; + for(int i = 0; i < subtokens->size(); i++) { + if((*subtokens)[i]->tok_type != FUNCTION) { + continue; + } + con_function* crntfunc = (*subtokens)[i]->tok_function; + if(crntfunc->name == "main") { + crntfunc->name = "_start"; + } + + con_token* tag_tok = new con_token; + tag_tok->tok_type = TAG; + con_tag* functag = new con_tag; + tag_tok->tok_tag = functag; + functag->name = crntfunc->name; + for(int j = 0; j < crntfunc->arguments.size(); j++) { + con_token* arg_tok = new con_token; + arg_tok->tok_type = MACRO; + con_macro* arg_macro = new con_macro; + arg_macro->value = reg_to_str(j, bitwidth); + arg_macro->macro = crntfunc->arguments[j]; + arg_tok->tok_macro = arg_macro; + + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); + } + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); + con_token* ret_tok = new con_token; + ret_tok->tok_type = CMD; + con_cmd* ret_cmd = new con_cmd; + ret_tok->tok_cmd = ret_cmd; + ret_cmd->command = "ret"; + (*subtokens)[i]->tokens.push_back(ret_tok); + } +} +void apply_macros(vector& tokens, vector knownmacros) { + for(int i = 0; i < tokens.size(); i++) { + if(tokens[i]->tok_type == MACRO) { + // Filter spaces from macro and value pair + con_macro* f_macro = new con_macro(); + f_macro->macro = ""; + f_macro->value = ""; + for(int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + if(tokens[i]->tok_macro->macro[j] != ' ') { + f_macro->macro += tokens[i]->tok_macro->macro[j]; + } + } + for(int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + if(tokens[i]->tok_macro->value[j] != ' ') + f_macro->value += tokens[i]->tok_macro->value[j]; + } + knownmacros.push_back(*f_macro); + delete f_macro; + continue; + } + apply_macro_to_token(*tokens[i], knownmacros); + if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { + apply_macros(tokens[i]->tokens, knownmacros); + } + } +} +void apply_whiles(vector& tokens) { + for(int i = 0; i< tokens.size(); i++) { apply_whiles(tokens[i]->tokens); - if (tokens[i]->tok_type != WHILE) { + if(tokens[i]->tok_type != WHILE) { continue; } con_token* cmp_tok = new con_token; @@ -236,13 +320,13 @@ void apply_whiles(vector& tokens) endwhile_tok->tok_type = TAG; con_tag* endwhile_tag = new con_tag; endwhile_tok->tok_tag = endwhile_tag; - endwhile_tag->name = endtag_name; + endwhile_tag->name = endtag_name; con_token* startwhile_tok = new con_token; startwhile_tok->tok_type = TAG; con_tag* startwhile_tag = new con_tag; startwhile_tok->tok_tag = startwhile_tag; - startwhile_tag->name = starttag_name; + startwhile_tag->name = starttag_name; while_amnt++; tokens[i]->tokens.insert(tokens[i]->tokens.begin(), jmp_tok); @@ -253,11 +337,10 @@ void apply_whiles(vector& tokens) // so: starttag, cmp, jmp ... jmp, endtag } } -void apply_ifs(vector& tokens) -{ - for (size_t i = 0; i< tokens.size(); i++) { +void apply_ifs(vector& tokens) { + for(int i = 0; i< tokens.size(); i++) { apply_ifs(tokens[i]->tokens); - if (tokens[i]->tok_type != IF) { + if(tokens[i]->tok_type != IF) { continue; } con_token* cmp_tok = new con_token; @@ -267,7 +350,7 @@ void apply_ifs(vector& tokens) cmp_cmd->command = "cmp"; cmp_cmd->arg1 = tokens[i]->tok_if->condition.arg1; cmp_cmd->arg2 = tokens[i]->tok_if->condition.arg2; - + string tagname = "endif" + to_string(if_amnt); con_token* jmp_tok = new con_token; @@ -281,7 +364,7 @@ void apply_ifs(vector& tokens) endif_tok->tok_type = TAG; con_tag* endif_tag = new con_tag; endif_tok->tok_tag = endif_tag; - endif_tag->name = tagname; + endif_tag->name = tagname; if_amnt++; tokens[i]->tokens.insert(tokens[i]->tokens.begin(), jmp_tok); @@ -289,104 +372,9 @@ void apply_ifs(vector& tokens) tokens[i]->tokens.push_back(endif_tok); } } -void apply_functions(std::vector& tokens) -{ - vector* subtokens = &tokens; - for (size_t i = 0; i < subtokens->size(); i++) { - if ((*subtokens)[i]->tok_type != FUNCTION) { - continue; - } - con_function* crntfunc = (*subtokens)[i]->tok_function; - if (crntfunc->name == "main") { - crntfunc->name = "_start"; - } - - con_token* tag_tok = new con_token; - tag_tok->tok_type = TAG; - con_tag* functag = new con_tag; - tag_tok->tok_tag = functag; - functag->name = crntfunc->name; - for (size_t j = 0; j < crntfunc->arguments.size(); j++) { - con_token* arg_tok = new con_token; - arg_tok->tok_type = MACRO; - con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j); - arg_macro->macro = crntfunc->arguments[j]; - arg_tok->tok_macro = arg_macro; - - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); - } - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); - con_token* ret_tok = new con_token; - ret_tok->tok_type = CMD; - con_cmd* ret_cmd = new con_cmd; - ret_tok->tok_cmd = ret_cmd; - ret_cmd->command = "ret"; - (*subtokens)[i]->tokens.push_back(ret_tok); - } -} -void apply_macros(vector& tokens, vector knownmacros) -{ - for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type == MACRO) { - // Filter spaces from macro and value pair - con_macro* f_macro = new con_macro(); - f_macro->macro = ""; - f_macro->value = ""; - for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { - if (tokens[i]->tok_macro->macro[j] != ' ') { - f_macro->macro += tokens[i]->tok_macro->macro[j]; - } - } - for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { - if (tokens[i]->tok_macro->value[j] != ' ') - f_macro->value += tokens[i]->tok_macro->value[j]; - } - knownmacros.push_back(*f_macro); - delete f_macro; - continue; - } - apply_macro_to_token(*tokens[i], knownmacros); - if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { - apply_macros(tokens[i]->tokens, knownmacros); - } - } -} -void apply_funcalls(std::vector& tokens) -{ - for (size_t i = 0; i < tokens.size(); i++) { - apply_funcalls(tokens[i]->tokens); - if (tokens[i]->tok_type != FUNCALL) { - continue; - } - vector* args = &tokens[i]->tok_funcall->arguments; - vector arg_tokens; - for (size_t j = 0; j < args->size(); j++) { - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j); - arg_cmd->arg2 = (*args)[j]; - arg_tokens.push_back(arg_tok); - } - con_token* call_tok = new con_token(); - call_tok->tok_type = CMD; - con_cmd* call_cmd = new con_cmd(); - call_tok->tok_cmd = call_cmd; - call_cmd->command = "call"; - call_cmd->arg1 = tokens[i]->tok_funcall->funcname; - arg_tokens.push_back(call_tok); - - tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); - } -} - -void linearize_tokens(vector& tokens) -{ - for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { +void linearize_tokens(vector& tokens) { + for(int i = 0; i < tokens.size(); i++) { + if(tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { continue; } vector* subtokens = &tokens[i]->tokens; @@ -396,31 +384,28 @@ void linearize_tokens(vector& tokens) } } -std::string tokens_to_nasm(std::vector& tokens) -{ +std::string tokens_to_nasm(std::vector& tokens) { string output = ""; - for (size_t i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE - || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO - || tokens[i]->tok_type == FUNCALL) { + for(int i = 0; i < tokens.size(); i++) { + if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO || tokens[i]->tok_type == FUNCALL) { continue; } output += "\n"; - if (tokens[i]->tok_type == CMD) { + if(tokens[i]->tok_type == CMD) { output += tokens[i]->tok_cmd->command; - if (!tokens[i]->tok_cmd->arg1.empty()) { + if(!tokens[i]->tok_cmd->arg1.empty()) { output += " " + tokens[i]->tok_cmd->arg1; } - if (!tokens[i]->tok_cmd->arg2.empty()) { + if(!tokens[i]->tok_cmd->arg2.empty()) { output += ", " + tokens[i]->tok_cmd->arg2; } continue; } - if (tokens[i]->tok_type == TAG) { + if(tokens[i]->tok_type == TAG) { output += tokens[i]->tok_tag->name + ":"; continue; } - if (tokens[i]->tok_type == SECTION) { + if(tokens[i]->tok_type == SECTION) { output += "section " + tokens[i]->tok_section->name; continue; } diff --git a/src/reconstruct.h b/src/reconstruct.h index 4dd2c02..3147859 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -1,8 +1,6 @@ -#ifndef RECONSTRUCT_H_ -#define RECONSTRUCT_H_ - -#include -#include +#include +#include +#include #include "construct_types.h" // Used for naming tags @@ -10,23 +8,22 @@ extern int if_amnt; extern int while_amnt; extern CON_BITWIDTH bitwidth; -std::string reg_to_str(uint8_t call_num); +std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); -// The following functions transform the construct specific tokens to nasm ones, +// During linearization, the construct parent tokens are removed +void linearize_tokens(std::vector& tokens); +void apply_macro_to_token(con_token& token, std::vector macros); + +// The following functions transform the construct specific tokens to nasm ones, // the parent construct tokens remain, but are removed during linearization // Converts args to macros and adds tag with same name to child tokens +void apply_functions(std::vector& tokens); +void apply_funcalls(std::vector& tokens); void apply_whiles(std::vector& tokens); void apply_ifs(std::vector& tokens); -void apply_functions(std::vector& tokens); void apply_macros(std::vector& tokens, std::vector macros); -void apply_funcalls(std::vector& tokens); - -// During linearization, the construct parent tokens are removed -void linearize_tokens(std::vector& tokens); std::string tokens_to_nasm(std::vector& tokens); - -#endif // RECONSTRUCT_H_ From 4ff5a9856443e803f217af6f5fc58f7ce5a46ba5 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 20:32:33 +0200 Subject: [PATCH 02/10] added gitignore andwrote better (not the best) Makefile --- .gitignore | 2 ++ Makefile | 41 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b497ff1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +bin/ +.vscode/ \ No newline at end of file diff --git a/Makefile b/Makefile index f8d850c..3d0c7ad 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,38 @@ -main: - mkdir -p bin/ - g++ src/construct.cpp src/deconstruct.cpp src/construct_debug.cpp src/reconstruct.cpp src/construct_flags.cpp -o bin/construct +CC = g++ +CFLAGS = -std=c++11 -Wall --pedantic-errors -g +SDIR = src +BDIR = bin +_OBJS = construct_debug.o construct_flags.o deconstruct.o reconstruct.o construct.o +OBJS = $(patsubst %,$(BDIR)/%,$(_OBJS)) +PROG = construct + +.PHONY: all clean + +all: $(OBJS) $(BDIR)/$(PROG) + +$(BDIR)/$(PROG): $(OBJS) + mkdir -p $(BDIR) + $(CC) $(OBJS) -o $(BDIR)/$(PROG) + +$(BDIR)/construct.o: $(SDIR)/construct.cpp $(SDIR)/deconstruct.h $(SDIR)/reconstruct.h $(SDIR)/construct_flags.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/construct.cpp -o $(BDIR)/construct.o $(CFLAGS) + +$(BDIR)/construct_debug.o: $(SDIR)/construct_debug.cpp $(SDIR)/construct_debug.h $(SDIR)/construct_types.h $(SDIR)/reconstruct.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/construct_debug.cpp -o $(BDIR)/construct_debug.o $(CFLAGS) + +$(BDIR)/construct_flags.o: $(SDIR)/construct_flags.cpp $(SDIR)/construct_flags.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/construct_flags.cpp -o $(BDIR)/construct_flags.o $(CFLAGS) + +$(BDIR)/deconstruct.o: $(SDIR)/deconstruct.cpp $(SDIR)/deconstruct.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/deconstruct.cpp -o $(BDIR)/deconstruct.o $(CFLAGS) + +$(BDIR)/reconstruct.o: $(SDIR)/reconstruct.cpp $(SDIR)/reconstruct.h $(SDIR)/construct_types.h + mkdir -p $(BDIR) + $(CC) -c $(SDIR)/reconstruct.cpp -o $(BDIR)/reconstruct.o $(CFLAGS) + +clean: + rm -rf $(BDIR) From 199c96035e4428a69841fde8ba4f540c360f843f Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 21:25:49 +0200 Subject: [PATCH 03/10] strchr.con: usless macro - appears in your youtube video src/*: whitespaces src/construct.cpp: added a note you explained in the youtube video README.md: added eexplaination for "-o" flag --- README.md | 3 +- examples/factorial.con | 2 +- examples/strchr.con | 3 +- src/construct.cpp | 12 ++-- src/construct_debug.cpp | 22 ++++--- src/construct_debug.h | 4 +- src/construct_flags.cpp | 30 +++++---- src/construct_flags.h | 6 +- src/construct_types.h | 6 +- src/deconstruct.cpp | 117 ++++++++++++++++++--------------- src/deconstruct.h | 15 +++-- src/reconstruct.cpp | 140 +++++++++++++++++++++------------------- src/reconstruct.h | 8 +-- 13 files changed, 197 insertions(+), 171 deletions(-) diff --git a/README.md b/README.md index a3d1036..e88bc63 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ fmt: db "%s", 10, 0 - If statements: If statements, like while loops, take a single [conditional](#conditionals) statement - Functions: Functions are declared with the "function" keyword, a "ret" instruction is added to functions in post-processing, so functions will not flow into eachother. -- Function calls: Functions can be called with any number of arguments, independent of the function decleration. +- Function calls: Functions can be called with any number of arguments, independent of the function decleration. If the amount of arguments used to call a function is more than its decleration states, they can be accessed like normal with their respective registers / stack address. Construct function calls, like NASM, use the "call" keyword. Functions can still be called without parentheses or arguments, NASM-style. - Macros: Construct macros can only be used in their respective scopes. Construct macros are declared with the '!' character and cannot contain whitespaces. @@ -58,3 +58,4 @@ Neither side of the comparison can contains whitespaces. ### Required flags - `-f (format)`: Can be either "elf64", "elf32", "elf16", "elf8" and decides the registers used for funcion calls. - `-i (input file)`: Specifies the input file to be compiled (-i is not neccesary) +- `-o (output file)`: Specifies the output file to be created diff --git a/examples/factorial.con b/examples/factorial.con index a388f49..18c99f9 100644 --- a/examples/factorial.con +++ b/examples/factorial.con @@ -10,7 +10,7 @@ function factorial(num): while i le num: mul i inc i - + function main(): call factorial(3) !result rax diff --git a/examples/strchr.con b/examples/strchr.con index 4ff263d..81534cf 100644 --- a/examples/strchr.con +++ b/examples/strchr.con @@ -3,10 +3,9 @@ extern printf section .text function strchr(str, chr): !ptrresult rax - !findchr sil mov ptrresult, 0 while byte[str] ne 0: - if byte[str] e findchr: + if byte[str] e chr: mov ptrresult, str ret inc str diff --git a/src/construct.cpp b/src/construct.cpp index f771c5c..73ea30a 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -1,17 +1,18 @@ #include "deconstruct.h" #include "reconstruct.h" #include "construct_flags.h" -#include -#include +#include +#include -int main(int argc, char** argv) { +int main(int argc, char** argv) +{ std::string path; std::string outpath; - if(handle_flags(argc, argv, &path, &outpath) != 0) { + if (handle_flags(argc, argv, &path, &outpath) != 0) { std::cout << "Some flag(s) not set" << std::endl; return 0; } - if(path.empty()) { + if (path.empty()) { std::cout << "No input file specified" << std::endl; return 0; } @@ -29,6 +30,7 @@ int main(int argc, char** argv) { glob_cmd->command = "global _start"; tokens.insert(tokens.begin(), glob_tok); + // Order dependant: some tokens are replaced with macros, so apply_macro() must be at the end. tokens = delinearize_tokens(tokens); apply_functions(tokens); apply_ifs(tokens); diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index a3368cb..48a443b 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -1,7 +1,8 @@ #include "construct_debug.h" -std::string tokentype_to_string(CON_TOKENTYPE type) { - switch(type) { +std::string tokentype_to_string(CON_TOKENTYPE type) +{ + switch (type) { case SECTION: return "section"; case TAG: @@ -20,9 +21,10 @@ std::string tokentype_to_string(CON_TOKENTYPE type) { return "unknown"; } -std::string token_to_string(con_token token) { +std::string token_to_string(con_token token) +{ std::string tokstring = "type: " + tokentype_to_string(token.tok_type); - switch(token.tok_type) { + switch (token.tok_type) { case SECTION: tokstring += ", name: " + token.tok_section->name; break; @@ -37,19 +39,19 @@ std::string token_to_string(con_token token) { break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; - for(int i = 0; i < token.tok_function->arguments.size(); i++) { - if(i != 0) { + for (int i = 0; i < token.tok_function->arguments.size(); i++) { + if (i != 0) { tokstring += ", "; } tokstring += token.tok_function->arguments[i]; } break; case CMD: - if(!token.tok_cmd->arg1.empty() && !token.tok_cmd->arg2.empty()) { + if (!token.tok_cmd->arg1.empty() && !token.tok_cmd->arg2.empty()) { tokstring += ", cmd: " + token.tok_cmd->command + " " + token.tok_cmd->arg1 + ", " + token.tok_cmd->arg2; break; } - if(!token.tok_cmd->arg1.empty()) { + if (!token.tok_cmd->arg1.empty()) { tokstring += ", cmd: " + token.tok_cmd->command + " " + token.tok_cmd->arg1; break; } @@ -59,9 +61,9 @@ std::string token_to_string(con_token token) { tokstring += ", macro: " + token.tok_macro->macro + ", value: " + token.tok_macro->value; break; } - if(token.tokens.size() > 0) { + if (token.tokens.size() > 0) { tokstring += ", tokens: {\n"; - for(int i = 0; i < token.tokens.size(); i++) { + for (int i = 0; i < token.tokens.size(); i++) { tokstring += token_to_string(*token.tokens[i]) + "\n"; } tokstring += "}"; diff --git a/src/construct_debug.h b/src/construct_debug.h index d9b605a..df7bbc2 100644 --- a/src/construct_debug.h +++ b/src/construct_debug.h @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include "construct_types.h" #include "reconstruct.h" diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index 741dbc7..a4559e6 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -3,20 +3,21 @@ using namespace std; -int set_bitwidth(char* argv) { - if(strcmp(argv, "elf64") == 0) { +int set_bitwidth(char* argv) +{ + if (strcmp(argv, "elf64") == 0) { bitwidth = BIT64; return 0; } - if(strcmp(argv, "elf32") == 0) { + if (strcmp(argv, "elf32") == 0) { bitwidth = BIT32; return 0; } - if(strcmp(argv, "elf16") == 0) { + if (strcmp(argv, "elf16") == 0) { bitwidth = BIT16; return 0; } - if(strcmp(argv, "elf8") == 0) { + if (strcmp(argv, "elf8") == 0) { bitwidth = BIT8; return 0; } @@ -24,42 +25,43 @@ int set_bitwidth(char* argv) { return -1; } -int handle_flags(int argc, char** argv, string* path, string* outpath) { +int handle_flags(int argc, char** argv, string* path, string* outpath) +{ bool bitwidth_set = false; bool path_set = false; bool outpath_set = false; - for(int i = 1; i < argc; i++) { - if(strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { bitwidth_set = true; i++; continue; } - if(strcmp(argv[i], "-i") == 0) { + if (strcmp(argv[i], "-i") == 0) { path_set = true; i++; (*path) = argv[i]; continue; } - if(strcmp(argv[i], "-o") == 0) { + if (strcmp(argv[i], "-o") == 0) { outpath_set = true; i++; (*outpath) = argv[i]; continue; } - if(path != NULL) { + if (path != NULL) { path_set = true; (*path) = argv[i]; } } - if(!bitwidth_set) { + if (!bitwidth_set) { cout << "flag -f (format) not set" << endl; return -1; } - if(!path_set) { + if (!path_set) { cout << "flag -i (input file) not set" << endl; return -1; } - if(!outpath_set) { + if (!outpath_set) { cout << "flag -o (output file) not set" << endl; return -1; } diff --git a/src/construct_flags.h b/src/construct_flags.h index 9777d64..a0df1b3 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include "reconstruct.h" int set_bitwidth(char* argv); diff --git a/src/construct_types.h b/src/construct_types.h index 489d9da..212187a 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -1,8 +1,8 @@ #ifndef CON_TYPES_H #define CON_TYPES_H -#include -#include +#include +#include enum CON_BITWIDTH { BIT8, @@ -86,7 +86,7 @@ struct con_cmd { struct con_funcall { std::string funcname; - std:: vector arguments; + std::vector arguments; }; #endif diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 87143c6..e6d28e6 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -3,10 +3,11 @@ #include using namespace std; -int get_line_indentation(string line) { +int get_line_indentation(string line) +{ int indentation = 0; - for(int i = 0; i < line.size(); i++) { - if(line[i] == '\t') { + for (int i = 0; i < line.size(); i++) { + if (line[i] == '\t') { indentation++; continue; } @@ -16,42 +17,45 @@ int get_line_indentation(string line) { } // Expects formatted line -CON_TOKENTYPE get_token_type(string line) { - if(line[0] == '!') +CON_TOKENTYPE get_token_type(string line) +{ + if (line[0] == '!') return MACRO; - if(line.substr(0, 3) == "if ") + if (line.substr(0, 3) == "if ") return IF; - if(line.substr(0, 6) == "while ") + if (line.substr(0, 6) == "while ") return WHILE; - if(line.substr(0, 9) == "function ") + if (line.substr(0, 9) == "function ") return FUNCTION; - if(line.substr(0, 8) == "section ") + if (line.substr(0, 8) == "section ") return SECTION; - if(line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) + if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; - if(line.find(' ') == string::npos && line[line.size()-1] == ':') + if (line.find(' ') == string::npos && line[line.size()-1] == ':') return TAG; return CMD; } -CON_COMPARISON str_to_comparison(string comp) { - if(comp == "e") +CON_COMPARISON str_to_comparison(string comp) +{ + if (comp == "e") return E; - if(comp == "ne") + if (comp == "ne") return NE; - if(comp == "l") + if (comp == "l") return L; - if(comp == "g") + if (comp == "g") return G; - if(comp == "le") + if (comp == "le") return LE; - if(comp == "ge") + if (comp == "ge") return GE; //ERROR } -vector delinearize_tokens(std::vector tokens) { +vector delinearize_tokens(std::vector tokens) +{ vector dl_tokens; // Serves as parent "section" where all tokens belong to, convenient for algo @@ -72,17 +76,17 @@ vector delinearize_tokens(std::vector tokens) { // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. - for(int i = 0; i < tokens.size(); i++) { - if(parent_stack.top()->indentation - tokens[i]->indentation >= 0) { + for (int i = 0; i < tokens.size(); i++) { + if (parent_stack.top()->indentation - tokens[i]->indentation >= 0) { int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; - for(int j = 0; j < indentation_diff; j++) { + for (int j = 0; j < indentation_diff; j++) { parent_stack.pop(); } } - if(tokens[i]->indentation == parent_stack.top()->indentation+1) { + if (tokens[i]->indentation == parent_stack.top()->indentation+1) { parent_stack.top()->tokens.push_back(tokens[i]); } - if(tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { + if (tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE) { parent_stack.push(tokens[i]); } } @@ -95,15 +99,16 @@ vector delinearize_tokens(std::vector tokens) { return delinearized_tokens; } -con_macro* parse_macro(string line) { +con_macro* parse_macro(string line) +{ con_macro* tok_macro = new con_macro(); int spacepos = line.find(' '); tok_macro->macro = line.substr(1, spacepos-1); tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); return tok_macro; } - -con_if* parse_if(string line) { +con_if* parse_if(string line) +{ con_if* tok_if = new con_if(); vector line_split; boost::split(line_split, line, boost::is_any_of(" ")); @@ -112,7 +117,8 @@ con_if* parse_if(string line) { tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); return tok_if; } -con_while* parse_while(string line) { +con_while* parse_while(string line) +{ con_while* tok_while = new con_while(); vector line_split; boost::split(line_split, line, boost::is_any_of(" ")); @@ -121,49 +127,54 @@ con_while* parse_while(string line) { tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_section* parse_section(string line) { +con_section* parse_section(string line) +{ con_section* tok_section = new con_section(); vector line_split; boost::split(line_split, line, boost::is_any_of(" ")); tok_section->name = line_split[1]; return tok_section; } -con_tag* parse_tag(string line) { +con_tag* parse_tag(string line) +{ con_tag* tok_tag = new con_tag(); tok_tag->name = line.substr(0, line.size()-1); return tok_tag; } -con_cmd* parse_cmd(string line) { +con_cmd* parse_cmd(string line) +{ con_cmd* tok_cmd = new con_cmd(); vector line_split; boost::split(line_split, line, boost::is_any_of(" ,")); tok_cmd->command = line_split[0]; - if(line_split.size() > 1) + if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; - if(line_split.size() > 3) + if (line_split.size() > 3) tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_function* parse_function(string line) { +con_function* parse_function(string line) +{ con_function* tok_function = new con_function(); vector line_split; boost::split(line_split, line, boost::is_any_of("():,")); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for(int i = 1; i < line_split.size()-2; i++) { - if(line_split[i].empty()) { + for (int i = 1; i < line_split.size()-2; i++) { + if (line_split[i].empty()) { continue; } tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied } return tok_function; } -con_funcall* parse_funcall(string line) { +con_funcall* parse_funcall(string line) +{ con_funcall* tok_funcall = new con_funcall(); vector line_split; boost::split(line_split, line, boost::is_any_of("(),")); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); - for(int i = 1; i < line_split.size()-1; i++) { - if(line_split[i].empty()) { + for (int i = 1; i < line_split.size()-1; i++) { + if (line_split[i].empty()) { continue; } tok_funcall->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied @@ -172,26 +183,27 @@ con_funcall* parse_funcall(string line) { } // Does not expect formatted line, only lowercase -con_token* parse_line(string line) { +con_token* parse_line(string line) +{ con_token* token = new con_token; //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for(int i = 0; i < line.size(); i++) { - if(line[i] == ' ') { - if(!caught_space) { + for (int i = 0; i < line.size(); i++) { + if (line[i] == ' ') { + if (!caught_space) { f_line += line[i]; caught_space = true; } } else { - if(line[i] != '\t') { + if (line[i] != '\t') { f_line += line[i]; } caught_space = false; - } + } } token->tok_type = get_token_type(f_line); - switch(token->tok_type) { + switch (token->tok_type) { case MACRO: token->tok_macro = parse_macro(f_line); break; @@ -218,25 +230,26 @@ con_token* parse_line(string line) { } return token; } -vector parse_construct(string code) { +vector parse_construct(string code) +{ vector code_split; boost::split(code_split, code, boost::is_any_of("\n"), boost::token_compress_on); boost::to_lower(code); vector tokens; bool in_data = false; - for(int i = 0; i < code_split.size(); i++) { + for (int i = 0; i < code_split.size(); i++) { // Check if it contains any alphabet chars - if(code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { + if (code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; } con_token* new_token = parse_line(code_split[i]); new_token->indentation = get_line_indentation(code_split[i]); tokens.push_back(new_token); - if(new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { + if (new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { in_data = true; - } else if(new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { + } else if (new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { in_data = false; - } else if(in_data) { + } else if (in_data) { //TODO free original con_x con_cmd* data_cmd = new con_cmd; data_cmd->command = code_split[i]; diff --git a/src/deconstruct.h b/src/deconstruct.h index be95614..d8c5cf1 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -1,11 +1,11 @@ #include "construct_debug.h" -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include int get_line_indentation(std::string line); CON_TOKENTYPE get_token_type(std::string line); @@ -21,5 +21,6 @@ con_tag* parse_tag(std::string line); con_cmd* parse_cmd(std::string line); con_function* parse_function(std::string line); con_funcall* parse_funcall(std::string line); + con_token* parse_line(std::string line); std::vector parse_construct(std::string code); diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index d8bfaa5..c320830 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -2,14 +2,16 @@ #include "construct_types.h" using namespace std; + int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { - switch(bitwidth) { +string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) +{ + switch (bitwidth) { case BIT8: - switch(call_num) { + switch (call_num) { case 0: return "dil"; break; @@ -31,7 +33,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; case BIT16: - switch(call_num) { + switch (call_num) { case 0: return "di"; break; @@ -53,7 +55,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; case BIT32: - switch(call_num) { + switch (call_num) { case 0: return "edi"; break; @@ -75,7 +77,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { } break; case BIT64: - switch(call_num) { + switch (call_num) { case 0: return "rdi"; break; @@ -98,9 +100,9 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { break; } } - -string comparison_to_string(CON_COMPARISON condition) { - switch(condition) { +string comparison_to_string(CON_COMPARISON condition) +{ + switch (condition) { case E: return "e"; case NE: @@ -116,9 +118,9 @@ string comparison_to_string(CON_COMPARISON condition) { } return "unknown"; } - -CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { - switch(condition) { +CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) +{ + switch (condition) { case E: return NE; case NE: @@ -134,75 +136,71 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { } } -void apply_macro_to_token(con_token& token, vector macros) { - if(token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { +void apply_macro_to_token(con_token& token, vector macros) +{ + if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; } // Unoptimal, but more clear imo - for(int i = 0; i < macros.size(); i++) { + for (int i = 0; i < macros.size(); i++) { con_macro* crntmacro = ¯os[i]; size_t pos; - switch(token.tok_type) { + switch (token.tok_type) { case WHILE: - if(!token.tok_while->condition.arg1.empty() && + if (!token.tok_while->condition.arg1.empty() && (pos = token.tok_while->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg1[pos-1])) && (pos == token.tok_while->condition.arg1.size()-1 || !isalpha(token.tok_while->condition.arg1[pos+crntmacro->macro.size()]))) { - token.tok_while->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if(!token.tok_while->condition.arg2.empty() && + if (!token.tok_while->condition.arg2.empty() && (pos = token.tok_while->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_while->condition.arg2[pos-1])) && (pos == token.tok_while->condition.arg2.size()-1 || !isalpha(token.tok_while->condition.arg2[pos+crntmacro->macro.size()]))) { - token.tok_while->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case IF: - if(!token.tok_if->condition.arg1.empty() && + if (!token.tok_if->condition.arg1.empty() && (pos = token.tok_if->condition.arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg1[pos-1])) && (pos == token.tok_if->condition.arg1.size()-1 || !isalpha(token.tok_if->condition.arg1[pos+crntmacro->macro.size()]))) { - token.tok_if->condition.arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if(!token.tok_if->condition.arg2.empty() && + if (!token.tok_if->condition.arg2.empty() && (pos = token.tok_if->condition.arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_if->condition.arg2[pos-1])) && (pos == token.tok_if->condition.arg2.size()-1 || !isalpha(token.tok_if->condition.arg2[pos+crntmacro->macro.size()]))) { - token.tok_if->condition.arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; case CMD: - if(!token.tok_cmd->arg1.empty() && + if (!token.tok_cmd->arg1.empty() && (pos = token.tok_cmd->arg1.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_cmd->arg1[pos-1])) && (pos == token.tok_cmd->arg1.size()-1 || !isalpha(token.tok_cmd->arg1[pos+crntmacro->macro.size()]))) { - token.tok_cmd->arg1.replace(pos, crntmacro->macro.size(), crntmacro->value); } - if(!token.tok_cmd->arg2.empty() && + if (!token.tok_cmd->arg2.empty() && (pos = token.tok_cmd->arg2.find(crntmacro->macro)) != string::npos && (pos == 0 || !isalpha(token.tok_cmd->arg2[pos-1])) && (pos == token.tok_cmd->arg2.size()-1 || !isalpha(token.tok_cmd->arg2[pos+crntmacro->macro.size()]))) { - token.tok_cmd->arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; } } } -void apply_funcalls(std::vector& tokens) { - for(int i = 0; i < tokens.size(); i++) { +void apply_funcalls(std::vector& tokens) +{ + for (int i = 0; i < tokens.size(); i++) { apply_funcalls(tokens[i]->tokens); - if(tokens[i]->tok_type != FUNCALL) { + if (tokens[i]->tok_type != FUNCALL) { continue; } vector* args = &tokens[i]->tok_funcall->arguments; vector arg_tokens; - for(int j = 0; j < args->size(); j++) { + for (int j = 0; j < args->size(); j++) { con_token* arg_tok = new con_token(); arg_tok->tok_type = CMD; con_cmd* arg_cmd = new con_cmd(); @@ -224,14 +222,15 @@ void apply_funcalls(std::vector& tokens) { } } -void apply_functions(std::vector& tokens) { +void apply_functions(std::vector& tokens) +{ vector* subtokens = &tokens; - for(int i = 0; i < subtokens->size(); i++) { - if((*subtokens)[i]->tok_type != FUNCTION) { + for (int i = 0; i < subtokens->size(); i++) { + if ((*subtokens)[i]->tok_type != FUNCTION) { continue; } con_function* crntfunc = (*subtokens)[i]->tok_function; - if(crntfunc->name == "main") { + if (crntfunc->name == "main") { crntfunc->name = "_start"; } @@ -240,7 +239,7 @@ void apply_functions(std::vector& tokens) { con_tag* functag = new con_tag; tag_tok->tok_tag = functag; functag->name = crntfunc->name; - for(int j = 0; j < crntfunc->arguments.size(); j++) { + for (int j = 0; j < crntfunc->arguments.size(); j++) { con_token* arg_tok = new con_token; arg_tok->tok_type = MACRO; con_macro* arg_macro = new con_macro; @@ -259,20 +258,21 @@ void apply_functions(std::vector& tokens) { (*subtokens)[i]->tokens.push_back(ret_tok); } } -void apply_macros(vector& tokens, vector knownmacros) { - for(int i = 0; i < tokens.size(); i++) { - if(tokens[i]->tok_type == MACRO) { +void apply_macros(vector& tokens, vector knownmacros) +{ + for (int i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type == MACRO) { // Filter spaces from macro and value pair - con_macro* f_macro = new con_macro(); + con_macro* f_macro = new con_macro(); f_macro->macro = ""; f_macro->value = ""; - for(int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { - if(tokens[i]->tok_macro->macro[j] != ' ') { + for (int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + if (tokens[i]->tok_macro->macro[j] != ' ') { f_macro->macro += tokens[i]->tok_macro->macro[j]; } } - for(int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { - if(tokens[i]->tok_macro->value[j] != ' ') + for (int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + if (tokens[i]->tok_macro->value[j] != ' ') f_macro->value += tokens[i]->tok_macro->value[j]; } knownmacros.push_back(*f_macro); @@ -280,15 +280,16 @@ void apply_macros(vector& tokens, vector knownmacros) { continue; } apply_macro_to_token(*tokens[i], knownmacros); - if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { apply_macros(tokens[i]->tokens, knownmacros); } } } -void apply_whiles(vector& tokens) { - for(int i = 0; i< tokens.size(); i++) { +void apply_whiles(vector& tokens) +{ + for (int i = 0; i< tokens.size(); i++) { apply_whiles(tokens[i]->tokens); - if(tokens[i]->tok_type != WHILE) { + if (tokens[i]->tok_type != WHILE) { continue; } con_token* cmp_tok = new con_token; @@ -320,13 +321,13 @@ void apply_whiles(vector& tokens) { endwhile_tok->tok_type = TAG; con_tag* endwhile_tag = new con_tag; endwhile_tok->tok_tag = endwhile_tag; - endwhile_tag->name = endtag_name; + endwhile_tag->name = endtag_name; con_token* startwhile_tok = new con_token; startwhile_tok->tok_type = TAG; con_tag* startwhile_tag = new con_tag; startwhile_tok->tok_tag = startwhile_tag; - startwhile_tag->name = starttag_name; + startwhile_tag->name = starttag_name; while_amnt++; tokens[i]->tokens.insert(tokens[i]->tokens.begin(), jmp_tok); @@ -337,10 +338,11 @@ void apply_whiles(vector& tokens) { // so: starttag, cmp, jmp ... jmp, endtag } } -void apply_ifs(vector& tokens) { - for(int i = 0; i< tokens.size(); i++) { +void apply_ifs(vector& tokens) +{ + for (int i = 0; i< tokens.size(); i++) { apply_ifs(tokens[i]->tokens); - if(tokens[i]->tok_type != IF) { + if (tokens[i]->tok_type != IF) { continue; } con_token* cmp_tok = new con_token; @@ -350,7 +352,7 @@ void apply_ifs(vector& tokens) { cmp_cmd->command = "cmp"; cmp_cmd->arg1 = tokens[i]->tok_if->condition.arg1; cmp_cmd->arg2 = tokens[i]->tok_if->condition.arg2; - + string tagname = "endif" + to_string(if_amnt); con_token* jmp_tok = new con_token; @@ -364,7 +366,7 @@ void apply_ifs(vector& tokens) { endif_tok->tok_type = TAG; con_tag* endif_tag = new con_tag; endif_tok->tok_tag = endif_tag; - endif_tag->name = tagname; + endif_tag->name = tagname; if_amnt++; tokens[i]->tokens.insert(tokens[i]->tokens.begin(), jmp_tok); @@ -372,9 +374,10 @@ void apply_ifs(vector& tokens) { tokens[i]->tokens.push_back(endif_tok); } } -void linearize_tokens(vector& tokens) { - for(int i = 0; i < tokens.size(); i++) { - if(tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { +void linearize_tokens(vector& tokens) +{ + for (int i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { continue; } vector* subtokens = &tokens[i]->tokens; @@ -384,28 +387,31 @@ void linearize_tokens(vector& tokens) { } } -std::string tokens_to_nasm(std::vector& tokens) { +std::string tokens_to_nasm(std::vector& tokens) +{ string output = ""; - for(int i = 0; i < tokens.size(); i++) { - if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO || tokens[i]->tok_type == FUNCALL) { + for (int i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE + || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO + || tokens[i]->tok_type == FUNCALL) { continue; } output += "\n"; - if(tokens[i]->tok_type == CMD) { + if (tokens[i]->tok_type == CMD) { output += tokens[i]->tok_cmd->command; - if(!tokens[i]->tok_cmd->arg1.empty()) { + if (!tokens[i]->tok_cmd->arg1.empty()) { output += " " + tokens[i]->tok_cmd->arg1; } - if(!tokens[i]->tok_cmd->arg2.empty()) { + if (!tokens[i]->tok_cmd->arg2.empty()) { output += ", " + tokens[i]->tok_cmd->arg2; } continue; } - if(tokens[i]->tok_type == TAG) { + if (tokens[i]->tok_type == TAG) { output += tokens[i]->tok_tag->name + ":"; continue; } - if(tokens[i]->tok_type == SECTION) { + if (tokens[i]->tok_type == SECTION) { output += "section " + tokens[i]->tok_section->name; continue; } diff --git a/src/reconstruct.h b/src/reconstruct.h index 3147859..e84f94b 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include "construct_types.h" // Used for naming tags @@ -16,7 +16,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); void linearize_tokens(std::vector& tokens); void apply_macro_to_token(con_token& token, std::vector macros); -// The following functions transform the construct specific tokens to nasm ones, +// The following functions transform the construct specific tokens to nasm ones, // the parent construct tokens remain, but are removed during linearization // Converts args to macros and adds tag with same name to child tokens From 7eee68660f1189abeca4994e054292e53d82c8be Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 21:47:06 +0200 Subject: [PATCH 04/10] reordered switch cases and function definitions that are related to the enums in src/construct_types.h --- src/construct_flags.cpp | 16 ++-- src/construct_types.h | 20 ++--- src/deconstruct.cpp | 108 +++++++++++------------ src/deconstruct.h | 8 +- src/reconstruct.cpp | 187 ++++++++++++++++++++-------------------- src/reconstruct.h | 12 +-- 6 files changed, 176 insertions(+), 175 deletions(-) diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index a4559e6..a620ab3 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -5,20 +5,20 @@ using namespace std; int set_bitwidth(char* argv) { - if (strcmp(argv, "elf64") == 0) { - bitwidth = BIT64; - return 0; - } - if (strcmp(argv, "elf32") == 0) { - bitwidth = BIT32; + if (strcmp(argv, "elf8") == 0) { + bitwidth = BIT8; return 0; } if (strcmp(argv, "elf16") == 0) { bitwidth = BIT16; return 0; } - if (strcmp(argv, "elf8") == 0) { - bitwidth = BIT8; + if (strcmp(argv, "elf32") == 0) { + bitwidth = BIT32; + return 0; + } + if (strcmp(argv, "elf64") == 0) { + bitwidth = BIT64; return 0; } cout << "\"" << argv << "\" not a supported format" << endl; diff --git a/src/construct_types.h b/src/construct_types.h index 212187a..7707e56 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -40,15 +40,16 @@ struct con_token { struct con_while* tok_while; struct con_if* tok_if; struct con_function* tok_function; - struct con_funcall* tok_funcall; struct con_cmd* tok_cmd; struct con_macro* tok_macro; + struct con_funcall* tok_funcall; std::vector tokens; // Only non-empty for if, while and function tokens }; -struct con_macro { - std::string value; - std::string macro; +struct con_condition { + CON_COMPARISON op; + std::string arg1; + std::string arg2; }; struct con_section { @@ -59,12 +60,6 @@ struct con_tag { std::string name; }; -struct con_condition { - CON_COMPARISON op; - std::string arg1; - std::string arg2; -}; - struct con_while { con_condition condition; }; @@ -84,6 +79,11 @@ struct con_cmd { std::string arg2; }; +struct con_macro { + std::string value; + std::string macro; +}; + struct con_funcall { std::string funcname; std::vector arguments; diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index e6d28e6..b03d340 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -19,20 +19,20 @@ int get_line_indentation(string line) // Expects formatted line CON_TOKENTYPE get_token_type(string line) { - if (line[0] == '!') - return MACRO; - if (line.substr(0, 3) == "if ") - return IF; + if (line.substr(0, 8) == "section ") + return SECTION; + if (line.find(' ') == string::npos && line[line.size()-1] == ':') + return TAG; if (line.substr(0, 6) == "while ") return WHILE; + if (line.substr(0, 3) == "if ") + return IF; if (line.substr(0, 9) == "function ") return FUNCTION; - if (line.substr(0, 8) == "section ") - return SECTION; + if (line[0] == '!') + return MACRO; if (line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) return FUNCALL; - if (line.find(' ') == string::npos && line[line.size()-1] == ':') - return TAG; return CMD; } @@ -99,23 +99,19 @@ vector delinearize_tokens(std::vector tokens) return delinearized_tokens; } -con_macro* parse_macro(string line) -{ - con_macro* tok_macro = new con_macro(); - int spacepos = line.find(' '); - tok_macro->macro = line.substr(1, spacepos-1); - tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); - return tok_macro; -} -con_if* parse_if(string line) +con_section* parse_section(string line) { - con_if* tok_if = new con_if(); + con_section* tok_section = new con_section(); vector line_split; boost::split(line_split, line, boost::is_any_of(" ")); - tok_if->condition.arg1 = line_split[1]; - tok_if->condition.op = str_to_comparison(line_split[2]); - tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); - return tok_if; + tok_section->name = line_split[1]; + return tok_section; +} +con_tag* parse_tag(string line) +{ + con_tag* tok_tag = new con_tag(); + tok_tag->name = line.substr(0, line.size()-1); + return tok_tag; } con_while* parse_while(string line) { @@ -127,19 +123,29 @@ con_while* parse_while(string line) tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_section* parse_section(string line) +con_if* parse_if(string line) { - con_section* tok_section = new con_section(); + con_if* tok_if = new con_if(); vector line_split; boost::split(line_split, line, boost::is_any_of(" ")); - tok_section->name = line_split[1]; - return tok_section; + tok_if->condition.arg1 = line_split[1]; + tok_if->condition.op = str_to_comparison(line_split[2]); + tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); + return tok_if; } -con_tag* parse_tag(string line) +con_function* parse_function(string line) { - con_tag* tok_tag = new con_tag(); - tok_tag->name = line.substr(0, line.size()-1); - return tok_tag; + con_function* tok_function = new con_function(); + vector line_split; + boost::split(line_split, line, boost::is_any_of("():,")); + tok_function->name = line_split[0].substr(9, line_split[0].size()-9); + for (int i = 1; i < line_split.size()-2; i++) { + if (line_split[i].empty()) { + continue; + } + tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied + } + return tok_function; } con_cmd* parse_cmd(string line) { @@ -153,19 +159,13 @@ con_cmd* parse_cmd(string line) tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_function* parse_function(string line) +con_macro* parse_macro(string line) { - con_function* tok_function = new con_function(); - vector line_split; - boost::split(line_split, line, boost::is_any_of("():,")); - tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for (int i = 1; i < line_split.size()-2; i++) { - if (line_split[i].empty()) { - continue; - } - tok_function->arguments.push_back(line_split[i]); // macros filter out spaces anyway when applied - } - return tok_function; + con_macro* tok_macro = new con_macro(); + int spacepos = line.find(' '); + tok_macro->macro = line.substr(1, spacepos-1); + tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); + return tok_macro; } con_funcall* parse_funcall(string line) { @@ -204,29 +204,29 @@ con_token* parse_line(string line) } token->tok_type = get_token_type(f_line); switch (token->tok_type) { - case MACRO: - token->tok_macro = parse_macro(f_line); + case SECTION: + token->tok_section = parse_section(f_line); break; - case IF: - token->tok_if = parse_if(f_line); + case TAG: + token->tok_tag = parse_tag(f_line); break; case WHILE: token->tok_while = parse_while(f_line); break; + case IF: + token->tok_if = parse_if(f_line); + break; case FUNCTION: token->tok_function = parse_function(f_line); break; - case FUNCALL: - token->tok_funcall = parse_funcall(f_line); - case SECTION: - token->tok_section = parse_section(f_line); - break; - case TAG: - token->tok_tag = parse_tag(f_line); - break; case CMD: token->tok_cmd = parse_cmd(f_line); break; + case MACRO: + token->tok_macro = parse_macro(f_line); + break; + case FUNCALL: + token->tok_funcall = parse_funcall(f_line); } return token; } diff --git a/src/deconstruct.h b/src/deconstruct.h index d8c5cf1..1f00e56 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -13,13 +13,13 @@ CON_COMPARISON str_to_comparison(std::string comp); std::vector delinearize_tokens(std::vector tokens); -con_macro* parse_macro(std::string line); -con_if* parse_if(std::string line); -con_while* parse_while(std::string line); con_section* parse_section(std::string line); con_tag* parse_tag(std::string line); -con_cmd* parse_cmd(std::string line); +con_while* parse_while(std::string line); +con_if* parse_if(std::string line); con_function* parse_function(std::string line); +con_cmd* parse_cmd(std::string line); +con_macro* parse_macro(std::string line); con_funcall* parse_funcall(std::string line); con_token* parse_line(std::string line); diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index c320830..31f5872 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -191,100 +191,7 @@ void apply_macro_to_token(con_token& token, vector macros) } } } -void apply_funcalls(std::vector& tokens) -{ - for (int i = 0; i < tokens.size(); i++) { - apply_funcalls(tokens[i]->tokens); - if (tokens[i]->tok_type != FUNCALL) { - continue; - } - vector* args = &tokens[i]->tok_funcall->arguments; - vector arg_tokens; - for (int j = 0; j < args->size(); j++) { - con_token* arg_tok = new con_token(); - arg_tok->tok_type = CMD; - con_cmd* arg_cmd = new con_cmd(); - arg_tok->tok_cmd = arg_cmd; - arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j, bitwidth); - arg_cmd->arg2 = (*args)[j]; - arg_tokens.push_back(arg_tok); - } - con_token* call_tok = new con_token(); - call_tok->tok_type = CMD; - con_cmd* call_cmd = new con_cmd(); - call_tok->tok_cmd = call_cmd; - call_cmd->command = "call"; - call_cmd->arg1 = tokens[i]->tok_funcall->funcname; - arg_tokens.push_back(call_tok); - - tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); - } -} - -void apply_functions(std::vector& tokens) -{ - vector* subtokens = &tokens; - for (int i = 0; i < subtokens->size(); i++) { - if ((*subtokens)[i]->tok_type != FUNCTION) { - continue; - } - con_function* crntfunc = (*subtokens)[i]->tok_function; - if (crntfunc->name == "main") { - crntfunc->name = "_start"; - } - - con_token* tag_tok = new con_token; - tag_tok->tok_type = TAG; - con_tag* functag = new con_tag; - tag_tok->tok_tag = functag; - functag->name = crntfunc->name; - for (int j = 0; j < crntfunc->arguments.size(); j++) { - con_token* arg_tok = new con_token; - arg_tok->tok_type = MACRO; - con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j, bitwidth); - arg_macro->macro = crntfunc->arguments[j]; - arg_tok->tok_macro = arg_macro; - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); - } - (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); - con_token* ret_tok = new con_token; - ret_tok->tok_type = CMD; - con_cmd* ret_cmd = new con_cmd; - ret_tok->tok_cmd = ret_cmd; - ret_cmd->command = "ret"; - (*subtokens)[i]->tokens.push_back(ret_tok); - } -} -void apply_macros(vector& tokens, vector knownmacros) -{ - for (int i = 0; i < tokens.size(); i++) { - if (tokens[i]->tok_type == MACRO) { - // Filter spaces from macro and value pair - con_macro* f_macro = new con_macro(); - f_macro->macro = ""; - f_macro->value = ""; - for (int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { - if (tokens[i]->tok_macro->macro[j] != ' ') { - f_macro->macro += tokens[i]->tok_macro->macro[j]; - } - } - for (int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { - if (tokens[i]->tok_macro->value[j] != ' ') - f_macro->value += tokens[i]->tok_macro->value[j]; - } - knownmacros.push_back(*f_macro); - delete f_macro; - continue; - } - apply_macro_to_token(*tokens[i], knownmacros); - if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { - apply_macros(tokens[i]->tokens, knownmacros); - } - } -} void apply_whiles(vector& tokens) { for (int i = 0; i< tokens.size(); i++) { @@ -374,6 +281,100 @@ void apply_ifs(vector& tokens) tokens[i]->tokens.push_back(endif_tok); } } +void apply_functions(std::vector& tokens) +{ + vector* subtokens = &tokens; + for (int i = 0; i < subtokens->size(); i++) { + if ((*subtokens)[i]->tok_type != FUNCTION) { + continue; + } + con_function* crntfunc = (*subtokens)[i]->tok_function; + if (crntfunc->name == "main") { + crntfunc->name = "_start"; + } + + con_token* tag_tok = new con_token; + tag_tok->tok_type = TAG; + con_tag* functag = new con_tag; + tag_tok->tok_tag = functag; + functag->name = crntfunc->name; + for (int j = 0; j < crntfunc->arguments.size(); j++) { + con_token* arg_tok = new con_token; + arg_tok->tok_type = MACRO; + con_macro* arg_macro = new con_macro; + arg_macro->value = reg_to_str(j, bitwidth); + arg_macro->macro = crntfunc->arguments[j]; + arg_tok->tok_macro = arg_macro; + + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), arg_tok); + } + (*subtokens)[i]->tokens.insert((*subtokens)[i]->tokens.begin(), tag_tok); + con_token* ret_tok = new con_token; + ret_tok->tok_type = CMD; + con_cmd* ret_cmd = new con_cmd; + ret_tok->tok_cmd = ret_cmd; + ret_cmd->command = "ret"; + (*subtokens)[i]->tokens.push_back(ret_tok); + } +} +void apply_macros(vector& tokens, vector knownmacros) +{ + for (int i = 0; i < tokens.size(); i++) { + if (tokens[i]->tok_type == MACRO) { + // Filter spaces from macro and value pair + con_macro* f_macro = new con_macro(); + f_macro->macro = ""; + f_macro->value = ""; + for (int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + if (tokens[i]->tok_macro->macro[j] != ' ') { + f_macro->macro += tokens[i]->tok_macro->macro[j]; + } + } + for (int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + if (tokens[i]->tok_macro->value[j] != ' ') + f_macro->value += tokens[i]->tok_macro->value[j]; + } + knownmacros.push_back(*f_macro); + delete f_macro; + continue; + } + apply_macro_to_token(*tokens[i], knownmacros); + if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION) { + apply_macros(tokens[i]->tokens, knownmacros); + } + } +} +void apply_funcalls(std::vector& tokens) +{ + for (int i = 0; i < tokens.size(); i++) { + apply_funcalls(tokens[i]->tokens); + if (tokens[i]->tok_type != FUNCALL) { + continue; + } + vector* args = &tokens[i]->tok_funcall->arguments; + vector arg_tokens; + for (int j = 0; j < args->size(); j++) { + con_token* arg_tok = new con_token(); + arg_tok->tok_type = CMD; + con_cmd* arg_cmd = new con_cmd(); + arg_tok->tok_cmd = arg_cmd; + arg_cmd->command = "mov"; + arg_cmd->arg1 = reg_to_str(j, bitwidth); + arg_cmd->arg2 = (*args)[j]; + arg_tokens.push_back(arg_tok); + } + con_token* call_tok = new con_token(); + call_tok->tok_type = CMD; + con_cmd* call_cmd = new con_cmd(); + call_tok->tok_cmd = call_cmd; + call_cmd->command = "call"; + call_cmd->arg1 = tokens[i]->tok_funcall->funcname; + arg_tokens.push_back(call_tok); + + tokens.insert(tokens.begin()+i+1, arg_tokens.begin(), arg_tokens.end()); + } +} + void linearize_tokens(vector& tokens) { for (int i = 0; i < tokens.size(); i++) { diff --git a/src/reconstruct.h b/src/reconstruct.h index e84f94b..e2fdb54 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -12,18 +12,18 @@ std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); -// During linearization, the construct parent tokens are removed -void linearize_tokens(std::vector& tokens); -void apply_macro_to_token(con_token& token, std::vector macros); - // The following functions transform the construct specific tokens to nasm ones, // the parent construct tokens remain, but are removed during linearization +void apply_macro_to_token(con_token& token, std::vector macros); // Converts args to macros and adds tag with same name to child tokens -void apply_functions(std::vector& tokens); -void apply_funcalls(std::vector& tokens); void apply_whiles(std::vector& tokens); void apply_ifs(std::vector& tokens); +void apply_functions(std::vector& tokens); void apply_macros(std::vector& tokens, std::vector macros); +void apply_funcalls(std::vector& tokens); + +// During linearization, the construct parent tokens are removed +void linearize_tokens(std::vector& tokens); std::string tokens_to_nasm(std::vector& tokens); From 4d03685d240ce579e0812411e7eee79e6877dd54 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 22:04:52 +0200 Subject: [PATCH 05/10] warning 1: switch isn't handling all cases warning 2: functions missing return statements warning 3: for loop index is of type int but is compared to a value of type size_t --- src/construct_debug.cpp | 8 ++++++-- src/deconstruct.cpp | 14 +++++++------- src/reconstruct.cpp | 28 ++++++++++++++++------------ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 48a443b..7631bb5 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -17,6 +17,8 @@ std::string tokentype_to_string(CON_TOKENTYPE type) return "cmd"; case MACRO: return "macro"; + case FUNCALL: + return "funcall"; } return "unknown"; } @@ -39,7 +41,7 @@ std::string token_to_string(con_token token) break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; - for (int i = 0; i < token.tok_function->arguments.size(); i++) { + for (size_t i = 0; i < token.tok_function->arguments.size(); i++) { if (i != 0) { tokstring += ", "; } @@ -60,10 +62,12 @@ std::string token_to_string(con_token token) case MACRO: tokstring += ", macro: " + token.tok_macro->macro + ", value: " + token.tok_macro->value; break; + default: // FUNCALL + break; } if (token.tokens.size() > 0) { tokstring += ", tokens: {\n"; - for (int i = 0; i < token.tokens.size(); i++) { + for (size_t i = 0; i < token.tokens.size(); i++) { tokstring += token_to_string(*token.tokens[i]) + "\n"; } tokstring += "}"; diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index b03d340..e71f571 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -6,7 +6,7 @@ using namespace std; int get_line_indentation(string line) { int indentation = 0; - for (int i = 0; i < line.size(); i++) { + for (size_t i = 0; i < line.size(); i++) { if (line[i] == '\t') { indentation++; continue; @@ -50,7 +50,7 @@ CON_COMPARISON str_to_comparison(string comp) return LE; if (comp == "ge") return GE; - //ERROR + return E; //ERROR but there is no error value in the enum } @@ -76,7 +76,7 @@ vector delinearize_tokens(std::vector tokens) // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. - for (int i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size(); i++) { if (parent_stack.top()->indentation - tokens[i]->indentation >= 0) { int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; for (int j = 0; j < indentation_diff; j++) { @@ -139,7 +139,7 @@ con_function* parse_function(string line) vector line_split; boost::split(line_split, line, boost::is_any_of("():,")); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for (int i = 1; i < line_split.size()-2; i++) { + for (size_t i = 1; i < line_split.size()-2; i++) { if (line_split[i].empty()) { continue; } @@ -173,7 +173,7 @@ con_funcall* parse_funcall(string line) vector line_split; boost::split(line_split, line, boost::is_any_of("(),")); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); - for (int i = 1; i < line_split.size()-1; i++) { + for (size_t i = 1; i < line_split.size()-1; i++) { if (line_split[i].empty()) { continue; } @@ -189,7 +189,7 @@ con_token* parse_line(string line) //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for (int i = 0; i < line.size(); i++) { + for (size_t i = 0; i < line.size(); i++) { if (line[i] == ' ') { if (!caught_space) { f_line += line[i]; @@ -237,7 +237,7 @@ vector parse_construct(string code) boost::to_lower(code); vector tokens; bool in_data = false; - for (int i = 0; i < code_split.size(); i++) { + for (size_t i = 0; i < code_split.size(); i++) { // Check if it contains any alphabet chars if (code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index 31f5872..a99ac34 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -99,6 +99,7 @@ string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) } break; } + return "unknown"; } string comparison_to_string(CON_COMPARISON condition) { @@ -134,6 +135,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) case GE: return L; } + return E; //ERROR but there is no error value in the enum } void apply_macro_to_token(con_token& token, vector macros) @@ -142,7 +144,7 @@ void apply_macro_to_token(con_token& token, vector macros) return; } // Unoptimal, but more clear imo - for (int i = 0; i < macros.size(); i++) { + for (size_t i = 0; i < macros.size(); i++) { con_macro* crntmacro = ¯os[i]; size_t pos; switch (token.tok_type) { @@ -188,13 +190,15 @@ void apply_macro_to_token(con_token& token, vector macros) token.tok_cmd->arg2.replace(pos, crntmacro->macro.size(), crntmacro->value); } break; + default: + break; } } } void apply_whiles(vector& tokens) { - for (int i = 0; i< tokens.size(); i++) { + for (size_t i = 0; i< tokens.size(); i++) { apply_whiles(tokens[i]->tokens); if (tokens[i]->tok_type != WHILE) { continue; @@ -247,7 +251,7 @@ void apply_whiles(vector& tokens) } void apply_ifs(vector& tokens) { - for (int i = 0; i< tokens.size(); i++) { + for (size_t i = 0; i< tokens.size(); i++) { apply_ifs(tokens[i]->tokens); if (tokens[i]->tok_type != IF) { continue; @@ -284,7 +288,7 @@ void apply_ifs(vector& tokens) void apply_functions(std::vector& tokens) { vector* subtokens = &tokens; - for (int i = 0; i < subtokens->size(); i++) { + for (size_t i = 0; i < subtokens->size(); i++) { if ((*subtokens)[i]->tok_type != FUNCTION) { continue; } @@ -298,7 +302,7 @@ void apply_functions(std::vector& tokens) con_tag* functag = new con_tag; tag_tok->tok_tag = functag; functag->name = crntfunc->name; - for (int j = 0; j < crntfunc->arguments.size(); j++) { + for (size_t j = 0; j < crntfunc->arguments.size(); j++) { con_token* arg_tok = new con_token; arg_tok->tok_type = MACRO; con_macro* arg_macro = new con_macro; @@ -319,18 +323,18 @@ void apply_functions(std::vector& tokens) } void apply_macros(vector& tokens, vector knownmacros) { - for (int i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type == MACRO) { // Filter spaces from macro and value pair con_macro* f_macro = new con_macro(); f_macro->macro = ""; f_macro->value = ""; - for (int j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { + for (size_t j = 0; j < tokens[i]->tok_macro->macro.size(); j++) { if (tokens[i]->tok_macro->macro[j] != ' ') { f_macro->macro += tokens[i]->tok_macro->macro[j]; } } - for (int j = 0; j < tokens[i]->tok_macro->value.size(); j++) { + for (size_t j = 0; j < tokens[i]->tok_macro->value.size(); j++) { if (tokens[i]->tok_macro->value[j] != ' ') f_macro->value += tokens[i]->tok_macro->value[j]; } @@ -346,14 +350,14 @@ void apply_macros(vector& tokens, vector knownmacros) } void apply_funcalls(std::vector& tokens) { - for (int i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size(); i++) { apply_funcalls(tokens[i]->tokens); if (tokens[i]->tok_type != FUNCALL) { continue; } vector* args = &tokens[i]->tok_funcall->arguments; vector arg_tokens; - for (int j = 0; j < args->size(); j++) { + for (size_t j = 0; j < args->size(); j++) { con_token* arg_tok = new con_token(); arg_tok->tok_type = CMD; con_cmd* arg_cmd = new con_cmd(); @@ -377,7 +381,7 @@ void apply_funcalls(std::vector& tokens) void linearize_tokens(vector& tokens) { - for (int i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { continue; } @@ -391,7 +395,7 @@ void linearize_tokens(vector& tokens) std::string tokens_to_nasm(std::vector& tokens) { string output = ""; - for (int i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size(); i++) { if (tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO || tokens[i]->tok_type == FUNCALL) { From 164ee3a4ae765dc20ca9e9a64cfb24d37e836f2c Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 22:24:51 +0200 Subject: [PATCH 06/10] bitwidth is global so no need to pass its value as a function argument. also it is used in src/construct_flags.cpp so i added declaration. con_type looks like the token types while also being only used in the file itself so i renamed to _con_type. apply_macro_to_token() is used only in the file itself, so i set it as static and removed it from the header. --- src/construct_flags.cpp | 2 ++ src/construct_types.h | 6 +++--- src/reconstruct.cpp | 8 ++++---- src/reconstruct.h | 3 +-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index a620ab3..c268eb7 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -1,6 +1,8 @@ #include "construct_flags.h" #include "construct_types.h" +extern CON_BITWIDTH bitwidth; + using namespace std; int set_bitwidth(char* argv) diff --git a/src/construct_types.h b/src/construct_types.h index 7707e56..15b9989 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -46,7 +46,7 @@ struct con_token { std::vector tokens; // Only non-empty for if, while and function tokens }; -struct con_condition { +struct _con_condition { CON_COMPARISON op; std::string arg1; std::string arg2; @@ -61,11 +61,11 @@ struct con_tag { }; struct con_while { - con_condition condition; + _con_condition condition; }; struct con_if { - con_condition condition; + _con_condition condition; }; struct con_function { diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index a99ac34..bccbd30 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -7,7 +7,7 @@ int if_amnt = 0; int while_amnt = 0; CON_BITWIDTH bitwidth = BIT64; -string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) +string reg_to_str(uint8_t call_num) { switch (bitwidth) { case BIT8: @@ -138,7 +138,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) return E; //ERROR but there is no error value in the enum } -void apply_macro_to_token(con_token& token, vector macros) +static void apply_macro_to_token(con_token& token, vector macros) { if (token.tok_type != WHILE && token.tok_type != IF && token.tok_type != CMD) { return; @@ -306,7 +306,7 @@ void apply_functions(std::vector& tokens) con_token* arg_tok = new con_token; arg_tok->tok_type = MACRO; con_macro* arg_macro = new con_macro; - arg_macro->value = reg_to_str(j, bitwidth); + arg_macro->value = reg_to_str(j); arg_macro->macro = crntfunc->arguments[j]; arg_tok->tok_macro = arg_macro; @@ -363,7 +363,7 @@ void apply_funcalls(std::vector& tokens) con_cmd* arg_cmd = new con_cmd(); arg_tok->tok_cmd = arg_cmd; arg_cmd->command = "mov"; - arg_cmd->arg1 = reg_to_str(j, bitwidth); + arg_cmd->arg1 = reg_to_str(j); arg_cmd->arg2 = (*args)[j]; arg_tokens.push_back(arg_tok); } diff --git a/src/reconstruct.h b/src/reconstruct.h index e2fdb54..15c9150 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -8,13 +8,12 @@ extern int if_amnt; extern int while_amnt; extern CON_BITWIDTH bitwidth; -std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); +std::string reg_to_str(uint8_t call_num); std::string comparison_to_string(CON_COMPARISON condition); CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition); // The following functions transform the construct specific tokens to nasm ones, // the parent construct tokens remain, but are removed during linearization -void apply_macro_to_token(con_token& token, std::vector macros); // Converts args to macros and adds tag with same name to child tokens void apply_whiles(std::vector& tokens); From 01eac186fa10b66d9632a07963420f0bb07120ed Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 22:26:28 +0200 Subject: [PATCH 07/10] added include guards to header files --- src/construct_debug.h | 5 +++++ src/construct_flags.h | 5 +++++ src/construct_types.h | 6 +++--- src/deconstruct.h | 5 +++++ src/reconstruct.h | 5 +++++ 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/construct_debug.h b/src/construct_debug.h index df7bbc2..5cfa4c6 100644 --- a/src/construct_debug.h +++ b/src/construct_debug.h @@ -1,3 +1,6 @@ +#ifndef CONSTRUCT_DEBUG_H_ +#define CONSTRUCT_DEBUG_H_ + #include #include #include "construct_types.h" @@ -5,3 +8,5 @@ std::string tokentype_to_string(CON_TOKENTYPE type); std::string token_to_string(con_token token); + +#endif // CONSTRUCT_DEBUG_H_ diff --git a/src/construct_flags.h b/src/construct_flags.h index a0df1b3..2c672b8 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -1,3 +1,6 @@ +#ifndef CONSTRUCT_FLAGS_H_ +#define CONSTRUCT_FLAGS_H_ + #include #include #include @@ -5,3 +8,5 @@ int set_bitwidth(char* argv); int handle_flags(int argc, char** argv, std::string* path, std::string* outpath); + +#endif // CONSTRUCT_FLAGS_H_ diff --git a/src/construct_types.h b/src/construct_types.h index 15b9989..5f76bbc 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -1,5 +1,5 @@ -#ifndef CON_TYPES_H -#define CON_TYPES_H +#ifndef CONSTRUCT_TYPES_H_ +#define CONSTRUCT_TYPES_H_ #include #include @@ -89,4 +89,4 @@ struct con_funcall { std::vector arguments; }; -#endif +#endif // CONSTRUCT_TYPES_H_ diff --git a/src/deconstruct.h b/src/deconstruct.h index 1f00e56..2a390fa 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -1,3 +1,6 @@ +#ifndef DECONSTRUCT_H_ +#define DECONSTRUCT_H_ + #include "construct_debug.h" #include #include @@ -24,3 +27,5 @@ con_funcall* parse_funcall(std::string line); con_token* parse_line(std::string line); std::vector parse_construct(std::string code); + +#endif // DECONSTRUCT_H_ diff --git a/src/reconstruct.h b/src/reconstruct.h index 15c9150..87501c9 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -1,3 +1,6 @@ +#ifndef RECONSTRUCT_H_ +#define RECONSTRUCT_H_ + #include #include #include @@ -26,3 +29,5 @@ void apply_funcalls(std::vector& tokens); void linearize_tokens(std::vector& tokens); std::string tokens_to_nasm(std::vector& tokens); + +#endif // RECONSTRUCT_H_ From 6a08c68ca3f3f0f934e0317a439bbfbec092dc86 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 22:33:54 +0200 Subject: [PATCH 08/10] removed cstring and boost dependancy: cstring is included for strcmp. so I replaced it with std::string comparison. boost is used for split, is_any_of, and to_lower. so I implemented the functionality we need. --- src/construct_flags.cpp | 14 ++++---- src/construct_flags.h | 1 - src/deconstruct.cpp | 76 ++++++++++++++++++++++++++++++++++++----- src/deconstruct.h | 3 -- 4 files changed, 74 insertions(+), 20 deletions(-) diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index c268eb7..d1148a8 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -7,19 +7,19 @@ using namespace std; int set_bitwidth(char* argv) { - if (strcmp(argv, "elf8") == 0) { + if (string(argv) == "elf8") { bitwidth = BIT8; return 0; } - if (strcmp(argv, "elf16") == 0) { + if (string(argv) == "elf16") { bitwidth = BIT16; return 0; } - if (strcmp(argv, "elf32") == 0) { + if (string(argv) == "elf32") { bitwidth = BIT32; return 0; } - if (strcmp(argv, "elf64") == 0) { + if (string(argv) == "elf64") { bitwidth = BIT64; return 0; } @@ -33,18 +33,18 @@ int handle_flags(int argc, char** argv, string* path, string* outpath) bool path_set = false; bool outpath_set = false; for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { + if (string(argv[i]) == "-f" && set_bitwidth(argv[i+1]) == 0) { bitwidth_set = true; i++; continue; } - if (strcmp(argv[i], "-i") == 0) { + if (string(argv[i]) == "-i") { path_set = true; i++; (*path) = argv[i]; continue; } - if (strcmp(argv[i], "-o") == 0) { + if (string(argv[i]) == "-o") { outpath_set = true; i++; (*outpath) = argv[i]; diff --git a/src/construct_flags.h b/src/construct_flags.h index 2c672b8..dc71290 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -2,7 +2,6 @@ #define CONSTRUCT_FLAGS_H_ #include -#include #include #include "reconstruct.h" diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index e71f571..af4fcfb 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,8 +1,66 @@ #include "construct_types.h" #include "deconstruct.h" -#include using namespace std; + +static const char& FIRST_UPPERCASE_LETTER = 'A'; +static const char& LAST_UPPERCASE_LETTER = 'Z'; +static bool is_upper(const char& c) +{ + return c >= FIRST_UPPERCASE_LETTER && c <= LAST_UPPERCASE_LETTER; +} +static char to_lower(const char& c) +{ + return is_upper(c) ? c - FIRST_UPPERCASE_LETTER : c; +} +static void to_lower(string& str) +{ + string tmp; + for (string::iterator it = str.begin(); it != str.end(); ++it) { + tmp.push_back(to_lower(*it)); + } + str = tmp; +} + +class IsAnyOf +{ +private: + string chars; +public: + IsAnyOf() = default; + IsAnyOf(const string& _chars) : chars(_chars) {} + IsAnyOf(const char*& _chars) : chars(_chars) {} + ~IsAnyOf() = default; + IsAnyOf(const IsAnyOf& other) = delete; // should save unique sorted chars for that + IsAnyOf& operator=(const IsAnyOf& other) = delete; + + bool operator()(const char& c) const { + for (string::const_iterator it = chars.cbegin(); it != chars.cend(); ++it) { + if (*it == c) return true; + } + return false; + } +}; + +template +static void split(vector& result, const string& input, const Predicate& pred, const bool& compress_adj_delims = false) +{ + string tmp; + bool prev_is_delim = false; + for (string::const_iterator it = input.cbegin(); it != input.cend(); ++it) { + if (pred(*it)) { + if (prev_is_delim && compress_adj_delims) continue; + result.push_back(tmp); + tmp.clear(); + prev_is_delim = true; + } else { + tmp.push_back(*it); + prev_is_delim = false; + } + } + result.push_back(tmp); +} + int get_line_indentation(string line) { int indentation = 0; @@ -103,7 +161,7 @@ con_section* parse_section(string line) { con_section* tok_section = new con_section(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + split(line_split, line, IsAnyOf(" ")); tok_section->name = line_split[1]; return tok_section; } @@ -117,7 +175,7 @@ con_while* parse_while(string line) { con_while* tok_while = new con_while(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + split(line_split, line, IsAnyOf(" ")); tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : @@ -127,7 +185,7 @@ con_if* parse_if(string line) { con_if* tok_if = new con_if(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + split(line_split, line, IsAnyOf(" ")); tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); @@ -137,7 +195,7 @@ con_function* parse_function(string line) { con_function* tok_function = new con_function(); vector line_split; - boost::split(line_split, line, boost::is_any_of("():,")); + split(line_split, line, IsAnyOf("():,")); tok_function->name = line_split[0].substr(9, line_split[0].size()-9); for (size_t i = 1; i < line_split.size()-2; i++) { if (line_split[i].empty()) { @@ -151,7 +209,7 @@ con_cmd* parse_cmd(string line) { con_cmd* tok_cmd = new con_cmd(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ,")); + split(line_split, line, IsAnyOf(" ,")); tok_cmd->command = line_split[0]; if (line_split.size() > 1) tok_cmd->arg1 = line_split[1]; @@ -171,7 +229,7 @@ con_funcall* parse_funcall(string line) { con_funcall* tok_funcall = new con_funcall(); vector line_split; - boost::split(line_split, line, boost::is_any_of("(),")); + split(line_split, line, IsAnyOf("(),")); tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); for (size_t i = 1; i < line_split.size()-1; i++) { if (line_split[i].empty()) { @@ -233,8 +291,8 @@ con_token* parse_line(string line) vector parse_construct(string code) { vector code_split; - boost::split(code_split, code, boost::is_any_of("\n"), boost::token_compress_on); - boost::to_lower(code); + split(code_split, code, IsAnyOf("\n"), true); + to_lower(code); vector tokens; bool in_data = false; for (size_t i = 0; i < code_split.size(); i++) { diff --git a/src/deconstruct.h b/src/deconstruct.h index 2a390fa..7c95370 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -2,9 +2,6 @@ #define DECONSTRUCT_H_ #include "construct_debug.h" -#include -#include -#include #include #include #include From fed28d013f5e2e289c652afd6b9035bba50c36f7 Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 22:46:46 +0200 Subject: [PATCH 09/10] managed local and standard header files inclusion: you shuold inculde the types and functions you directly use, and not depend on the current header files to complete the needed includes. for example: the file src/construct_flags.h only needs to know the type std::string, so only #include , and since the function implementation appears in src/construct_flags.cpp, it also needs an #include . you should put that same include in both .h and .cpp files, since technically the .h file can be empty, then it won't have includes for arguments types, the .cpp has to do the includes. explaination for the importance of this rule: i remember something like this: under one compiler / operating system does #include inside the header, so the program in the story ran filne without noticing the need of direct #include . once the same code was compiled with a different compiler / OS, it failed compilation since was not included. --- src/construct.cpp | 10 +++++++--- src/construct_debug.cpp | 4 ++++ src/construct_debug.h | 2 -- src/construct_flags.cpp | 2 ++ src/construct_flags.h | 2 -- src/deconstruct.cpp | 5 ++++- src/deconstruct.h | 8 +++----- src/reconstruct.cpp | 2 ++ src/reconstruct.h | 3 +-- 9 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/construct.cpp b/src/construct.cpp index 73ea30a..53c61ab 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -1,8 +1,12 @@ -#include "deconstruct.h" -#include "reconstruct.h" -#include "construct_flags.h" +#include +#include #include #include +#include +#include "construct_types.h" +#include "deconstruct.h" // parse_construct() +#include "reconstruct.h" // linearize_tokens() +#include "construct_flags.h" // handle_flags() int main(int argc, char** argv) { diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 7631bb5..83f1b20 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -1,4 +1,8 @@ +#include +#include #include "construct_debug.h" +#include "construct_types.h" +#include "reconstruct.h" // comparison_to_string() std::string tokentype_to_string(CON_TOKENTYPE type) { diff --git a/src/construct_debug.h b/src/construct_debug.h index 5cfa4c6..9ff034a 100644 --- a/src/construct_debug.h +++ b/src/construct_debug.h @@ -1,10 +1,8 @@ #ifndef CONSTRUCT_DEBUG_H_ #define CONSTRUCT_DEBUG_H_ -#include #include #include "construct_types.h" -#include "reconstruct.h" std::string tokentype_to_string(CON_TOKENTYPE type); std::string token_to_string(con_token token); diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index d1148a8..33f3dc9 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -1,3 +1,5 @@ +#include +#include #include "construct_flags.h" #include "construct_types.h" diff --git a/src/construct_flags.h b/src/construct_flags.h index dc71290..b7a1e28 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -2,8 +2,6 @@ #define CONSTRUCT_FLAGS_H_ #include -#include -#include "reconstruct.h" int set_bitwidth(char* argv); int handle_flags(int argc, char** argv, std::string* path, std::string* outpath); diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index af4fcfb..c8abcf0 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,5 +1,8 @@ -#include "construct_types.h" +#include +#include +#include #include "deconstruct.h" +#include "construct_types.h" using namespace std; diff --git a/src/deconstruct.h b/src/deconstruct.h index 7c95370..7139980 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -1,11 +1,9 @@ #ifndef DECONSTRUCT_H_ #define DECONSTRUCT_H_ -#include "construct_debug.h" -#include -#include -#include -#include +#include +#include +#include "construct_types.h" int get_line_indentation(std::string line); CON_TOKENTYPE get_token_type(std::string line); diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index bccbd30..8f38792 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -1,3 +1,5 @@ +#include +#include #include "reconstruct.h" #include "construct_types.h" diff --git a/src/reconstruct.h b/src/reconstruct.h index 87501c9..4dd2c02 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -2,8 +2,7 @@ #define RECONSTRUCT_H_ #include -#include -#include +#include #include "construct_types.h" // Used for naming tags From 7c625739514f8907eb8a191fd42b8150d1c7e2ab Mon Sep 17 00:00:00 2001 From: NoneSince Date: Thu, 25 Jan 2024 22:52:11 +0200 Subject: [PATCH 10/10] used excpetions for returning error: replaced "return error;" with "throw error;". before, an error could go unnoticed, but now the program will end automatically (I didn't do try-catch) --- src/construct_debug.cpp | 3 ++- src/deconstruct.cpp | 3 ++- src/reconstruct.cpp | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index 83f1b20..5098a7a 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "construct_debug.h" #include "construct_types.h" #include "reconstruct.h" // comparison_to_string() @@ -24,7 +25,7 @@ std::string tokentype_to_string(CON_TOKENTYPE type) case FUNCALL: return "funcall"; } - return "unknown"; + throw std::invalid_argument("Invalid token type: "+std::to_string(static_cast(type))); } std::string token_to_string(con_token token) diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index c8abcf0..7b22553 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "deconstruct.h" #include "construct_types.h" @@ -111,7 +112,7 @@ CON_COMPARISON str_to_comparison(string comp) return LE; if (comp == "ge") return GE; - return E; //ERROR but there is no error value in the enum + throw invalid_argument("Invalid comparison sing: "+comp); } diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index 8f38792..03a7054 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "reconstruct.h" #include "construct_types.h" @@ -101,7 +102,7 @@ string reg_to_str(uint8_t call_num) } break; } - return "unknown"; + throw invalid_argument("Invalid bitwidth or call_num: bitwidth="+to_string(static_cast(bitwidth))+" call_num="+to_string(static_cast(call_num))); } string comparison_to_string(CON_COMPARISON condition) { @@ -119,7 +120,7 @@ string comparison_to_string(CON_COMPARISON condition) case GE: return "ge"; } - return "unknown"; + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) { @@ -137,7 +138,7 @@ CON_COMPARISON get_comparison_inverse(CON_COMPARISON condition) case GE: return L; } - return E; //ERROR but there is no error value in the enum + throw invalid_argument("Invalid comparison value: "+to_string(static_cast(condition))); } static void apply_macro_to_token(con_token& token, vector macros)