From 99fb7f24ba2955f1869066f442f995efe8a04ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xenxo=20Espasand=C3=ADn?= Date: Fri, 17 Feb 2023 16:44:38 +0100 Subject: [PATCH] feature(preprocessor): function-like macros (#70) --- gen/gsc/parser.ypp | 2 + src/gsc/lexer.cpp | 11 ++ src/gsc/misc/define.hpp | 3 +- src/gsc/misc/token.cpp | 109 ++++++++++++++ src/gsc/misc/token.hpp | 7 +- src/gsc/preprocessor.cpp | 303 +++++++++++++++++++++++++++++++++++++-- src/gsc/preprocessor.hpp | 1 + 7 files changed, 418 insertions(+), 18 deletions(-) diff --git a/gen/gsc/parser.ypp b/gen/gsc/parser.ypp index 2a72e966..c3664011 100644 --- a/gen/gsc/parser.ypp +++ b/gen/gsc/parser.ypp @@ -105,6 +105,7 @@ namespace xsk::gsc %token RBRACKET "]" %token COMMA "," %token DOT "." +%token DOUBLEDOT ".." %token ELLIPSIS "..." %token DOUBLECOLON "::" %token COLON ":" @@ -1124,6 +1125,7 @@ std::unordered_map const tok_to_par { token::DEC, parser::token::DECREMENT }, { token::QMARK, parser::token::QMARK }, { token::DOT, parser::token::DOT }, + { token::DOUBLEDOT, parser::token::DOUBLEDOT }, { token::ELLIPSIS, parser::token::ELLIPSIS }, { token::COMMA, parser::token::COMMA }, { token::COLON, parser::token::COLON }, diff --git a/src/gsc/lexer.cpp b/src/gsc/lexer.cpp index adb6ee37..ab192686 100644 --- a/src/gsc/lexer.cpp +++ b/src/gsc/lexer.cpp @@ -185,6 +185,17 @@ auto lexer::lex() -> token case '"': goto lex_string; case '.': + if (curr == '.') + { + advance(); + + if (curr != '.') + return token{ token::DOUBLECOLON, spacing_, loc_ }; + + advance(); + return token{ token::ELLIPSIS, spacing_, loc_ }; + } + if (curr < '0' || curr > '9') return token{ token::DOT, spacing_, loc_ }; goto lex_number; diff --git a/src/gsc/misc/define.hpp b/src/gsc/misc/define.hpp index e924c7ba..45459aac 100644 --- a/src/gsc/misc/define.hpp +++ b/src/gsc/misc/define.hpp @@ -10,9 +10,10 @@ namespace xsk::gsc struct define { - enum kind { PLAIN, BUILTIN, OBJECT, FUNCTION }; + enum kind : u8 { PLAIN, BUILTIN, OBJECT, FUNCTION }; kind type; +// bool vararg; std::vector args; std::vector exp; }; diff --git a/src/gsc/misc/token.cpp b/src/gsc/misc/token.cpp index 2f936c3e..d0430534 100644 --- a/src/gsc/misc/token.cpp +++ b/src/gsc/misc/token.cpp @@ -7,8 +7,117 @@ #include "location.hpp" #include "space.hpp" #include "token.hpp" +#include "utils/string.hpp" namespace xsk::gsc { +auto token::to_string() -> std::string +{ + switch (type) + { + case token::PLUS: return "+"; + case token::MINUS: return "-"; + case token::STAR: return "*"; + case token::DIV: return "/"; + case token::MOD: return "%"; + case token::BITOR: return "|"; + case token::BITAND: return "&"; + case token::BITEXOR: return "^"; + case token::SHL: return "<<"; + case token::SHR: return ">>"; + case token::ASSIGN: return "="; + case token::PLUSEQ: return "+="; + case token::MINUSEQ: return "-="; + case token::STAREQ: return "*="; + case token::DIVEQ: return "/="; + case token::MODEQ: return "%="; + case token::BITOREQ: return "|="; + case token::BITANDEQ: return "&="; + case token::BITEXOREQ: return "^="; + case token::SHLEQ: return "<<="; + case token::SHREQ: return ">>="; + case token::INC: return "++"; + case token::DEC: return "--"; + case token::GT: return ">"; + case token::LT: return "<"; + case token::GE: return ">="; + case token::LE: return "<="; + case token::NE: return "!="; + case token::EQ: return "=="; + case token::OR: return "||"; + case token::AND: return "&&"; + case token::TILDE: return "~"; + case token::BANG: return "!"; + case token::QMARK: return "?"; + case token::COLON: return ":"; + case token::SHARP: return "#"; + case token::COMMA: return ","; + case token::DOT: return "."; + case token::DOUBLEDOT: return ".."; + case token::ELLIPSIS: return "..."; + case token::SEMICOLON: return ";"; + case token::DOUBLECOLON: return "::"; + case token::LBRACKET: return "{"; + case token::RBRACKET: return "}"; + case token::LBRACE: return "["; + case token::RBRACE: return "]"; + case token::LPAREN: return "("; + case token::RPAREN: return ")"; + case token::NAME: return data; + case token::PATH: return data; + case token::STRING: return data; + case token::ISTRING: return data; + case token::INT: return data; + case token::FLT: return data; + case token::DEVBEGIN: return "/#"; + case token::DEVEND: return "#/"; + case token::INLINE: return "#inline"; + case token::INCLUDE: return "#include"; + case token::USINGTREE: return "#using_animtree"; + case token::ANIMTREE: return "#animtree"; + case token::ENDON: return "endon"; + case token::NOTIFY: return "notify"; + case token::WAIT: return "wait"; + case token::WAITTILL: return "waittill"; + case token::WAITTILLMATCH: return "waittillmatch"; + case token::WAITTILLFRAMEEND: return "waittillframeend"; + case token::WAITFRAME: return "waitframe"; + case token::IF: return "if"; + case token::ELSE: return "else"; + case token::DO: return "do"; + case token::WHILE: return "while"; + case token::FOR: return "for"; + case token::FOREACH: return "foreach"; + case token::IN: return "in"; + case token::SWITCH: return "switch"; + case token::CASE: return "case"; + case token::DEFAULT: return "default"; + case token::BREAK: return "break"; + case token::CONTINUE: return "continue"; + case token::RETURN: return "return"; + case token::BREAKPOINT: return "breakpoint"; + case token::PROFBEGIN: return "prof_begin"; + case token::PROFEND: return "prof_end"; + case token::ASSERT: return "assert"; + case token::ASSERTEX: return "assertex"; + case token::ASSERTMSG: return "assertmsg"; + case token::THREAD: return "thread"; + case token::CHILDTHREAD: return "endon"; + case token::THISTHREAD: return "thisthread"; + case token::CALL: return "call"; + case token::TRUE: return "true"; + case token::FALSE: return "false"; + case token::UNDEFINED: return "undefined"; + case token::SIZE: return "size"; + case token::GAME: return "game"; + case token::SELF: return "self"; + case token::ANIM: return "anim"; + case token::LEVEL: return "level"; + case token::ISDEFINED: return "isdefined"; + case token::ISTRUE: return "istrue"; + default: return "*INTERNAL*"; + } +} + } // namespace xsk::gsc diff --git a/src/gsc/misc/token.hpp b/src/gsc/misc/token.hpp index 3e8868a3..64f1bcac 100644 --- a/src/gsc/misc/token.hpp +++ b/src/gsc/misc/token.hpp @@ -14,8 +14,8 @@ struct token { PLUS, MINUS, STAR, DIV, MOD, BITOR, BITAND, BITEXOR, SHL, SHR, ASSIGN, PLUSEQ, MINUSEQ, STAREQ, DIVEQ, MODEQ, BITOREQ, BITANDEQ, BITEXOREQ, SHLEQ, SHREQ, - INC, DEC, GT, LT, GE, LE, NE, EQ, OR, AND, TILDE, BANG, QMARK, COLON, SHARP, COMMA, - DOT, ELLIPSIS, SEMICOLON, DOUBLECOLON, LBRACKET, RBRACKET, LBRACE, RBRACE, LPAREN, RPAREN, + INC, DEC, GT, LT, GE, LE, NE, EQ, OR, AND, TILDE, BANG, QMARK, COLON, SHARP, COMMA, DOT, + DOUBLEDOT, ELLIPSIS, SEMICOLON, DOUBLECOLON, LBRACKET, RBRACKET, LBRACE, RBRACE, LPAREN, RPAREN, NAME, PATH, STRING, ISTRING, INT, FLT, @@ -25,7 +25,7 @@ struct token PROFBEGIN, PROFEND, ASSERT, ASSERTEX, ASSERTMSG, THREAD, CHILDTHREAD, THISTHREAD, CALL, TRUE, FALSE, UNDEFINED, SIZE, GAME, SELF, ANIM, LEVEL, ISDEFINED, ISTRUE, - HASH, NEWLINE, EOS, DEFINED, MACROBEGIN, MACROEND, + HASH, NEWLINE, EOS, DEFINED, MACROBEGIN, MACROEND, MACROARG, MACROVAOPT, MACROVAARGS, STRINGIZE, PASTE }; kind type; @@ -35,6 +35,7 @@ struct token token(kind type, spacing space, location pos) : type{ type }, space{ space }, pos{ pos }, data{} {} token(kind type, spacing space, location pos, std::string data) : type{ type }, space{ space }, pos{ pos }, data{ std::move(data) } {} + auto to_string() -> std::string; }; } // namespace xsk::gsc diff --git a/src/gsc/preprocessor.cpp b/src/gsc/preprocessor.cpp index 33270332..19b12fd2 100644 --- a/src/gsc/preprocessor.cpp +++ b/src/gsc/preprocessor.cpp @@ -15,10 +15,10 @@ preprocessor::preprocessor(context* ctx, std::string const& name, char const* da { lexer_.push(lexer{ ctx, name, data, size }); defines_.reserve(4); - defines_.insert({ "__FILE__", { define::BUILTIN, {}, {} }}); - defines_.insert({ "__LINE__", { define::BUILTIN, {}, {} }}); - defines_.insert({ "__DATE__", { define::BUILTIN, {}, {} }}); - defines_.insert({ "__TIME__", { define::BUILTIN, {}, {} }}); + defines_.insert({ "__FILE__", { define::BUILTIN,/* false,*/ {}, {} }}); + defines_.insert({ "__LINE__", { define::BUILTIN,/* false,*/ {}, {} }}); + defines_.insert({ "__DATE__", { define::BUILTIN,/* false,*/ {}, {} }}); + defines_.insert({ "__TIME__", { define::BUILTIN,/* false,*/ {}, {} }}); directives_.reserve(15); directives_.insert({ "if", directive::IF }); directives_.insert({ "ifdef", directive::IFDEF }); @@ -464,12 +464,140 @@ auto preprocessor::read_directive_define(token&) -> void switch (next.type) { case token::NEWLINE: - defines_.insert({ name, define{ define::PLAIN, {}, {} }}); + defines_.insert({ name, define{ define::PLAIN,/* false,*/ {}, {} }}); break; case token::LPAREN: if (next.space == spacing::none) { - throw ppr_error(next.pos, "function-like macros not supported"); + auto params = std::vector{}; + auto last_comma = true; + auto last_elips = false; + + while (true) + { + next = read_token(); + + if (next.type == token::RPAREN) + { + if (last_comma && !params.empty()) + throw ppr_error(next.pos, "misplaced comma in macro param list"); + + break; + } + else if (next.type == token::NAME) + { + if (last_elips) + throw ppr_error(next.pos, "elipsis must be last in macro param list"); + else if (!last_comma) + throw ppr_error(next.pos, "misplaced name in macro param list"); + else + { + auto it = std::find_if(params.begin(), params.end(), [&next](token const& v) { return v.data == next.data; }); + + if (it != params.end()) + { + throw ppr_error(next.pos, "duplicate macro parameter name"); + } + + params.push_back(next); + last_comma = false; + } + } + else if (next.type == token::ELLIPSIS) + { + // TODO: disabled + throw ppr_error(next.pos, "variadic macros not supported"); + // + + if (!last_comma || last_elips) + throw ppr_error(next.pos, "misplaced elipsis in macro param list"); + + last_elips = true; + last_comma = false; + } + else if (next.type == token::COMMA) + { + if (last_elips) + throw ppr_error(next.pos, "elipsis must be last in macro param list"); + if (last_comma) + throw ppr_error(next.pos, "misplaced comma in macro param list"); + else + last_comma = true; + } + else + throw ppr_error(next.pos, "unexpected token in macro param list"); + } + + auto exp = std::vector{}; + auto last_sharp = false; + next = read_token(); + + while (next.type != token::NEWLINE) + { + if (next.type == token::NAME) + { + auto it = std::find_if(params.begin(), params.end(), [&next](token const& v) { return v.data == next.data; }); + + if (it != params.end()) + { + if (last_sharp) + exp.back().type = token::STRINGIZE; + + next.type = token::MACROARG; + exp.push_back(std::move(next)); + } + else + { + // check for #animtree ?? + if (last_sharp) + throw ppr_error(next.pos, "'#' is not followed by a macro parameter"); + + exp.push_back(std::move(next)); + } + // TODO: VAARGS, VAOPT + } + else if (next.type == token::SHARP) + { + if (!last_sharp) + { + last_sharp = true; + exp.push_back(std::move(next)); + } + else if (next.space == spacing::none) + { + exp.back().type = token::PASTE; + } + else + { + throw ppr_error(next.pos, "'#' is not followed by a macro parameter"); + } + } + else + { + exp.push_back(std::move(next)); + } + + if (exp.back().type != token::SHARP) + last_sharp = false; + + next = read_token(); + } + + expect(next, token::NEWLINE); + + if (!exp.empty()) + { + if (exp.front().type == token::PASTE) + throw ppr_error(next.pos, "'##' cannot appear at start of macro expansion"); + + if (exp.back().type == token::PASTE) + throw ppr_error(next.pos, "'##' cannot appear at end of macro expansion"); + + if (exp.back().type == token::SHARP) + throw ppr_error(next.pos, "'#' is not followed by a macro parameter"); + } + + defines_.insert({ name, define{ define::FUNCTION, /*last_elips,*/ params, exp }}); break; } default: @@ -487,7 +615,7 @@ auto preprocessor::read_directive_define(token&) -> void expect(next, token::NEWLINE); - defines_.insert({ name, define{ define::OBJECT, {}, exp }}); + defines_.insert({ name, define{ define::OBJECT,/* false,*/ {}, exp }}); } else { @@ -627,20 +755,167 @@ auto preprocessor::expand(token& tok, define& def) -> void else if (def.type == define::OBJECT) { tokens_.push_front(token{ token::MACROEND, tok.space, tok.pos, tok.data }); - - for (auto it = def.exp.rbegin(); it != def.exp.rend(); ++it) - { - tokens_.push_front(*it); - } - + for (auto it = def.exp.rbegin(); it != def.exp.rend(); ++it) tokens_.push_front(*it); tokens_.push_front(token{ token::MACROBEGIN, tok.space, tok.pos, tok.data }); } else if (def.type == define::FUNCTION) { - // TODO! + auto next = next_token(); + + if (next.type != token::LPAREN) + { + tokens_.push_front(next); + tokens_.push_front(token{ token::MACROEND, tok.space, tok.pos, tok.data }); + tokens_.push_front(tok); + tokens_.push_front(token{ token::MACROBEGIN, tok.space, tok.pos, tok.data }); + return; + } + + auto args = expand_params(tok, def); + + auto exp = std::vector{}; + exp.reserve(def.exp.size()); + + for (auto i = 0u; i < def.exp.size(); i++) + { + if (def.exp[i].type == token::MACROARG) + { + auto const& name = def.exp[i].data; + + for (auto n = 0u; n < def.args.size(); n++) + { + if (def.args[n].data == name) + { + for (auto t : args.at(n)) exp.push_back(t); + break; + } + } + } + else if (def.exp[i].type == token::MACROVAARGS) + { + // TODO: + // if (!def.vararg) + // throw ppr_error(def.exp[i].pos, "__VA_ARGS__ can only appear in the expansion of a variadic macro"); + + // for (auto t : args.back()) exp.push_back(t); + } + else if (def.exp[i].type == token::MACROVAOPT) + { + // TODO: + // if (!def.vararg) + // throw ppr_error(def.exp[i].pos, "__VA_OPT__ can only appear in the expansion of a variadic macro"); + + // + // if (!args.back().empty()) + // { + // // paste opt + // } + } + else if (def.exp[i].type == token::STRINGIZE) + { + auto name = def.exp[i + 1].data; + auto str = std::string{}; + + for (auto n = 0u; n < def.args.size(); n++) + { + if (def.args[n].data == name) + { + for (size_t idx = 0; auto t : args.at(n)) + { + if (idx != 0 && t.space == spacing::back) + str.append(" "); + str.append(t.to_string()); + idx++; + } + break; + } + } + + exp.push_back(token{ token::STRING, def.exp[i].space, def.exp[i].pos, str }); + i++; + } + else if (def.exp[i].type == token::PASTE) + { + if (exp.back().type == token::NAME && def.exp[i+1].type == token::NAME) + { + exp.back().data.append(def.exp[i+1].data); + } + else + { + throw ppr_error(def.exp[i].pos, "paste can only be applied to identifiers"); + } + i++; + } + else + { + exp.push_back(def.exp[i]); + } + } + + tokens_.push_front(token{ token::MACROEND, tok.space, tok.pos, tok.data }); + for (auto it = exp.rbegin(); it != exp.rend(); ++it) tokens_.push_front(*it); + tokens_.push_front(token{ token::MACROBEGIN, tok.space, tok.pos, tok.data }); } } +auto preprocessor::expand_params(token& tok, define& def) -> std::vector> +{ + auto nest_paren = 0; + auto args = std::vector>{}; + args.push_back({}); + + while (true) + { + auto next = next_token(); + + if (next.type == token::EOS) + { + throw ppr_error(tok.pos, "unterminated function-like macro invocation"); + } + else if (next.type == token::LPAREN) + { + nest_paren++; + args.back().push_back(next); + } + else if (next.type == token::RPAREN) + { + if (nest_paren == 0) + break; + else + { + nest_paren--; + args.back().push_back(next); + } + } + else if (next.type == token::COMMA && nest_paren == 0 /*&& !(def.vararg && args.size() > def.args.size())*/) + { + args.push_back({}); + } + else + { + args.back().push_back(next); + } + } + + if (def.args.empty() && args.size() == 1 && args[0].empty()) + { + args.pop_back(); + } + + if (args.size() < def.args.size()) + { + throw ppr_error(tok.pos, "too few arguments provided to function-like macro invocation"); + } + + if (/*!def.vararg &&*/ args.size() > def.args.size()) + { + throw ppr_error(tok.pos, "too many arguments provided to function-like macro invocation"); + } + + // TODO: expand args + return args; +} + auto preprocessor::expect(token& tok, token::kind expected, spacing) -> void { if (tok.type != expected) diff --git a/src/gsc/preprocessor.hpp b/src/gsc/preprocessor.hpp index 3db34f50..50bc5e69 100644 --- a/src/gsc/preprocessor.hpp +++ b/src/gsc/preprocessor.hpp @@ -58,6 +58,7 @@ private: auto read_hashtoken(token& hash) -> void; auto read_hashtoken_animtree(token& hash, token& name) -> void; auto expand(token& tok, define& def) -> void; + auto expand_params(token& tok, define& def) -> std::vector>; auto expect(token& tok, token::kind expected, spacing space = spacing::none) -> void; auto evaluate() -> bool; auto eval_next() -> token&;