feature(preprocessor): function-like macros (#70)

This commit is contained in:
Xenxo Espasandín 2023-02-17 16:44:38 +01:00 committed by GitHub
parent 4f83e351d8
commit 99fb7f24ba
7 changed files with 418 additions and 18 deletions

View File

@ -105,6 +105,7 @@ namespace xsk::gsc
%token RBRACKET "]"
%token COMMA ","
%token DOT "."
%token DOUBLEDOT ".."
%token ELLIPSIS "..."
%token DOUBLECOLON "::"
%token COLON ":"
@ -1124,6 +1125,7 @@ std::unordered_map<token::kind, parser::token::token_kind_type> const tok_to_par
{ token::DEC, parser::token::DECREMENT },
{ token::QMARK, parser::token::QMARK },
{ token::DOT, parser::token::DOT },
{ token::DOUBLEDOT, parser::token::DOUBLEDOT },
{ token::ELLIPSIS, parser::token::ELLIPSIS },
{ token::COMMA, parser::token::COMMA },
{ token::COLON, parser::token::COLON },

View File

@ -185,6 +185,17 @@ auto lexer::lex() -> token
case '"':
goto lex_string;
case '.':
if (curr == '.')
{
advance();
if (curr != '.')
return token{ token::DOUBLECOLON, spacing_, loc_ };
advance();
return token{ token::ELLIPSIS, spacing_, loc_ };
}
if (curr < '0' || curr > '9')
return token{ token::DOT, spacing_, loc_ };
goto lex_number;

View File

@ -10,9 +10,10 @@ namespace xsk::gsc
struct define
{
enum kind { PLAIN, BUILTIN, OBJECT, FUNCTION };
enum kind : u8 { PLAIN, BUILTIN, OBJECT, FUNCTION };
kind type;
// bool vararg;
std::vector<token> args;
std::vector<token> exp;
};

View File

@ -7,8 +7,117 @@
#include "location.hpp"
#include "space.hpp"
#include "token.hpp"
#include "utils/string.hpp"
namespace xsk::gsc
{
auto token::to_string() -> std::string
{
switch (type)
{
case token::PLUS: return "+";
case token::MINUS: return "-";
case token::STAR: return "*";
case token::DIV: return "/";
case token::MOD: return "%";
case token::BITOR: return "|";
case token::BITAND: return "&";
case token::BITEXOR: return "^";
case token::SHL: return "<<";
case token::SHR: return ">>";
case token::ASSIGN: return "=";
case token::PLUSEQ: return "+=";
case token::MINUSEQ: return "-=";
case token::STAREQ: return "*=";
case token::DIVEQ: return "/=";
case token::MODEQ: return "%=";
case token::BITOREQ: return "|=";
case token::BITANDEQ: return "&=";
case token::BITEXOREQ: return "^=";
case token::SHLEQ: return "<<=";
case token::SHREQ: return ">>=";
case token::INC: return "++";
case token::DEC: return "--";
case token::GT: return ">";
case token::LT: return "<";
case token::GE: return ">=";
case token::LE: return "<=";
case token::NE: return "!=";
case token::EQ: return "==";
case token::OR: return "||";
case token::AND: return "&&";
case token::TILDE: return "~";
case token::BANG: return "!";
case token::QMARK: return "?";
case token::COLON: return ":";
case token::SHARP: return "#";
case token::COMMA: return ",";
case token::DOT: return ".";
case token::DOUBLEDOT: return "..";
case token::ELLIPSIS: return "...";
case token::SEMICOLON: return ";";
case token::DOUBLECOLON: return "::";
case token::LBRACKET: return "{";
case token::RBRACKET: return "}";
case token::LBRACE: return "[";
case token::RBRACE: return "]";
case token::LPAREN: return "(";
case token::RPAREN: return ")";
case token::NAME: return data;
case token::PATH: return data;
case token::STRING: return data;
case token::ISTRING: return data;
case token::INT: return data;
case token::FLT: return data;
case token::DEVBEGIN: return "/#";
case token::DEVEND: return "#/";
case token::INLINE: return "#inline";
case token::INCLUDE: return "#include";
case token::USINGTREE: return "#using_animtree";
case token::ANIMTREE: return "#animtree";
case token::ENDON: return "endon";
case token::NOTIFY: return "notify";
case token::WAIT: return "wait";
case token::WAITTILL: return "waittill";
case token::WAITTILLMATCH: return "waittillmatch";
case token::WAITTILLFRAMEEND: return "waittillframeend";
case token::WAITFRAME: return "waitframe";
case token::IF: return "if";
case token::ELSE: return "else";
case token::DO: return "do";
case token::WHILE: return "while";
case token::FOR: return "for";
case token::FOREACH: return "foreach";
case token::IN: return "in";
case token::SWITCH: return "switch";
case token::CASE: return "case";
case token::DEFAULT: return "default";
case token::BREAK: return "break";
case token::CONTINUE: return "continue";
case token::RETURN: return "return";
case token::BREAKPOINT: return "breakpoint";
case token::PROFBEGIN: return "prof_begin";
case token::PROFEND: return "prof_end";
case token::ASSERT: return "assert";
case token::ASSERTEX: return "assertex";
case token::ASSERTMSG: return "assertmsg";
case token::THREAD: return "thread";
case token::CHILDTHREAD: return "endon";
case token::THISTHREAD: return "thisthread";
case token::CALL: return "call";
case token::TRUE: return "true";
case token::FALSE: return "false";
case token::UNDEFINED: return "undefined";
case token::SIZE: return "size";
case token::GAME: return "game";
case token::SELF: return "self";
case token::ANIM: return "anim";
case token::LEVEL: return "level";
case token::ISDEFINED: return "isdefined";
case token::ISTRUE: return "istrue";
default: return "*INTERNAL*";
}
}
} // namespace xsk::gsc

View File

@ -14,8 +14,8 @@ struct token
{
PLUS, MINUS, STAR, DIV, MOD, BITOR, BITAND, BITEXOR, SHL, SHR,
ASSIGN, PLUSEQ, MINUSEQ, STAREQ, DIVEQ, MODEQ, BITOREQ, BITANDEQ, BITEXOREQ, SHLEQ, SHREQ,
INC, DEC, GT, LT, GE, LE, NE, EQ, OR, AND, TILDE, BANG, QMARK, COLON, SHARP, COMMA,
DOT, ELLIPSIS, SEMICOLON, DOUBLECOLON, LBRACKET, RBRACKET, LBRACE, RBRACE, LPAREN, RPAREN,
INC, DEC, GT, LT, GE, LE, NE, EQ, OR, AND, TILDE, BANG, QMARK, COLON, SHARP, COMMA, DOT,
DOUBLEDOT, ELLIPSIS, SEMICOLON, DOUBLECOLON, LBRACKET, RBRACKET, LBRACE, RBRACE, LPAREN, RPAREN,
NAME, PATH, STRING, ISTRING, INT, FLT,
@ -25,7 +25,7 @@ struct token
PROFBEGIN, PROFEND, ASSERT, ASSERTEX, ASSERTMSG, THREAD, CHILDTHREAD, THISTHREAD,
CALL, TRUE, FALSE, UNDEFINED, SIZE, GAME, SELF, ANIM, LEVEL, ISDEFINED, ISTRUE,
HASH, NEWLINE, EOS, DEFINED, MACROBEGIN, MACROEND,
HASH, NEWLINE, EOS, DEFINED, MACROBEGIN, MACROEND, MACROARG, MACROVAOPT, MACROVAARGS, STRINGIZE, PASTE
};
kind type;
@ -35,6 +35,7 @@ struct token
token(kind type, spacing space, location pos) : type{ type }, space{ space }, pos{ pos }, data{} {}
token(kind type, spacing space, location pos, std::string data) : type{ type }, space{ space }, pos{ pos }, data{ std::move(data) } {}
auto to_string() -> std::string;
};
} // namespace xsk::gsc

View File

@ -15,10 +15,10 @@ preprocessor::preprocessor(context* ctx, std::string const& name, char const* da
{
lexer_.push(lexer{ ctx, name, data, size });
defines_.reserve(4);
defines_.insert({ "__FILE__", { define::BUILTIN, {}, {} }});
defines_.insert({ "__LINE__", { define::BUILTIN, {}, {} }});
defines_.insert({ "__DATE__", { define::BUILTIN, {}, {} }});
defines_.insert({ "__TIME__", { define::BUILTIN, {}, {} }});
defines_.insert({ "__FILE__", { define::BUILTIN,/* false,*/ {}, {} }});
defines_.insert({ "__LINE__", { define::BUILTIN,/* false,*/ {}, {} }});
defines_.insert({ "__DATE__", { define::BUILTIN,/* false,*/ {}, {} }});
defines_.insert({ "__TIME__", { define::BUILTIN,/* false,*/ {}, {} }});
directives_.reserve(15);
directives_.insert({ "if", directive::IF });
directives_.insert({ "ifdef", directive::IFDEF });
@ -464,12 +464,140 @@ auto preprocessor::read_directive_define(token&) -> void
switch (next.type)
{
case token::NEWLINE:
defines_.insert({ name, define{ define::PLAIN, {}, {} }});
defines_.insert({ name, define{ define::PLAIN,/* false,*/ {}, {} }});
break;
case token::LPAREN:
if (next.space == spacing::none)
{
throw ppr_error(next.pos, "function-like macros not supported");
auto params = std::vector<token>{};
auto last_comma = true;
auto last_elips = false;
while (true)
{
next = read_token();
if (next.type == token::RPAREN)
{
if (last_comma && !params.empty())
throw ppr_error(next.pos, "misplaced comma in macro param list");
break;
}
else if (next.type == token::NAME)
{
if (last_elips)
throw ppr_error(next.pos, "elipsis must be last in macro param list");
else if (!last_comma)
throw ppr_error(next.pos, "misplaced name in macro param list");
else
{
auto it = std::find_if(params.begin(), params.end(), [&next](token const& v) { return v.data == next.data; });
if (it != params.end())
{
throw ppr_error(next.pos, "duplicate macro parameter name");
}
params.push_back(next);
last_comma = false;
}
}
else if (next.type == token::ELLIPSIS)
{
// TODO: disabled
throw ppr_error(next.pos, "variadic macros not supported");
//
if (!last_comma || last_elips)
throw ppr_error(next.pos, "misplaced elipsis in macro param list");
last_elips = true;
last_comma = false;
}
else if (next.type == token::COMMA)
{
if (last_elips)
throw ppr_error(next.pos, "elipsis must be last in macro param list");
if (last_comma)
throw ppr_error(next.pos, "misplaced comma in macro param list");
else
last_comma = true;
}
else
throw ppr_error(next.pos, "unexpected token in macro param list");
}
auto exp = std::vector<token>{};
auto last_sharp = false;
next = read_token();
while (next.type != token::NEWLINE)
{
if (next.type == token::NAME)
{
auto it = std::find_if(params.begin(), params.end(), [&next](token const& v) { return v.data == next.data; });
if (it != params.end())
{
if (last_sharp)
exp.back().type = token::STRINGIZE;
next.type = token::MACROARG;
exp.push_back(std::move(next));
}
else
{
// check for #animtree ??
if (last_sharp)
throw ppr_error(next.pos, "'#' is not followed by a macro parameter");
exp.push_back(std::move(next));
}
// TODO: VAARGS, VAOPT
}
else if (next.type == token::SHARP)
{
if (!last_sharp)
{
last_sharp = true;
exp.push_back(std::move(next));
}
else if (next.space == spacing::none)
{
exp.back().type = token::PASTE;
}
else
{
throw ppr_error(next.pos, "'#' is not followed by a macro parameter");
}
}
else
{
exp.push_back(std::move(next));
}
if (exp.back().type != token::SHARP)
last_sharp = false;
next = read_token();
}
expect(next, token::NEWLINE);
if (!exp.empty())
{
if (exp.front().type == token::PASTE)
throw ppr_error(next.pos, "'##' cannot appear at start of macro expansion");
if (exp.back().type == token::PASTE)
throw ppr_error(next.pos, "'##' cannot appear at end of macro expansion");
if (exp.back().type == token::SHARP)
throw ppr_error(next.pos, "'#' is not followed by a macro parameter");
}
defines_.insert({ name, define{ define::FUNCTION, /*last_elips,*/ params, exp }});
break;
}
default:
@ -487,7 +615,7 @@ auto preprocessor::read_directive_define(token&) -> void
expect(next, token::NEWLINE);
defines_.insert({ name, define{ define::OBJECT, {}, exp }});
defines_.insert({ name, define{ define::OBJECT,/* false,*/ {}, exp }});
}
else
{
@ -627,18 +755,165 @@ auto preprocessor::expand(token& tok, define& def) -> void
else if (def.type == define::OBJECT)
{
tokens_.push_front(token{ token::MACROEND, tok.space, tok.pos, tok.data });
for (auto it = def.exp.rbegin(); it != def.exp.rend(); ++it)
{
tokens_.push_front(*it);
}
for (auto it = def.exp.rbegin(); it != def.exp.rend(); ++it) tokens_.push_front(*it);
tokens_.push_front(token{ token::MACROBEGIN, tok.space, tok.pos, tok.data });
}
else if (def.type == define::FUNCTION)
{
// TODO!
auto next = next_token();
if (next.type != token::LPAREN)
{
tokens_.push_front(next);
tokens_.push_front(token{ token::MACROEND, tok.space, tok.pos, tok.data });
tokens_.push_front(tok);
tokens_.push_front(token{ token::MACROBEGIN, tok.space, tok.pos, tok.data });
return;
}
auto args = expand_params(tok, def);
auto exp = std::vector<token>{};
exp.reserve(def.exp.size());
for (auto i = 0u; i < def.exp.size(); i++)
{
if (def.exp[i].type == token::MACROARG)
{
auto const& name = def.exp[i].data;
for (auto n = 0u; n < def.args.size(); n++)
{
if (def.args[n].data == name)
{
for (auto t : args.at(n)) exp.push_back(t);
break;
}
}
}
else if (def.exp[i].type == token::MACROVAARGS)
{
// TODO:
// if (!def.vararg)
// throw ppr_error(def.exp[i].pos, "__VA_ARGS__ can only appear in the expansion of a variadic macro");
// for (auto t : args.back()) exp.push_back(t);
}
else if (def.exp[i].type == token::MACROVAOPT)
{
// TODO:
// if (!def.vararg)
// throw ppr_error(def.exp[i].pos, "__VA_OPT__ can only appear in the expansion of a variadic macro");
//
// if (!args.back().empty())
// {
// // paste opt
// }
}
else if (def.exp[i].type == token::STRINGIZE)
{
auto name = def.exp[i + 1].data;
auto str = std::string{};
for (auto n = 0u; n < def.args.size(); n++)
{
if (def.args[n].data == name)
{
for (size_t idx = 0; auto t : args.at(n))
{
if (idx != 0 && t.space == spacing::back)
str.append(" ");
str.append(t.to_string());
idx++;
}
break;
}
}
exp.push_back(token{ token::STRING, def.exp[i].space, def.exp[i].pos, str });
i++;
}
else if (def.exp[i].type == token::PASTE)
{
if (exp.back().type == token::NAME && def.exp[i+1].type == token::NAME)
{
exp.back().data.append(def.exp[i+1].data);
}
else
{
throw ppr_error(def.exp[i].pos, "paste can only be applied to identifiers");
}
i++;
}
else
{
exp.push_back(def.exp[i]);
}
}
tokens_.push_front(token{ token::MACROEND, tok.space, tok.pos, tok.data });
for (auto it = exp.rbegin(); it != exp.rend(); ++it) tokens_.push_front(*it);
tokens_.push_front(token{ token::MACROBEGIN, tok.space, tok.pos, tok.data });
}
}
auto preprocessor::expand_params(token& tok, define& def) -> std::vector<std::vector<token>>
{
auto nest_paren = 0;
auto args = std::vector<std::vector<token>>{};
args.push_back({});
while (true)
{
auto next = next_token();
if (next.type == token::EOS)
{
throw ppr_error(tok.pos, "unterminated function-like macro invocation");
}
else if (next.type == token::LPAREN)
{
nest_paren++;
args.back().push_back(next);
}
else if (next.type == token::RPAREN)
{
if (nest_paren == 0)
break;
else
{
nest_paren--;
args.back().push_back(next);
}
}
else if (next.type == token::COMMA && nest_paren == 0 /*&& !(def.vararg && args.size() > def.args.size())*/)
{
args.push_back({});
}
else
{
args.back().push_back(next);
}
}
if (def.args.empty() && args.size() == 1 && args[0].empty())
{
args.pop_back();
}
if (args.size() < def.args.size())
{
throw ppr_error(tok.pos, "too few arguments provided to function-like macro invocation");
}
if (/*!def.vararg &&*/ args.size() > def.args.size())
{
throw ppr_error(tok.pos, "too many arguments provided to function-like macro invocation");
}
// TODO: expand args
return args;
}
auto preprocessor::expect(token& tok, token::kind expected, spacing) -> void

View File

@ -58,6 +58,7 @@ private:
auto read_hashtoken(token& hash) -> void;
auto read_hashtoken_animtree(token& hash, token& name) -> void;
auto expand(token& tok, define& def) -> void;
auto expand_params(token& tok, define& def) -> std::vector<std::vector<token>>;
auto expect(token& tok, token::kind expected, spacing space = spacing::none) -> void;
auto evaluate() -> bool;
auto eval_next() -> token&;