preprocessor line wrapping

This commit is contained in:
xensik 2022-02-04 15:39:47 +01:00
parent 30aab69283
commit a220b47daa
36 changed files with 2629 additions and 1538 deletions

View File

@ -41,14 +41,14 @@ using namespace xsk::gsc;
xsk::gsc::h1::parser::symbol_type H1lex(xsk::gsc::h1::lexer& lexer); xsk::gsc::h1::parser::symbol_type H1lex(xsk::gsc::h1::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::h2::parser::symbol_type H2lex(xsk::gsc::h2::lexer& lexer); xsk::gsc::h2::parser::symbol_type H2lex(xsk::gsc::h2::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::iw5::parser::symbol_type IW5lex(xsk::gsc::iw5::lexer& lexer); xsk::gsc::iw5::parser::symbol_type IW5lex(xsk::gsc::iw5::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::iw6::parser::symbol_type IW6lex(xsk::gsc::iw6::lexer& lexer); xsk::gsc::iw6::parser::symbol_type IW6lex(xsk::gsc::iw6::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::iw7::parser::symbol_type IW7lex(xsk::gsc::iw7::lexer& lexer); xsk::gsc::iw7::parser::symbol_type IW7lex(xsk::gsc::iw7::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::iw8::parser::symbol_type IW8lex(xsk::gsc::iw8::lexer& lexer); xsk::gsc::iw8::parser::symbol_type IW8lex(xsk::gsc::iw8::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::s1::parser::symbol_type S1lex(xsk::gsc::s1::lexer& lexer); xsk::gsc::s1::parser::symbol_type S1lex(xsk::gsc::s1::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::s2::parser::symbol_type S2lex(xsk::gsc::s2::lexer& lexer); xsk::gsc::s2::parser::symbol_type S2lex(xsk::gsc::s2::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -39,14 +39,14 @@ using namespace xsk::gsc;
xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer); xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer);
} }
%token HSDEFINE "#define" %token SH_DEFINE "#define"
%token HSUNDEF "#undef" %token SH_UNDEF "#undef"
%token HSIFDEF "#ifdef" %token SH_IFDEF "#ifdef"
%token HSIFNDEF "#ifndef" %token SH_IFNDEF "#ifndef"
%token HSIF "#if" %token SH_IF "#if"
%token HSELIF "#elif" %token SH_ELIF "#elif"
%token HSELSE "#else" %token SH_ELSE "#else"
%token HSENDIF "#endif" %token SH_ENDIF "#endif"
%token DEVBEGIN "/#" %token DEVBEGIN "/#"
%token DEVEND "#/" %token DEVEND "#/"
%token INLINE "#inline" %token INLINE "#inline"

View File

@ -16,6 +16,56 @@ xsk::gsc::h1::parser::symbol_type H1lex(xsk::gsc::h1::lexer& lexer)
namespace xsk::gsc::h1 namespace xsk::gsc::h1
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +85,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +126,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_H1EOF(loc_); return parser::make_H1EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +490,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +524,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +559,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +578,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::H1UNDEF; auto token = parser::token::H1UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +604,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +643,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +657,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +673,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +685,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +695,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +707,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +719,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +729,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +741,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +753,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +763,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,54 +775,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF }, reader_.buffer_pos += 3;
{ "else", parser::token::ELSE }, reader_.bytes_remaining -= 3;
{ "do", parser::token::DO }, }
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR }, if ((reader_.buffer_pos[1] == '\n'))
{ "foreach", parser::token::FOREACH }, {
{ "in", parser::token::IN }, if (reader_.bytes_remaining == 2)
{ "switch", parser::token::SWITCH }, throw comp_error(loc_, "invalid token ('\\')");
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT }, reader_.buffer_pos += 2;
{ "break", parser::token::BREAK }, reader_.bytes_remaining -= 2;
{ "continue", parser::token::CONTINUE }, }
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT }, if (reader_.bytes_remaining == 0)
{ "prof_begin", parser::token::PROFBEGIN }, {
{ "prof_end", parser::token::PROFEND }, reader_.state = reader::end;
{ "thread", parser::token::THREAD }, reader_.current_byte = 0;
{ "childthread", parser::token::CHILDTHREAD }, }
{ "thisthread", parser::token::THISTHREAD }, else
{ "call", parser::token::CALL }, {
{ "true", parser::token::TRUE }, reader_.current_byte = *reader_.buffer_pos;
{ "false", parser::token::FALSE }, }
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME }, loc_.lines();
{ "self", parser::token::SELF }, loc_.step();
{ "anim", parser::token::ANIM }, }
{ "level", parser::token::LEVEL }, }
}};
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::h1 } // namespace xsk::gsc::h1

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "h1.hpp"
namespace xsk::gsc::h1 namespace xsk::gsc::h1
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::h1 } // namespace xsk::gsc::h1

View File

@ -679,14 +679,14 @@ namespace xsk { namespace gsc { namespace h1 {
H1EOF = 0, // "end of file" H1EOF = 0, // "end of file"
H1error = 1, // error H1error = 1, // error
H1UNDEF = 2, // "invalid token" H1UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -811,14 +811,14 @@ namespace xsk { namespace gsc { namespace h1 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2800,121 +2800,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,56 @@ xsk::gsc::h2::parser::symbol_type H2lex(xsk::gsc::h2::lexer& lexer)
namespace xsk::gsc::h2 namespace xsk::gsc::h2
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +85,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +126,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_H2EOF(loc_); return parser::make_H2EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +490,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +524,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +559,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +578,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::H2UNDEF; auto token = parser::token::H2UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +604,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +643,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +657,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +673,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +685,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +695,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +707,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +719,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +729,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +741,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +753,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +763,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,54 +775,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF }, reader_.buffer_pos += 3;
{ "else", parser::token::ELSE }, reader_.bytes_remaining -= 3;
{ "do", parser::token::DO }, }
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR }, if ((reader_.buffer_pos[1] == '\n'))
{ "foreach", parser::token::FOREACH }, {
{ "in", parser::token::IN }, if (reader_.bytes_remaining == 2)
{ "switch", parser::token::SWITCH }, throw comp_error(loc_, "invalid token ('\\')");
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT }, reader_.buffer_pos += 2;
{ "break", parser::token::BREAK }, reader_.bytes_remaining -= 2;
{ "continue", parser::token::CONTINUE }, }
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT }, if (reader_.bytes_remaining == 0)
{ "prof_begin", parser::token::PROFBEGIN }, {
{ "prof_end", parser::token::PROFEND }, reader_.state = reader::end;
{ "thread", parser::token::THREAD }, reader_.current_byte = 0;
{ "childthread", parser::token::CHILDTHREAD }, }
{ "thisthread", parser::token::THISTHREAD }, else
{ "call", parser::token::CALL }, {
{ "true", parser::token::TRUE }, reader_.current_byte = *reader_.buffer_pos;
{ "false", parser::token::FALSE }, }
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME }, loc_.lines();
{ "self", parser::token::SELF }, loc_.step();
{ "anim", parser::token::ANIM }, }
{ "level", parser::token::LEVEL }, }
}};
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::h2 } // namespace xsk::gsc::h2

View File

@ -55,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -64,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::h2 } // namespace xsk::gsc::h2

View File

@ -677,14 +677,14 @@ namespace xsk { namespace gsc { namespace h2 {
H2EOF = 0, // "end of file" H2EOF = 0, // "end of file"
H2error = 1, // error H2error = 1, // error
H2UNDEF = 2, // "invalid token" H2UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -809,14 +809,14 @@ namespace xsk { namespace gsc { namespace h2 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2798,121 +2798,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,55 @@ xsk::gsc::iw5::parser::symbol_type IW5lex(xsk::gsc::iw5::lexer& lexer)
namespace xsk::gsc::iw5 namespace xsk::gsc::iw5
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +84,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +125,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +144,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +193,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_IW5EOF(loc_); return parser::make_IW5EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +208,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +278,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +315,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +349,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +376,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +384,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +400,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +410,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +441,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +455,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +476,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +489,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +523,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +558,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +577,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::IW5UNDEF; auto token = parser::token::IW5UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +603,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +642,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +656,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +672,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +684,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +694,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +706,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +718,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +728,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +740,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +752,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +762,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,53 +774,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "if", parser::token::IF },
{ "else", parser::token::ELSE }, reader_.buffer_pos += 3;
{ "do", parser::token::DO }, reader_.bytes_remaining -= 3;
{ "while", parser::token::WHILE }, }
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH }, if ((reader_.buffer_pos[1] == '\n'))
{ "in", parser::token::IN }, {
{ "switch", parser::token::SWITCH }, if (reader_.bytes_remaining == 2)
{ "case", parser::token::CASE }, throw comp_error(loc_, "invalid token ('\\')");
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK }, reader_.buffer_pos += 2;
{ "continue", parser::token::CONTINUE }, reader_.bytes_remaining -= 2;
{ "return", parser::token::RETURN }, }
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN }, if (reader_.bytes_remaining == 0)
{ "prof_end", parser::token::PROFEND }, {
{ "thread", parser::token::THREAD }, reader_.state = reader::end;
{ "childthread", parser::token::CHILDTHREAD }, reader_.current_byte = 0;
{ "thisthread", parser::token::THISTHREAD }, }
{ "call", parser::token::CALL }, else
{ "true", parser::token::TRUE }, {
{ "false", parser::token::FALSE }, reader_.current_byte = *reader_.buffer_pos;
{ "undefined", parser::token::UNDEFINED }, }
{ "game", parser::token::GAME },
{ "self", parser::token::SELF }, loc_.lines();
{ "anim", parser::token::ANIM }, loc_.step();
{ "level", parser::token::LEVEL }, }
}}; }
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::iw5 } // namespace xsk::gsc::iw5

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "iw5.hpp"
namespace xsk::gsc::iw5 namespace xsk::gsc::iw5
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::iw5 } // namespace xsk::gsc::iw5

View File

@ -674,14 +674,14 @@ namespace xsk { namespace gsc { namespace iw5 {
IW5EOF = 0, // "end of file" IW5EOF = 0, // "end of file"
IW5error = 1, // error IW5error = 1, // error
IW5UNDEF = 2, // "invalid token" IW5UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -805,14 +805,14 @@ namespace xsk { namespace gsc { namespace iw5 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2770,121 +2770,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,55 @@ xsk::gsc::iw6::parser::symbol_type IW6lex(xsk::gsc::iw6::lexer& lexer)
namespace xsk::gsc::iw6 namespace xsk::gsc::iw6
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +84,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +125,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +144,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +193,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_IW6EOF(loc_); return parser::make_IW6EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +208,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +278,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +315,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +349,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +376,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +384,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +400,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +410,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +441,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +455,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +476,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +489,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +523,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +558,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +577,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::IW6UNDEF; auto token = parser::token::IW6UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +603,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +642,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +656,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +672,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +684,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +694,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +706,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +718,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +728,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +740,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +752,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +762,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,53 +774,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "if", parser::token::IF },
{ "else", parser::token::ELSE }, reader_.buffer_pos += 3;
{ "do", parser::token::DO }, reader_.bytes_remaining -= 3;
{ "while", parser::token::WHILE }, }
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH }, if ((reader_.buffer_pos[1] == '\n'))
{ "in", parser::token::IN }, {
{ "switch", parser::token::SWITCH }, if (reader_.bytes_remaining == 2)
{ "case", parser::token::CASE }, throw comp_error(loc_, "invalid token ('\\')");
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK }, reader_.buffer_pos += 2;
{ "continue", parser::token::CONTINUE }, reader_.bytes_remaining -= 2;
{ "return", parser::token::RETURN }, }
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN }, if (reader_.bytes_remaining == 0)
{ "prof_end", parser::token::PROFEND }, {
{ "thread", parser::token::THREAD }, reader_.state = reader::end;
{ "childthread", parser::token::CHILDTHREAD }, reader_.current_byte = 0;
{ "thisthread", parser::token::THISTHREAD }, }
{ "call", parser::token::CALL }, else
{ "true", parser::token::TRUE }, {
{ "false", parser::token::FALSE }, reader_.current_byte = *reader_.buffer_pos;
{ "undefined", parser::token::UNDEFINED }, }
{ "game", parser::token::GAME },
{ "self", parser::token::SELF }, loc_.lines();
{ "anim", parser::token::ANIM }, loc_.step();
{ "level", parser::token::LEVEL }, }
}}; }
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::iw6 } // namespace xsk::gsc::iw6

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "iw6.hpp"
namespace xsk::gsc::iw6 namespace xsk::gsc::iw6
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::iw6 } // namespace xsk::gsc::iw6

View File

@ -674,14 +674,14 @@ namespace xsk { namespace gsc { namespace iw6 {
IW6EOF = 0, // "end of file" IW6EOF = 0, // "end of file"
IW6error = 1, // error IW6error = 1, // error
IW6UNDEF = 2, // "invalid token" IW6UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -805,14 +805,14 @@ namespace xsk { namespace gsc { namespace iw6 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2770,121 +2770,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,55 @@ xsk::gsc::iw7::parser::symbol_type IW7lex(xsk::gsc::iw7::lexer& lexer)
namespace xsk::gsc::iw7 namespace xsk::gsc::iw7
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +84,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +125,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +144,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +193,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_IW7EOF(loc_); return parser::make_IW7EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +208,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +278,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +315,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +349,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +376,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +384,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +400,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +410,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +441,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +455,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +476,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +489,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +523,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +558,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +577,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::IW7UNDEF; auto token = parser::token::IW7UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +603,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +642,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +656,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +672,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +684,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +694,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +706,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +718,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +728,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +740,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +752,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +762,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,53 +774,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "if", parser::token::IF },
{ "else", parser::token::ELSE }, reader_.buffer_pos += 3;
{ "do", parser::token::DO }, reader_.bytes_remaining -= 3;
{ "while", parser::token::WHILE }, }
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH }, if ((reader_.buffer_pos[1] == '\n'))
{ "in", parser::token::IN }, {
{ "switch", parser::token::SWITCH }, if (reader_.bytes_remaining == 2)
{ "case", parser::token::CASE }, throw comp_error(loc_, "invalid token ('\\')");
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK }, reader_.buffer_pos += 2;
{ "continue", parser::token::CONTINUE }, reader_.bytes_remaining -= 2;
{ "return", parser::token::RETURN }, }
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN }, if (reader_.bytes_remaining == 0)
{ "prof_end", parser::token::PROFEND }, {
{ "thread", parser::token::THREAD }, reader_.state = reader::end;
{ "childthread", parser::token::CHILDTHREAD }, reader_.current_byte = 0;
{ "thisthread", parser::token::THISTHREAD }, }
{ "call", parser::token::CALL }, else
{ "true", parser::token::TRUE }, {
{ "false", parser::token::FALSE }, reader_.current_byte = *reader_.buffer_pos;
{ "undefined", parser::token::UNDEFINED }, }
{ "game", parser::token::GAME },
{ "self", parser::token::SELF }, loc_.lines();
{ "anim", parser::token::ANIM }, loc_.step();
{ "level", parser::token::LEVEL }, }
}}; }
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::iw7 } // namespace xsk::gsc::iw7

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "iw7.hpp"
namespace xsk::gsc::iw7 namespace xsk::gsc::iw7
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::iw7 } // namespace xsk::gsc::iw7

View File

@ -674,14 +674,14 @@ namespace xsk { namespace gsc { namespace iw7 {
IW7EOF = 0, // "end of file" IW7EOF = 0, // "end of file"
IW7error = 1, // error IW7error = 1, // error
IW7UNDEF = 2, // "invalid token" IW7UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -805,14 +805,14 @@ namespace xsk { namespace gsc { namespace iw7 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2770,121 +2770,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,58 @@ xsk::gsc::iw8::parser::symbol_type IW8lex(xsk::gsc::iw8::lexer& lexer)
namespace xsk::gsc::iw8 namespace xsk::gsc::iw8
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
{ "isdefined", parser::token::ISDEFINED },
{ "istrue", parser::token::ISTRUE },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +87,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +128,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +147,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +196,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_IW8EOF(loc_); return parser::make_IW8EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +211,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +281,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +318,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +342,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +352,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +379,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +387,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +403,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +413,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +444,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +458,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +479,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +492,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +526,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +561,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +580,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::IW8UNDEF; auto token = parser::token::IW8UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -529,7 +617,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -568,7 +656,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -582,7 +670,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -598,7 +686,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -610,7 +698,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -620,7 +708,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -632,7 +720,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -644,7 +732,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -654,7 +742,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -666,7 +754,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -678,7 +766,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -688,7 +776,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -700,56 +788,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF }, reader_.buffer_pos += 3;
{ "else", parser::token::ELSE }, reader_.bytes_remaining -= 3;
{ "do", parser::token::DO }, }
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR }, if ((reader_.buffer_pos[1] == '\n'))
{ "foreach", parser::token::FOREACH }, {
{ "in", parser::token::IN }, if (reader_.bytes_remaining == 2)
{ "switch", parser::token::SWITCH }, throw comp_error(loc_, "invalid token ('\\')");
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT }, reader_.buffer_pos += 2;
{ "break", parser::token::BREAK }, reader_.bytes_remaining -= 2;
{ "continue", parser::token::CONTINUE }, }
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT }, if (reader_.bytes_remaining == 0)
{ "prof_begin", parser::token::PROFBEGIN }, {
{ "prof_end", parser::token::PROFEND }, reader_.state = reader::end;
{ "thread", parser::token::THREAD }, reader_.current_byte = 0;
{ "childthread", parser::token::CHILDTHREAD }, }
{ "thisthread", parser::token::THISTHREAD }, else
{ "call", parser::token::CALL }, {
{ "true", parser::token::TRUE }, reader_.current_byte = *reader_.buffer_pos;
{ "false", parser::token::FALSE }, }
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME }, loc_.lines();
{ "self", parser::token::SELF }, loc_.step();
{ "anim", parser::token::ANIM }, }
{ "level", parser::token::LEVEL }, }
{ "isdefined", parser::token::ISDEFINED },
{ "istrue", parser::token::ISTRUE }, void lexer::preprocessor(parser::token::token_kind_type token)
}}; {
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::iw8 } // namespace xsk::gsc::iw8

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "iw8.hpp"
namespace xsk::gsc::iw8 namespace xsk::gsc::iw8
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::iw8 } // namespace xsk::gsc::iw8

View File

@ -683,14 +683,14 @@ namespace xsk { namespace gsc { namespace iw8 {
IW8EOF = 0, // "end of file" IW8EOF = 0, // "end of file"
IW8error = 1, // error IW8error = 1, // error
IW8UNDEF = 2, // "invalid token" IW8UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -817,14 +817,14 @@ namespace xsk { namespace gsc { namespace iw8 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2854,121 +2854,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,56 @@ xsk::gsc::s1::parser::symbol_type S1lex(xsk::gsc::s1::lexer& lexer)
namespace xsk::gsc::s1 namespace xsk::gsc::s1
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +85,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +126,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_S1EOF(loc_); return parser::make_S1EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +490,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +524,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +559,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +578,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::S1UNDEF; auto token = parser::token::S1UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +604,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +643,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +657,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +673,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +685,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +695,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +707,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +719,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +729,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +741,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +753,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +763,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,54 +775,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF }, reader_.buffer_pos += 3;
{ "else", parser::token::ELSE }, reader_.bytes_remaining -= 3;
{ "do", parser::token::DO }, }
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR }, if ((reader_.buffer_pos[1] == '\n'))
{ "foreach", parser::token::FOREACH }, {
{ "in", parser::token::IN }, if (reader_.bytes_remaining == 2)
{ "switch", parser::token::SWITCH }, throw comp_error(loc_, "invalid token ('\\')");
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT }, reader_.buffer_pos += 2;
{ "break", parser::token::BREAK }, reader_.bytes_remaining -= 2;
{ "continue", parser::token::CONTINUE }, }
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT }, if (reader_.bytes_remaining == 0)
{ "prof_begin", parser::token::PROFBEGIN }, {
{ "prof_end", parser::token::PROFEND }, reader_.state = reader::end;
{ "thread", parser::token::THREAD }, reader_.current_byte = 0;
{ "childthread", parser::token::CHILDTHREAD }, }
{ "thisthread", parser::token::THISTHREAD }, else
{ "call", parser::token::CALL }, {
{ "true", parser::token::TRUE }, reader_.current_byte = *reader_.buffer_pos;
{ "false", parser::token::FALSE }, }
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME }, loc_.lines();
{ "self", parser::token::SELF }, loc_.step();
{ "anim", parser::token::ANIM }, }
{ "level", parser::token::LEVEL }, }
}};
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::s1 } // namespace xsk::gsc::s1

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "s1.hpp"
namespace xsk::gsc::s1 namespace xsk::gsc::s1
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::s1 } // namespace xsk::gsc::s1

View File

@ -677,14 +677,14 @@ namespace xsk { namespace gsc { namespace s1 {
S1EOF = 0, // "end of file" S1EOF = 0, // "end of file"
S1error = 1, // error S1error = 1, // error
S1UNDEF = 2, // "invalid token" S1UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -809,14 +809,14 @@ namespace xsk { namespace gsc { namespace s1 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2798,121 +2798,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,56 @@ xsk::gsc::s2::parser::symbol_type S2lex(xsk::gsc::s2::lexer& lexer)
namespace xsk::gsc::s2 namespace xsk::gsc::s2
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +85,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +126,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_S2EOF(loc_); return parser::make_S2EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +490,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +524,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +559,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +578,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::S2UNDEF; auto token = parser::token::S2UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -518,7 +604,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -557,7 +643,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -571,7 +657,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -587,7 +673,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -599,7 +685,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -609,7 +695,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -621,7 +707,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -633,7 +719,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -643,7 +729,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -655,7 +741,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -667,7 +753,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -677,7 +763,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -689,54 +775,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF }, reader_.buffer_pos += 3;
{ "else", parser::token::ELSE }, reader_.bytes_remaining -= 3;
{ "do", parser::token::DO }, }
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR }, if ((reader_.buffer_pos[1] == '\n'))
{ "foreach", parser::token::FOREACH }, {
{ "in", parser::token::IN }, if (reader_.bytes_remaining == 2)
{ "switch", parser::token::SWITCH }, throw comp_error(loc_, "invalid token ('\\')");
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT }, reader_.buffer_pos += 2;
{ "break", parser::token::BREAK }, reader_.bytes_remaining -= 2;
{ "continue", parser::token::CONTINUE }, }
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT }, if (reader_.bytes_remaining == 0)
{ "prof_begin", parser::token::PROFBEGIN }, {
{ "prof_end", parser::token::PROFEND }, reader_.state = reader::end;
{ "thread", parser::token::THREAD }, reader_.current_byte = 0;
{ "childthread", parser::token::CHILDTHREAD }, }
{ "thisthread", parser::token::THISTHREAD }, else
{ "call", parser::token::CALL }, {
{ "true", parser::token::TRUE }, reader_.current_byte = *reader_.buffer_pos;
{ "false", parser::token::FALSE }, }
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME }, loc_.lines();
{ "self", parser::token::SELF }, loc_.step();
{ "anim", parser::token::ANIM }, }
{ "level", parser::token::LEVEL }, }
}};
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::s2 } // namespace xsk::gsc::s2

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "s2.hpp"
namespace xsk::gsc::s2 namespace xsk::gsc::s2
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::s2 } // namespace xsk::gsc::s2

View File

@ -677,14 +677,14 @@ namespace xsk { namespace gsc { namespace s2 {
S2EOF = 0, // "end of file" S2EOF = 0, // "end of file"
S2error = 1, // error S2error = 1, // error
S2UNDEF = 2, // "invalid token" S2UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -809,14 +809,14 @@ namespace xsk { namespace gsc { namespace s2 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2798,121 +2798,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS

View File

@ -16,6 +16,58 @@ xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer)
namespace xsk::gsc::s4 namespace xsk::gsc::s4
{ {
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
{ "isdefined", parser::token::ISDEFINED },
{ "istrue", parser::token::ISTRUE },
}};
buffer::buffer() : length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(max_buf_size)); data = static_cast<char*>(std::malloc(max_buf_size));
@ -35,10 +87,8 @@ bool buffer::push(char c)
return true; return true;
} }
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
{ last_byte(0), current_byte(0) { }
}
void reader::init(const char* data, size_t size) void reader::init(const char* data, size_t size)
{ {
@ -78,7 +128,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -97,6 +147,7 @@ void lexer::push_header(const std::string& file)
locs_.push(loc_); locs_.push(loc_);
loc_.initialize(std::get<0>(data)); loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data)); reader_.init(std::get<1>(data), std::get<2>(data));
clean_ = true;
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
@ -145,7 +196,10 @@ auto lexer::lex() -> parser::symbol_type
return parser::make_S4EOF(loc_); return parser::make_S4EOF(loc_);
} }
reader_.advance(); if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
switch (last) switch (last)
{ {
@ -157,12 +211,15 @@ auto lexer::lex() -> parser::symbol_type
case '\n': case '\n':
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
clean_ = true;
continue; continue;
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/': case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=') if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return parser::make_DIV(loc_); return parser::make_DIV(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_DIV(loc_); return parser::make_ASSIGN_DIV(loc_);
@ -224,7 +281,30 @@ auto lexer::lex() -> parser::symbol_type
{ {
while (true) while (true)
{ {
if (state == reader::end || curr == '\n') if (state == reader::end)
break;
if (last == '\\' && curr == '\r' || curr == '\n')
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -238,15 +318,23 @@ auto lexer::lex() -> parser::symbol_type
throw comp_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false; indev_ = false;
reader_.advance(); advance();
return parser::make_DEVEND(loc_); return parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
reader_.advance(); advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
throw comp_error(loc_, "unterminated preprocessor directive ('#')"); throw comp_error(loc_, "invalid preprocessor directive ('#')");
state_ = state::preprocessor; state_ = state::preprocessor;
goto lex_name; goto lex_name;
@ -254,7 +342,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '/' && curr != '=') if (curr != '/' && curr != '=')
return parser::make_MUL(loc_); return parser::make_MUL(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_ASSIGN_MUL(loc_); return parser::make_ASSIGN_MUL(loc_);
@ -264,9 +352,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string; state_ = state::string;
goto lex_string; goto lex_string;
case '.': case '.':
reader_.advance(); advance();
if(state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')"); throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field; state_ = state::field;
@ -291,7 +379,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != ':') if (curr != ':')
return parser::make_COLON(loc_); return parser::make_COLON(loc_);
reader_.advance(); advance();
return parser::make_DOUBLECOLON(loc_); return parser::make_DOUBLECOLON(loc_);
case '?': case '?':
return parser::make_QMARK(loc_); return parser::make_QMARK(loc_);
@ -299,13 +387,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_ASSIGN(loc_); return parser::make_ASSIGN(loc_);
reader_.advance(); advance();
return parser::make_EQUALITY(loc_); return parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return parser::make_ADD(loc_); return parser::make_ADD(loc_);
reader_.advance(); advance();
if (last == '+') if (last == '+')
return parser::make_INCREMENT(loc_); return parser::make_INCREMENT(loc_);
@ -315,7 +403,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return parser::make_SUB(loc_); return parser::make_SUB(loc_);
reader_.advance(); advance();
if (last == '-') if (last == '-')
return parser::make_DECREMENT(loc_); return parser::make_DECREMENT(loc_);
@ -325,24 +413,24 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_MOD(loc_); return parser::make_MOD(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_MOD(loc_); return parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return parser::make_BITWISE_OR(loc_); return parser::make_BITWISE_OR(loc_);
reader_.advance(); advance();
if (last == '|') if (last == '|')
return parser::make_OR(loc_); return parser::make_OR(loc_);
return parser::make_ASSIGN_BW_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"')
return parser::make_BITWISE_AND(loc_); return parser::make_BITWISE_AND(loc_);
reader_.advance(); advance();
if (last == '&') if (last == '&')
return parser::make_AND(loc_); return parser::make_AND(loc_);
@ -356,13 +444,13 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_BITWISE_EXOR(loc_); return parser::make_BITWISE_EXOR(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_BW_EXOR(loc_); return parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return parser::make_NOT(loc_); return parser::make_NOT(loc_);
reader_.advance(); advance();
return parser::make_INEQUALITY(loc_); return parser::make_INEQUALITY(loc_);
case '~': case '~':
return parser::make_COMPLEMENT(loc_); return parser::make_COMPLEMENT(loc_);
@ -370,20 +458,20 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return parser::make_LESS(loc_); return parser::make_LESS(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_LESS_EQUAL(loc_); return parser::make_LESS_EQUAL(loc_);
if (curr != '=') if (curr != '=')
return parser::make_LSHIFT(loc_); return parser::make_LSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_LSHIFT(loc_); return parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return parser::make_GREATER(loc_); return parser::make_GREATER(loc_);
reader_.advance(); advance();
if (last == '=') if (last == '=')
return parser::make_GREATER_EQUAL(loc_); return parser::make_GREATER_EQUAL(loc_);
@ -391,7 +479,7 @@ auto lexer::lex() -> parser::symbol_type
if (curr != '=') if (curr != '=')
return parser::make_RSHIFT(loc_); return parser::make_RSHIFT(loc_);
reader_.advance(); advance();
return parser::make_ASSIGN_RSHIFT(loc_); return parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number: lex_name_or_number:
@ -404,24 +492,27 @@ lex_name_or_number:
} }
lex_string: lex_string:
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (true) while (true)
{ {
if (last == '"')
break;
if (last == '\n')
throw comp_error(loc_, "unterminated string literal");
if (state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')"); throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\') if (curr == '"')
{ {
advance();
break;
}
if (curr == '\n')
throw comp_error(loc_, "unterminated string literal");
if (curr == '\\')
{
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
char c = curr; char c = curr;
switch (curr) switch (curr)
{ {
@ -435,13 +526,11 @@ lex_string:
if (!buffer_.push(c)) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance();
} }
else if (!buffer_.push(last)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if (state_ == state::localize) if (state_ == state::localize)
@ -472,7 +561,7 @@ lex_name:
else if (!buffer_.push(curr)) else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded"); throw comp_error(loc_, "max string size exceeded");
reader_.advance(); advance();
} }
if(state_ == state::field) if(state_ == state::field)
@ -491,24 +580,23 @@ lex_name:
{ {
if (path) if (path)
throw comp_error(loc_, "invalid preprocessor directive"); throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::S4UNDEF; auto token = parser::token::S4UNDEF;
if (buffer_.length < 16) if (buffer_.length < 16)
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
{ {
if (itr->second > parser::token::HSENDIF) if (itr->second > parser::token::SH_ENDIF)
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
token = itr->second; token = itr->second;
} }
} }
// TODO: call preprocessor(token); preprocessor(token);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start; state_ = state::start;
continue; continue;
} }
@ -529,7 +617,7 @@ lex_name:
{ {
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
if(itr != keyword_map.end()) if (itr != keyword_map.end())
return parser::symbol_type(itr->second, loc_); return parser::symbol_type(itr->second, loc_);
} }
@ -568,7 +656,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -582,7 +670,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'') if (last == '\'')
@ -598,7 +686,7 @@ lex_number:
} }
else if (curr == 'o') else if (curr == 'o')
{ {
reader_.advance(); advance();
while (true) while (true)
{ {
@ -610,7 +698,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -620,7 +708,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length <= 0) if (last == '\'' || buffer_.length <= 0)
@ -632,7 +720,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -644,7 +732,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -654,7 +742,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -666,7 +754,7 @@ lex_number:
{ {
buffer_.push(last); buffer_.push(last);
buffer_.push(curr); buffer_.push(curr);
reader_.advance(); advance();
while (true) while (true)
{ {
@ -678,7 +766,7 @@ lex_number:
if (curr == '\'') if (curr == '\'')
{ {
reader_.advance(); advance();
continue; continue;
} }
@ -688,7 +776,7 @@ lex_number:
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw error("gsc lexer: out of memory!");
reader_.advance(); advance();
} }
if (last == '\'' || buffer_.length < 3) if (last == '\'' || buffer_.length < 3)
@ -700,56 +788,89 @@ lex_number:
} }
} }
const std::unordered_map<std::string_view, parser::token::token_kind_type> lexer::keyword_map void lexer::advance()
{{ {
{ "#define", parser::token::HSDEFINE }, reader_.advance();
{ "#undef", parser::token::HSUNDEF },
{ "#ifdef", parser::token::HSIFDEF }, // dont wrap comment marks '/\/' '/\*' outside strings
{ "#ifndef", parser::token::HSIFNDEF }, if (state_ == state::start && reader_.last_byte == '/')
{ "#if", parser::token::HSIF }, return;
{ "#elif", parser::token::HSELIF },
{ "#else", parser::token::HSELSE }, while (reader_.current_byte == '\\')
{ "#endif", parser::token::HSENDIF }, {
{ "#inline", parser::token::INLINE }, if (reader_.bytes_remaining == 1)
{ "#include", parser::token::INCLUDE }, throw comp_error(loc_, "invalid token ('\\')");
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE }, if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
{ "endon", parser::token::ENDON }, break;
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT }, if (reader_.buffer_pos[1] == '\r')
{ "waittill", parser::token::WAITTILL }, {
{ "waittillmatch", parser::token::WAITTILLMATCH }, if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
{ "waittillframeend", parser::token::WAITTILLFRAMEEND }, throw comp_error(loc_, "invalid token ('\\')");
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF }, reader_.buffer_pos += 3;
{ "else", parser::token::ELSE }, reader_.bytes_remaining -= 3;
{ "do", parser::token::DO }, }
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR }, if ((reader_.buffer_pos[1] == '\n'))
{ "foreach", parser::token::FOREACH }, {
{ "in", parser::token::IN }, if (reader_.bytes_remaining == 2)
{ "switch", parser::token::SWITCH }, throw comp_error(loc_, "invalid token ('\\')");
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT }, reader_.buffer_pos += 2;
{ "break", parser::token::BREAK }, reader_.bytes_remaining -= 2;
{ "continue", parser::token::CONTINUE }, }
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT }, if (reader_.bytes_remaining == 0)
{ "prof_begin", parser::token::PROFBEGIN }, {
{ "prof_end", parser::token::PROFEND }, reader_.state = reader::end;
{ "thread", parser::token::THREAD }, reader_.current_byte = 0;
{ "childthread", parser::token::CHILDTHREAD }, }
{ "thisthread", parser::token::THISTHREAD }, else
{ "call", parser::token::CALL }, {
{ "true", parser::token::TRUE }, reader_.current_byte = *reader_.buffer_pos;
{ "false", parser::token::FALSE }, }
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME }, loc_.lines();
{ "self", parser::token::SELF }, loc_.step();
{ "anim", parser::token::ANIM }, }
{ "level", parser::token::LEVEL }, }
{ "isdefined", parser::token::ISDEFINED },
{ "istrue", parser::token::ISTRUE }, void lexer::preprocessor(parser::token::token_kind_type token)
}}; {
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
} // namespace xsk::gsc::s4 } // namespace xsk::gsc::s4

View File

@ -5,8 +5,6 @@
#pragma once #pragma once
#include "s4.hpp"
namespace xsk::gsc::s4 namespace xsk::gsc::s4
{ {
@ -57,6 +55,7 @@ class lexer
state state_; state state_;
build mode_; build mode_;
bool indev_; bool indev_;
bool clean_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -66,7 +65,8 @@ public:
void ban_header(const location& loc); void ban_header(const location& loc);
private: private:
static const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map; void advance();
void preprocessor(parser::token::token_kind_type token);
}; };
} // namespace xsk::gsc::s4 } // namespace xsk::gsc::s4

View File

@ -683,14 +683,14 @@ namespace xsk { namespace gsc { namespace s4 {
S4EOF = 0, // "end of file" S4EOF = 0, // "end of file"
S4error = 1, // error S4error = 1, // error
S4UNDEF = 2, // "invalid token" S4UNDEF = 2, // "invalid token"
HSDEFINE = 3, // "#define" SH_DEFINE = 3, // "#define"
HSUNDEF = 4, // "#undef" SH_UNDEF = 4, // "#undef"
HSIFDEF = 5, // "#ifdef" SH_IFDEF = 5, // "#ifdef"
HSIFNDEF = 6, // "#ifndef" SH_IFNDEF = 6, // "#ifndef"
HSIF = 7, // "#if" SH_IF = 7, // "#if"
HSELIF = 8, // "#elif" SH_ELIF = 8, // "#elif"
HSELSE = 9, // "#else" SH_ELSE = 9, // "#else"
HSENDIF = 10, // "#endif" SH_ENDIF = 10, // "#endif"
DEVBEGIN = 11, // "/#" DEVBEGIN = 11, // "/#"
DEVEND = 12, // "#/" DEVEND = 12, // "#/"
INLINE = 13, // "#inline" INLINE = 13, // "#inline"
@ -817,14 +817,14 @@ namespace xsk { namespace gsc { namespace s4 {
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
S_YYUNDEF = 2, // "invalid token" S_YYUNDEF = 2, // "invalid token"
S_HSDEFINE = 3, // "#define" S_SH_DEFINE = 3, // "#define"
S_HSUNDEF = 4, // "#undef" S_SH_UNDEF = 4, // "#undef"
S_HSIFDEF = 5, // "#ifdef" S_SH_IFDEF = 5, // "#ifdef"
S_HSIFNDEF = 6, // "#ifndef" S_SH_IFNDEF = 6, // "#ifndef"
S_HSIF = 7, // "#if" S_SH_IF = 7, // "#if"
S_HSELIF = 8, // "#elif" S_SH_ELIF = 8, // "#elif"
S_HSELSE = 9, // "#else" S_SH_ELSE = 9, // "#else"
S_HSENDIF = 10, // "#endif" S_SH_ENDIF = 10, // "#endif"
S_DEVBEGIN = 11, // "/#" S_DEVBEGIN = 11, // "/#"
S_DEVEND = 12, // "#/" S_DEVEND = 12, // "#/"
S_INLINE = 13, // "#inline" S_INLINE = 13, // "#inline"
@ -2854,121 +2854,121 @@ switch (yykind)
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSDEFINE (location_type l) make_SH_DEFINE (location_type l)
{ {
return symbol_type (token::HSDEFINE, std::move (l)); return symbol_type (token::SH_DEFINE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSDEFINE (const location_type& l) make_SH_DEFINE (const location_type& l)
{ {
return symbol_type (token::HSDEFINE, l); return symbol_type (token::SH_DEFINE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSUNDEF (location_type l) make_SH_UNDEF (location_type l)
{ {
return symbol_type (token::HSUNDEF, std::move (l)); return symbol_type (token::SH_UNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSUNDEF (const location_type& l) make_SH_UNDEF (const location_type& l)
{ {
return symbol_type (token::HSUNDEF, l); return symbol_type (token::SH_UNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFDEF (location_type l) make_SH_IFDEF (location_type l)
{ {
return symbol_type (token::HSIFDEF, std::move (l)); return symbol_type (token::SH_IFDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFDEF (const location_type& l) make_SH_IFDEF (const location_type& l)
{ {
return symbol_type (token::HSIFDEF, l); return symbol_type (token::SH_IFDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIFNDEF (location_type l) make_SH_IFNDEF (location_type l)
{ {
return symbol_type (token::HSIFNDEF, std::move (l)); return symbol_type (token::SH_IFNDEF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIFNDEF (const location_type& l) make_SH_IFNDEF (const location_type& l)
{ {
return symbol_type (token::HSIFNDEF, l); return symbol_type (token::SH_IFNDEF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSIF (location_type l) make_SH_IF (location_type l)
{ {
return symbol_type (token::HSIF, std::move (l)); return symbol_type (token::SH_IF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSIF (const location_type& l) make_SH_IF (const location_type& l)
{ {
return symbol_type (token::HSIF, l); return symbol_type (token::SH_IF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELIF (location_type l) make_SH_ELIF (location_type l)
{ {
return symbol_type (token::HSELIF, std::move (l)); return symbol_type (token::SH_ELIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELIF (const location_type& l) make_SH_ELIF (const location_type& l)
{ {
return symbol_type (token::HSELIF, l); return symbol_type (token::SH_ELIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSELSE (location_type l) make_SH_ELSE (location_type l)
{ {
return symbol_type (token::HSELSE, std::move (l)); return symbol_type (token::SH_ELSE, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSELSE (const location_type& l) make_SH_ELSE (const location_type& l)
{ {
return symbol_type (token::HSELSE, l); return symbol_type (token::SH_ELSE, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
make_HSENDIF (location_type l) make_SH_ENDIF (location_type l)
{ {
return symbol_type (token::HSENDIF, std::move (l)); return symbol_type (token::SH_ENDIF, std::move (l));
} }
#else #else
static static
symbol_type symbol_type
make_HSENDIF (const location_type& l) make_SH_ENDIF (const location_type& l)
{ {
return symbol_type (token::HSENDIF, l); return symbol_type (token::SH_ENDIF, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS