lexer & grammar fixes

This commit is contained in:
xensik
2022-02-20 12:10:21 +01:00
parent e491993f93
commit 6980506e88
54 changed files with 10312 additions and 10375 deletions

View File

@ -60,6 +60,7 @@ const std::unordered_map<std::string_view, parser::token::token_kind_type> keywo
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "size", parser::token::SIZE },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
@ -85,8 +86,8 @@ bool buffer::push(char c)
return true;
}
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
last_byte(0), current_byte(0) { }
reader::reader() : state(reader::end), buffer_pos(0),
bytes_remaining(0), last_byte(0), current_byte(0) {}
void reader::init(const char* data, size_t size)
{
@ -174,7 +175,6 @@ auto lexer::lex() -> parser::symbol_type
{
buffer_.length = 0;
state_ = state::start;
loc_.step();
while (true)
{
@ -182,6 +182,7 @@ auto lexer::lex() -> parser::symbol_type
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
auto path = false;
loc_.step();
if (state == reader::end)
{
@ -214,7 +215,7 @@ auto lexer::lex() -> parser::symbol_type
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
case '/':
if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
if (curr != '=' && curr != '#' && curr != '@' && curr != '*' && curr != '/')
return parser::make_DIV(loc_);
advance();
@ -246,14 +247,35 @@ auto lexer::lex() -> parser::symbol_type
}
else if (last == '#' && curr == '/')
{
reader_.advance();
advance();
break;
}
reader_.advance();
advance();
}
}
}
else if (last == '@')
{
while (true)
{
if (state == reader::end)
throw comp_error(loc_, "unmatched script doc comment start ('/@')");
if (curr == '\n')
{
loc_.lines();
loc_.step();
}
else if (last == '@' && curr == '/')
{
advance();
break;
}
advance();
}
}
else if (last == '*')
{
while (true)
@ -268,11 +290,11 @@ auto lexer::lex() -> parser::symbol_type
}
else if (last == '*' && curr == '/')
{
reader_.advance();
advance();
break;
}
reader_.advance();
advance();
}
}
else if (last == '/')
@ -282,30 +304,10 @@ auto lexer::lex() -> parser::symbol_type
if (state == reader::end)
break;
if (last == '\\' && (curr == '\r' || curr == '\n'))
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
break;
reader_.advance();
advance();
}
}
continue;
@ -315,8 +317,8 @@ auto lexer::lex() -> parser::symbol_type
if (!indev_)
throw comp_error(loc_, "unmatched devblock end ('#/')");
indev_ = false;
advance();
indev_ = false;
return parser::make_DEVEND(loc_);
}
@ -337,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::preprocessor;
goto lex_name;
case '*':
if (curr != '/' && curr != '=')
if (curr != '=' && curr != '/')
return parser::make_MUL(loc_);
advance();
@ -350,13 +352,9 @@ auto lexer::lex() -> parser::symbol_type
state_ = state::string;
goto lex_string;
case '.':
advance();
if (state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
if (curr < '0' || curr > '9')
return parser::make_DOT(loc_);
goto lex_number;
case '(':
return parser::make_LPAREN(loc_);
case ')':
@ -480,7 +478,6 @@ auto lexer::lex() -> parser::symbol_type
advance();
return parser::make_ASSIGN_RSHIFT(loc_);
default:
lex_name_or_number:
if (last >= '0' && last <= '9')
goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
@ -562,23 +559,8 @@ lex_name:
advance();
}
if (state_ == state::field)
if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return parser::make_SIZE(loc_);
}
return parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto token = parser::token::S1UNDEF;
if (buffer_.length < 16)
@ -594,7 +576,8 @@ lex_name:
}
}
preprocessor(token);
preprocessor_run(token);
state_ = state::start;
continue;
}
@ -620,14 +603,11 @@ lex_name:
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
if (last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto dot = last == '.' ? 1 : 0;
auto flt = 0;
while (true)
@ -663,10 +643,10 @@ lex_number:
if (last == '\'')
throw comp_error(loc_, "invalid number literal");
if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
if (dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
throw comp_error(loc_, "invalid number literal");
if (state_ == state::field || dot || flt)
if (dot || flt)
return parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
return parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
@ -681,7 +661,7 @@ lex_number:
break;
if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw comp_error(loc_, "invalid octal literal");
throw comp_error(loc_, "invalid octal literal");
if (curr == '\'')
{
@ -771,18 +751,22 @@ lex_number:
return parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
}
// cant get here!
throw error("UNEXPECTED LEXER INTERNAL ERROR!");
}
}
void lexer::advance()
{
reader_.advance();
loc_.end.column++;
// dont wrap comment marks '/\/' '/\*' outside strings
if (state_ == state::start && reader_.last_byte == '/')
return;
if (reader_.current_byte == '\\') [[unlikely]]
preprocessor_wrap();
}
void lexer::preprocessor_wrap()
{
while (reader_.current_byte == '\\')
{
if (reader_.bytes_remaining == 1)
@ -824,7 +808,7 @@ void lexer::advance()
}
}
void lexer::preprocessor(parser::token::token_kind_type token)
void lexer::preprocessor_run(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");

View File

@ -44,7 +44,7 @@ struct reader
class lexer
{
enum class state : std::uint8_t { start, string, localize, field, preprocessor };
enum class state : std::uint8_t { start, string, localize, preprocessor };
reader reader_;
buffer buffer_;
@ -66,7 +66,8 @@ public:
private:
void advance();
void preprocessor(parser::token::token_kind_type token);
void preprocessor_wrap();
void preprocessor_run(parser::token::token_kind_type token);
};
} // namespace xsk::gsc::s1

File diff suppressed because it is too large Load Diff

View File

@ -474,6 +474,7 @@ namespace xsk { namespace gsc { namespace s1 {
// expr_game
char dummy19[sizeof (ast::expr_game::ptr)];
// expr_identifier_nosize
// expr_identifier
char dummy20[sizeof (ast::expr_identifier::ptr)];
@ -618,7 +619,6 @@ namespace xsk { namespace gsc { namespace s1 {
// stmt_while
char dummy66[sizeof (ast::stmt_while::ptr)];
// "field"
// "path"
// "identifier"
// "string literal"
@ -721,7 +721,7 @@ namespace xsk { namespace gsc { namespace s1 {
TRUE = 44, // "true"
FALSE = 45, // "false"
UNDEFINED = 46, // "undefined"
SIZE = 47, // ".size"
SIZE = 47, // "size"
GAME = 48, // "game"
SELF = 49, // "self"
ANIM = 50, // "anim"
@ -771,13 +771,13 @@ namespace xsk { namespace gsc { namespace s1 {
MUL = 94, // "*"
DIV = 95, // "/"
MOD = 96, // "%"
FIELD = 97, // "field"
PATH = 98, // "path"
IDENTIFIER = 99, // "identifier"
STRING = 100, // "string literal"
ISTRING = 101, // "localized string"
FLOAT = 102, // "float"
INTEGER = 103, // "integer"
PATH = 97, // "path"
IDENTIFIER = 98, // "identifier"
STRING = 99, // "string literal"
ISTRING = 100, // "localized string"
FLOAT = 101, // "float"
INTEGER = 102, // "integer"
SIZEOF = 103, // SIZEOF
ADD_ARRAY = 104, // ADD_ARRAY
THEN = 105, // THEN
TERN = 106, // TERN
@ -852,7 +852,7 @@ namespace xsk { namespace gsc { namespace s1 {
S_TRUE = 44, // "true"
S_FALSE = 45, // "false"
S_UNDEFINED = 46, // "undefined"
S_SIZE = 47, // ".size"
S_SIZE = 47, // "size"
S_GAME = 48, // "game"
S_SELF = 49, // "self"
S_ANIM = 50, // "anim"
@ -902,13 +902,13 @@ namespace xsk { namespace gsc { namespace s1 {
S_MUL = 94, // "*"
S_DIV = 95, // "/"
S_MOD = 96, // "%"
S_FIELD = 97, // "field"
S_PATH = 98, // "path"
S_IDENTIFIER = 99, // "identifier"
S_STRING = 100, // "string literal"
S_ISTRING = 101, // "localized string"
S_FLOAT = 102, // "float"
S_INTEGER = 103, // "integer"
S_PATH = 97, // "path"
S_IDENTIFIER = 98, // "identifier"
S_STRING = 99, // "string literal"
S_ISTRING = 100, // "localized string"
S_FLOAT = 101, // "float"
S_INTEGER = 102, // "integer"
S_SIZEOF = 103, // SIZEOF
S_ADD_ARRAY = 104, // ADD_ARRAY
S_THEN = 105, // THEN
S_TERN = 106, // TERN
@ -991,15 +991,16 @@ namespace xsk { namespace gsc { namespace s1 {
S_expr_level = 183, // expr_level
S_expr_animation = 184, // expr_animation
S_expr_animtree = 185, // expr_animtree
S_expr_identifier = 186, // expr_identifier
S_expr_path = 187, // expr_path
S_expr_istring = 188, // expr_istring
S_expr_string = 189, // expr_string
S_expr_vector = 190, // expr_vector
S_expr_float = 191, // expr_float
S_expr_integer = 192, // expr_integer
S_expr_false = 193, // expr_false
S_expr_true = 194 // expr_true
S_expr_identifier_nosize = 186, // expr_identifier_nosize
S_expr_identifier = 187, // expr_identifier
S_expr_path = 188, // expr_path
S_expr_istring = 189, // expr_istring
S_expr_string = 190, // expr_string
S_expr_vector = 191, // expr_vector
S_expr_float = 192, // expr_float
S_expr_integer = 193, // expr_integer
S_expr_false = 194, // expr_false
S_expr_true = 195 // expr_true
};
};
@ -1122,6 +1123,7 @@ namespace xsk { namespace gsc { namespace s1 {
value.move< ast::expr_game::ptr > (std::move (that.value));
break;
case symbol_kind::S_expr_identifier_nosize: // expr_identifier_nosize
case symbol_kind::S_expr_identifier: // expr_identifier
value.move< ast::expr_identifier::ptr > (std::move (that.value));
break;
@ -1313,7 +1315,6 @@ namespace xsk { namespace gsc { namespace s1 {
value.move< ast::stmt_while::ptr > (std::move (that.value));
break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal"
@ -2392,6 +2393,7 @@ switch (yykind)
value.template destroy< ast::expr_game::ptr > ();
break;
case symbol_kind::S_expr_identifier_nosize: // expr_identifier_nosize
case symbol_kind::S_expr_identifier: // expr_identifier
value.template destroy< ast::expr_identifier::ptr > ();
break;
@ -2583,7 +2585,6 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > ();
break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal"
@ -2689,7 +2690,7 @@ switch (yykind)
{
S1_ASSERT (tok == token::S1EOF
|| (token::S1error <= tok && tok <= token::MOD)
|| (token::ADD_ARRAY <= tok && tok <= token::POSTDEC));
|| (token::SIZEOF <= tok && tok <= token::POSTDEC));
}
#if 201103L <= YY_CPLUSPLUS
symbol_type (int tok, std::string v, location_type l)
@ -2699,7 +2700,7 @@ switch (yykind)
: super_type(token_type (tok), v, l)
#endif
{
S1_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
S1_ASSERT ((token::PATH <= tok && tok <= token::INTEGER));
}
};
@ -4204,21 +4205,6 @@ switch (yykind)
return symbol_type (token::MOD, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
@ -4309,6 +4295,21 @@ switch (yykind)
return symbol_type (token::INTEGER, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_SIZEOF (location_type l)
{
return symbol_type (token::SIZEOF, std::move (l));
}
#else
static
symbol_type
make_SIZEOF (const location_type& l)
{
return symbol_type (token::SIZEOF, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
@ -4788,9 +4789,9 @@ switch (yykind)
/// Constants.
enum
{
yylast_ = 2300, ///< Last index in yytable_.
yynnts_ = 82, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number.
yylast_ = 2251, ///< Last index in yytable_.
yynnts_ = 83, ///< Number of nonterminal symbols.
yyfinal_ = 22 ///< Termination state number.
};
@ -4902,6 +4903,7 @@ switch (yykind)
value.copy< ast::expr_game::ptr > (YY_MOVE (that.value));
break;
case symbol_kind::S_expr_identifier_nosize: // expr_identifier_nosize
case symbol_kind::S_expr_identifier: // expr_identifier
value.copy< ast::expr_identifier::ptr > (YY_MOVE (that.value));
break;
@ -5093,7 +5095,6 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal"
@ -5218,6 +5219,7 @@ switch (yykind)
value.move< ast::expr_game::ptr > (YY_MOVE (s.value));
break;
case symbol_kind::S_expr_identifier_nosize: // expr_identifier_nosize
case symbol_kind::S_expr_identifier: // expr_identifier
value.move< ast::expr_identifier::ptr > (YY_MOVE (s.value));
break;
@ -5409,7 +5411,6 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal"
@ -5482,7 +5483,7 @@ switch (yykind)
#line 13 "parser.ypp"
} } } // xsk::gsc::s1
#line 5486 "parser.hpp"
#line 5487 "parser.hpp"